181ad6265SDimitry Andric #ifndef LLVM_PROFILEDATA_MEMPROF_H_ 281ad6265SDimitry Andric #define LLVM_PROFILEDATA_MEMPROF_H_ 381ad6265SDimitry Andric 4*0fca6ea1SDimitry Andric #include "llvm/ADT/MapVector.h" 5*0fca6ea1SDimitry Andric #include "llvm/ADT/STLForwardCompat.h" 681ad6265SDimitry Andric #include "llvm/ADT/STLFunctionalExtras.h" 781ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h" 881ad6265SDimitry Andric #include "llvm/IR/GlobalValue.h" 981ad6265SDimitry Andric #include "llvm/ProfileData/MemProfData.inc" 1081ad6265SDimitry Andric #include "llvm/Support/Endian.h" 1181ad6265SDimitry Andric #include "llvm/Support/EndianStream.h" 1281ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h" 1381ad6265SDimitry Andric 14*0fca6ea1SDimitry Andric #include <bitset> 1581ad6265SDimitry Andric #include <cstdint> 16bdd1243dSDimitry Andric #include <optional> 1781ad6265SDimitry Andric 1881ad6265SDimitry Andric namespace llvm { 1981ad6265SDimitry Andric namespace memprof { 2081ad6265SDimitry Andric 21*0fca6ea1SDimitry Andric struct MemProfRecord; 22*0fca6ea1SDimitry Andric 23*0fca6ea1SDimitry Andric // The versions of the indexed MemProf format 24*0fca6ea1SDimitry Andric enum IndexedVersion : uint64_t { 25*0fca6ea1SDimitry Andric // Version 0: This version didn't have a version field. 26*0fca6ea1SDimitry Andric Version0 = 0, 27*0fca6ea1SDimitry Andric // Version 1: Added a version field to the header. 28*0fca6ea1SDimitry Andric Version1 = 1, 29*0fca6ea1SDimitry Andric // Version 2: Added a call stack table. 30*0fca6ea1SDimitry Andric Version2 = 2, 31*0fca6ea1SDimitry Andric // Version 3: Added a radix tree for call stacks. Switched to linear IDs for 32*0fca6ea1SDimitry Andric // frames and call stacks. 33*0fca6ea1SDimitry Andric Version3 = 3, 34*0fca6ea1SDimitry Andric }; 35*0fca6ea1SDimitry Andric 36*0fca6ea1SDimitry Andric constexpr uint64_t MinimumSupportedVersion = Version0; 37*0fca6ea1SDimitry Andric constexpr uint64_t MaximumSupportedVersion = Version3; 38*0fca6ea1SDimitry Andric 39*0fca6ea1SDimitry Andric // Verify that the minimum and maximum satisfy the obvious constraint. 40*0fca6ea1SDimitry Andric static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); 41*0fca6ea1SDimitry Andric 4281ad6265SDimitry Andric enum class Meta : uint64_t { 4381ad6265SDimitry Andric Start = 0, 4481ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) NameTag, 4581ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 4681ad6265SDimitry Andric #undef MIBEntryDef 4781ad6265SDimitry Andric Size 4881ad6265SDimitry Andric }; 4981ad6265SDimitry Andric 5081ad6265SDimitry Andric using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>; 5181ad6265SDimitry Andric 52*0fca6ea1SDimitry Andric // Returns the full schema currently in use. 53*0fca6ea1SDimitry Andric MemProfSchema getFullSchema(); 54*0fca6ea1SDimitry Andric 55*0fca6ea1SDimitry Andric // Returns the schema consisting of the fields used for hot cold memory hinting. 56*0fca6ea1SDimitry Andric MemProfSchema getHotColdSchema(); 57*0fca6ea1SDimitry Andric 5881ad6265SDimitry Andric // Holds the actual MemInfoBlock data with all fields. Contents may be read or 5981ad6265SDimitry Andric // written partially by providing an appropriate schema to the serialize and 6081ad6265SDimitry Andric // deserialize methods. 6181ad6265SDimitry Andric struct PortableMemInfoBlock { 6281ad6265SDimitry Andric PortableMemInfoBlock() = default; 63*0fca6ea1SDimitry Andric explicit PortableMemInfoBlock(const MemInfoBlock &Block, 64*0fca6ea1SDimitry Andric const MemProfSchema &IncomingSchema) { 65*0fca6ea1SDimitry Andric for (const Meta Id : IncomingSchema) 66*0fca6ea1SDimitry Andric Schema.set(llvm::to_underlying(Id)); 6781ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name; 6881ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 6981ad6265SDimitry Andric #undef MIBEntryDef 7081ad6265SDimitry Andric } 7181ad6265SDimitry Andric 7281ad6265SDimitry Andric PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) { 7381ad6265SDimitry Andric deserialize(Schema, Ptr); 7481ad6265SDimitry Andric } 7581ad6265SDimitry Andric 7681ad6265SDimitry Andric // Read the contents of \p Ptr based on the \p Schema to populate the 7781ad6265SDimitry Andric // MemInfoBlock member. 78*0fca6ea1SDimitry Andric void deserialize(const MemProfSchema &IncomingSchema, 79*0fca6ea1SDimitry Andric const unsigned char *Ptr) { 8081ad6265SDimitry Andric using namespace support; 8181ad6265SDimitry Andric 82*0fca6ea1SDimitry Andric Schema.reset(); 83*0fca6ea1SDimitry Andric for (const Meta Id : IncomingSchema) { 8481ad6265SDimitry Andric switch (Id) { 8581ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 8681ad6265SDimitry Andric case Meta::Name: { \ 87*0fca6ea1SDimitry Andric Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \ 8881ad6265SDimitry Andric } break; 8981ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 9081ad6265SDimitry Andric #undef MIBEntryDef 9181ad6265SDimitry Andric default: 9281ad6265SDimitry Andric llvm_unreachable("Unknown meta type id, is the profile collected from " 9381ad6265SDimitry Andric "a newer version of the runtime?"); 9481ad6265SDimitry Andric } 95*0fca6ea1SDimitry Andric 96*0fca6ea1SDimitry Andric Schema.set(llvm::to_underlying(Id)); 9781ad6265SDimitry Andric } 9881ad6265SDimitry Andric } 9981ad6265SDimitry Andric 10081ad6265SDimitry Andric // Write the contents of the MemInfoBlock based on the \p Schema provided to 10181ad6265SDimitry Andric // the raw_ostream \p OS. 10281ad6265SDimitry Andric void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { 10381ad6265SDimitry Andric using namespace support; 10481ad6265SDimitry Andric 1055f757f3fSDimitry Andric endian::Writer LE(OS, llvm::endianness::little); 10681ad6265SDimitry Andric for (const Meta Id : Schema) { 10781ad6265SDimitry Andric switch (Id) { 10881ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 10981ad6265SDimitry Andric case Meta::Name: { \ 11081ad6265SDimitry Andric LE.write<Type>(Name); \ 11181ad6265SDimitry Andric } break; 11281ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 11381ad6265SDimitry Andric #undef MIBEntryDef 11481ad6265SDimitry Andric default: 11581ad6265SDimitry Andric llvm_unreachable("Unknown meta type id, invalid input?"); 11681ad6265SDimitry Andric } 11781ad6265SDimitry Andric } 11881ad6265SDimitry Andric } 11981ad6265SDimitry Andric 12081ad6265SDimitry Andric // Print out the contents of the MemInfoBlock in YAML format. 12181ad6265SDimitry Andric void printYAML(raw_ostream &OS) const { 12281ad6265SDimitry Andric OS << " MemInfoBlock:\n"; 12381ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 12481ad6265SDimitry Andric OS << " " << #Name << ": " << Name << "\n"; 12581ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 12681ad6265SDimitry Andric #undef MIBEntryDef 127*0fca6ea1SDimitry Andric if (AccessHistogramSize > 0) { 128*0fca6ea1SDimitry Andric OS << " " << "AccessHistogramValues" << ":"; 129*0fca6ea1SDimitry Andric for (uint32_t I = 0; I < AccessHistogramSize; ++I) { 130*0fca6ea1SDimitry Andric OS << " " << ((uint64_t *)AccessHistogram)[I]; 131*0fca6ea1SDimitry Andric } 132*0fca6ea1SDimitry Andric OS << "\n"; 133*0fca6ea1SDimitry Andric } 134*0fca6ea1SDimitry Andric } 135*0fca6ea1SDimitry Andric 136*0fca6ea1SDimitry Andric // Return the schema, only for unit tests. 137*0fca6ea1SDimitry Andric std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const { 138*0fca6ea1SDimitry Andric return Schema; 13981ad6265SDimitry Andric } 14081ad6265SDimitry Andric 14181ad6265SDimitry Andric // Define getters for each type which can be called by analyses. 14281ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 143*0fca6ea1SDimitry Andric Type get##Name() const { \ 144*0fca6ea1SDimitry Andric assert(Schema[llvm::to_underlying(Meta::Name)]); \ 145*0fca6ea1SDimitry Andric return Name; \ 146*0fca6ea1SDimitry Andric } 14781ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 14881ad6265SDimitry Andric #undef MIBEntryDef 14981ad6265SDimitry Andric 15081ad6265SDimitry Andric void clear() { *this = PortableMemInfoBlock(); } 15181ad6265SDimitry Andric 15281ad6265SDimitry Andric bool operator==(const PortableMemInfoBlock &Other) const { 153*0fca6ea1SDimitry Andric if (Other.Schema != Schema) 154*0fca6ea1SDimitry Andric return false; 155*0fca6ea1SDimitry Andric 15681ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 157*0fca6ea1SDimitry Andric if (Schema[llvm::to_underlying(Meta::Name)] && \ 158*0fca6ea1SDimitry Andric Other.get##Name() != get##Name()) \ 15981ad6265SDimitry Andric return false; 16081ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 16181ad6265SDimitry Andric #undef MIBEntryDef 16281ad6265SDimitry Andric return true; 16381ad6265SDimitry Andric } 16481ad6265SDimitry Andric 16581ad6265SDimitry Andric bool operator!=(const PortableMemInfoBlock &Other) const { 16681ad6265SDimitry Andric return !operator==(Other); 16781ad6265SDimitry Andric } 16881ad6265SDimitry Andric 169*0fca6ea1SDimitry Andric static size_t serializedSize(const MemProfSchema &Schema) { 17081ad6265SDimitry Andric size_t Result = 0; 171*0fca6ea1SDimitry Andric 172*0fca6ea1SDimitry Andric for (const Meta Id : Schema) { 173*0fca6ea1SDimitry Andric switch (Id) { 174*0fca6ea1SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) \ 175*0fca6ea1SDimitry Andric case Meta::Name: { \ 176*0fca6ea1SDimitry Andric Result += sizeof(Type); \ 177*0fca6ea1SDimitry Andric } break; 17881ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 17981ad6265SDimitry Andric #undef MIBEntryDef 180*0fca6ea1SDimitry Andric default: 181*0fca6ea1SDimitry Andric llvm_unreachable("Unknown meta type id, invalid input?"); 182*0fca6ea1SDimitry Andric } 183*0fca6ea1SDimitry Andric } 184*0fca6ea1SDimitry Andric 18581ad6265SDimitry Andric return Result; 18681ad6265SDimitry Andric } 18781ad6265SDimitry Andric 18881ad6265SDimitry Andric private: 189*0fca6ea1SDimitry Andric // The set of available fields, indexed by Meta::Name. 190*0fca6ea1SDimitry Andric std::bitset<llvm::to_underlying(Meta::Size)> Schema; 191*0fca6ea1SDimitry Andric 19281ad6265SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) Type Name = Type(); 19381ad6265SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 19481ad6265SDimitry Andric #undef MIBEntryDef 19581ad6265SDimitry Andric }; 19681ad6265SDimitry Andric 19781ad6265SDimitry Andric // A type representing the id generated by hashing the contents of the Frame. 19881ad6265SDimitry Andric using FrameId = uint64_t; 199*0fca6ea1SDimitry Andric // A type representing the id to index into the frame array. 200*0fca6ea1SDimitry Andric using LinearFrameId = uint32_t; 20181ad6265SDimitry Andric // Describes a call frame for a dynamic allocation context. The contents of 20281ad6265SDimitry Andric // the frame are populated by symbolizing the stack depot call frame from the 20381ad6265SDimitry Andric // compiler runtime. 20481ad6265SDimitry Andric struct Frame { 20581ad6265SDimitry Andric // A uuid (uint64_t) identifying the function. It is obtained by 20681ad6265SDimitry Andric // llvm::md5(FunctionName) which returns the lower 64 bits. 20781ad6265SDimitry Andric GlobalValue::GUID Function; 20881ad6265SDimitry Andric // The symbol name for the function. Only populated in the Frame by the reader 20981ad6265SDimitry Andric // if requested during initialization. This field should not be serialized. 210*0fca6ea1SDimitry Andric std::unique_ptr<std::string> SymbolName; 21181ad6265SDimitry Andric // The source line offset of the call from the beginning of parent function. 21281ad6265SDimitry Andric uint32_t LineOffset; 21381ad6265SDimitry Andric // The source column number of the call to help distinguish multiple calls 21481ad6265SDimitry Andric // on the same line. 21581ad6265SDimitry Andric uint32_t Column; 21681ad6265SDimitry Andric // Whether the current frame is inlined. 21781ad6265SDimitry Andric bool IsInlineFrame; 21881ad6265SDimitry Andric 21981ad6265SDimitry Andric Frame(const Frame &Other) { 22081ad6265SDimitry Andric Function = Other.Function; 221*0fca6ea1SDimitry Andric SymbolName = Other.SymbolName 222*0fca6ea1SDimitry Andric ? std::make_unique<std::string>(*Other.SymbolName) 223*0fca6ea1SDimitry Andric : nullptr; 22481ad6265SDimitry Andric LineOffset = Other.LineOffset; 22581ad6265SDimitry Andric Column = Other.Column; 22681ad6265SDimitry Andric IsInlineFrame = Other.IsInlineFrame; 22781ad6265SDimitry Andric } 22881ad6265SDimitry Andric 229*0fca6ea1SDimitry Andric Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline) 23081ad6265SDimitry Andric : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} 23181ad6265SDimitry Andric 23281ad6265SDimitry Andric bool operator==(const Frame &Other) const { 23381ad6265SDimitry Andric // Ignore the SymbolName field to avoid a string compare. Comparing the 23481ad6265SDimitry Andric // function hash serves the same purpose. 23581ad6265SDimitry Andric return Other.Function == Function && Other.LineOffset == LineOffset && 23681ad6265SDimitry Andric Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; 23781ad6265SDimitry Andric } 23881ad6265SDimitry Andric 23981ad6265SDimitry Andric Frame &operator=(const Frame &Other) { 24081ad6265SDimitry Andric Function = Other.Function; 241*0fca6ea1SDimitry Andric SymbolName = Other.SymbolName 242*0fca6ea1SDimitry Andric ? std::make_unique<std::string>(*Other.SymbolName) 243*0fca6ea1SDimitry Andric : nullptr; 24481ad6265SDimitry Andric LineOffset = Other.LineOffset; 24581ad6265SDimitry Andric Column = Other.Column; 24681ad6265SDimitry Andric IsInlineFrame = Other.IsInlineFrame; 24781ad6265SDimitry Andric return *this; 24881ad6265SDimitry Andric } 24981ad6265SDimitry Andric 25081ad6265SDimitry Andric bool operator!=(const Frame &Other) const { return !operator==(Other); } 25181ad6265SDimitry Andric 252*0fca6ea1SDimitry Andric bool hasSymbolName() const { return !!SymbolName; } 253*0fca6ea1SDimitry Andric 254*0fca6ea1SDimitry Andric StringRef getSymbolName() const { 255*0fca6ea1SDimitry Andric assert(hasSymbolName()); 256*0fca6ea1SDimitry Andric return *SymbolName; 257*0fca6ea1SDimitry Andric } 258*0fca6ea1SDimitry Andric 259*0fca6ea1SDimitry Andric std::string getSymbolNameOr(StringRef Alt) const { 260*0fca6ea1SDimitry Andric return std::string(hasSymbolName() ? getSymbolName() : Alt); 261*0fca6ea1SDimitry Andric } 262*0fca6ea1SDimitry Andric 26381ad6265SDimitry Andric // Write the contents of the frame to the ostream \p OS. 26481ad6265SDimitry Andric void serialize(raw_ostream &OS) const { 26581ad6265SDimitry Andric using namespace support; 26681ad6265SDimitry Andric 2675f757f3fSDimitry Andric endian::Writer LE(OS, llvm::endianness::little); 26881ad6265SDimitry Andric 26981ad6265SDimitry Andric // If the type of the GlobalValue::GUID changes, then we need to update 27081ad6265SDimitry Andric // the reader and the writer. 27181ad6265SDimitry Andric static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value, 27281ad6265SDimitry Andric "Expect GUID to be uint64_t."); 27381ad6265SDimitry Andric LE.write<uint64_t>(Function); 27481ad6265SDimitry Andric 27581ad6265SDimitry Andric LE.write<uint32_t>(LineOffset); 27681ad6265SDimitry Andric LE.write<uint32_t>(Column); 27781ad6265SDimitry Andric LE.write<bool>(IsInlineFrame); 27881ad6265SDimitry Andric } 27981ad6265SDimitry Andric 28081ad6265SDimitry Andric // Read a frame from char data which has been serialized as little endian. 28181ad6265SDimitry Andric static Frame deserialize(const unsigned char *Ptr) { 28281ad6265SDimitry Andric using namespace support; 28381ad6265SDimitry Andric 2845f757f3fSDimitry Andric const uint64_t F = 285*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 2865f757f3fSDimitry Andric const uint32_t L = 287*0fca6ea1SDimitry Andric endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 2885f757f3fSDimitry Andric const uint32_t C = 289*0fca6ea1SDimitry Andric endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 290*0fca6ea1SDimitry Andric const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr); 29181ad6265SDimitry Andric return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, 29281ad6265SDimitry Andric /*IsInlineFrame=*/I); 29381ad6265SDimitry Andric } 29481ad6265SDimitry Andric 29581ad6265SDimitry Andric // Returns the size of the frame information. 29681ad6265SDimitry Andric static constexpr size_t serializedSize() { 29781ad6265SDimitry Andric return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + 29881ad6265SDimitry Andric sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); 29981ad6265SDimitry Andric } 30081ad6265SDimitry Andric 30181ad6265SDimitry Andric // Print the frame information in YAML format. 30281ad6265SDimitry Andric void printYAML(raw_ostream &OS) const { 30381ad6265SDimitry Andric OS << " -\n" 30481ad6265SDimitry Andric << " Function: " << Function << "\n" 305*0fca6ea1SDimitry Andric << " SymbolName: " << getSymbolNameOr("<None>") << "\n" 30681ad6265SDimitry Andric << " LineOffset: " << LineOffset << "\n" 30781ad6265SDimitry Andric << " Column: " << Column << "\n" 30881ad6265SDimitry Andric << " Inline: " << IsInlineFrame << "\n"; 30981ad6265SDimitry Andric } 31081ad6265SDimitry Andric 31181ad6265SDimitry Andric // Return a hash value based on the contents of the frame. Here we don't use 31281ad6265SDimitry Andric // hashing from llvm ADT since we are going to persist the hash id, the hash 31381ad6265SDimitry Andric // combine algorithm in ADT uses a new randomized seed each time. 31481ad6265SDimitry Andric inline FrameId hash() const { 31581ad6265SDimitry Andric auto HashCombine = [](auto Value, size_t Seed) { 31681ad6265SDimitry Andric std::hash<decltype(Value)> Hasher; 31781ad6265SDimitry Andric // The constant used below is the 64 bit representation of the fractional 31881ad6265SDimitry Andric // part of the golden ratio. Used here for the randomness in their bit 31981ad6265SDimitry Andric // pattern. 32081ad6265SDimitry Andric return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); 32181ad6265SDimitry Andric }; 32281ad6265SDimitry Andric 32381ad6265SDimitry Andric size_t Result = 0; 32481ad6265SDimitry Andric Result ^= HashCombine(Function, Result); 32581ad6265SDimitry Andric Result ^= HashCombine(LineOffset, Result); 32681ad6265SDimitry Andric Result ^= HashCombine(Column, Result); 32781ad6265SDimitry Andric Result ^= HashCombine(IsInlineFrame, Result); 32881ad6265SDimitry Andric return static_cast<FrameId>(Result); 32981ad6265SDimitry Andric } 33081ad6265SDimitry Andric }; 33181ad6265SDimitry Andric 332*0fca6ea1SDimitry Andric // A type representing the index into the table of call stacks. 333*0fca6ea1SDimitry Andric using CallStackId = uint64_t; 334*0fca6ea1SDimitry Andric 335*0fca6ea1SDimitry Andric // A type representing the index into the call stack array. 336*0fca6ea1SDimitry Andric using LinearCallStackId = uint32_t; 337*0fca6ea1SDimitry Andric 33881ad6265SDimitry Andric // Holds allocation information in a space efficient format where frames are 33981ad6265SDimitry Andric // represented using unique identifiers. 34081ad6265SDimitry Andric struct IndexedAllocationInfo { 34181ad6265SDimitry Andric // The dynamic calling context for the allocation in bottom-up (leaf-to-root) 34281ad6265SDimitry Andric // order. Frame contents are stored out-of-line. 343*0fca6ea1SDimitry Andric // TODO: Remove once we fully transition to CSId. 34481ad6265SDimitry Andric llvm::SmallVector<FrameId> CallStack; 345*0fca6ea1SDimitry Andric // Conceptually the same as above. We are going to keep both CallStack and 346*0fca6ea1SDimitry Andric // CallStackId while we are transitioning from CallStack to CallStackId. 347*0fca6ea1SDimitry Andric CallStackId CSId = 0; 34881ad6265SDimitry Andric // The statistics obtained from the runtime for the allocation. 34981ad6265SDimitry Andric PortableMemInfoBlock Info; 35081ad6265SDimitry Andric 35181ad6265SDimitry Andric IndexedAllocationInfo() = default; 352*0fca6ea1SDimitry Andric IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId, 353*0fca6ea1SDimitry Andric const MemInfoBlock &MB, 354*0fca6ea1SDimitry Andric const MemProfSchema &Schema = getFullSchema()) 355*0fca6ea1SDimitry Andric : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {} 35681ad6265SDimitry Andric 35781ad6265SDimitry Andric // Returns the size in bytes when this allocation info struct is serialized. 358*0fca6ea1SDimitry Andric size_t serializedSize(const MemProfSchema &Schema, 359*0fca6ea1SDimitry Andric IndexedVersion Version) const; 36081ad6265SDimitry Andric 36181ad6265SDimitry Andric bool operator==(const IndexedAllocationInfo &Other) const { 36281ad6265SDimitry Andric if (Other.Info != Info) 36381ad6265SDimitry Andric return false; 36481ad6265SDimitry Andric 365*0fca6ea1SDimitry Andric if (Other.CSId != CSId) 36681ad6265SDimitry Andric return false; 36781ad6265SDimitry Andric return true; 36881ad6265SDimitry Andric } 36981ad6265SDimitry Andric 37081ad6265SDimitry Andric bool operator!=(const IndexedAllocationInfo &Other) const { 37181ad6265SDimitry Andric return !operator==(Other); 37281ad6265SDimitry Andric } 37381ad6265SDimitry Andric }; 37481ad6265SDimitry Andric 37581ad6265SDimitry Andric // Holds allocation information with frame contents inline. The type should 37681ad6265SDimitry Andric // be used for temporary in-memory instances. 37781ad6265SDimitry Andric struct AllocationInfo { 37881ad6265SDimitry Andric // Same as IndexedAllocationInfo::CallStack with the frame contents inline. 379*0fca6ea1SDimitry Andric std::vector<Frame> CallStack; 38081ad6265SDimitry Andric // Same as IndexedAllocationInfo::Info; 38181ad6265SDimitry Andric PortableMemInfoBlock Info; 38281ad6265SDimitry Andric 38381ad6265SDimitry Andric AllocationInfo() = default; 38481ad6265SDimitry Andric AllocationInfo( 38581ad6265SDimitry Andric const IndexedAllocationInfo &IndexedAI, 38681ad6265SDimitry Andric llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) { 38781ad6265SDimitry Andric for (const FrameId &Id : IndexedAI.CallStack) { 38881ad6265SDimitry Andric CallStack.push_back(IdToFrameCallback(Id)); 38981ad6265SDimitry Andric } 39081ad6265SDimitry Andric Info = IndexedAI.Info; 39181ad6265SDimitry Andric } 39281ad6265SDimitry Andric 39381ad6265SDimitry Andric void printYAML(raw_ostream &OS) const { 39481ad6265SDimitry Andric OS << " -\n"; 39581ad6265SDimitry Andric OS << " Callstack:\n"; 39681ad6265SDimitry Andric // TODO: Print out the frame on one line with to make it easier for deep 39781ad6265SDimitry Andric // callstacks once we have a test to check valid YAML is generated. 39881ad6265SDimitry Andric for (const Frame &F : CallStack) { 39981ad6265SDimitry Andric F.printYAML(OS); 40081ad6265SDimitry Andric } 40181ad6265SDimitry Andric Info.printYAML(OS); 40281ad6265SDimitry Andric } 40381ad6265SDimitry Andric }; 40481ad6265SDimitry Andric 40581ad6265SDimitry Andric // Holds the memprof profile information for a function. The internal 40681ad6265SDimitry Andric // representation stores frame ids for efficiency. This representation should 40781ad6265SDimitry Andric // be used in the profile conversion and manipulation tools. 40881ad6265SDimitry Andric struct IndexedMemProfRecord { 40981ad6265SDimitry Andric // Memory allocation sites in this function for which we have memory 41081ad6265SDimitry Andric // profiling data. 41181ad6265SDimitry Andric llvm::SmallVector<IndexedAllocationInfo> AllocSites; 41281ad6265SDimitry Andric // Holds call sites in this function which are part of some memory 41381ad6265SDimitry Andric // allocation context. We store this as a list of locations, each with its 41481ad6265SDimitry Andric // list of inline locations in bottom-up order i.e. from leaf to root. The 41581ad6265SDimitry Andric // inline location list may include additional entries, users should pick 41681ad6265SDimitry Andric // the last entry in the list with the same function GUID. 41781ad6265SDimitry Andric llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites; 418*0fca6ea1SDimitry Andric // Conceptually the same as above. We are going to keep both CallSites and 419*0fca6ea1SDimitry Andric // CallSiteIds while we are transitioning from CallSites to CallSiteIds. 420*0fca6ea1SDimitry Andric llvm::SmallVector<CallStackId> CallSiteIds; 42181ad6265SDimitry Andric 42281ad6265SDimitry Andric void clear() { 42381ad6265SDimitry Andric AllocSites.clear(); 42481ad6265SDimitry Andric CallSites.clear(); 42581ad6265SDimitry Andric } 42681ad6265SDimitry Andric 42781ad6265SDimitry Andric void merge(const IndexedMemProfRecord &Other) { 42881ad6265SDimitry Andric // TODO: Filter out duplicates which may occur if multiple memprof 42981ad6265SDimitry Andric // profiles are merged together using llvm-profdata. 43081ad6265SDimitry Andric AllocSites.append(Other.AllocSites); 43181ad6265SDimitry Andric CallSites.append(Other.CallSites); 43281ad6265SDimitry Andric } 43381ad6265SDimitry Andric 434*0fca6ea1SDimitry Andric size_t serializedSize(const MemProfSchema &Schema, 435*0fca6ea1SDimitry Andric IndexedVersion Version) const; 43681ad6265SDimitry Andric 43781ad6265SDimitry Andric bool operator==(const IndexedMemProfRecord &Other) const { 438*0fca6ea1SDimitry Andric if (Other.AllocSites != AllocSites) 43981ad6265SDimitry Andric return false; 44081ad6265SDimitry Andric 441*0fca6ea1SDimitry Andric if (Other.CallSiteIds != CallSiteIds) 44281ad6265SDimitry Andric return false; 44381ad6265SDimitry Andric return true; 44481ad6265SDimitry Andric } 44581ad6265SDimitry Andric 44681ad6265SDimitry Andric // Serializes the memprof records in \p Records to the ostream \p OS based 44781ad6265SDimitry Andric // on the schema provided in \p Schema. 448*0fca6ea1SDimitry Andric void serialize(const MemProfSchema &Schema, raw_ostream &OS, 449*0fca6ea1SDimitry Andric IndexedVersion Version, 450*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> 451*0fca6ea1SDimitry Andric *MemProfCallStackIndexes = nullptr) const; 45281ad6265SDimitry Andric 45381ad6265SDimitry Andric // Deserializes memprof records from the Buffer. 45481ad6265SDimitry Andric static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, 455*0fca6ea1SDimitry Andric const unsigned char *Buffer, 456*0fca6ea1SDimitry Andric IndexedVersion Version); 457*0fca6ea1SDimitry Andric 458*0fca6ea1SDimitry Andric // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to 459*0fca6ea1SDimitry Andric // translate CallStackId to call stacks with frames inline. 460*0fca6ea1SDimitry Andric MemProfRecord toMemProfRecord( 461*0fca6ea1SDimitry Andric llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const; 46281ad6265SDimitry Andric 46381ad6265SDimitry Andric // Returns the GUID for the function name after canonicalization. For 46481ad6265SDimitry Andric // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are 46581ad6265SDimitry Andric // mapped to functions using this GUID. 46681ad6265SDimitry Andric static GlobalValue::GUID getGUID(const StringRef FunctionName); 46781ad6265SDimitry Andric }; 46881ad6265SDimitry Andric 46981ad6265SDimitry Andric // Holds the memprof profile information for a function. The internal 47081ad6265SDimitry Andric // representation stores frame contents inline. This representation should 47181ad6265SDimitry Andric // be used for small amount of temporary, in memory instances. 47281ad6265SDimitry Andric struct MemProfRecord { 47381ad6265SDimitry Andric // Same as IndexedMemProfRecord::AllocSites with frame contents inline. 47481ad6265SDimitry Andric llvm::SmallVector<AllocationInfo> AllocSites; 47581ad6265SDimitry Andric // Same as IndexedMemProfRecord::CallSites with frame contents inline. 476*0fca6ea1SDimitry Andric llvm::SmallVector<std::vector<Frame>> CallSites; 47781ad6265SDimitry Andric 47881ad6265SDimitry Andric MemProfRecord() = default; 47981ad6265SDimitry Andric MemProfRecord( 48081ad6265SDimitry Andric const IndexedMemProfRecord &Record, 48181ad6265SDimitry Andric llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) { 48281ad6265SDimitry Andric for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) { 48381ad6265SDimitry Andric AllocSites.emplace_back(IndexedAI, IdToFrameCallback); 48481ad6265SDimitry Andric } 48581ad6265SDimitry Andric for (const ArrayRef<FrameId> Site : Record.CallSites) { 486*0fca6ea1SDimitry Andric std::vector<Frame> Frames; 48781ad6265SDimitry Andric for (const FrameId Id : Site) { 48881ad6265SDimitry Andric Frames.push_back(IdToFrameCallback(Id)); 48981ad6265SDimitry Andric } 49081ad6265SDimitry Andric CallSites.push_back(Frames); 49181ad6265SDimitry Andric } 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric 49481ad6265SDimitry Andric // Prints out the contents of the memprof record in YAML. 49581ad6265SDimitry Andric void print(llvm::raw_ostream &OS) const { 49681ad6265SDimitry Andric if (!AllocSites.empty()) { 49781ad6265SDimitry Andric OS << " AllocSites:\n"; 49881ad6265SDimitry Andric for (const AllocationInfo &N : AllocSites) 49981ad6265SDimitry Andric N.printYAML(OS); 50081ad6265SDimitry Andric } 50181ad6265SDimitry Andric 50281ad6265SDimitry Andric if (!CallSites.empty()) { 50381ad6265SDimitry Andric OS << " CallSites:\n"; 504*0fca6ea1SDimitry Andric for (const std::vector<Frame> &Frames : CallSites) { 50581ad6265SDimitry Andric for (const Frame &F : Frames) { 50681ad6265SDimitry Andric OS << " -\n"; 50781ad6265SDimitry Andric F.printYAML(OS); 50881ad6265SDimitry Andric } 50981ad6265SDimitry Andric } 51081ad6265SDimitry Andric } 51181ad6265SDimitry Andric } 51281ad6265SDimitry Andric }; 51381ad6265SDimitry Andric 51481ad6265SDimitry Andric // Reads a memprof schema from a buffer. All entries in the buffer are 51581ad6265SDimitry Andric // interpreted as uint64_t. The first entry in the buffer denotes the number of 51681ad6265SDimitry Andric // ids in the schema. Subsequent entries are integers which map to memprof::Meta 51781ad6265SDimitry Andric // enum class entries. After successfully reading the schema, the pointer is one 51881ad6265SDimitry Andric // byte past the schema contents. 51981ad6265SDimitry Andric Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer); 52081ad6265SDimitry Andric 52181ad6265SDimitry Andric // Trait for reading IndexedMemProfRecord data from the on-disk hash table. 52281ad6265SDimitry Andric class RecordLookupTrait { 52381ad6265SDimitry Andric public: 52481ad6265SDimitry Andric using data_type = const IndexedMemProfRecord &; 52581ad6265SDimitry Andric using internal_key_type = uint64_t; 52681ad6265SDimitry Andric using external_key_type = uint64_t; 52781ad6265SDimitry Andric using hash_value_type = uint64_t; 52881ad6265SDimitry Andric using offset_type = uint64_t; 52981ad6265SDimitry Andric 53081ad6265SDimitry Andric RecordLookupTrait() = delete; 531*0fca6ea1SDimitry Andric RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) 532*0fca6ea1SDimitry Andric : Version(V), Schema(S) {} 53381ad6265SDimitry Andric 53481ad6265SDimitry Andric static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } 53581ad6265SDimitry Andric static uint64_t GetInternalKey(uint64_t K) { return K; } 53681ad6265SDimitry Andric static uint64_t GetExternalKey(uint64_t K) { return K; } 53781ad6265SDimitry Andric 53881ad6265SDimitry Andric hash_value_type ComputeHash(uint64_t K) { return K; } 53981ad6265SDimitry Andric 54081ad6265SDimitry Andric static std::pair<offset_type, offset_type> 54181ad6265SDimitry Andric ReadKeyDataLength(const unsigned char *&D) { 54281ad6265SDimitry Andric using namespace support; 54381ad6265SDimitry Andric 5445f757f3fSDimitry Andric offset_type KeyLen = 545*0fca6ea1SDimitry Andric endian::readNext<offset_type, llvm::endianness::little>(D); 5465f757f3fSDimitry Andric offset_type DataLen = 547*0fca6ea1SDimitry Andric endian::readNext<offset_type, llvm::endianness::little>(D); 54881ad6265SDimitry Andric return std::make_pair(KeyLen, DataLen); 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric 55181ad6265SDimitry Andric uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 55281ad6265SDimitry Andric using namespace support; 553*0fca6ea1SDimitry Andric return endian::readNext<external_key_type, llvm::endianness::little>(D); 55481ad6265SDimitry Andric } 55581ad6265SDimitry Andric 55681ad6265SDimitry Andric data_type ReadData(uint64_t K, const unsigned char *D, 55781ad6265SDimitry Andric offset_type /*Unused*/) { 558*0fca6ea1SDimitry Andric Record = IndexedMemProfRecord::deserialize(Schema, D, Version); 55981ad6265SDimitry Andric return Record; 56081ad6265SDimitry Andric } 56181ad6265SDimitry Andric 56281ad6265SDimitry Andric private: 563*0fca6ea1SDimitry Andric // Holds the MemProf version. 564*0fca6ea1SDimitry Andric IndexedVersion Version; 56581ad6265SDimitry Andric // Holds the memprof schema used to deserialize records. 56681ad6265SDimitry Andric MemProfSchema Schema; 56781ad6265SDimitry Andric // Holds the records from one function deserialized from the indexed format. 56881ad6265SDimitry Andric IndexedMemProfRecord Record; 56981ad6265SDimitry Andric }; 57081ad6265SDimitry Andric 57181ad6265SDimitry Andric // Trait for writing IndexedMemProfRecord data to the on-disk hash table. 57281ad6265SDimitry Andric class RecordWriterTrait { 57381ad6265SDimitry Andric public: 57481ad6265SDimitry Andric using key_type = uint64_t; 57581ad6265SDimitry Andric using key_type_ref = uint64_t; 57681ad6265SDimitry Andric 57781ad6265SDimitry Andric using data_type = IndexedMemProfRecord; 57881ad6265SDimitry Andric using data_type_ref = IndexedMemProfRecord &; 57981ad6265SDimitry Andric 58081ad6265SDimitry Andric using hash_value_type = uint64_t; 58181ad6265SDimitry Andric using offset_type = uint64_t; 58281ad6265SDimitry Andric 583*0fca6ea1SDimitry Andric private: 584*0fca6ea1SDimitry Andric // Pointer to the memprof schema to use for the generator. 585*0fca6ea1SDimitry Andric const MemProfSchema *Schema; 586*0fca6ea1SDimitry Andric // The MemProf version to use for the serialization. 587*0fca6ea1SDimitry Andric IndexedVersion Version; 58881ad6265SDimitry Andric 589*0fca6ea1SDimitry Andric // Mappings from CallStackId to the indexes into the call stack array. 590*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes; 591*0fca6ea1SDimitry Andric 592*0fca6ea1SDimitry Andric public: 593*0fca6ea1SDimitry Andric // We do not support the default constructor, which does not set Version. 594*0fca6ea1SDimitry Andric RecordWriterTrait() = delete; 595*0fca6ea1SDimitry Andric RecordWriterTrait( 596*0fca6ea1SDimitry Andric const MemProfSchema *Schema, IndexedVersion V, 597*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 598*0fca6ea1SDimitry Andric : Schema(Schema), Version(V), 599*0fca6ea1SDimitry Andric MemProfCallStackIndexes(MemProfCallStackIndexes) {} 60081ad6265SDimitry Andric 60181ad6265SDimitry Andric static hash_value_type ComputeHash(key_type_ref K) { return K; } 60281ad6265SDimitry Andric 603*0fca6ea1SDimitry Andric std::pair<offset_type, offset_type> 60481ad6265SDimitry Andric EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 60581ad6265SDimitry Andric using namespace support; 60681ad6265SDimitry Andric 6075f757f3fSDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 60881ad6265SDimitry Andric offset_type N = sizeof(K); 60981ad6265SDimitry Andric LE.write<offset_type>(N); 610*0fca6ea1SDimitry Andric offset_type M = V.serializedSize(*Schema, Version); 61181ad6265SDimitry Andric LE.write<offset_type>(M); 61281ad6265SDimitry Andric return std::make_pair(N, M); 61381ad6265SDimitry Andric } 61481ad6265SDimitry Andric 61581ad6265SDimitry Andric void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 61681ad6265SDimitry Andric using namespace support; 6175f757f3fSDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 61881ad6265SDimitry Andric LE.write<uint64_t>(K); 61981ad6265SDimitry Andric } 62081ad6265SDimitry Andric 62181ad6265SDimitry Andric void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 62281ad6265SDimitry Andric offset_type /*Unused*/) { 62381ad6265SDimitry Andric assert(Schema != nullptr && "MemProf schema is not initialized!"); 624*0fca6ea1SDimitry Andric V.serialize(*Schema, Out, Version, MemProfCallStackIndexes); 6255f757f3fSDimitry Andric // Clear the IndexedMemProfRecord which results in clearing/freeing its 6265f757f3fSDimitry Andric // vectors of allocs and callsites. This is owned by the associated on-disk 6275f757f3fSDimitry Andric // hash table, but unused after this point. See also the comment added to 6285f757f3fSDimitry Andric // the client which constructs the on-disk hash table for this trait. 6295f757f3fSDimitry Andric V.clear(); 63081ad6265SDimitry Andric } 63181ad6265SDimitry Andric }; 63281ad6265SDimitry Andric 63381ad6265SDimitry Andric // Trait for writing frame mappings to the on-disk hash table. 63481ad6265SDimitry Andric class FrameWriterTrait { 63581ad6265SDimitry Andric public: 63681ad6265SDimitry Andric using key_type = FrameId; 63781ad6265SDimitry Andric using key_type_ref = FrameId; 63881ad6265SDimitry Andric 63981ad6265SDimitry Andric using data_type = Frame; 64081ad6265SDimitry Andric using data_type_ref = Frame &; 64181ad6265SDimitry Andric 64281ad6265SDimitry Andric using hash_value_type = FrameId; 64381ad6265SDimitry Andric using offset_type = uint64_t; 64481ad6265SDimitry Andric 64581ad6265SDimitry Andric static hash_value_type ComputeHash(key_type_ref K) { return K; } 64681ad6265SDimitry Andric 64781ad6265SDimitry Andric static std::pair<offset_type, offset_type> 64881ad6265SDimitry Andric EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 64981ad6265SDimitry Andric using namespace support; 6505f757f3fSDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 65181ad6265SDimitry Andric offset_type N = sizeof(K); 65281ad6265SDimitry Andric LE.write<offset_type>(N); 65381ad6265SDimitry Andric offset_type M = V.serializedSize(); 65481ad6265SDimitry Andric LE.write<offset_type>(M); 65581ad6265SDimitry Andric return std::make_pair(N, M); 65681ad6265SDimitry Andric } 65781ad6265SDimitry Andric 65881ad6265SDimitry Andric void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 65981ad6265SDimitry Andric using namespace support; 6605f757f3fSDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 66181ad6265SDimitry Andric LE.write<key_type>(K); 66281ad6265SDimitry Andric } 66381ad6265SDimitry Andric 66481ad6265SDimitry Andric void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 66581ad6265SDimitry Andric offset_type /*Unused*/) { 66681ad6265SDimitry Andric V.serialize(Out); 66781ad6265SDimitry Andric } 66881ad6265SDimitry Andric }; 66981ad6265SDimitry Andric 67081ad6265SDimitry Andric // Trait for reading frame mappings from the on-disk hash table. 67181ad6265SDimitry Andric class FrameLookupTrait { 67281ad6265SDimitry Andric public: 67381ad6265SDimitry Andric using data_type = const Frame; 67481ad6265SDimitry Andric using internal_key_type = FrameId; 67581ad6265SDimitry Andric using external_key_type = FrameId; 67681ad6265SDimitry Andric using hash_value_type = FrameId; 67781ad6265SDimitry Andric using offset_type = uint64_t; 67881ad6265SDimitry Andric 67981ad6265SDimitry Andric static bool EqualKey(internal_key_type A, internal_key_type B) { 68081ad6265SDimitry Andric return A == B; 68181ad6265SDimitry Andric } 68281ad6265SDimitry Andric static uint64_t GetInternalKey(internal_key_type K) { return K; } 68381ad6265SDimitry Andric static uint64_t GetExternalKey(external_key_type K) { return K; } 68481ad6265SDimitry Andric 68581ad6265SDimitry Andric hash_value_type ComputeHash(internal_key_type K) { return K; } 68681ad6265SDimitry Andric 68781ad6265SDimitry Andric static std::pair<offset_type, offset_type> 68881ad6265SDimitry Andric ReadKeyDataLength(const unsigned char *&D) { 68981ad6265SDimitry Andric using namespace support; 69081ad6265SDimitry Andric 6915f757f3fSDimitry Andric offset_type KeyLen = 692*0fca6ea1SDimitry Andric endian::readNext<offset_type, llvm::endianness::little>(D); 6935f757f3fSDimitry Andric offset_type DataLen = 694*0fca6ea1SDimitry Andric endian::readNext<offset_type, llvm::endianness::little>(D); 69581ad6265SDimitry Andric return std::make_pair(KeyLen, DataLen); 69681ad6265SDimitry Andric } 69781ad6265SDimitry Andric 69881ad6265SDimitry Andric uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 69981ad6265SDimitry Andric using namespace support; 700*0fca6ea1SDimitry Andric return endian::readNext<external_key_type, llvm::endianness::little>(D); 70181ad6265SDimitry Andric } 70281ad6265SDimitry Andric 70381ad6265SDimitry Andric data_type ReadData(uint64_t K, const unsigned char *D, 70481ad6265SDimitry Andric offset_type /*Unused*/) { 70581ad6265SDimitry Andric return Frame::deserialize(D); 70681ad6265SDimitry Andric } 70781ad6265SDimitry Andric }; 708*0fca6ea1SDimitry Andric 709*0fca6ea1SDimitry Andric // Trait for writing call stacks to the on-disk hash table. 710*0fca6ea1SDimitry Andric class CallStackWriterTrait { 711*0fca6ea1SDimitry Andric public: 712*0fca6ea1SDimitry Andric using key_type = CallStackId; 713*0fca6ea1SDimitry Andric using key_type_ref = CallStackId; 714*0fca6ea1SDimitry Andric 715*0fca6ea1SDimitry Andric using data_type = llvm::SmallVector<FrameId>; 716*0fca6ea1SDimitry Andric using data_type_ref = llvm::SmallVector<FrameId> &; 717*0fca6ea1SDimitry Andric 718*0fca6ea1SDimitry Andric using hash_value_type = CallStackId; 719*0fca6ea1SDimitry Andric using offset_type = uint64_t; 720*0fca6ea1SDimitry Andric 721*0fca6ea1SDimitry Andric static hash_value_type ComputeHash(key_type_ref K) { return K; } 722*0fca6ea1SDimitry Andric 723*0fca6ea1SDimitry Andric static std::pair<offset_type, offset_type> 724*0fca6ea1SDimitry Andric EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 725*0fca6ea1SDimitry Andric using namespace support; 726*0fca6ea1SDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 727*0fca6ea1SDimitry Andric // We do not explicitly emit the key length because it is a constant. 728*0fca6ea1SDimitry Andric offset_type N = sizeof(K); 729*0fca6ea1SDimitry Andric offset_type M = sizeof(FrameId) * V.size(); 730*0fca6ea1SDimitry Andric LE.write<offset_type>(M); 731*0fca6ea1SDimitry Andric return std::make_pair(N, M); 732*0fca6ea1SDimitry Andric } 733*0fca6ea1SDimitry Andric 734*0fca6ea1SDimitry Andric void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 735*0fca6ea1SDimitry Andric using namespace support; 736*0fca6ea1SDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 737*0fca6ea1SDimitry Andric LE.write<key_type>(K); 738*0fca6ea1SDimitry Andric } 739*0fca6ea1SDimitry Andric 740*0fca6ea1SDimitry Andric void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 741*0fca6ea1SDimitry Andric offset_type /*Unused*/) { 742*0fca6ea1SDimitry Andric using namespace support; 743*0fca6ea1SDimitry Andric endian::Writer LE(Out, llvm::endianness::little); 744*0fca6ea1SDimitry Andric // Emit the frames. We do not explicitly emit the length of the vector 745*0fca6ea1SDimitry Andric // because it can be inferred from the data length. 746*0fca6ea1SDimitry Andric for (FrameId F : V) 747*0fca6ea1SDimitry Andric LE.write<FrameId>(F); 748*0fca6ea1SDimitry Andric } 749*0fca6ea1SDimitry Andric }; 750*0fca6ea1SDimitry Andric 751*0fca6ea1SDimitry Andric // Trait for reading call stack mappings from the on-disk hash table. 752*0fca6ea1SDimitry Andric class CallStackLookupTrait { 753*0fca6ea1SDimitry Andric public: 754*0fca6ea1SDimitry Andric using data_type = const llvm::SmallVector<FrameId>; 755*0fca6ea1SDimitry Andric using internal_key_type = CallStackId; 756*0fca6ea1SDimitry Andric using external_key_type = CallStackId; 757*0fca6ea1SDimitry Andric using hash_value_type = CallStackId; 758*0fca6ea1SDimitry Andric using offset_type = uint64_t; 759*0fca6ea1SDimitry Andric 760*0fca6ea1SDimitry Andric static bool EqualKey(internal_key_type A, internal_key_type B) { 761*0fca6ea1SDimitry Andric return A == B; 762*0fca6ea1SDimitry Andric } 763*0fca6ea1SDimitry Andric static uint64_t GetInternalKey(internal_key_type K) { return K; } 764*0fca6ea1SDimitry Andric static uint64_t GetExternalKey(external_key_type K) { return K; } 765*0fca6ea1SDimitry Andric 766*0fca6ea1SDimitry Andric hash_value_type ComputeHash(internal_key_type K) { return K; } 767*0fca6ea1SDimitry Andric 768*0fca6ea1SDimitry Andric static std::pair<offset_type, offset_type> 769*0fca6ea1SDimitry Andric ReadKeyDataLength(const unsigned char *&D) { 770*0fca6ea1SDimitry Andric using namespace support; 771*0fca6ea1SDimitry Andric 772*0fca6ea1SDimitry Andric // We do not explicitly read the key length because it is a constant. 773*0fca6ea1SDimitry Andric offset_type KeyLen = sizeof(external_key_type); 774*0fca6ea1SDimitry Andric offset_type DataLen = 775*0fca6ea1SDimitry Andric endian::readNext<offset_type, llvm::endianness::little>(D); 776*0fca6ea1SDimitry Andric return std::make_pair(KeyLen, DataLen); 777*0fca6ea1SDimitry Andric } 778*0fca6ea1SDimitry Andric 779*0fca6ea1SDimitry Andric uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 780*0fca6ea1SDimitry Andric using namespace support; 781*0fca6ea1SDimitry Andric return endian::readNext<external_key_type, llvm::endianness::little>(D); 782*0fca6ea1SDimitry Andric } 783*0fca6ea1SDimitry Andric 784*0fca6ea1SDimitry Andric data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) { 785*0fca6ea1SDimitry Andric using namespace support; 786*0fca6ea1SDimitry Andric llvm::SmallVector<FrameId> CS; 787*0fca6ea1SDimitry Andric // Derive the number of frames from the data length. 788*0fca6ea1SDimitry Andric uint64_t NumFrames = Length / sizeof(FrameId); 789*0fca6ea1SDimitry Andric assert(Length % sizeof(FrameId) == 0); 790*0fca6ea1SDimitry Andric CS.reserve(NumFrames); 791*0fca6ea1SDimitry Andric for (size_t I = 0; I != NumFrames; ++I) { 792*0fca6ea1SDimitry Andric FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D); 793*0fca6ea1SDimitry Andric CS.push_back(F); 794*0fca6ea1SDimitry Andric } 795*0fca6ea1SDimitry Andric return CS; 796*0fca6ea1SDimitry Andric } 797*0fca6ea1SDimitry Andric }; 798*0fca6ea1SDimitry Andric 799*0fca6ea1SDimitry Andric // Compute a CallStackId for a given call stack. 800*0fca6ea1SDimitry Andric CallStackId hashCallStack(ArrayRef<FrameId> CS); 801*0fca6ea1SDimitry Andric 802*0fca6ea1SDimitry Andric namespace detail { 803*0fca6ea1SDimitry Andric // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have 804*0fca6ea1SDimitry Andric // to do so in one of two different ways depending on the type of the hash 805*0fca6ea1SDimitry Andric // table. 806*0fca6ea1SDimitry Andric template <typename value_type, typename IterTy> 807*0fca6ea1SDimitry Andric value_type DerefIterator(IterTy Iter) { 808*0fca6ea1SDimitry Andric using deref_type = llvm::remove_cvref_t<decltype(*Iter)>; 809*0fca6ea1SDimitry Andric if constexpr (std::is_same_v<deref_type, value_type>) 810*0fca6ea1SDimitry Andric return *Iter; 811*0fca6ea1SDimitry Andric else 812*0fca6ea1SDimitry Andric return Iter->second; 813*0fca6ea1SDimitry Andric } 814*0fca6ea1SDimitry Andric } // namespace detail 815*0fca6ea1SDimitry Andric 816*0fca6ea1SDimitry Andric // A function object that returns a frame for a given FrameId. 817*0fca6ea1SDimitry Andric template <typename MapTy> struct FrameIdConverter { 818*0fca6ea1SDimitry Andric std::optional<FrameId> LastUnmappedId; 819*0fca6ea1SDimitry Andric MapTy ⤅ 820*0fca6ea1SDimitry Andric 821*0fca6ea1SDimitry Andric FrameIdConverter() = delete; 822*0fca6ea1SDimitry Andric FrameIdConverter(MapTy &Map) : Map(Map) {} 823*0fca6ea1SDimitry Andric 824*0fca6ea1SDimitry Andric // Delete the copy constructor and copy assignment operator to avoid a 825*0fca6ea1SDimitry Andric // situation where a copy of FrameIdConverter gets an error in LastUnmappedId 826*0fca6ea1SDimitry Andric // while the original instance doesn't. 827*0fca6ea1SDimitry Andric FrameIdConverter(const FrameIdConverter &) = delete; 828*0fca6ea1SDimitry Andric FrameIdConverter &operator=(const FrameIdConverter &) = delete; 829*0fca6ea1SDimitry Andric 830*0fca6ea1SDimitry Andric Frame operator()(FrameId Id) { 831*0fca6ea1SDimitry Andric auto Iter = Map.find(Id); 832*0fca6ea1SDimitry Andric if (Iter == Map.end()) { 833*0fca6ea1SDimitry Andric LastUnmappedId = Id; 834*0fca6ea1SDimitry Andric return Frame(0, 0, 0, false); 835*0fca6ea1SDimitry Andric } 836*0fca6ea1SDimitry Andric return detail::DerefIterator<Frame>(Iter); 837*0fca6ea1SDimitry Andric } 838*0fca6ea1SDimitry Andric }; 839*0fca6ea1SDimitry Andric 840*0fca6ea1SDimitry Andric // A function object that returns a call stack for a given CallStackId. 841*0fca6ea1SDimitry Andric template <typename MapTy> struct CallStackIdConverter { 842*0fca6ea1SDimitry Andric std::optional<CallStackId> LastUnmappedId; 843*0fca6ea1SDimitry Andric MapTy ⤅ 844*0fca6ea1SDimitry Andric llvm::function_ref<Frame(FrameId)> FrameIdToFrame; 845*0fca6ea1SDimitry Andric 846*0fca6ea1SDimitry Andric CallStackIdConverter() = delete; 847*0fca6ea1SDimitry Andric CallStackIdConverter(MapTy &Map, 848*0fca6ea1SDimitry Andric llvm::function_ref<Frame(FrameId)> FrameIdToFrame) 849*0fca6ea1SDimitry Andric : Map(Map), FrameIdToFrame(FrameIdToFrame) {} 850*0fca6ea1SDimitry Andric 851*0fca6ea1SDimitry Andric // Delete the copy constructor and copy assignment operator to avoid a 852*0fca6ea1SDimitry Andric // situation where a copy of CallStackIdConverter gets an error in 853*0fca6ea1SDimitry Andric // LastUnmappedId while the original instance doesn't. 854*0fca6ea1SDimitry Andric CallStackIdConverter(const CallStackIdConverter &) = delete; 855*0fca6ea1SDimitry Andric CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; 856*0fca6ea1SDimitry Andric 857*0fca6ea1SDimitry Andric std::vector<Frame> operator()(CallStackId CSId) { 858*0fca6ea1SDimitry Andric std::vector<Frame> Frames; 859*0fca6ea1SDimitry Andric auto CSIter = Map.find(CSId); 860*0fca6ea1SDimitry Andric if (CSIter == Map.end()) { 861*0fca6ea1SDimitry Andric LastUnmappedId = CSId; 862*0fca6ea1SDimitry Andric } else { 863*0fca6ea1SDimitry Andric llvm::SmallVector<FrameId> CS = 864*0fca6ea1SDimitry Andric detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter); 865*0fca6ea1SDimitry Andric Frames.reserve(CS.size()); 866*0fca6ea1SDimitry Andric for (FrameId Id : CS) 867*0fca6ea1SDimitry Andric Frames.push_back(FrameIdToFrame(Id)); 868*0fca6ea1SDimitry Andric } 869*0fca6ea1SDimitry Andric return Frames; 870*0fca6ea1SDimitry Andric } 871*0fca6ea1SDimitry Andric }; 872*0fca6ea1SDimitry Andric 873*0fca6ea1SDimitry Andric // A function object that returns a Frame stored at a given index into the Frame 874*0fca6ea1SDimitry Andric // array in the profile. 875*0fca6ea1SDimitry Andric struct LinearFrameIdConverter { 876*0fca6ea1SDimitry Andric const unsigned char *FrameBase; 877*0fca6ea1SDimitry Andric 878*0fca6ea1SDimitry Andric LinearFrameIdConverter() = delete; 879*0fca6ea1SDimitry Andric LinearFrameIdConverter(const unsigned char *FrameBase) 880*0fca6ea1SDimitry Andric : FrameBase(FrameBase) {} 881*0fca6ea1SDimitry Andric 882*0fca6ea1SDimitry Andric Frame operator()(LinearFrameId LinearId) { 883*0fca6ea1SDimitry Andric uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize(); 884*0fca6ea1SDimitry Andric return Frame::deserialize(FrameBase + Offset); 885*0fca6ea1SDimitry Andric } 886*0fca6ea1SDimitry Andric }; 887*0fca6ea1SDimitry Andric 888*0fca6ea1SDimitry Andric // A function object that returns a call stack stored at a given index into the 889*0fca6ea1SDimitry Andric // call stack array in the profile. 890*0fca6ea1SDimitry Andric struct LinearCallStackIdConverter { 891*0fca6ea1SDimitry Andric const unsigned char *CallStackBase; 892*0fca6ea1SDimitry Andric std::function<Frame(LinearFrameId)> FrameIdToFrame; 893*0fca6ea1SDimitry Andric 894*0fca6ea1SDimitry Andric LinearCallStackIdConverter() = delete; 895*0fca6ea1SDimitry Andric LinearCallStackIdConverter(const unsigned char *CallStackBase, 896*0fca6ea1SDimitry Andric std::function<Frame(LinearFrameId)> FrameIdToFrame) 897*0fca6ea1SDimitry Andric : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} 898*0fca6ea1SDimitry Andric 899*0fca6ea1SDimitry Andric std::vector<Frame> operator()(LinearCallStackId LinearCSId) { 900*0fca6ea1SDimitry Andric std::vector<Frame> Frames; 901*0fca6ea1SDimitry Andric 902*0fca6ea1SDimitry Andric const unsigned char *Ptr = 903*0fca6ea1SDimitry Andric CallStackBase + 904*0fca6ea1SDimitry Andric static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId); 905*0fca6ea1SDimitry Andric uint32_t NumFrames = 906*0fca6ea1SDimitry Andric support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 907*0fca6ea1SDimitry Andric Frames.reserve(NumFrames); 908*0fca6ea1SDimitry Andric for (; NumFrames; --NumFrames) { 909*0fca6ea1SDimitry Andric LinearFrameId Elem = 910*0fca6ea1SDimitry Andric support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 911*0fca6ea1SDimitry Andric // Follow a pointer to the parent, if any. See comments below on 912*0fca6ea1SDimitry Andric // CallStackRadixTreeBuilder for the description of the radix tree format. 913*0fca6ea1SDimitry Andric if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) { 914*0fca6ea1SDimitry Andric Ptr += (-Elem) * sizeof(LinearFrameId); 915*0fca6ea1SDimitry Andric Elem = 916*0fca6ea1SDimitry Andric support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 917*0fca6ea1SDimitry Andric } 918*0fca6ea1SDimitry Andric // We shouldn't encounter another pointer. 919*0fca6ea1SDimitry Andric assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0); 920*0fca6ea1SDimitry Andric Frames.push_back(FrameIdToFrame(Elem)); 921*0fca6ea1SDimitry Andric Ptr += sizeof(LinearFrameId); 922*0fca6ea1SDimitry Andric } 923*0fca6ea1SDimitry Andric 924*0fca6ea1SDimitry Andric return Frames; 925*0fca6ea1SDimitry Andric } 926*0fca6ea1SDimitry Andric }; 927*0fca6ea1SDimitry Andric 928*0fca6ea1SDimitry Andric struct IndexedMemProfData { 929*0fca6ea1SDimitry Andric // A map to hold memprof data per function. The lower 64 bits obtained from 930*0fca6ea1SDimitry Andric // the md5 hash of the function name is used to index into the map. 931*0fca6ea1SDimitry Andric llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records; 932*0fca6ea1SDimitry Andric 933*0fca6ea1SDimitry Andric // A map to hold frame id to frame mappings. The mappings are used to 934*0fca6ea1SDimitry Andric // convert IndexedMemProfRecord to MemProfRecords with frame information 935*0fca6ea1SDimitry Andric // inline. 936*0fca6ea1SDimitry Andric llvm::MapVector<FrameId, Frame> Frames; 937*0fca6ea1SDimitry Andric 938*0fca6ea1SDimitry Andric // A map to hold call stack id to call stacks. 939*0fca6ea1SDimitry Andric llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks; 940*0fca6ea1SDimitry Andric }; 941*0fca6ea1SDimitry Andric 942*0fca6ea1SDimitry Andric struct FrameStat { 943*0fca6ea1SDimitry Andric // The number of occurrences of a given FrameId. 944*0fca6ea1SDimitry Andric uint64_t Count = 0; 945*0fca6ea1SDimitry Andric // The sum of indexes where a given FrameId shows up. 946*0fca6ea1SDimitry Andric uint64_t PositionSum = 0; 947*0fca6ea1SDimitry Andric }; 948*0fca6ea1SDimitry Andric 949*0fca6ea1SDimitry Andric // Compute a histogram of Frames in call stacks. 950*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat> 951*0fca6ea1SDimitry Andric computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 952*0fca6ea1SDimitry Andric &MemProfCallStackData); 953*0fca6ea1SDimitry Andric 954*0fca6ea1SDimitry Andric // Construct a radix tree of call stacks. 955*0fca6ea1SDimitry Andric // 956*0fca6ea1SDimitry Andric // A set of call stacks might look like: 957*0fca6ea1SDimitry Andric // 958*0fca6ea1SDimitry Andric // CallStackId 1: f1 -> f2 -> f3 959*0fca6ea1SDimitry Andric // CallStackId 2: f1 -> f2 -> f4 -> f5 960*0fca6ea1SDimitry Andric // CallStackId 3: f1 -> f2 -> f4 -> f6 961*0fca6ea1SDimitry Andric // CallStackId 4: f7 -> f8 -> f9 962*0fca6ea1SDimitry Andric // 963*0fca6ea1SDimitry Andric // where each fn refers to a stack frame. 964*0fca6ea1SDimitry Andric // 965*0fca6ea1SDimitry Andric // Since we expect a lot of common prefixes, we can compress the call stacks 966*0fca6ea1SDimitry Andric // into a radix tree like: 967*0fca6ea1SDimitry Andric // 968*0fca6ea1SDimitry Andric // CallStackId 1: f1 -> f2 -> f3 969*0fca6ea1SDimitry Andric // | 970*0fca6ea1SDimitry Andric // CallStackId 2: +---> f4 -> f5 971*0fca6ea1SDimitry Andric // | 972*0fca6ea1SDimitry Andric // CallStackId 3: +---> f6 973*0fca6ea1SDimitry Andric // 974*0fca6ea1SDimitry Andric // CallStackId 4: f7 -> f8 -> f9 975*0fca6ea1SDimitry Andric // 976*0fca6ea1SDimitry Andric // Now, we are interested in retrieving call stacks for a given CallStackId, so 977*0fca6ea1SDimitry Andric // we just need a pointer from a given call stack to its parent. For example, 978*0fca6ea1SDimitry Andric // CallStackId 2 would point to CallStackId 1 as a parent. 979*0fca6ea1SDimitry Andric // 980*0fca6ea1SDimitry Andric // We serialize the radix tree above into a single array along with the length 981*0fca6ea1SDimitry Andric // of each call stack and pointers to the parent call stacks. 982*0fca6ea1SDimitry Andric // 983*0fca6ea1SDimitry Andric // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 984*0fca6ea1SDimitry Andric // Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1 985*0fca6ea1SDimitry Andric // ^ ^ ^ ^ 986*0fca6ea1SDimitry Andric // | | | | 987*0fca6ea1SDimitry Andric // CallStackId 4: 0 --+ | | | 988*0fca6ea1SDimitry Andric // CallStackId 3: 4 --------------+ | | 989*0fca6ea1SDimitry Andric // CallStackId 2: 7 -----------------------+ | 990*0fca6ea1SDimitry Andric // CallStackId 1: 11 -----------------------------------+ 991*0fca6ea1SDimitry Andric // 992*0fca6ea1SDimitry Andric // - LN indicates the length of a call stack, encoded as ordinary integer N. 993*0fca6ea1SDimitry Andric // 994*0fca6ea1SDimitry Andric // - JN indicates a pointer to the parent, encoded as -N. 995*0fca6ea1SDimitry Andric // 996*0fca6ea1SDimitry Andric // The radix tree allows us to reconstruct call stacks in the leaf-to-root 997*0fca6ea1SDimitry Andric // order as we scan the array from left ro right while following pointers to 998*0fca6ea1SDimitry Andric // parents along the way. 999*0fca6ea1SDimitry Andric // 1000*0fca6ea1SDimitry Andric // For example, if we are decoding CallStackId 2, we start a forward traversal 1001*0fca6ea1SDimitry Andric // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When 1002*0fca6ea1SDimitry Andric // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3, 1003*0fca6ea1SDimitry Andric // picking up f2 and f1. We are done after collecting 4 frames as indicated at 1004*0fca6ea1SDimitry Andric // the beginning of the traversal. 1005*0fca6ea1SDimitry Andric // 1006*0fca6ea1SDimitry Andric // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into 1007*0fca6ea1SDimitry Andric // the radix tree array, so we do not explicitly encode mappings like: 1008*0fca6ea1SDimitry Andric // "CallStackId 1 -> 11". 1009*0fca6ea1SDimitry Andric class CallStackRadixTreeBuilder { 1010*0fca6ea1SDimitry Andric // The radix tree array. 1011*0fca6ea1SDimitry Andric std::vector<LinearFrameId> RadixArray; 1012*0fca6ea1SDimitry Andric 1013*0fca6ea1SDimitry Andric // Mapping from CallStackIds to indexes into RadixArray. 1014*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos; 1015*0fca6ea1SDimitry Andric 1016*0fca6ea1SDimitry Andric // In build, we partition a given call stack into two parts -- the prefix 1017*0fca6ea1SDimitry Andric // that's common with the previously encoded call stack and the frames beyond 1018*0fca6ea1SDimitry Andric // the common prefix -- the unique portion. Then we want to find out where 1019*0fca6ea1SDimitry Andric // the common prefix is stored in RadixArray so that we can link the unique 1020*0fca6ea1SDimitry Andric // portion to the common prefix. Indexes, declared below, helps with our 1021*0fca6ea1SDimitry Andric // needs. Intuitively, Indexes tells us where each of the previously encoded 1022*0fca6ea1SDimitry Andric // call stack is stored in RadixArray. More formally, Indexes satisfies: 1023*0fca6ea1SDimitry Andric // 1024*0fca6ea1SDimitry Andric // RadixArray[Indexes[I]] == Prev[I] 1025*0fca6ea1SDimitry Andric // 1026*0fca6ea1SDimitry Andric // for every I, where Prev is the the call stack in the root-to-leaf order 1027*0fca6ea1SDimitry Andric // previously encoded by build. (Note that Prev, as passed to 1028*0fca6ea1SDimitry Andric // encodeCallStack, is in the leaf-to-root order.) 1029*0fca6ea1SDimitry Andric // 1030*0fca6ea1SDimitry Andric // For example, if the call stack being encoded shares 5 frames at the root of 1031*0fca6ea1SDimitry Andric // the call stack with the previously encoded call stack, 1032*0fca6ea1SDimitry Andric // RadixArray[Indexes[0]] is the root frame of the common prefix. 1033*0fca6ea1SDimitry Andric // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix. 1034*0fca6ea1SDimitry Andric std::vector<LinearCallStackId> Indexes; 1035*0fca6ea1SDimitry Andric 1036*0fca6ea1SDimitry Andric using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>; 1037*0fca6ea1SDimitry Andric 1038*0fca6ea1SDimitry Andric // Encode a call stack into RadixArray. Return the starting index within 1039*0fca6ea1SDimitry Andric // RadixArray. 1040*0fca6ea1SDimitry Andric LinearCallStackId encodeCallStack( 1041*0fca6ea1SDimitry Andric const llvm::SmallVector<FrameId> *CallStack, 1042*0fca6ea1SDimitry Andric const llvm::SmallVector<FrameId> *Prev, 1043*0fca6ea1SDimitry Andric const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes); 1044*0fca6ea1SDimitry Andric 1045*0fca6ea1SDimitry Andric public: 1046*0fca6ea1SDimitry Andric CallStackRadixTreeBuilder() = default; 1047*0fca6ea1SDimitry Andric 1048*0fca6ea1SDimitry Andric // Build a radix tree array. 1049*0fca6ea1SDimitry Andric void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 1050*0fca6ea1SDimitry Andric &&MemProfCallStackData, 1051*0fca6ea1SDimitry Andric const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, 1052*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat> &FrameHistogram); 1053*0fca6ea1SDimitry Andric 1054*0fca6ea1SDimitry Andric const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; } 1055*0fca6ea1SDimitry Andric 1056*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() { 1057*0fca6ea1SDimitry Andric return std::move(CallStackPos); 1058*0fca6ea1SDimitry Andric } 1059*0fca6ea1SDimitry Andric }; 1060*0fca6ea1SDimitry Andric 1061*0fca6ea1SDimitry Andric // Verify that each CallStackId is computed with hashCallStack. This function 1062*0fca6ea1SDimitry Andric // is intended to help transition from CallStack to CSId in 1063*0fca6ea1SDimitry Andric // IndexedAllocationInfo. 1064*0fca6ea1SDimitry Andric void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); 1065*0fca6ea1SDimitry Andric 1066*0fca6ea1SDimitry Andric // Verify that each CallStackId is computed with hashCallStack. This function 1067*0fca6ea1SDimitry Andric // is intended to help transition from CallStack to CSId in 1068*0fca6ea1SDimitry Andric // IndexedAllocationInfo. 1069*0fca6ea1SDimitry Andric void verifyFunctionProfileData( 1070*0fca6ea1SDimitry Andric const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> 1071*0fca6ea1SDimitry Andric &FunctionProfileData); 107281ad6265SDimitry Andric } // namespace memprof 107381ad6265SDimitry Andric } // namespace llvm 107481ad6265SDimitry Andric 107581ad6265SDimitry Andric #endif // LLVM_PROFILEDATA_MEMPROF_H_ 1076