181ad6265SDimitry Andric #include "llvm/ProfileData/MemProf.h" 281ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h" 381ad6265SDimitry Andric #include "llvm/IR/Function.h" 481ad6265SDimitry Andric #include "llvm/ProfileData/InstrProf.h" 55f757f3fSDimitry Andric #include "llvm/ProfileData/SampleProf.h" 6*0fca6ea1SDimitry Andric #include "llvm/Support/BLAKE3.h" 781ad6265SDimitry Andric #include "llvm/Support/Endian.h" 881ad6265SDimitry Andric #include "llvm/Support/EndianStream.h" 9*0fca6ea1SDimitry Andric #include "llvm/Support/HashBuilder.h" 1081ad6265SDimitry Andric 1181ad6265SDimitry Andric namespace llvm { 1281ad6265SDimitry Andric namespace memprof { 13*0fca6ea1SDimitry Andric MemProfSchema getFullSchema() { 14*0fca6ea1SDimitry Andric MemProfSchema List; 15*0fca6ea1SDimitry Andric #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); 16*0fca6ea1SDimitry Andric #include "llvm/ProfileData/MIBEntryDef.inc" 17*0fca6ea1SDimitry Andric #undef MIBEntryDef 18*0fca6ea1SDimitry Andric return List; 19*0fca6ea1SDimitry Andric } 2081ad6265SDimitry Andric 21*0fca6ea1SDimitry Andric MemProfSchema getHotColdSchema() { 22*0fca6ea1SDimitry Andric return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, 23*0fca6ea1SDimitry Andric Meta::TotalLifetimeAccessDensity}; 24*0fca6ea1SDimitry Andric } 25*0fca6ea1SDimitry Andric 26*0fca6ea1SDimitry Andric static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, 27*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 28*0fca6ea1SDimitry Andric size_t Size = 0; 29*0fca6ea1SDimitry Andric // The number of frames to serialize. 30*0fca6ea1SDimitry Andric Size += sizeof(uint64_t); 31*0fca6ea1SDimitry Andric // The callstack frame ids. 32*0fca6ea1SDimitry Andric Size += sizeof(FrameId) * IAI.CallStack.size(); 33*0fca6ea1SDimitry Andric // The size of the payload. 34*0fca6ea1SDimitry Andric Size += PortableMemInfoBlock::serializedSize(Schema); 35*0fca6ea1SDimitry Andric return Size; 36*0fca6ea1SDimitry Andric } 37*0fca6ea1SDimitry Andric 38*0fca6ea1SDimitry Andric static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, 39*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 40*0fca6ea1SDimitry Andric size_t Size = 0; 41*0fca6ea1SDimitry Andric // The CallStackId 42*0fca6ea1SDimitry Andric Size += sizeof(CallStackId); 43*0fca6ea1SDimitry Andric // The size of the payload. 44*0fca6ea1SDimitry Andric Size += PortableMemInfoBlock::serializedSize(Schema); 45*0fca6ea1SDimitry Andric return Size; 46*0fca6ea1SDimitry Andric } 47*0fca6ea1SDimitry Andric 48*0fca6ea1SDimitry Andric static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, 49*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 50*0fca6ea1SDimitry Andric size_t Size = 0; 51*0fca6ea1SDimitry Andric // The linear call stack ID. 52*0fca6ea1SDimitry Andric Size += sizeof(LinearCallStackId); 53*0fca6ea1SDimitry Andric // The size of the payload. 54*0fca6ea1SDimitry Andric Size += PortableMemInfoBlock::serializedSize(Schema); 55*0fca6ea1SDimitry Andric return Size; 56*0fca6ea1SDimitry Andric } 57*0fca6ea1SDimitry Andric 58*0fca6ea1SDimitry Andric size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, 59*0fca6ea1SDimitry Andric IndexedVersion Version) const { 60*0fca6ea1SDimitry Andric switch (Version) { 61*0fca6ea1SDimitry Andric case Version0: 62*0fca6ea1SDimitry Andric case Version1: 63*0fca6ea1SDimitry Andric return serializedSizeV0(*this, Schema); 64*0fca6ea1SDimitry Andric case Version2: 65*0fca6ea1SDimitry Andric return serializedSizeV2(*this, Schema); 66*0fca6ea1SDimitry Andric case Version3: 67*0fca6ea1SDimitry Andric return serializedSizeV3(*this, Schema); 68*0fca6ea1SDimitry Andric } 69*0fca6ea1SDimitry Andric llvm_unreachable("unsupported MemProf version"); 70*0fca6ea1SDimitry Andric } 71*0fca6ea1SDimitry Andric 72*0fca6ea1SDimitry Andric static size_t serializedSizeV0(const IndexedMemProfRecord &Record, 73*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 74*0fca6ea1SDimitry Andric // The number of alloc sites to serialize. 75*0fca6ea1SDimitry Andric size_t Result = sizeof(uint64_t); 76*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) 77*0fca6ea1SDimitry Andric Result += N.serializedSize(Schema, Version0); 78*0fca6ea1SDimitry Andric 79*0fca6ea1SDimitry Andric // The number of callsites we have information for. 80*0fca6ea1SDimitry Andric Result += sizeof(uint64_t); 81*0fca6ea1SDimitry Andric for (const auto &Frames : Record.CallSites) { 82*0fca6ea1SDimitry Andric // The number of frame ids to serialize. 83*0fca6ea1SDimitry Andric Result += sizeof(uint64_t); 84*0fca6ea1SDimitry Andric Result += Frames.size() * sizeof(FrameId); 85*0fca6ea1SDimitry Andric } 86*0fca6ea1SDimitry Andric return Result; 87*0fca6ea1SDimitry Andric } 88*0fca6ea1SDimitry Andric 89*0fca6ea1SDimitry Andric static size_t serializedSizeV2(const IndexedMemProfRecord &Record, 90*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 91*0fca6ea1SDimitry Andric // The number of alloc sites to serialize. 92*0fca6ea1SDimitry Andric size_t Result = sizeof(uint64_t); 93*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) 94*0fca6ea1SDimitry Andric Result += N.serializedSize(Schema, Version2); 95*0fca6ea1SDimitry Andric 96*0fca6ea1SDimitry Andric // The number of callsites we have information for. 97*0fca6ea1SDimitry Andric Result += sizeof(uint64_t); 98*0fca6ea1SDimitry Andric // The CallStackId 99*0fca6ea1SDimitry Andric Result += Record.CallSiteIds.size() * sizeof(CallStackId); 100*0fca6ea1SDimitry Andric return Result; 101*0fca6ea1SDimitry Andric } 102*0fca6ea1SDimitry Andric 103*0fca6ea1SDimitry Andric static size_t serializedSizeV3(const IndexedMemProfRecord &Record, 104*0fca6ea1SDimitry Andric const MemProfSchema &Schema) { 105*0fca6ea1SDimitry Andric // The number of alloc sites to serialize. 106*0fca6ea1SDimitry Andric size_t Result = sizeof(uint64_t); 107*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) 108*0fca6ea1SDimitry Andric Result += N.serializedSize(Schema, Version3); 109*0fca6ea1SDimitry Andric 110*0fca6ea1SDimitry Andric // The number of callsites we have information for. 111*0fca6ea1SDimitry Andric Result += sizeof(uint64_t); 112*0fca6ea1SDimitry Andric // The linear call stack ID. 113*0fca6ea1SDimitry Andric Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId); 114*0fca6ea1SDimitry Andric return Result; 115*0fca6ea1SDimitry Andric } 116*0fca6ea1SDimitry Andric 117*0fca6ea1SDimitry Andric size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, 118*0fca6ea1SDimitry Andric IndexedVersion Version) const { 119*0fca6ea1SDimitry Andric switch (Version) { 120*0fca6ea1SDimitry Andric case Version0: 121*0fca6ea1SDimitry Andric case Version1: 122*0fca6ea1SDimitry Andric return serializedSizeV0(*this, Schema); 123*0fca6ea1SDimitry Andric case Version2: 124*0fca6ea1SDimitry Andric return serializedSizeV2(*this, Schema); 125*0fca6ea1SDimitry Andric case Version3: 126*0fca6ea1SDimitry Andric return serializedSizeV3(*this, Schema); 127*0fca6ea1SDimitry Andric } 128*0fca6ea1SDimitry Andric llvm_unreachable("unsupported MemProf version"); 129*0fca6ea1SDimitry Andric } 130*0fca6ea1SDimitry Andric 131*0fca6ea1SDimitry Andric static void serializeV0(const IndexedMemProfRecord &Record, 132*0fca6ea1SDimitry Andric const MemProfSchema &Schema, raw_ostream &OS) { 13381ad6265SDimitry Andric using namespace support; 13481ad6265SDimitry Andric 1355f757f3fSDimitry Andric endian::Writer LE(OS, llvm::endianness::little); 13681ad6265SDimitry Andric 137*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.AllocSites.size()); 138*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) { 13981ad6265SDimitry Andric LE.write<uint64_t>(N.CallStack.size()); 14081ad6265SDimitry Andric for (const FrameId &Id : N.CallStack) 14181ad6265SDimitry Andric LE.write<FrameId>(Id); 14281ad6265SDimitry Andric N.Info.serialize(Schema, OS); 14381ad6265SDimitry Andric } 14481ad6265SDimitry Andric 14581ad6265SDimitry Andric // Related contexts. 146*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.CallSites.size()); 147*0fca6ea1SDimitry Andric for (const auto &Frames : Record.CallSites) { 14881ad6265SDimitry Andric LE.write<uint64_t>(Frames.size()); 14981ad6265SDimitry Andric for (const FrameId &Id : Frames) 15081ad6265SDimitry Andric LE.write<FrameId>(Id); 15181ad6265SDimitry Andric } 15281ad6265SDimitry Andric } 15381ad6265SDimitry Andric 154*0fca6ea1SDimitry Andric static void serializeV2(const IndexedMemProfRecord &Record, 155*0fca6ea1SDimitry Andric const MemProfSchema &Schema, raw_ostream &OS) { 156*0fca6ea1SDimitry Andric using namespace support; 157*0fca6ea1SDimitry Andric 158*0fca6ea1SDimitry Andric endian::Writer LE(OS, llvm::endianness::little); 159*0fca6ea1SDimitry Andric 160*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.AllocSites.size()); 161*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) { 162*0fca6ea1SDimitry Andric LE.write<CallStackId>(N.CSId); 163*0fca6ea1SDimitry Andric N.Info.serialize(Schema, OS); 164*0fca6ea1SDimitry Andric } 165*0fca6ea1SDimitry Andric 166*0fca6ea1SDimitry Andric // Related contexts. 167*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.CallSiteIds.size()); 168*0fca6ea1SDimitry Andric for (const auto &CSId : Record.CallSiteIds) 169*0fca6ea1SDimitry Andric LE.write<CallStackId>(CSId); 170*0fca6ea1SDimitry Andric } 171*0fca6ea1SDimitry Andric 172*0fca6ea1SDimitry Andric static void serializeV3( 173*0fca6ea1SDimitry Andric const IndexedMemProfRecord &Record, const MemProfSchema &Schema, 174*0fca6ea1SDimitry Andric raw_ostream &OS, 175*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) { 176*0fca6ea1SDimitry Andric using namespace support; 177*0fca6ea1SDimitry Andric 178*0fca6ea1SDimitry Andric endian::Writer LE(OS, llvm::endianness::little); 179*0fca6ea1SDimitry Andric 180*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.AllocSites.size()); 181*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &N : Record.AllocSites) { 182*0fca6ea1SDimitry Andric assert(MemProfCallStackIndexes.contains(N.CSId)); 183*0fca6ea1SDimitry Andric LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]); 184*0fca6ea1SDimitry Andric N.Info.serialize(Schema, OS); 185*0fca6ea1SDimitry Andric } 186*0fca6ea1SDimitry Andric 187*0fca6ea1SDimitry Andric // Related contexts. 188*0fca6ea1SDimitry Andric LE.write<uint64_t>(Record.CallSiteIds.size()); 189*0fca6ea1SDimitry Andric for (const auto &CSId : Record.CallSiteIds) { 190*0fca6ea1SDimitry Andric assert(MemProfCallStackIndexes.contains(CSId)); 191*0fca6ea1SDimitry Andric LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]); 192*0fca6ea1SDimitry Andric } 193*0fca6ea1SDimitry Andric } 194*0fca6ea1SDimitry Andric 195*0fca6ea1SDimitry Andric void IndexedMemProfRecord::serialize( 196*0fca6ea1SDimitry Andric const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, 197*0fca6ea1SDimitry Andric llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 198*0fca6ea1SDimitry Andric const { 199*0fca6ea1SDimitry Andric switch (Version) { 200*0fca6ea1SDimitry Andric case Version0: 201*0fca6ea1SDimitry Andric case Version1: 202*0fca6ea1SDimitry Andric serializeV0(*this, Schema, OS); 203*0fca6ea1SDimitry Andric return; 204*0fca6ea1SDimitry Andric case Version2: 205*0fca6ea1SDimitry Andric serializeV2(*this, Schema, OS); 206*0fca6ea1SDimitry Andric return; 207*0fca6ea1SDimitry Andric case Version3: 208*0fca6ea1SDimitry Andric serializeV3(*this, Schema, OS, *MemProfCallStackIndexes); 209*0fca6ea1SDimitry Andric return; 210*0fca6ea1SDimitry Andric } 211*0fca6ea1SDimitry Andric llvm_unreachable("unsupported MemProf version"); 212*0fca6ea1SDimitry Andric } 213*0fca6ea1SDimitry Andric 214*0fca6ea1SDimitry Andric static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, 21581ad6265SDimitry Andric const unsigned char *Ptr) { 21681ad6265SDimitry Andric using namespace support; 21781ad6265SDimitry Andric 21881ad6265SDimitry Andric IndexedMemProfRecord Record; 21981ad6265SDimitry Andric 22081ad6265SDimitry Andric // Read the meminfo nodes. 2215f757f3fSDimitry Andric const uint64_t NumNodes = 222*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 22381ad6265SDimitry Andric for (uint64_t I = 0; I < NumNodes; I++) { 22481ad6265SDimitry Andric IndexedAllocationInfo Node; 22581ad6265SDimitry Andric const uint64_t NumFrames = 226*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 22781ad6265SDimitry Andric for (uint64_t J = 0; J < NumFrames; J++) { 2285f757f3fSDimitry Andric const FrameId Id = 229*0fca6ea1SDimitry Andric endian::readNext<FrameId, llvm::endianness::little>(Ptr); 23081ad6265SDimitry Andric Node.CallStack.push_back(Id); 23181ad6265SDimitry Andric } 232*0fca6ea1SDimitry Andric Node.CSId = hashCallStack(Node.CallStack); 23381ad6265SDimitry Andric Node.Info.deserialize(Schema, Ptr); 234*0fca6ea1SDimitry Andric Ptr += PortableMemInfoBlock::serializedSize(Schema); 23581ad6265SDimitry Andric Record.AllocSites.push_back(Node); 23681ad6265SDimitry Andric } 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric // Read the callsite information. 2395f757f3fSDimitry Andric const uint64_t NumCtxs = 240*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 24181ad6265SDimitry Andric for (uint64_t J = 0; J < NumCtxs; J++) { 24281ad6265SDimitry Andric const uint64_t NumFrames = 243*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 24481ad6265SDimitry Andric llvm::SmallVector<FrameId> Frames; 24581ad6265SDimitry Andric Frames.reserve(NumFrames); 24681ad6265SDimitry Andric for (uint64_t K = 0; K < NumFrames; K++) { 2475f757f3fSDimitry Andric const FrameId Id = 248*0fca6ea1SDimitry Andric endian::readNext<FrameId, llvm::endianness::little>(Ptr); 24981ad6265SDimitry Andric Frames.push_back(Id); 25081ad6265SDimitry Andric } 25181ad6265SDimitry Andric Record.CallSites.push_back(Frames); 252*0fca6ea1SDimitry Andric Record.CallSiteIds.push_back(hashCallStack(Frames)); 25381ad6265SDimitry Andric } 25481ad6265SDimitry Andric 25581ad6265SDimitry Andric return Record; 25681ad6265SDimitry Andric } 25781ad6265SDimitry Andric 258*0fca6ea1SDimitry Andric static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, 259*0fca6ea1SDimitry Andric const unsigned char *Ptr) { 260*0fca6ea1SDimitry Andric using namespace support; 261*0fca6ea1SDimitry Andric 262*0fca6ea1SDimitry Andric IndexedMemProfRecord Record; 263*0fca6ea1SDimitry Andric 264*0fca6ea1SDimitry Andric // Read the meminfo nodes. 265*0fca6ea1SDimitry Andric const uint64_t NumNodes = 266*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 267*0fca6ea1SDimitry Andric Record.AllocSites.reserve(NumNodes); 268*0fca6ea1SDimitry Andric for (uint64_t I = 0; I < NumNodes; I++) { 269*0fca6ea1SDimitry Andric IndexedAllocationInfo Node; 270*0fca6ea1SDimitry Andric Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 271*0fca6ea1SDimitry Andric Node.Info.deserialize(Schema, Ptr); 272*0fca6ea1SDimitry Andric Ptr += PortableMemInfoBlock::serializedSize(Schema); 273*0fca6ea1SDimitry Andric Record.AllocSites.push_back(Node); 274*0fca6ea1SDimitry Andric } 275*0fca6ea1SDimitry Andric 276*0fca6ea1SDimitry Andric // Read the callsite information. 277*0fca6ea1SDimitry Andric const uint64_t NumCtxs = 278*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 279*0fca6ea1SDimitry Andric Record.CallSiteIds.reserve(NumCtxs); 280*0fca6ea1SDimitry Andric for (uint64_t J = 0; J < NumCtxs; J++) { 281*0fca6ea1SDimitry Andric CallStackId CSId = 282*0fca6ea1SDimitry Andric endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 283*0fca6ea1SDimitry Andric Record.CallSiteIds.push_back(CSId); 284*0fca6ea1SDimitry Andric } 285*0fca6ea1SDimitry Andric 286*0fca6ea1SDimitry Andric return Record; 287*0fca6ea1SDimitry Andric } 288*0fca6ea1SDimitry Andric 289*0fca6ea1SDimitry Andric static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema, 290*0fca6ea1SDimitry Andric const unsigned char *Ptr) { 291*0fca6ea1SDimitry Andric using namespace support; 292*0fca6ea1SDimitry Andric 293*0fca6ea1SDimitry Andric IndexedMemProfRecord Record; 294*0fca6ea1SDimitry Andric 295*0fca6ea1SDimitry Andric // Read the meminfo nodes. 296*0fca6ea1SDimitry Andric const uint64_t NumNodes = 297*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 298*0fca6ea1SDimitry Andric Record.AllocSites.reserve(NumNodes); 299*0fca6ea1SDimitry Andric for (uint64_t I = 0; I < NumNodes; I++) { 300*0fca6ea1SDimitry Andric IndexedAllocationInfo Node; 301*0fca6ea1SDimitry Andric Node.CSId = 302*0fca6ea1SDimitry Andric endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 303*0fca6ea1SDimitry Andric Node.Info.deserialize(Schema, Ptr); 304*0fca6ea1SDimitry Andric Ptr += PortableMemInfoBlock::serializedSize(Schema); 305*0fca6ea1SDimitry Andric Record.AllocSites.push_back(Node); 306*0fca6ea1SDimitry Andric } 307*0fca6ea1SDimitry Andric 308*0fca6ea1SDimitry Andric // Read the callsite information. 309*0fca6ea1SDimitry Andric const uint64_t NumCtxs = 310*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 311*0fca6ea1SDimitry Andric Record.CallSiteIds.reserve(NumCtxs); 312*0fca6ea1SDimitry Andric for (uint64_t J = 0; J < NumCtxs; J++) { 313*0fca6ea1SDimitry Andric // We are storing LinearCallStackId in CallSiteIds, which is a vector of 314*0fca6ea1SDimitry Andric // CallStackId. Assert that CallStackId is no smaller than 315*0fca6ea1SDimitry Andric // LinearCallStackId. 316*0fca6ea1SDimitry Andric static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId)); 317*0fca6ea1SDimitry Andric LinearCallStackId CSId = 318*0fca6ea1SDimitry Andric endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 319*0fca6ea1SDimitry Andric Record.CallSiteIds.push_back(CSId); 320*0fca6ea1SDimitry Andric } 321*0fca6ea1SDimitry Andric 322*0fca6ea1SDimitry Andric return Record; 323*0fca6ea1SDimitry Andric } 324*0fca6ea1SDimitry Andric 325*0fca6ea1SDimitry Andric IndexedMemProfRecord 326*0fca6ea1SDimitry Andric IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, 327*0fca6ea1SDimitry Andric const unsigned char *Ptr, 328*0fca6ea1SDimitry Andric IndexedVersion Version) { 329*0fca6ea1SDimitry Andric switch (Version) { 330*0fca6ea1SDimitry Andric case Version0: 331*0fca6ea1SDimitry Andric case Version1: 332*0fca6ea1SDimitry Andric return deserializeV0(Schema, Ptr); 333*0fca6ea1SDimitry Andric case Version2: 334*0fca6ea1SDimitry Andric return deserializeV2(Schema, Ptr); 335*0fca6ea1SDimitry Andric case Version3: 336*0fca6ea1SDimitry Andric return deserializeV3(Schema, Ptr); 337*0fca6ea1SDimitry Andric } 338*0fca6ea1SDimitry Andric llvm_unreachable("unsupported MemProf version"); 339*0fca6ea1SDimitry Andric } 340*0fca6ea1SDimitry Andric 341*0fca6ea1SDimitry Andric MemProfRecord IndexedMemProfRecord::toMemProfRecord( 342*0fca6ea1SDimitry Andric llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const { 343*0fca6ea1SDimitry Andric MemProfRecord Record; 344*0fca6ea1SDimitry Andric 345*0fca6ea1SDimitry Andric Record.AllocSites.reserve(AllocSites.size()); 346*0fca6ea1SDimitry Andric for (const IndexedAllocationInfo &IndexedAI : AllocSites) { 347*0fca6ea1SDimitry Andric AllocationInfo AI; 348*0fca6ea1SDimitry Andric AI.Info = IndexedAI.Info; 349*0fca6ea1SDimitry Andric AI.CallStack = Callback(IndexedAI.CSId); 350*0fca6ea1SDimitry Andric Record.AllocSites.push_back(std::move(AI)); 351*0fca6ea1SDimitry Andric } 352*0fca6ea1SDimitry Andric 353*0fca6ea1SDimitry Andric Record.CallSites.reserve(CallSiteIds.size()); 354*0fca6ea1SDimitry Andric for (CallStackId CSId : CallSiteIds) 355*0fca6ea1SDimitry Andric Record.CallSites.push_back(Callback(CSId)); 356*0fca6ea1SDimitry Andric 357*0fca6ea1SDimitry Andric return Record; 358*0fca6ea1SDimitry Andric } 359*0fca6ea1SDimitry Andric 36081ad6265SDimitry Andric GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { 3615f757f3fSDimitry Andric // Canonicalize the function name to drop suffixes such as ".llvm.". Note 3625f757f3fSDimitry Andric // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop 3635f757f3fSDimitry Andric // those by default. This is by design to differentiate internal linkage 3645f757f3fSDimitry Andric // functions during matching. By dropping the other suffixes we can then match 3655f757f3fSDimitry Andric // functions in the profile use phase prior to their addition. Note that this 3665f757f3fSDimitry Andric // applies to both instrumented and sampled function names. 3675f757f3fSDimitry Andric StringRef CanonicalName = 3685f757f3fSDimitry Andric sampleprof::FunctionSamples::getCanonicalFnName(FunctionName); 36981ad6265SDimitry Andric 37081ad6265SDimitry Andric // We use the function guid which we expect to be a uint64_t. At 3715f757f3fSDimitry Andric // this time, it is the lower 64 bits of the md5 of the canonical 3725f757f3fSDimitry Andric // function name. 3735f757f3fSDimitry Andric return Function::getGUID(CanonicalName); 37481ad6265SDimitry Andric } 37581ad6265SDimitry Andric 37681ad6265SDimitry Andric Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { 37781ad6265SDimitry Andric using namespace support; 37881ad6265SDimitry Andric 37981ad6265SDimitry Andric const unsigned char *Ptr = Buffer; 38081ad6265SDimitry Andric const uint64_t NumSchemaIds = 381*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 38281ad6265SDimitry Andric if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) { 38381ad6265SDimitry Andric return make_error<InstrProfError>(instrprof_error::malformed, 38481ad6265SDimitry Andric "memprof schema invalid"); 38581ad6265SDimitry Andric } 38681ad6265SDimitry Andric 38781ad6265SDimitry Andric MemProfSchema Result; 38881ad6265SDimitry Andric for (size_t I = 0; I < NumSchemaIds; I++) { 3895f757f3fSDimitry Andric const uint64_t Tag = 390*0fca6ea1SDimitry Andric endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 39181ad6265SDimitry Andric if (Tag >= static_cast<uint64_t>(Meta::Size)) { 39281ad6265SDimitry Andric return make_error<InstrProfError>(instrprof_error::malformed, 39381ad6265SDimitry Andric "memprof schema invalid"); 39481ad6265SDimitry Andric } 39581ad6265SDimitry Andric Result.push_back(static_cast<Meta>(Tag)); 39681ad6265SDimitry Andric } 397*0fca6ea1SDimitry Andric // Advance the buffer to one past the schema if we succeeded. 39881ad6265SDimitry Andric Buffer = Ptr; 39981ad6265SDimitry Andric return Result; 40081ad6265SDimitry Andric } 40181ad6265SDimitry Andric 402*0fca6ea1SDimitry Andric CallStackId hashCallStack(ArrayRef<FrameId> CS) { 403*0fca6ea1SDimitry Andric llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 404*0fca6ea1SDimitry Andric HashBuilder; 405*0fca6ea1SDimitry Andric for (FrameId F : CS) 406*0fca6ea1SDimitry Andric HashBuilder.add(F); 407*0fca6ea1SDimitry Andric llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 408*0fca6ea1SDimitry Andric CallStackId CSId; 409*0fca6ea1SDimitry Andric std::memcpy(&CSId, Hash.data(), sizeof(Hash)); 410*0fca6ea1SDimitry Andric return CSId; 411*0fca6ea1SDimitry Andric } 412*0fca6ea1SDimitry Andric 413*0fca6ea1SDimitry Andric // Encode a call stack into RadixArray. Return the starting index within 414*0fca6ea1SDimitry Andric // RadixArray. For each call stack we encode, we emit two or three components 415*0fca6ea1SDimitry Andric // into RadixArray. If a given call stack doesn't have a common prefix relative 416*0fca6ea1SDimitry Andric // to the previous one, we emit: 417*0fca6ea1SDimitry Andric // 418*0fca6ea1SDimitry Andric // - the frames in the given call stack in the root-to-leaf order 419*0fca6ea1SDimitry Andric // 420*0fca6ea1SDimitry Andric // - the length of the given call stack 421*0fca6ea1SDimitry Andric // 422*0fca6ea1SDimitry Andric // If a given call stack has a non-empty common prefix relative to the previous 423*0fca6ea1SDimitry Andric // one, we emit: 424*0fca6ea1SDimitry Andric // 425*0fca6ea1SDimitry Andric // - the relative location of the common prefix, encoded as a negative number. 426*0fca6ea1SDimitry Andric // 427*0fca6ea1SDimitry Andric // - a portion of the given call stack that's beyond the common prefix 428*0fca6ea1SDimitry Andric // 429*0fca6ea1SDimitry Andric // - the length of the given call stack, including the length of the common 430*0fca6ea1SDimitry Andric // prefix. 431*0fca6ea1SDimitry Andric // 432*0fca6ea1SDimitry Andric // The resulting RadixArray requires a somewhat unintuitive backward traversal 433*0fca6ea1SDimitry Andric // to reconstruct a call stack -- read the call stack length and scan backward 434*0fca6ea1SDimitry Andric // while collecting frames in the leaf to root order. build, the caller of this 435*0fca6ea1SDimitry Andric // function, reverses RadixArray in place so that we can reconstruct a call 436*0fca6ea1SDimitry Andric // stack as if we were deserializing an array in a typical way -- the call stack 437*0fca6ea1SDimitry Andric // length followed by the frames in the leaf-to-root order except that we need 438*0fca6ea1SDimitry Andric // to handle pointers to parents along the way. 439*0fca6ea1SDimitry Andric // 440*0fca6ea1SDimitry Andric // To quickly determine the location of the common prefix within RadixArray, 441*0fca6ea1SDimitry Andric // Indexes caches the indexes of the previous call stack's frames within 442*0fca6ea1SDimitry Andric // RadixArray. 443*0fca6ea1SDimitry Andric LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( 444*0fca6ea1SDimitry Andric const llvm::SmallVector<FrameId> *CallStack, 445*0fca6ea1SDimitry Andric const llvm::SmallVector<FrameId> *Prev, 446*0fca6ea1SDimitry Andric const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) { 447*0fca6ea1SDimitry Andric // Compute the length of the common root prefix between Prev and CallStack. 448*0fca6ea1SDimitry Andric uint32_t CommonLen = 0; 449*0fca6ea1SDimitry Andric if (Prev) { 450*0fca6ea1SDimitry Andric auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(), 451*0fca6ea1SDimitry Andric CallStack->rend()); 452*0fca6ea1SDimitry Andric CommonLen = std::distance(CallStack->rbegin(), Pos.second); 453*0fca6ea1SDimitry Andric } 454*0fca6ea1SDimitry Andric 455*0fca6ea1SDimitry Andric // Drop the portion beyond CommonLen. 456*0fca6ea1SDimitry Andric assert(CommonLen <= Indexes.size()); 457*0fca6ea1SDimitry Andric Indexes.resize(CommonLen); 458*0fca6ea1SDimitry Andric 459*0fca6ea1SDimitry Andric // Append a pointer to the parent. 460*0fca6ea1SDimitry Andric if (CommonLen) { 461*0fca6ea1SDimitry Andric uint32_t CurrentIndex = RadixArray.size(); 462*0fca6ea1SDimitry Andric uint32_t ParentIndex = Indexes.back(); 463*0fca6ea1SDimitry Andric // The offset to the parent must be negative because we are pointing to an 464*0fca6ea1SDimitry Andric // element we've already added to RadixArray. 465*0fca6ea1SDimitry Andric assert(ParentIndex < CurrentIndex); 466*0fca6ea1SDimitry Andric RadixArray.push_back(ParentIndex - CurrentIndex); 467*0fca6ea1SDimitry Andric } 468*0fca6ea1SDimitry Andric 469*0fca6ea1SDimitry Andric // Copy the part of the call stack beyond the common prefix to RadixArray. 470*0fca6ea1SDimitry Andric assert(CommonLen <= CallStack->size()); 471*0fca6ea1SDimitry Andric for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) { 472*0fca6ea1SDimitry Andric // Remember the index of F in RadixArray. 473*0fca6ea1SDimitry Andric Indexes.push_back(RadixArray.size()); 474*0fca6ea1SDimitry Andric RadixArray.push_back(MemProfFrameIndexes.find(F)->second); 475*0fca6ea1SDimitry Andric } 476*0fca6ea1SDimitry Andric assert(CallStack->size() == Indexes.size()); 477*0fca6ea1SDimitry Andric 478*0fca6ea1SDimitry Andric // End with the call stack length. 479*0fca6ea1SDimitry Andric RadixArray.push_back(CallStack->size()); 480*0fca6ea1SDimitry Andric 481*0fca6ea1SDimitry Andric // Return the index within RadixArray where we can start reconstructing a 482*0fca6ea1SDimitry Andric // given call stack from. 483*0fca6ea1SDimitry Andric return RadixArray.size() - 1; 484*0fca6ea1SDimitry Andric } 485*0fca6ea1SDimitry Andric 486*0fca6ea1SDimitry Andric void CallStackRadixTreeBuilder::build( 487*0fca6ea1SDimitry Andric llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 488*0fca6ea1SDimitry Andric &&MemProfCallStackData, 489*0fca6ea1SDimitry Andric const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, 490*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) { 491*0fca6ea1SDimitry Andric // Take the vector portion of MemProfCallStackData. The vector is exactly 492*0fca6ea1SDimitry Andric // what we need to sort. Also, we no longer need its lookup capability. 493*0fca6ea1SDimitry Andric llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector(); 494*0fca6ea1SDimitry Andric 495*0fca6ea1SDimitry Andric // Return early if we have no work to do. 496*0fca6ea1SDimitry Andric if (CallStacks.empty()) { 497*0fca6ea1SDimitry Andric RadixArray.clear(); 498*0fca6ea1SDimitry Andric CallStackPos.clear(); 499*0fca6ea1SDimitry Andric return; 500*0fca6ea1SDimitry Andric } 501*0fca6ea1SDimitry Andric 502*0fca6ea1SDimitry Andric // Sorting the list of call stacks in the dictionary order is sufficient to 503*0fca6ea1SDimitry Andric // maximize the length of the common prefix between two adjacent call stacks 504*0fca6ea1SDimitry Andric // and thus minimize the length of RadixArray. However, we go one step 505*0fca6ea1SDimitry Andric // further and try to reduce the number of times we follow pointers to parents 506*0fca6ea1SDimitry Andric // during deserilization. Consider a poorly encoded radix tree: 507*0fca6ea1SDimitry Andric // 508*0fca6ea1SDimitry Andric // CallStackId 1: f1 -> f2 -> f3 509*0fca6ea1SDimitry Andric // | 510*0fca6ea1SDimitry Andric // CallStackId 2: +--- f4 -> f5 511*0fca6ea1SDimitry Andric // | 512*0fca6ea1SDimitry Andric // CallStackId 3: +--> f6 513*0fca6ea1SDimitry Andric // 514*0fca6ea1SDimitry Andric // Here, f2 and f4 appear once and twice, respectively, in the call stacks. 515*0fca6ea1SDimitry Andric // Once we encode CallStackId 1 into RadixArray, every other call stack with 516*0fca6ea1SDimitry Andric // common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3 517*0fca6ea1SDimitry Andric // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to 518*0fca6ea1SDimitry Andric // parents twice. 519*0fca6ea1SDimitry Andric // 520*0fca6ea1SDimitry Andric // We try to alleviate the situation by sorting the list of call stacks by 521*0fca6ea1SDimitry Andric // comparing the popularity of frames rather than the integer values of 522*0fca6ea1SDimitry Andric // FrameIds. In the example above, f4 is more popular than f2, so we sort the 523*0fca6ea1SDimitry Andric // call stacks and encode them as: 524*0fca6ea1SDimitry Andric // 525*0fca6ea1SDimitry Andric // CallStackId 2: f1 -- f4 -> f5 526*0fca6ea1SDimitry Andric // | | 527*0fca6ea1SDimitry Andric // CallStackId 3: | +--> f6 528*0fca6ea1SDimitry Andric // | 529*0fca6ea1SDimitry Andric // CallStackId 1: +--> f2 -> f3 530*0fca6ea1SDimitry Andric // 531*0fca6ea1SDimitry Andric // Notice that CallStackId 3 follows a pointer to a parent only once. 532*0fca6ea1SDimitry Andric // 533*0fca6ea1SDimitry Andric // All this is a quick-n-dirty trick to reduce the number of jumps. The 534*0fca6ea1SDimitry Andric // proper way would be to compute the weight of each radix tree node -- how 535*0fca6ea1SDimitry Andric // many call stacks use a given radix tree node, and encode a radix tree from 536*0fca6ea1SDimitry Andric // the heaviest node first. We do not do so because that's a lot of work. 537*0fca6ea1SDimitry Andric llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) { 538*0fca6ea1SDimitry Andric // Call stacks are stored from leaf to root. Perform comparisons from the 539*0fca6ea1SDimitry Andric // root. 540*0fca6ea1SDimitry Andric return std::lexicographical_compare( 541*0fca6ea1SDimitry Andric L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(), 542*0fca6ea1SDimitry Andric [&](FrameId F1, FrameId F2) { 543*0fca6ea1SDimitry Andric uint64_t H1 = FrameHistogram[F1].Count; 544*0fca6ea1SDimitry Andric uint64_t H2 = FrameHistogram[F2].Count; 545*0fca6ea1SDimitry Andric // Popular frames should come later because we encode call stacks from 546*0fca6ea1SDimitry Andric // the last one in the list. 547*0fca6ea1SDimitry Andric if (H1 != H2) 548*0fca6ea1SDimitry Andric return H1 < H2; 549*0fca6ea1SDimitry Andric // For sort stability. 550*0fca6ea1SDimitry Andric return F1 < F2; 551*0fca6ea1SDimitry Andric }); 552*0fca6ea1SDimitry Andric }); 553*0fca6ea1SDimitry Andric 554*0fca6ea1SDimitry Andric // Reserve some reasonable amount of storage. 555*0fca6ea1SDimitry Andric RadixArray.clear(); 556*0fca6ea1SDimitry Andric RadixArray.reserve(CallStacks.size() * 8); 557*0fca6ea1SDimitry Andric 558*0fca6ea1SDimitry Andric // Indexes will grow as long as the longest call stack. 559*0fca6ea1SDimitry Andric Indexes.clear(); 560*0fca6ea1SDimitry Andric Indexes.reserve(512); 561*0fca6ea1SDimitry Andric 562*0fca6ea1SDimitry Andric // CallStackPos will grow to exactly CallStacks.size() entries. 563*0fca6ea1SDimitry Andric CallStackPos.clear(); 564*0fca6ea1SDimitry Andric CallStackPos.reserve(CallStacks.size()); 565*0fca6ea1SDimitry Andric 566*0fca6ea1SDimitry Andric // Compute the radix array. We encode one call stack at a time, computing the 567*0fca6ea1SDimitry Andric // longest prefix that's shared with the previous call stack we encode. For 568*0fca6ea1SDimitry Andric // each call stack we encode, we remember a mapping from CallStackId to its 569*0fca6ea1SDimitry Andric // position within RadixArray. 570*0fca6ea1SDimitry Andric // 571*0fca6ea1SDimitry Andric // As an optimization, we encode from the last call stack in CallStacks to 572*0fca6ea1SDimitry Andric // reduce the number of times we follow pointers to the parents. Consider the 573*0fca6ea1SDimitry Andric // list of call stacks that has been sorted in the dictionary order: 574*0fca6ea1SDimitry Andric // 575*0fca6ea1SDimitry Andric // Call Stack 1: F1 576*0fca6ea1SDimitry Andric // Call Stack 2: F1 -> F2 577*0fca6ea1SDimitry Andric // Call Stack 3: F1 -> F2 -> F3 578*0fca6ea1SDimitry Andric // 579*0fca6ea1SDimitry Andric // If we traversed CallStacks in the forward order, we would end up with a 580*0fca6ea1SDimitry Andric // radix tree like: 581*0fca6ea1SDimitry Andric // 582*0fca6ea1SDimitry Andric // Call Stack 1: F1 583*0fca6ea1SDimitry Andric // | 584*0fca6ea1SDimitry Andric // Call Stack 2: +---> F2 585*0fca6ea1SDimitry Andric // | 586*0fca6ea1SDimitry Andric // Call Stack 3: +---> F3 587*0fca6ea1SDimitry Andric // 588*0fca6ea1SDimitry Andric // Notice that each call stack jumps to the previous one. However, if we 589*0fca6ea1SDimitry Andric // traverse CallStacks in the reverse order, then Call Stack 3 has the 590*0fca6ea1SDimitry Andric // complete call stack encoded without any pointers. Call Stack 1 and 2 point 591*0fca6ea1SDimitry Andric // to appropriate prefixes of Call Stack 3. 592*0fca6ea1SDimitry Andric const llvm::SmallVector<FrameId> *Prev = nullptr; 593*0fca6ea1SDimitry Andric for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) { 594*0fca6ea1SDimitry Andric LinearCallStackId Pos = 595*0fca6ea1SDimitry Andric encodeCallStack(&CallStack, Prev, MemProfFrameIndexes); 596*0fca6ea1SDimitry Andric CallStackPos.insert({CSId, Pos}); 597*0fca6ea1SDimitry Andric Prev = &CallStack; 598*0fca6ea1SDimitry Andric } 599*0fca6ea1SDimitry Andric 600*0fca6ea1SDimitry Andric // "RadixArray.size() - 1" below is problematic if RadixArray is empty. 601*0fca6ea1SDimitry Andric assert(!RadixArray.empty()); 602*0fca6ea1SDimitry Andric 603*0fca6ea1SDimitry Andric // Reverse the radix array in place. We do so mostly for intuitive 604*0fca6ea1SDimitry Andric // deserialization where we would read the length field and then the call 605*0fca6ea1SDimitry Andric // stack frames proper just like any other array deserialization, except 606*0fca6ea1SDimitry Andric // that we have occasional jumps to take advantage of prefixes. 607*0fca6ea1SDimitry Andric for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J) 608*0fca6ea1SDimitry Andric std::swap(RadixArray[I], RadixArray[J]); 609*0fca6ea1SDimitry Andric 610*0fca6ea1SDimitry Andric // "Reverse" the indexes stored in CallStackPos. 611*0fca6ea1SDimitry Andric for (auto &[K, V] : CallStackPos) 612*0fca6ea1SDimitry Andric V = RadixArray.size() - 1 - V; 613*0fca6ea1SDimitry Andric } 614*0fca6ea1SDimitry Andric 615*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat> 616*0fca6ea1SDimitry Andric computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 617*0fca6ea1SDimitry Andric &MemProfCallStackData) { 618*0fca6ea1SDimitry Andric llvm::DenseMap<FrameId, FrameStat> Histogram; 619*0fca6ea1SDimitry Andric 620*0fca6ea1SDimitry Andric for (const auto &KV : MemProfCallStackData) { 621*0fca6ea1SDimitry Andric const auto &CS = KV.second; 622*0fca6ea1SDimitry Andric for (unsigned I = 0, E = CS.size(); I != E; ++I) { 623*0fca6ea1SDimitry Andric auto &S = Histogram[CS[I]]; 624*0fca6ea1SDimitry Andric ++S.Count; 625*0fca6ea1SDimitry Andric S.PositionSum += I; 626*0fca6ea1SDimitry Andric } 627*0fca6ea1SDimitry Andric } 628*0fca6ea1SDimitry Andric return Histogram; 629*0fca6ea1SDimitry Andric } 630*0fca6ea1SDimitry Andric 631*0fca6ea1SDimitry Andric void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { 632*0fca6ea1SDimitry Andric for (const auto &AS : Record.AllocSites) { 633*0fca6ea1SDimitry Andric assert(AS.CSId == hashCallStack(AS.CallStack)); 634*0fca6ea1SDimitry Andric (void)AS; 635*0fca6ea1SDimitry Andric } 636*0fca6ea1SDimitry Andric } 637*0fca6ea1SDimitry Andric 638*0fca6ea1SDimitry Andric void verifyFunctionProfileData( 639*0fca6ea1SDimitry Andric const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> 640*0fca6ea1SDimitry Andric &FunctionProfileData) { 641*0fca6ea1SDimitry Andric for (const auto &[GUID, Record] : FunctionProfileData) { 642*0fca6ea1SDimitry Andric (void)GUID; 643*0fca6ea1SDimitry Andric verifyIndexedMemProfRecord(Record); 644*0fca6ea1SDimitry Andric } 645*0fca6ea1SDimitry Andric } 646*0fca6ea1SDimitry Andric 64781ad6265SDimitry Andric } // namespace memprof 64881ad6265SDimitry Andric } // namespace llvm 649