1 #ifndef LLVM_PROFILEDATA_MEMPROF_H_ 2 #define LLVM_PROFILEDATA_MEMPROF_H_ 3 4 #include "llvm/ADT/MapVector.h" 5 #include "llvm/ADT/STLForwardCompat.h" 6 #include "llvm/ADT/STLFunctionalExtras.h" 7 #include "llvm/ADT/SmallVector.h" 8 #include "llvm/IR/GlobalValue.h" 9 #include "llvm/ProfileData/MemProfData.inc" 10 #include "llvm/Support/Endian.h" 11 #include "llvm/Support/EndianStream.h" 12 #include "llvm/Support/raw_ostream.h" 13 14 #include <bitset> 15 #include <cstdint> 16 #include <optional> 17 18 namespace llvm { 19 namespace memprof { 20 21 struct MemProfRecord; 22 23 // The versions of the indexed MemProf format 24 enum IndexedVersion : uint64_t { 25 // Version 0: This version didn't have a version field. 26 Version0 = 0, 27 // Version 1: Added a version field to the header. 28 Version1 = 1, 29 // Version 2: Added a call stack table. 30 Version2 = 2, 31 // Version 3: Added a radix tree for call stacks. Switched to linear IDs for 32 // frames and call stacks. 33 Version3 = 3, 34 }; 35 36 constexpr uint64_t MinimumSupportedVersion = Version0; 37 constexpr uint64_t MaximumSupportedVersion = Version3; 38 39 // Verify that the minimum and maximum satisfy the obvious constraint. 40 static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); 41 42 enum class Meta : uint64_t { 43 Start = 0, 44 #define MIBEntryDef(NameTag, Name, Type) NameTag, 45 #include "llvm/ProfileData/MIBEntryDef.inc" 46 #undef MIBEntryDef 47 Size 48 }; 49 50 using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>; 51 52 // Returns the full schema currently in use. 53 MemProfSchema getFullSchema(); 54 55 // Returns the schema consisting of the fields used for hot cold memory hinting. 56 MemProfSchema getHotColdSchema(); 57 58 // Holds the actual MemInfoBlock data with all fields. Contents may be read or 59 // written partially by providing an appropriate schema to the serialize and 60 // deserialize methods. 61 struct PortableMemInfoBlock { 62 PortableMemInfoBlock() = default; 63 explicit PortableMemInfoBlock(const MemInfoBlock &Block, 64 const MemProfSchema &IncomingSchema) { 65 for (const Meta Id : IncomingSchema) 66 Schema.set(llvm::to_underlying(Id)); 67 #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name; 68 #include "llvm/ProfileData/MIBEntryDef.inc" 69 #undef MIBEntryDef 70 } 71 72 PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) { 73 deserialize(Schema, Ptr); 74 } 75 76 // Read the contents of \p Ptr based on the \p Schema to populate the 77 // MemInfoBlock member. 78 void deserialize(const MemProfSchema &IncomingSchema, 79 const unsigned char *Ptr) { 80 using namespace support; 81 82 Schema.reset(); 83 for (const Meta Id : IncomingSchema) { 84 switch (Id) { 85 #define MIBEntryDef(NameTag, Name, Type) \ 86 case Meta::Name: { \ 87 Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \ 88 } break; 89 #include "llvm/ProfileData/MIBEntryDef.inc" 90 #undef MIBEntryDef 91 default: 92 llvm_unreachable("Unknown meta type id, is the profile collected from " 93 "a newer version of the runtime?"); 94 } 95 96 Schema.set(llvm::to_underlying(Id)); 97 } 98 } 99 100 // Write the contents of the MemInfoBlock based on the \p Schema provided to 101 // the raw_ostream \p OS. 102 void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { 103 using namespace support; 104 105 endian::Writer LE(OS, llvm::endianness::little); 106 for (const Meta Id : Schema) { 107 switch (Id) { 108 #define MIBEntryDef(NameTag, Name, Type) \ 109 case Meta::Name: { \ 110 LE.write<Type>(Name); \ 111 } break; 112 #include "llvm/ProfileData/MIBEntryDef.inc" 113 #undef MIBEntryDef 114 default: 115 llvm_unreachable("Unknown meta type id, invalid input?"); 116 } 117 } 118 } 119 120 // Print out the contents of the MemInfoBlock in YAML format. 121 void printYAML(raw_ostream &OS) const { 122 OS << " MemInfoBlock:\n"; 123 #define MIBEntryDef(NameTag, Name, Type) \ 124 OS << " " << #Name << ": " << Name << "\n"; 125 #include "llvm/ProfileData/MIBEntryDef.inc" 126 #undef MIBEntryDef 127 if (AccessHistogramSize > 0) { 128 OS << " " << "AccessHistogramValues" << ":"; 129 for (uint32_t I = 0; I < AccessHistogramSize; ++I) { 130 OS << " " << ((uint64_t *)AccessHistogram)[I]; 131 } 132 OS << "\n"; 133 } 134 } 135 136 // Return the schema, only for unit tests. 137 std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const { 138 return Schema; 139 } 140 141 // Define getters for each type which can be called by analyses. 142 #define MIBEntryDef(NameTag, Name, Type) \ 143 Type get##Name() const { \ 144 assert(Schema[llvm::to_underlying(Meta::Name)]); \ 145 return Name; \ 146 } 147 #include "llvm/ProfileData/MIBEntryDef.inc" 148 #undef MIBEntryDef 149 150 void clear() { *this = PortableMemInfoBlock(); } 151 152 bool operator==(const PortableMemInfoBlock &Other) const { 153 if (Other.Schema != Schema) 154 return false; 155 156 #define MIBEntryDef(NameTag, Name, Type) \ 157 if (Schema[llvm::to_underlying(Meta::Name)] && \ 158 Other.get##Name() != get##Name()) \ 159 return false; 160 #include "llvm/ProfileData/MIBEntryDef.inc" 161 #undef MIBEntryDef 162 return true; 163 } 164 165 bool operator!=(const PortableMemInfoBlock &Other) const { 166 return !operator==(Other); 167 } 168 169 static size_t serializedSize(const MemProfSchema &Schema) { 170 size_t Result = 0; 171 172 for (const Meta Id : Schema) { 173 switch (Id) { 174 #define MIBEntryDef(NameTag, Name, Type) \ 175 case Meta::Name: { \ 176 Result += sizeof(Type); \ 177 } break; 178 #include "llvm/ProfileData/MIBEntryDef.inc" 179 #undef MIBEntryDef 180 default: 181 llvm_unreachable("Unknown meta type id, invalid input?"); 182 } 183 } 184 185 return Result; 186 } 187 188 private: 189 // The set of available fields, indexed by Meta::Name. 190 std::bitset<llvm::to_underlying(Meta::Size)> Schema; 191 192 #define MIBEntryDef(NameTag, Name, Type) Type Name = Type(); 193 #include "llvm/ProfileData/MIBEntryDef.inc" 194 #undef MIBEntryDef 195 }; 196 197 // A type representing the id generated by hashing the contents of the Frame. 198 using FrameId = uint64_t; 199 // A type representing the id to index into the frame array. 200 using LinearFrameId = uint32_t; 201 // Describes a call frame for a dynamic allocation context. The contents of 202 // the frame are populated by symbolizing the stack depot call frame from the 203 // compiler runtime. 204 struct Frame { 205 // A uuid (uint64_t) identifying the function. It is obtained by 206 // llvm::md5(FunctionName) which returns the lower 64 bits. 207 GlobalValue::GUID Function; 208 // The symbol name for the function. Only populated in the Frame by the reader 209 // if requested during initialization. This field should not be serialized. 210 std::unique_ptr<std::string> SymbolName; 211 // The source line offset of the call from the beginning of parent function. 212 uint32_t LineOffset; 213 // The source column number of the call to help distinguish multiple calls 214 // on the same line. 215 uint32_t Column; 216 // Whether the current frame is inlined. 217 bool IsInlineFrame; 218 219 Frame(const Frame &Other) { 220 Function = Other.Function; 221 SymbolName = Other.SymbolName 222 ? std::make_unique<std::string>(*Other.SymbolName) 223 : nullptr; 224 LineOffset = Other.LineOffset; 225 Column = Other.Column; 226 IsInlineFrame = Other.IsInlineFrame; 227 } 228 229 Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline) 230 : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} 231 232 bool operator==(const Frame &Other) const { 233 // Ignore the SymbolName field to avoid a string compare. Comparing the 234 // function hash serves the same purpose. 235 return Other.Function == Function && Other.LineOffset == LineOffset && 236 Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; 237 } 238 239 Frame &operator=(const Frame &Other) { 240 Function = Other.Function; 241 SymbolName = Other.SymbolName 242 ? std::make_unique<std::string>(*Other.SymbolName) 243 : nullptr; 244 LineOffset = Other.LineOffset; 245 Column = Other.Column; 246 IsInlineFrame = Other.IsInlineFrame; 247 return *this; 248 } 249 250 bool operator!=(const Frame &Other) const { return !operator==(Other); } 251 252 bool hasSymbolName() const { return !!SymbolName; } 253 254 StringRef getSymbolName() const { 255 assert(hasSymbolName()); 256 return *SymbolName; 257 } 258 259 std::string getSymbolNameOr(StringRef Alt) const { 260 return std::string(hasSymbolName() ? getSymbolName() : Alt); 261 } 262 263 // Write the contents of the frame to the ostream \p OS. 264 void serialize(raw_ostream &OS) const { 265 using namespace support; 266 267 endian::Writer LE(OS, llvm::endianness::little); 268 269 // If the type of the GlobalValue::GUID changes, then we need to update 270 // the reader and the writer. 271 static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value, 272 "Expect GUID to be uint64_t."); 273 LE.write<uint64_t>(Function); 274 275 LE.write<uint32_t>(LineOffset); 276 LE.write<uint32_t>(Column); 277 LE.write<bool>(IsInlineFrame); 278 } 279 280 // Read a frame from char data which has been serialized as little endian. 281 static Frame deserialize(const unsigned char *Ptr) { 282 using namespace support; 283 284 const uint64_t F = 285 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 286 const uint32_t L = 287 endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 288 const uint32_t C = 289 endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 290 const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr); 291 return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, 292 /*IsInlineFrame=*/I); 293 } 294 295 // Returns the size of the frame information. 296 static constexpr size_t serializedSize() { 297 return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + 298 sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); 299 } 300 301 // Print the frame information in YAML format. 302 void printYAML(raw_ostream &OS) const { 303 OS << " -\n" 304 << " Function: " << Function << "\n" 305 << " SymbolName: " << getSymbolNameOr("<None>") << "\n" 306 << " LineOffset: " << LineOffset << "\n" 307 << " Column: " << Column << "\n" 308 << " Inline: " << IsInlineFrame << "\n"; 309 } 310 311 // Return a hash value based on the contents of the frame. Here we don't use 312 // hashing from llvm ADT since we are going to persist the hash id, the hash 313 // combine algorithm in ADT uses a new randomized seed each time. 314 inline FrameId hash() const { 315 auto HashCombine = [](auto Value, size_t Seed) { 316 std::hash<decltype(Value)> Hasher; 317 // The constant used below is the 64 bit representation of the fractional 318 // part of the golden ratio. Used here for the randomness in their bit 319 // pattern. 320 return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); 321 }; 322 323 size_t Result = 0; 324 Result ^= HashCombine(Function, Result); 325 Result ^= HashCombine(LineOffset, Result); 326 Result ^= HashCombine(Column, Result); 327 Result ^= HashCombine(IsInlineFrame, Result); 328 return static_cast<FrameId>(Result); 329 } 330 }; 331 332 // A type representing the index into the table of call stacks. 333 using CallStackId = uint64_t; 334 335 // A type representing the index into the call stack array. 336 using LinearCallStackId = uint32_t; 337 338 // Holds allocation information in a space efficient format where frames are 339 // represented using unique identifiers. 340 struct IndexedAllocationInfo { 341 // The dynamic calling context for the allocation in bottom-up (leaf-to-root) 342 // order. Frame contents are stored out-of-line. 343 // TODO: Remove once we fully transition to CSId. 344 llvm::SmallVector<FrameId> CallStack; 345 // Conceptually the same as above. We are going to keep both CallStack and 346 // CallStackId while we are transitioning from CallStack to CallStackId. 347 CallStackId CSId = 0; 348 // The statistics obtained from the runtime for the allocation. 349 PortableMemInfoBlock Info; 350 351 IndexedAllocationInfo() = default; 352 IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId, 353 const MemInfoBlock &MB, 354 const MemProfSchema &Schema = getFullSchema()) 355 : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {} 356 357 // Returns the size in bytes when this allocation info struct is serialized. 358 size_t serializedSize(const MemProfSchema &Schema, 359 IndexedVersion Version) const; 360 361 bool operator==(const IndexedAllocationInfo &Other) const { 362 if (Other.Info != Info) 363 return false; 364 365 if (Other.CSId != CSId) 366 return false; 367 return true; 368 } 369 370 bool operator!=(const IndexedAllocationInfo &Other) const { 371 return !operator==(Other); 372 } 373 }; 374 375 // Holds allocation information with frame contents inline. The type should 376 // be used for temporary in-memory instances. 377 struct AllocationInfo { 378 // Same as IndexedAllocationInfo::CallStack with the frame contents inline. 379 std::vector<Frame> CallStack; 380 // Same as IndexedAllocationInfo::Info; 381 PortableMemInfoBlock Info; 382 383 AllocationInfo() = default; 384 AllocationInfo( 385 const IndexedAllocationInfo &IndexedAI, 386 llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) { 387 for (const FrameId &Id : IndexedAI.CallStack) { 388 CallStack.push_back(IdToFrameCallback(Id)); 389 } 390 Info = IndexedAI.Info; 391 } 392 393 void printYAML(raw_ostream &OS) const { 394 OS << " -\n"; 395 OS << " Callstack:\n"; 396 // TODO: Print out the frame on one line with to make it easier for deep 397 // callstacks once we have a test to check valid YAML is generated. 398 for (const Frame &F : CallStack) { 399 F.printYAML(OS); 400 } 401 Info.printYAML(OS); 402 } 403 }; 404 405 // Holds the memprof profile information for a function. The internal 406 // representation stores frame ids for efficiency. This representation should 407 // be used in the profile conversion and manipulation tools. 408 struct IndexedMemProfRecord { 409 // Memory allocation sites in this function for which we have memory 410 // profiling data. 411 llvm::SmallVector<IndexedAllocationInfo> AllocSites; 412 // Holds call sites in this function which are part of some memory 413 // allocation context. We store this as a list of locations, each with its 414 // list of inline locations in bottom-up order i.e. from leaf to root. The 415 // inline location list may include additional entries, users should pick 416 // the last entry in the list with the same function GUID. 417 llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites; 418 // Conceptually the same as above. We are going to keep both CallSites and 419 // CallSiteIds while we are transitioning from CallSites to CallSiteIds. 420 llvm::SmallVector<CallStackId> CallSiteIds; 421 422 void clear() { 423 AllocSites.clear(); 424 CallSites.clear(); 425 } 426 427 void merge(const IndexedMemProfRecord &Other) { 428 // TODO: Filter out duplicates which may occur if multiple memprof 429 // profiles are merged together using llvm-profdata. 430 AllocSites.append(Other.AllocSites); 431 CallSites.append(Other.CallSites); 432 } 433 434 size_t serializedSize(const MemProfSchema &Schema, 435 IndexedVersion Version) const; 436 437 bool operator==(const IndexedMemProfRecord &Other) const { 438 if (Other.AllocSites != AllocSites) 439 return false; 440 441 if (Other.CallSiteIds != CallSiteIds) 442 return false; 443 return true; 444 } 445 446 // Serializes the memprof records in \p Records to the ostream \p OS based 447 // on the schema provided in \p Schema. 448 void serialize(const MemProfSchema &Schema, raw_ostream &OS, 449 IndexedVersion Version, 450 llvm::DenseMap<CallStackId, LinearCallStackId> 451 *MemProfCallStackIndexes = nullptr) const; 452 453 // Deserializes memprof records from the Buffer. 454 static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, 455 const unsigned char *Buffer, 456 IndexedVersion Version); 457 458 // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to 459 // translate CallStackId to call stacks with frames inline. 460 MemProfRecord toMemProfRecord( 461 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const; 462 463 // Returns the GUID for the function name after canonicalization. For 464 // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are 465 // mapped to functions using this GUID. 466 static GlobalValue::GUID getGUID(const StringRef FunctionName); 467 }; 468 469 // Holds the memprof profile information for a function. The internal 470 // representation stores frame contents inline. This representation should 471 // be used for small amount of temporary, in memory instances. 472 struct MemProfRecord { 473 // Same as IndexedMemProfRecord::AllocSites with frame contents inline. 474 llvm::SmallVector<AllocationInfo> AllocSites; 475 // Same as IndexedMemProfRecord::CallSites with frame contents inline. 476 llvm::SmallVector<std::vector<Frame>> CallSites; 477 478 MemProfRecord() = default; 479 MemProfRecord( 480 const IndexedMemProfRecord &Record, 481 llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) { 482 for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) { 483 AllocSites.emplace_back(IndexedAI, IdToFrameCallback); 484 } 485 for (const ArrayRef<FrameId> Site : Record.CallSites) { 486 std::vector<Frame> Frames; 487 for (const FrameId Id : Site) { 488 Frames.push_back(IdToFrameCallback(Id)); 489 } 490 CallSites.push_back(Frames); 491 } 492 } 493 494 // Prints out the contents of the memprof record in YAML. 495 void print(llvm::raw_ostream &OS) const { 496 if (!AllocSites.empty()) { 497 OS << " AllocSites:\n"; 498 for (const AllocationInfo &N : AllocSites) 499 N.printYAML(OS); 500 } 501 502 if (!CallSites.empty()) { 503 OS << " CallSites:\n"; 504 for (const std::vector<Frame> &Frames : CallSites) { 505 for (const Frame &F : Frames) { 506 OS << " -\n"; 507 F.printYAML(OS); 508 } 509 } 510 } 511 } 512 }; 513 514 // Reads a memprof schema from a buffer. All entries in the buffer are 515 // interpreted as uint64_t. The first entry in the buffer denotes the number of 516 // ids in the schema. Subsequent entries are integers which map to memprof::Meta 517 // enum class entries. After successfully reading the schema, the pointer is one 518 // byte past the schema contents. 519 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer); 520 521 // Trait for reading IndexedMemProfRecord data from the on-disk hash table. 522 class RecordLookupTrait { 523 public: 524 using data_type = const IndexedMemProfRecord &; 525 using internal_key_type = uint64_t; 526 using external_key_type = uint64_t; 527 using hash_value_type = uint64_t; 528 using offset_type = uint64_t; 529 530 RecordLookupTrait() = delete; 531 RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) 532 : Version(V), Schema(S) {} 533 534 static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } 535 static uint64_t GetInternalKey(uint64_t K) { return K; } 536 static uint64_t GetExternalKey(uint64_t K) { return K; } 537 538 hash_value_type ComputeHash(uint64_t K) { return K; } 539 540 static std::pair<offset_type, offset_type> 541 ReadKeyDataLength(const unsigned char *&D) { 542 using namespace support; 543 544 offset_type KeyLen = 545 endian::readNext<offset_type, llvm::endianness::little>(D); 546 offset_type DataLen = 547 endian::readNext<offset_type, llvm::endianness::little>(D); 548 return std::make_pair(KeyLen, DataLen); 549 } 550 551 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 552 using namespace support; 553 return endian::readNext<external_key_type, llvm::endianness::little>(D); 554 } 555 556 data_type ReadData(uint64_t K, const unsigned char *D, 557 offset_type /*Unused*/) { 558 Record = IndexedMemProfRecord::deserialize(Schema, D, Version); 559 return Record; 560 } 561 562 private: 563 // Holds the MemProf version. 564 IndexedVersion Version; 565 // Holds the memprof schema used to deserialize records. 566 MemProfSchema Schema; 567 // Holds the records from one function deserialized from the indexed format. 568 IndexedMemProfRecord Record; 569 }; 570 571 // Trait for writing IndexedMemProfRecord data to the on-disk hash table. 572 class RecordWriterTrait { 573 public: 574 using key_type = uint64_t; 575 using key_type_ref = uint64_t; 576 577 using data_type = IndexedMemProfRecord; 578 using data_type_ref = IndexedMemProfRecord &; 579 580 using hash_value_type = uint64_t; 581 using offset_type = uint64_t; 582 583 private: 584 // Pointer to the memprof schema to use for the generator. 585 const MemProfSchema *Schema; 586 // The MemProf version to use for the serialization. 587 IndexedVersion Version; 588 589 // Mappings from CallStackId to the indexes into the call stack array. 590 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes; 591 592 public: 593 // We do not support the default constructor, which does not set Version. 594 RecordWriterTrait() = delete; 595 RecordWriterTrait( 596 const MemProfSchema *Schema, IndexedVersion V, 597 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 598 : Schema(Schema), Version(V), 599 MemProfCallStackIndexes(MemProfCallStackIndexes) {} 600 601 static hash_value_type ComputeHash(key_type_ref K) { return K; } 602 603 std::pair<offset_type, offset_type> 604 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 605 using namespace support; 606 607 endian::Writer LE(Out, llvm::endianness::little); 608 offset_type N = sizeof(K); 609 LE.write<offset_type>(N); 610 offset_type M = V.serializedSize(*Schema, Version); 611 LE.write<offset_type>(M); 612 return std::make_pair(N, M); 613 } 614 615 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 616 using namespace support; 617 endian::Writer LE(Out, llvm::endianness::little); 618 LE.write<uint64_t>(K); 619 } 620 621 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 622 offset_type /*Unused*/) { 623 assert(Schema != nullptr && "MemProf schema is not initialized!"); 624 V.serialize(*Schema, Out, Version, MemProfCallStackIndexes); 625 // Clear the IndexedMemProfRecord which results in clearing/freeing its 626 // vectors of allocs and callsites. This is owned by the associated on-disk 627 // hash table, but unused after this point. See also the comment added to 628 // the client which constructs the on-disk hash table for this trait. 629 V.clear(); 630 } 631 }; 632 633 // Trait for writing frame mappings to the on-disk hash table. 634 class FrameWriterTrait { 635 public: 636 using key_type = FrameId; 637 using key_type_ref = FrameId; 638 639 using data_type = Frame; 640 using data_type_ref = Frame &; 641 642 using hash_value_type = FrameId; 643 using offset_type = uint64_t; 644 645 static hash_value_type ComputeHash(key_type_ref K) { return K; } 646 647 static std::pair<offset_type, offset_type> 648 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 649 using namespace support; 650 endian::Writer LE(Out, llvm::endianness::little); 651 offset_type N = sizeof(K); 652 LE.write<offset_type>(N); 653 offset_type M = V.serializedSize(); 654 LE.write<offset_type>(M); 655 return std::make_pair(N, M); 656 } 657 658 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 659 using namespace support; 660 endian::Writer LE(Out, llvm::endianness::little); 661 LE.write<key_type>(K); 662 } 663 664 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 665 offset_type /*Unused*/) { 666 V.serialize(Out); 667 } 668 }; 669 670 // Trait for reading frame mappings from the on-disk hash table. 671 class FrameLookupTrait { 672 public: 673 using data_type = const Frame; 674 using internal_key_type = FrameId; 675 using external_key_type = FrameId; 676 using hash_value_type = FrameId; 677 using offset_type = uint64_t; 678 679 static bool EqualKey(internal_key_type A, internal_key_type B) { 680 return A == B; 681 } 682 static uint64_t GetInternalKey(internal_key_type K) { return K; } 683 static uint64_t GetExternalKey(external_key_type K) { return K; } 684 685 hash_value_type ComputeHash(internal_key_type K) { return K; } 686 687 static std::pair<offset_type, offset_type> 688 ReadKeyDataLength(const unsigned char *&D) { 689 using namespace support; 690 691 offset_type KeyLen = 692 endian::readNext<offset_type, llvm::endianness::little>(D); 693 offset_type DataLen = 694 endian::readNext<offset_type, llvm::endianness::little>(D); 695 return std::make_pair(KeyLen, DataLen); 696 } 697 698 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 699 using namespace support; 700 return endian::readNext<external_key_type, llvm::endianness::little>(D); 701 } 702 703 data_type ReadData(uint64_t K, const unsigned char *D, 704 offset_type /*Unused*/) { 705 return Frame::deserialize(D); 706 } 707 }; 708 709 // Trait for writing call stacks to the on-disk hash table. 710 class CallStackWriterTrait { 711 public: 712 using key_type = CallStackId; 713 using key_type_ref = CallStackId; 714 715 using data_type = llvm::SmallVector<FrameId>; 716 using data_type_ref = llvm::SmallVector<FrameId> &; 717 718 using hash_value_type = CallStackId; 719 using offset_type = uint64_t; 720 721 static hash_value_type ComputeHash(key_type_ref K) { return K; } 722 723 static std::pair<offset_type, offset_type> 724 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 725 using namespace support; 726 endian::Writer LE(Out, llvm::endianness::little); 727 // We do not explicitly emit the key length because it is a constant. 728 offset_type N = sizeof(K); 729 offset_type M = sizeof(FrameId) * V.size(); 730 LE.write<offset_type>(M); 731 return std::make_pair(N, M); 732 } 733 734 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 735 using namespace support; 736 endian::Writer LE(Out, llvm::endianness::little); 737 LE.write<key_type>(K); 738 } 739 740 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 741 offset_type /*Unused*/) { 742 using namespace support; 743 endian::Writer LE(Out, llvm::endianness::little); 744 // Emit the frames. We do not explicitly emit the length of the vector 745 // because it can be inferred from the data length. 746 for (FrameId F : V) 747 LE.write<FrameId>(F); 748 } 749 }; 750 751 // Trait for reading call stack mappings from the on-disk hash table. 752 class CallStackLookupTrait { 753 public: 754 using data_type = const llvm::SmallVector<FrameId>; 755 using internal_key_type = CallStackId; 756 using external_key_type = CallStackId; 757 using hash_value_type = CallStackId; 758 using offset_type = uint64_t; 759 760 static bool EqualKey(internal_key_type A, internal_key_type B) { 761 return A == B; 762 } 763 static uint64_t GetInternalKey(internal_key_type K) { return K; } 764 static uint64_t GetExternalKey(external_key_type K) { return K; } 765 766 hash_value_type ComputeHash(internal_key_type K) { return K; } 767 768 static std::pair<offset_type, offset_type> 769 ReadKeyDataLength(const unsigned char *&D) { 770 using namespace support; 771 772 // We do not explicitly read the key length because it is a constant. 773 offset_type KeyLen = sizeof(external_key_type); 774 offset_type DataLen = 775 endian::readNext<offset_type, llvm::endianness::little>(D); 776 return std::make_pair(KeyLen, DataLen); 777 } 778 779 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 780 using namespace support; 781 return endian::readNext<external_key_type, llvm::endianness::little>(D); 782 } 783 784 data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) { 785 using namespace support; 786 llvm::SmallVector<FrameId> CS; 787 // Derive the number of frames from the data length. 788 uint64_t NumFrames = Length / sizeof(FrameId); 789 assert(Length % sizeof(FrameId) == 0); 790 CS.reserve(NumFrames); 791 for (size_t I = 0; I != NumFrames; ++I) { 792 FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D); 793 CS.push_back(F); 794 } 795 return CS; 796 } 797 }; 798 799 // Compute a CallStackId for a given call stack. 800 CallStackId hashCallStack(ArrayRef<FrameId> CS); 801 802 namespace detail { 803 // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have 804 // to do so in one of two different ways depending on the type of the hash 805 // table. 806 template <typename value_type, typename IterTy> 807 value_type DerefIterator(IterTy Iter) { 808 using deref_type = llvm::remove_cvref_t<decltype(*Iter)>; 809 if constexpr (std::is_same_v<deref_type, value_type>) 810 return *Iter; 811 else 812 return Iter->second; 813 } 814 } // namespace detail 815 816 // A function object that returns a frame for a given FrameId. 817 template <typename MapTy> struct FrameIdConverter { 818 std::optional<FrameId> LastUnmappedId; 819 MapTy ⤅ 820 821 FrameIdConverter() = delete; 822 FrameIdConverter(MapTy &Map) : Map(Map) {} 823 824 // Delete the copy constructor and copy assignment operator to avoid a 825 // situation where a copy of FrameIdConverter gets an error in LastUnmappedId 826 // while the original instance doesn't. 827 FrameIdConverter(const FrameIdConverter &) = delete; 828 FrameIdConverter &operator=(const FrameIdConverter &) = delete; 829 830 Frame operator()(FrameId Id) { 831 auto Iter = Map.find(Id); 832 if (Iter == Map.end()) { 833 LastUnmappedId = Id; 834 return Frame(0, 0, 0, false); 835 } 836 return detail::DerefIterator<Frame>(Iter); 837 } 838 }; 839 840 // A function object that returns a call stack for a given CallStackId. 841 template <typename MapTy> struct CallStackIdConverter { 842 std::optional<CallStackId> LastUnmappedId; 843 MapTy ⤅ 844 llvm::function_ref<Frame(FrameId)> FrameIdToFrame; 845 846 CallStackIdConverter() = delete; 847 CallStackIdConverter(MapTy &Map, 848 llvm::function_ref<Frame(FrameId)> FrameIdToFrame) 849 : Map(Map), FrameIdToFrame(FrameIdToFrame) {} 850 851 // Delete the copy constructor and copy assignment operator to avoid a 852 // situation where a copy of CallStackIdConverter gets an error in 853 // LastUnmappedId while the original instance doesn't. 854 CallStackIdConverter(const CallStackIdConverter &) = delete; 855 CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; 856 857 std::vector<Frame> operator()(CallStackId CSId) { 858 std::vector<Frame> Frames; 859 auto CSIter = Map.find(CSId); 860 if (CSIter == Map.end()) { 861 LastUnmappedId = CSId; 862 } else { 863 llvm::SmallVector<FrameId> CS = 864 detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter); 865 Frames.reserve(CS.size()); 866 for (FrameId Id : CS) 867 Frames.push_back(FrameIdToFrame(Id)); 868 } 869 return Frames; 870 } 871 }; 872 873 // A function object that returns a Frame stored at a given index into the Frame 874 // array in the profile. 875 struct LinearFrameIdConverter { 876 const unsigned char *FrameBase; 877 878 LinearFrameIdConverter() = delete; 879 LinearFrameIdConverter(const unsigned char *FrameBase) 880 : FrameBase(FrameBase) {} 881 882 Frame operator()(LinearFrameId LinearId) { 883 uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize(); 884 return Frame::deserialize(FrameBase + Offset); 885 } 886 }; 887 888 // A function object that returns a call stack stored at a given index into the 889 // call stack array in the profile. 890 struct LinearCallStackIdConverter { 891 const unsigned char *CallStackBase; 892 std::function<Frame(LinearFrameId)> FrameIdToFrame; 893 894 LinearCallStackIdConverter() = delete; 895 LinearCallStackIdConverter(const unsigned char *CallStackBase, 896 std::function<Frame(LinearFrameId)> FrameIdToFrame) 897 : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} 898 899 std::vector<Frame> operator()(LinearCallStackId LinearCSId) { 900 std::vector<Frame> Frames; 901 902 const unsigned char *Ptr = 903 CallStackBase + 904 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId); 905 uint32_t NumFrames = 906 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 907 Frames.reserve(NumFrames); 908 for (; NumFrames; --NumFrames) { 909 LinearFrameId Elem = 910 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 911 // Follow a pointer to the parent, if any. See comments below on 912 // CallStackRadixTreeBuilder for the description of the radix tree format. 913 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) { 914 Ptr += (-Elem) * sizeof(LinearFrameId); 915 Elem = 916 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 917 } 918 // We shouldn't encounter another pointer. 919 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0); 920 Frames.push_back(FrameIdToFrame(Elem)); 921 Ptr += sizeof(LinearFrameId); 922 } 923 924 return Frames; 925 } 926 }; 927 928 struct IndexedMemProfData { 929 // A map to hold memprof data per function. The lower 64 bits obtained from 930 // the md5 hash of the function name is used to index into the map. 931 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records; 932 933 // A map to hold frame id to frame mappings. The mappings are used to 934 // convert IndexedMemProfRecord to MemProfRecords with frame information 935 // inline. 936 llvm::MapVector<FrameId, Frame> Frames; 937 938 // A map to hold call stack id to call stacks. 939 llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks; 940 }; 941 942 struct FrameStat { 943 // The number of occurrences of a given FrameId. 944 uint64_t Count = 0; 945 // The sum of indexes where a given FrameId shows up. 946 uint64_t PositionSum = 0; 947 }; 948 949 // Compute a histogram of Frames in call stacks. 950 llvm::DenseMap<FrameId, FrameStat> 951 computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 952 &MemProfCallStackData); 953 954 // Construct a radix tree of call stacks. 955 // 956 // A set of call stacks might look like: 957 // 958 // CallStackId 1: f1 -> f2 -> f3 959 // CallStackId 2: f1 -> f2 -> f4 -> f5 960 // CallStackId 3: f1 -> f2 -> f4 -> f6 961 // CallStackId 4: f7 -> f8 -> f9 962 // 963 // where each fn refers to a stack frame. 964 // 965 // Since we expect a lot of common prefixes, we can compress the call stacks 966 // into a radix tree like: 967 // 968 // CallStackId 1: f1 -> f2 -> f3 969 // | 970 // CallStackId 2: +---> f4 -> f5 971 // | 972 // CallStackId 3: +---> f6 973 // 974 // CallStackId 4: f7 -> f8 -> f9 975 // 976 // Now, we are interested in retrieving call stacks for a given CallStackId, so 977 // we just need a pointer from a given call stack to its parent. For example, 978 // CallStackId 2 would point to CallStackId 1 as a parent. 979 // 980 // We serialize the radix tree above into a single array along with the length 981 // of each call stack and pointers to the parent call stacks. 982 // 983 // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 984 // Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1 985 // ^ ^ ^ ^ 986 // | | | | 987 // CallStackId 4: 0 --+ | | | 988 // CallStackId 3: 4 --------------+ | | 989 // CallStackId 2: 7 -----------------------+ | 990 // CallStackId 1: 11 -----------------------------------+ 991 // 992 // - LN indicates the length of a call stack, encoded as ordinary integer N. 993 // 994 // - JN indicates a pointer to the parent, encoded as -N. 995 // 996 // The radix tree allows us to reconstruct call stacks in the leaf-to-root 997 // order as we scan the array from left ro right while following pointers to 998 // parents along the way. 999 // 1000 // For example, if we are decoding CallStackId 2, we start a forward traversal 1001 // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When 1002 // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3, 1003 // picking up f2 and f1. We are done after collecting 4 frames as indicated at 1004 // the beginning of the traversal. 1005 // 1006 // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into 1007 // the radix tree array, so we do not explicitly encode mappings like: 1008 // "CallStackId 1 -> 11". 1009 class CallStackRadixTreeBuilder { 1010 // The radix tree array. 1011 std::vector<LinearFrameId> RadixArray; 1012 1013 // Mapping from CallStackIds to indexes into RadixArray. 1014 llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos; 1015 1016 // In build, we partition a given call stack into two parts -- the prefix 1017 // that's common with the previously encoded call stack and the frames beyond 1018 // the common prefix -- the unique portion. Then we want to find out where 1019 // the common prefix is stored in RadixArray so that we can link the unique 1020 // portion to the common prefix. Indexes, declared below, helps with our 1021 // needs. Intuitively, Indexes tells us where each of the previously encoded 1022 // call stack is stored in RadixArray. More formally, Indexes satisfies: 1023 // 1024 // RadixArray[Indexes[I]] == Prev[I] 1025 // 1026 // for every I, where Prev is the the call stack in the root-to-leaf order 1027 // previously encoded by build. (Note that Prev, as passed to 1028 // encodeCallStack, is in the leaf-to-root order.) 1029 // 1030 // For example, if the call stack being encoded shares 5 frames at the root of 1031 // the call stack with the previously encoded call stack, 1032 // RadixArray[Indexes[0]] is the root frame of the common prefix. 1033 // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix. 1034 std::vector<LinearCallStackId> Indexes; 1035 1036 using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>; 1037 1038 // Encode a call stack into RadixArray. Return the starting index within 1039 // RadixArray. 1040 LinearCallStackId encodeCallStack( 1041 const llvm::SmallVector<FrameId> *CallStack, 1042 const llvm::SmallVector<FrameId> *Prev, 1043 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes); 1044 1045 public: 1046 CallStackRadixTreeBuilder() = default; 1047 1048 // Build a radix tree array. 1049 void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 1050 &&MemProfCallStackData, 1051 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, 1052 llvm::DenseMap<FrameId, FrameStat> &FrameHistogram); 1053 1054 const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; } 1055 1056 llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() { 1057 return std::move(CallStackPos); 1058 } 1059 }; 1060 1061 // Verify that each CallStackId is computed with hashCallStack. This function 1062 // is intended to help transition from CallStack to CSId in 1063 // IndexedAllocationInfo. 1064 void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); 1065 1066 // Verify that each CallStackId is computed with hashCallStack. This function 1067 // is intended to help transition from CallStack to CSId in 1068 // IndexedAllocationInfo. 1069 void verifyFunctionProfileData( 1070 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> 1071 &FunctionProfileData); 1072 } // namespace memprof 1073 } // namespace llvm 1074 1075 #endif // LLVM_PROFILEDATA_MEMPROF_H_ 1076