1 #ifndef LLVM_PROFILEDATA_MEMPROF_H_ 2 #define LLVM_PROFILEDATA_MEMPROF_H_ 3 4 #include "llvm/ADT/BitVector.h" 5 #include "llvm/ADT/MapVector.h" 6 #include "llvm/ADT/STLForwardCompat.h" 7 #include "llvm/ADT/STLFunctionalExtras.h" 8 #include "llvm/ADT/SmallVector.h" 9 #include "llvm/IR/GlobalValue.h" 10 #include "llvm/ProfileData/MemProfData.inc" 11 #include "llvm/Support/BLAKE3.h" 12 #include "llvm/Support/Endian.h" 13 #include "llvm/Support/EndianStream.h" 14 #include "llvm/Support/HashBuilder.h" 15 #include "llvm/Support/raw_ostream.h" 16 17 #include <bitset> 18 #include <cstdint> 19 #include <optional> 20 21 namespace llvm { 22 namespace yaml { 23 template <typename T> struct CustomMappingTraits; 24 } // namespace yaml 25 26 namespace memprof { 27 28 struct MemProfRecord; 29 30 // The versions of the indexed MemProf format 31 enum IndexedVersion : uint64_t { 32 // Version 2: Added a call stack table. 33 Version2 = 2, 34 // Version 3: Added a radix tree for call stacks. Switched to linear IDs for 35 // frames and call stacks. 36 Version3 = 3, 37 }; 38 39 constexpr uint64_t MinimumSupportedVersion = Version2; 40 constexpr uint64_t MaximumSupportedVersion = Version3; 41 42 // Verify that the minimum and maximum satisfy the obvious constraint. 43 static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); 44 45 enum class Meta : uint64_t { 46 Start = 0, 47 #define MIBEntryDef(NameTag, Name, Type) NameTag, 48 #include "llvm/ProfileData/MIBEntryDef.inc" 49 #undef MIBEntryDef 50 Size 51 }; 52 53 using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>; 54 55 // Returns the full schema currently in use. 56 MemProfSchema getFullSchema(); 57 58 // Returns the schema consisting of the fields used for hot cold memory hinting. 59 MemProfSchema getHotColdSchema(); 60 61 // Holds the actual MemInfoBlock data with all fields. Contents may be read or 62 // written partially by providing an appropriate schema to the serialize and 63 // deserialize methods. 64 struct PortableMemInfoBlock { 65 PortableMemInfoBlock() = default; 66 explicit PortableMemInfoBlock(const MemInfoBlock &Block, 67 const MemProfSchema &IncomingSchema) { 68 for (const Meta Id : IncomingSchema) 69 Schema.set(llvm::to_underlying(Id)); 70 #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name; 71 #include "llvm/ProfileData/MIBEntryDef.inc" 72 #undef MIBEntryDef 73 } 74 75 PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) { 76 deserialize(Schema, Ptr); 77 } 78 79 // Read the contents of \p Ptr based on the \p Schema to populate the 80 // MemInfoBlock member. 81 void deserialize(const MemProfSchema &IncomingSchema, 82 const unsigned char *Ptr) { 83 using namespace support; 84 85 Schema.reset(); 86 for (const Meta Id : IncomingSchema) { 87 switch (Id) { 88 #define MIBEntryDef(NameTag, Name, Type) \ 89 case Meta::Name: { \ 90 Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \ 91 } break; 92 #include "llvm/ProfileData/MIBEntryDef.inc" 93 #undef MIBEntryDef 94 default: 95 llvm_unreachable("Unknown meta type id, is the profile collected from " 96 "a newer version of the runtime?"); 97 } 98 99 Schema.set(llvm::to_underlying(Id)); 100 } 101 } 102 103 // Write the contents of the MemInfoBlock based on the \p Schema provided to 104 // the raw_ostream \p OS. 105 void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { 106 using namespace support; 107 108 endian::Writer LE(OS, llvm::endianness::little); 109 for (const Meta Id : Schema) { 110 switch (Id) { 111 #define MIBEntryDef(NameTag, Name, Type) \ 112 case Meta::Name: { \ 113 LE.write<Type>(Name); \ 114 } break; 115 #include "llvm/ProfileData/MIBEntryDef.inc" 116 #undef MIBEntryDef 117 default: 118 llvm_unreachable("Unknown meta type id, invalid input?"); 119 } 120 } 121 } 122 123 // Print out the contents of the MemInfoBlock in YAML format. 124 void printYAML(raw_ostream &OS) const { 125 OS << " MemInfoBlock:\n"; 126 #define MIBEntryDef(NameTag, Name, Type) \ 127 OS << " " << #Name << ": " << Name << "\n"; 128 #include "llvm/ProfileData/MIBEntryDef.inc" 129 #undef MIBEntryDef 130 if (AccessHistogramSize > 0) { 131 OS << " " << "AccessHistogramValues" << ":"; 132 for (uint32_t I = 0; I < AccessHistogramSize; ++I) { 133 OS << " " << ((uint64_t *)AccessHistogram)[I]; 134 } 135 OS << "\n"; 136 } 137 } 138 139 // Return the schema, only for unit tests. 140 std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const { 141 return Schema; 142 } 143 144 // Define getters for each type which can be called by analyses. 145 #define MIBEntryDef(NameTag, Name, Type) \ 146 Type get##Name() const { \ 147 assert(Schema[llvm::to_underlying(Meta::Name)]); \ 148 return Name; \ 149 } 150 #include "llvm/ProfileData/MIBEntryDef.inc" 151 #undef MIBEntryDef 152 153 // Define setters for each type which can be called by the writer. 154 #define MIBEntryDef(NameTag, Name, Type) \ 155 void set##Name(Type NewVal) { \ 156 assert(Schema[llvm::to_underlying(Meta::Name)]); \ 157 Name = NewVal; \ 158 } 159 #include "llvm/ProfileData/MIBEntryDef.inc" 160 #undef MIBEntryDef 161 162 void clear() { *this = PortableMemInfoBlock(); } 163 164 bool operator==(const PortableMemInfoBlock &Other) const { 165 if (Other.Schema != Schema) 166 return false; 167 168 #define MIBEntryDef(NameTag, Name, Type) \ 169 if (Schema[llvm::to_underlying(Meta::Name)] && \ 170 Other.get##Name() != get##Name()) \ 171 return false; 172 #include "llvm/ProfileData/MIBEntryDef.inc" 173 #undef MIBEntryDef 174 return true; 175 } 176 177 bool operator!=(const PortableMemInfoBlock &Other) const { 178 return !operator==(Other); 179 } 180 181 static size_t serializedSize(const MemProfSchema &Schema) { 182 size_t Result = 0; 183 184 for (const Meta Id : Schema) { 185 switch (Id) { 186 #define MIBEntryDef(NameTag, Name, Type) \ 187 case Meta::Name: { \ 188 Result += sizeof(Type); \ 189 } break; 190 #include "llvm/ProfileData/MIBEntryDef.inc" 191 #undef MIBEntryDef 192 default: 193 llvm_unreachable("Unknown meta type id, invalid input?"); 194 } 195 } 196 197 return Result; 198 } 199 200 // Give YAML access to the individual MIB fields. 201 friend struct yaml::CustomMappingTraits<memprof::PortableMemInfoBlock>; 202 203 private: 204 // The set of available fields, indexed by Meta::Name. 205 std::bitset<llvm::to_underlying(Meta::Size)> Schema; 206 207 #define MIBEntryDef(NameTag, Name, Type) Type Name = Type(); 208 #include "llvm/ProfileData/MIBEntryDef.inc" 209 #undef MIBEntryDef 210 }; 211 212 // A type representing the id generated by hashing the contents of the Frame. 213 using FrameId = uint64_t; 214 // A type representing the id to index into the frame array. 215 using LinearFrameId = uint32_t; 216 // Describes a call frame for a dynamic allocation context. The contents of 217 // the frame are populated by symbolizing the stack depot call frame from the 218 // compiler runtime. 219 struct Frame { 220 // A uuid (uint64_t) identifying the function. It is obtained by 221 // llvm::md5(FunctionName) which returns the lower 64 bits. 222 GlobalValue::GUID Function = 0; 223 // The symbol name for the function. Only populated in the Frame by the reader 224 // if requested during initialization. This field should not be serialized. 225 std::unique_ptr<std::string> SymbolName; 226 // The source line offset of the call from the beginning of parent function. 227 uint32_t LineOffset = 0; 228 // The source column number of the call to help distinguish multiple calls 229 // on the same line. 230 uint32_t Column = 0; 231 // Whether the current frame is inlined. 232 bool IsInlineFrame = false; 233 234 Frame() = default; 235 Frame(const Frame &Other) { 236 Function = Other.Function; 237 SymbolName = Other.SymbolName 238 ? std::make_unique<std::string>(*Other.SymbolName) 239 : nullptr; 240 LineOffset = Other.LineOffset; 241 Column = Other.Column; 242 IsInlineFrame = Other.IsInlineFrame; 243 } 244 245 Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline) 246 : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} 247 248 bool operator==(const Frame &Other) const { 249 // Ignore the SymbolName field to avoid a string compare. Comparing the 250 // function hash serves the same purpose. 251 return Other.Function == Function && Other.LineOffset == LineOffset && 252 Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; 253 } 254 255 Frame &operator=(const Frame &Other) { 256 Function = Other.Function; 257 SymbolName = Other.SymbolName 258 ? std::make_unique<std::string>(*Other.SymbolName) 259 : nullptr; 260 LineOffset = Other.LineOffset; 261 Column = Other.Column; 262 IsInlineFrame = Other.IsInlineFrame; 263 return *this; 264 } 265 266 bool operator!=(const Frame &Other) const { return !operator==(Other); } 267 268 bool hasSymbolName() const { return !!SymbolName; } 269 270 StringRef getSymbolName() const { 271 assert(hasSymbolName()); 272 return *SymbolName; 273 } 274 275 std::string getSymbolNameOr(StringRef Alt) const { 276 return std::string(hasSymbolName() ? getSymbolName() : Alt); 277 } 278 279 // Write the contents of the frame to the ostream \p OS. 280 void serialize(raw_ostream &OS) const { 281 using namespace support; 282 283 endian::Writer LE(OS, llvm::endianness::little); 284 285 // If the type of the GlobalValue::GUID changes, then we need to update 286 // the reader and the writer. 287 static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value, 288 "Expect GUID to be uint64_t."); 289 LE.write<uint64_t>(Function); 290 291 LE.write<uint32_t>(LineOffset); 292 LE.write<uint32_t>(Column); 293 LE.write<bool>(IsInlineFrame); 294 } 295 296 // Read a frame from char data which has been serialized as little endian. 297 static Frame deserialize(const unsigned char *Ptr) { 298 using namespace support; 299 300 const uint64_t F = 301 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 302 const uint32_t L = 303 endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 304 const uint32_t C = 305 endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 306 const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr); 307 return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, 308 /*IsInlineFrame=*/I); 309 } 310 311 // Returns the size of the frame information. 312 static constexpr size_t serializedSize() { 313 return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + 314 sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); 315 } 316 317 // Print the frame information in YAML format. 318 void printYAML(raw_ostream &OS) const { 319 OS << " -\n" 320 << " Function: " << Function << "\n" 321 << " SymbolName: " << getSymbolNameOr("<None>") << "\n" 322 << " LineOffset: " << LineOffset << "\n" 323 << " Column: " << Column << "\n" 324 << " Inline: " << IsInlineFrame << "\n"; 325 } 326 }; 327 328 // A type representing the index into the table of call stacks. 329 using CallStackId = uint64_t; 330 331 // A type representing the index into the call stack array. 332 using LinearCallStackId = uint32_t; 333 334 // Holds allocation information in a space efficient format where frames are 335 // represented using unique identifiers. 336 struct IndexedAllocationInfo { 337 // The dynamic calling context for the allocation in bottom-up (leaf-to-root) 338 // order. Frame contents are stored out-of-line. 339 CallStackId CSId = 0; 340 // The statistics obtained from the runtime for the allocation. 341 PortableMemInfoBlock Info; 342 343 IndexedAllocationInfo() = default; 344 IndexedAllocationInfo(CallStackId CSId, const MemInfoBlock &MB, 345 const MemProfSchema &Schema = getFullSchema()) 346 : CSId(CSId), Info(MB, Schema) {} 347 IndexedAllocationInfo(CallStackId CSId, const PortableMemInfoBlock &MB) 348 : CSId(CSId), Info(MB) {} 349 350 // Returns the size in bytes when this allocation info struct is serialized. 351 size_t serializedSize(const MemProfSchema &Schema, 352 IndexedVersion Version) const; 353 354 bool operator==(const IndexedAllocationInfo &Other) const { 355 if (Other.Info != Info) 356 return false; 357 358 if (Other.CSId != CSId) 359 return false; 360 return true; 361 } 362 363 bool operator!=(const IndexedAllocationInfo &Other) const { 364 return !operator==(Other); 365 } 366 }; 367 368 // Holds allocation information with frame contents inline. The type should 369 // be used for temporary in-memory instances. 370 struct AllocationInfo { 371 // Same as IndexedAllocationInfo::CallStack with the frame contents inline. 372 std::vector<Frame> CallStack; 373 // Same as IndexedAllocationInfo::Info; 374 PortableMemInfoBlock Info; 375 376 AllocationInfo() = default; 377 378 void printYAML(raw_ostream &OS) const { 379 OS << " -\n"; 380 OS << " Callstack:\n"; 381 // TODO: Print out the frame on one line with to make it easier for deep 382 // callstacks once we have a test to check valid YAML is generated. 383 for (const Frame &F : CallStack) { 384 F.printYAML(OS); 385 } 386 Info.printYAML(OS); 387 } 388 }; 389 390 // Holds the memprof profile information for a function. The internal 391 // representation stores frame ids for efficiency. This representation should 392 // be used in the profile conversion and manipulation tools. 393 struct IndexedMemProfRecord { 394 // Memory allocation sites in this function for which we have memory 395 // profiling data. 396 llvm::SmallVector<IndexedAllocationInfo> AllocSites; 397 // Holds call sites in this function which are part of some memory 398 // allocation context. We store this as a list of locations, each with its 399 // list of inline locations in bottom-up order i.e. from leaf to root. The 400 // inline location list may include additional entries, users should pick 401 // the last entry in the list with the same function GUID. 402 llvm::SmallVector<CallStackId> CallSiteIds; 403 404 void clear() { *this = IndexedMemProfRecord(); } 405 406 void merge(const IndexedMemProfRecord &Other) { 407 // TODO: Filter out duplicates which may occur if multiple memprof 408 // profiles are merged together using llvm-profdata. 409 AllocSites.append(Other.AllocSites); 410 } 411 412 size_t serializedSize(const MemProfSchema &Schema, 413 IndexedVersion Version) const; 414 415 bool operator==(const IndexedMemProfRecord &Other) const { 416 if (Other.AllocSites != AllocSites) 417 return false; 418 419 if (Other.CallSiteIds != CallSiteIds) 420 return false; 421 return true; 422 } 423 424 // Serializes the memprof records in \p Records to the ostream \p OS based 425 // on the schema provided in \p Schema. 426 void serialize(const MemProfSchema &Schema, raw_ostream &OS, 427 IndexedVersion Version, 428 llvm::DenseMap<CallStackId, LinearCallStackId> 429 *MemProfCallStackIndexes = nullptr) const; 430 431 // Deserializes memprof records from the Buffer. 432 static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, 433 const unsigned char *Buffer, 434 IndexedVersion Version); 435 436 // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to 437 // translate CallStackId to call stacks with frames inline. 438 MemProfRecord toMemProfRecord( 439 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const; 440 441 // Returns the GUID for the function name after canonicalization. For 442 // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are 443 // mapped to functions using this GUID. 444 static GlobalValue::GUID getGUID(const StringRef FunctionName); 445 }; 446 447 // Holds the memprof profile information for a function. The internal 448 // representation stores frame contents inline. This representation should 449 // be used for small amount of temporary, in memory instances. 450 struct MemProfRecord { 451 // Same as IndexedMemProfRecord::AllocSites with frame contents inline. 452 llvm::SmallVector<AllocationInfo> AllocSites; 453 // Same as IndexedMemProfRecord::CallSites with frame contents inline. 454 llvm::SmallVector<std::vector<Frame>> CallSites; 455 456 MemProfRecord() = default; 457 458 // Prints out the contents of the memprof record in YAML. 459 void print(llvm::raw_ostream &OS) const { 460 if (!AllocSites.empty()) { 461 OS << " AllocSites:\n"; 462 for (const AllocationInfo &N : AllocSites) 463 N.printYAML(OS); 464 } 465 466 if (!CallSites.empty()) { 467 OS << " CallSites:\n"; 468 for (const std::vector<Frame> &Frames : CallSites) { 469 for (const Frame &F : Frames) { 470 OS << " -\n"; 471 F.printYAML(OS); 472 } 473 } 474 } 475 } 476 }; 477 478 // Reads a memprof schema from a buffer. All entries in the buffer are 479 // interpreted as uint64_t. The first entry in the buffer denotes the number of 480 // ids in the schema. Subsequent entries are integers which map to memprof::Meta 481 // enum class entries. After successfully reading the schema, the pointer is one 482 // byte past the schema contents. 483 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer); 484 485 // Trait for reading IndexedMemProfRecord data from the on-disk hash table. 486 class RecordLookupTrait { 487 public: 488 using data_type = const IndexedMemProfRecord &; 489 using internal_key_type = uint64_t; 490 using external_key_type = uint64_t; 491 using hash_value_type = uint64_t; 492 using offset_type = uint64_t; 493 494 RecordLookupTrait() = delete; 495 RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) 496 : Version(V), Schema(S) {} 497 498 static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } 499 static uint64_t GetInternalKey(uint64_t K) { return K; } 500 static uint64_t GetExternalKey(uint64_t K) { return K; } 501 502 hash_value_type ComputeHash(uint64_t K) { return K; } 503 504 static std::pair<offset_type, offset_type> 505 ReadKeyDataLength(const unsigned char *&D) { 506 using namespace support; 507 508 offset_type KeyLen = 509 endian::readNext<offset_type, llvm::endianness::little>(D); 510 offset_type DataLen = 511 endian::readNext<offset_type, llvm::endianness::little>(D); 512 return std::make_pair(KeyLen, DataLen); 513 } 514 515 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 516 using namespace support; 517 return endian::readNext<external_key_type, llvm::endianness::little>(D); 518 } 519 520 data_type ReadData(uint64_t K, const unsigned char *D, 521 offset_type /*Unused*/) { 522 Record = IndexedMemProfRecord::deserialize(Schema, D, Version); 523 return Record; 524 } 525 526 private: 527 // Holds the MemProf version. 528 IndexedVersion Version; 529 // Holds the memprof schema used to deserialize records. 530 MemProfSchema Schema; 531 // Holds the records from one function deserialized from the indexed format. 532 IndexedMemProfRecord Record; 533 }; 534 535 // Trait for writing IndexedMemProfRecord data to the on-disk hash table. 536 class RecordWriterTrait { 537 public: 538 using key_type = uint64_t; 539 using key_type_ref = uint64_t; 540 541 using data_type = IndexedMemProfRecord; 542 using data_type_ref = IndexedMemProfRecord &; 543 544 using hash_value_type = uint64_t; 545 using offset_type = uint64_t; 546 547 private: 548 // Pointer to the memprof schema to use for the generator. 549 const MemProfSchema *Schema; 550 // The MemProf version to use for the serialization. 551 IndexedVersion Version; 552 553 // Mappings from CallStackId to the indexes into the call stack array. 554 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes; 555 556 public: 557 // We do not support the default constructor, which does not set Version. 558 RecordWriterTrait() = delete; 559 RecordWriterTrait( 560 const MemProfSchema *Schema, IndexedVersion V, 561 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 562 : Schema(Schema), Version(V), 563 MemProfCallStackIndexes(MemProfCallStackIndexes) {} 564 565 static hash_value_type ComputeHash(key_type_ref K) { return K; } 566 567 std::pair<offset_type, offset_type> 568 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 569 using namespace support; 570 571 endian::Writer LE(Out, llvm::endianness::little); 572 offset_type N = sizeof(K); 573 LE.write<offset_type>(N); 574 offset_type M = V.serializedSize(*Schema, Version); 575 LE.write<offset_type>(M); 576 return std::make_pair(N, M); 577 } 578 579 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 580 using namespace support; 581 endian::Writer LE(Out, llvm::endianness::little); 582 LE.write<uint64_t>(K); 583 } 584 585 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 586 offset_type /*Unused*/) { 587 assert(Schema != nullptr && "MemProf schema is not initialized!"); 588 V.serialize(*Schema, Out, Version, MemProfCallStackIndexes); 589 // Clear the IndexedMemProfRecord which results in clearing/freeing its 590 // vectors of allocs and callsites. This is owned by the associated on-disk 591 // hash table, but unused after this point. See also the comment added to 592 // the client which constructs the on-disk hash table for this trait. 593 V.clear(); 594 } 595 }; 596 597 // Trait for writing frame mappings to the on-disk hash table. 598 class FrameWriterTrait { 599 public: 600 using key_type = FrameId; 601 using key_type_ref = FrameId; 602 603 using data_type = Frame; 604 using data_type_ref = Frame &; 605 606 using hash_value_type = FrameId; 607 using offset_type = uint64_t; 608 609 static hash_value_type ComputeHash(key_type_ref K) { return K; } 610 611 static std::pair<offset_type, offset_type> 612 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 613 using namespace support; 614 endian::Writer LE(Out, llvm::endianness::little); 615 offset_type N = sizeof(K); 616 LE.write<offset_type>(N); 617 offset_type M = V.serializedSize(); 618 LE.write<offset_type>(M); 619 return std::make_pair(N, M); 620 } 621 622 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 623 using namespace support; 624 endian::Writer LE(Out, llvm::endianness::little); 625 LE.write<key_type>(K); 626 } 627 628 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 629 offset_type /*Unused*/) { 630 V.serialize(Out); 631 } 632 }; 633 634 // Trait for reading frame mappings from the on-disk hash table. 635 class FrameLookupTrait { 636 public: 637 using data_type = const Frame; 638 using internal_key_type = FrameId; 639 using external_key_type = FrameId; 640 using hash_value_type = FrameId; 641 using offset_type = uint64_t; 642 643 static bool EqualKey(internal_key_type A, internal_key_type B) { 644 return A == B; 645 } 646 static uint64_t GetInternalKey(internal_key_type K) { return K; } 647 static uint64_t GetExternalKey(external_key_type K) { return K; } 648 649 hash_value_type ComputeHash(internal_key_type K) { return K; } 650 651 static std::pair<offset_type, offset_type> 652 ReadKeyDataLength(const unsigned char *&D) { 653 using namespace support; 654 655 offset_type KeyLen = 656 endian::readNext<offset_type, llvm::endianness::little>(D); 657 offset_type DataLen = 658 endian::readNext<offset_type, llvm::endianness::little>(D); 659 return std::make_pair(KeyLen, DataLen); 660 } 661 662 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 663 using namespace support; 664 return endian::readNext<external_key_type, llvm::endianness::little>(D); 665 } 666 667 data_type ReadData(uint64_t K, const unsigned char *D, 668 offset_type /*Unused*/) { 669 return Frame::deserialize(D); 670 } 671 }; 672 673 // Trait for writing call stacks to the on-disk hash table. 674 class CallStackWriterTrait { 675 public: 676 using key_type = CallStackId; 677 using key_type_ref = CallStackId; 678 679 using data_type = llvm::SmallVector<FrameId>; 680 using data_type_ref = llvm::SmallVector<FrameId> &; 681 682 using hash_value_type = CallStackId; 683 using offset_type = uint64_t; 684 685 static hash_value_type ComputeHash(key_type_ref K) { return K; } 686 687 static std::pair<offset_type, offset_type> 688 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 689 using namespace support; 690 endian::Writer LE(Out, llvm::endianness::little); 691 // We do not explicitly emit the key length because it is a constant. 692 offset_type N = sizeof(K); 693 offset_type M = sizeof(FrameId) * V.size(); 694 LE.write<offset_type>(M); 695 return std::make_pair(N, M); 696 } 697 698 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { 699 using namespace support; 700 endian::Writer LE(Out, llvm::endianness::little); 701 LE.write<key_type>(K); 702 } 703 704 void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, 705 offset_type /*Unused*/) { 706 using namespace support; 707 endian::Writer LE(Out, llvm::endianness::little); 708 // Emit the frames. We do not explicitly emit the length of the vector 709 // because it can be inferred from the data length. 710 for (FrameId F : V) 711 LE.write<FrameId>(F); 712 } 713 }; 714 715 // Trait for reading call stack mappings from the on-disk hash table. 716 class CallStackLookupTrait { 717 public: 718 using data_type = const llvm::SmallVector<FrameId>; 719 using internal_key_type = CallStackId; 720 using external_key_type = CallStackId; 721 using hash_value_type = CallStackId; 722 using offset_type = uint64_t; 723 724 static bool EqualKey(internal_key_type A, internal_key_type B) { 725 return A == B; 726 } 727 static uint64_t GetInternalKey(internal_key_type K) { return K; } 728 static uint64_t GetExternalKey(external_key_type K) { return K; } 729 730 hash_value_type ComputeHash(internal_key_type K) { return K; } 731 732 static std::pair<offset_type, offset_type> 733 ReadKeyDataLength(const unsigned char *&D) { 734 using namespace support; 735 736 // We do not explicitly read the key length because it is a constant. 737 offset_type KeyLen = sizeof(external_key_type); 738 offset_type DataLen = 739 endian::readNext<offset_type, llvm::endianness::little>(D); 740 return std::make_pair(KeyLen, DataLen); 741 } 742 743 uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { 744 using namespace support; 745 return endian::readNext<external_key_type, llvm::endianness::little>(D); 746 } 747 748 data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) { 749 using namespace support; 750 llvm::SmallVector<FrameId> CS; 751 // Derive the number of frames from the data length. 752 uint64_t NumFrames = Length / sizeof(FrameId); 753 assert(Length % sizeof(FrameId) == 0); 754 CS.reserve(NumFrames); 755 for (size_t I = 0; I != NumFrames; ++I) { 756 FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D); 757 CS.push_back(F); 758 } 759 return CS; 760 } 761 }; 762 763 namespace detail { 764 // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have 765 // to do so in one of two different ways depending on the type of the hash 766 // table. 767 template <typename value_type, typename IterTy> 768 value_type DerefIterator(IterTy Iter) { 769 using deref_type = llvm::remove_cvref_t<decltype(*Iter)>; 770 if constexpr (std::is_same_v<deref_type, value_type>) 771 return *Iter; 772 else 773 return Iter->second; 774 } 775 } // namespace detail 776 777 // A function object that returns a frame for a given FrameId. 778 template <typename MapTy> struct FrameIdConverter { 779 std::optional<FrameId> LastUnmappedId; 780 MapTy ⤅ 781 782 FrameIdConverter() = delete; 783 FrameIdConverter(MapTy &Map) : Map(Map) {} 784 785 // Delete the copy constructor and copy assignment operator to avoid a 786 // situation where a copy of FrameIdConverter gets an error in LastUnmappedId 787 // while the original instance doesn't. 788 FrameIdConverter(const FrameIdConverter &) = delete; 789 FrameIdConverter &operator=(const FrameIdConverter &) = delete; 790 791 Frame operator()(FrameId Id) { 792 auto Iter = Map.find(Id); 793 if (Iter == Map.end()) { 794 LastUnmappedId = Id; 795 return Frame(); 796 } 797 return detail::DerefIterator<Frame>(Iter); 798 } 799 }; 800 801 // A function object that returns a call stack for a given CallStackId. 802 template <typename MapTy> struct CallStackIdConverter { 803 std::optional<CallStackId> LastUnmappedId; 804 MapTy ⤅ 805 llvm::function_ref<Frame(FrameId)> FrameIdToFrame; 806 807 CallStackIdConverter() = delete; 808 CallStackIdConverter(MapTy &Map, 809 llvm::function_ref<Frame(FrameId)> FrameIdToFrame) 810 : Map(Map), FrameIdToFrame(FrameIdToFrame) {} 811 812 // Delete the copy constructor and copy assignment operator to avoid a 813 // situation where a copy of CallStackIdConverter gets an error in 814 // LastUnmappedId while the original instance doesn't. 815 CallStackIdConverter(const CallStackIdConverter &) = delete; 816 CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; 817 818 std::vector<Frame> operator()(CallStackId CSId) { 819 std::vector<Frame> Frames; 820 auto CSIter = Map.find(CSId); 821 if (CSIter == Map.end()) { 822 LastUnmappedId = CSId; 823 } else { 824 llvm::SmallVector<FrameId> CS = 825 detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter); 826 Frames.reserve(CS.size()); 827 for (FrameId Id : CS) 828 Frames.push_back(FrameIdToFrame(Id)); 829 } 830 return Frames; 831 } 832 }; 833 834 // A function object that returns a Frame stored at a given index into the Frame 835 // array in the profile. 836 struct LinearFrameIdConverter { 837 const unsigned char *FrameBase; 838 839 LinearFrameIdConverter() = delete; 840 LinearFrameIdConverter(const unsigned char *FrameBase) 841 : FrameBase(FrameBase) {} 842 843 Frame operator()(LinearFrameId LinearId) { 844 uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize(); 845 return Frame::deserialize(FrameBase + Offset); 846 } 847 }; 848 849 // A function object that returns a call stack stored at a given index into the 850 // call stack array in the profile. 851 struct LinearCallStackIdConverter { 852 const unsigned char *CallStackBase; 853 llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame; 854 855 LinearCallStackIdConverter() = delete; 856 LinearCallStackIdConverter( 857 const unsigned char *CallStackBase, 858 llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame) 859 : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} 860 861 std::vector<Frame> operator()(LinearCallStackId LinearCSId) { 862 std::vector<Frame> Frames; 863 864 const unsigned char *Ptr = 865 CallStackBase + 866 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId); 867 uint32_t NumFrames = 868 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 869 Frames.reserve(NumFrames); 870 for (; NumFrames; --NumFrames) { 871 LinearFrameId Elem = 872 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 873 // Follow a pointer to the parent, if any. See comments below on 874 // CallStackRadixTreeBuilder for the description of the radix tree format. 875 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) { 876 Ptr += (-Elem) * sizeof(LinearFrameId); 877 Elem = 878 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 879 } 880 // We shouldn't encounter another pointer. 881 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0); 882 Frames.push_back(FrameIdToFrame(Elem)); 883 Ptr += sizeof(LinearFrameId); 884 } 885 886 return Frames; 887 } 888 }; 889 890 struct LineLocation { 891 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {} 892 893 bool operator<(const LineLocation &O) const { 894 return LineOffset < O.LineOffset || 895 (LineOffset == O.LineOffset && Column < O.Column); 896 } 897 898 bool operator==(const LineLocation &O) const { 899 return LineOffset == O.LineOffset && Column == O.Column; 900 } 901 902 bool operator!=(const LineLocation &O) const { 903 return LineOffset != O.LineOffset || Column != O.Column; 904 } 905 906 uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; } 907 908 uint32_t LineOffset; 909 uint32_t Column; 910 }; 911 912 // A pair of a call site location and its corresponding callee GUID. 913 using CallEdgeTy = std::pair<LineLocation, uint64_t>; 914 915 // Used to extract caller-callee pairs from the call stack array. The leaf 916 // frame is assumed to call a heap allocation function with GUID 0. The 917 // resulting pairs are accumulated in CallerCalleePairs. Users can take it 918 // with: 919 // 920 // auto Pairs = std::move(Extractor.CallerCalleePairs); 921 struct CallerCalleePairExtractor { 922 // The base address of the radix tree array. 923 const unsigned char *CallStackBase; 924 // A functor to convert a linear FrameId to a Frame. 925 llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame; 926 // A map from caller GUIDs to lists of call sites in respective callers. 927 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs; 928 929 // The set of linear call stack IDs that we've visited. 930 BitVector Visited; 931 932 CallerCalleePairExtractor() = delete; 933 CallerCalleePairExtractor( 934 const unsigned char *CallStackBase, 935 llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame, 936 unsigned RadixTreeSize) 937 : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame), 938 Visited(RadixTreeSize) {} 939 940 void operator()(LinearCallStackId LinearCSId) { 941 const unsigned char *Ptr = 942 CallStackBase + 943 static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId); 944 uint32_t NumFrames = 945 support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); 946 // The leaf frame calls a function with GUID 0. 947 uint64_t CalleeGUID = 0; 948 for (; NumFrames; --NumFrames) { 949 LinearFrameId Elem = 950 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 951 // Follow a pointer to the parent, if any. See comments below on 952 // CallStackRadixTreeBuilder for the description of the radix tree format. 953 if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) { 954 Ptr += (-Elem) * sizeof(LinearFrameId); 955 Elem = 956 support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); 957 } 958 // We shouldn't encounter another pointer. 959 assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0); 960 961 // Add a new caller-callee pair. 962 Frame F = FrameIdToFrame(Elem); 963 uint64_t CallerGUID = F.Function; 964 LineLocation Loc(F.LineOffset, F.Column); 965 CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID); 966 967 // Keep track of the indices we've visited. If we've already visited the 968 // current one, terminate the traversal. We will not discover any new 969 // caller-callee pair by continuing the traversal. 970 unsigned Offset = 971 std::distance(CallStackBase, Ptr) / sizeof(LinearFrameId); 972 if (Visited.test(Offset)) 973 break; 974 Visited.set(Offset); 975 976 Ptr += sizeof(LinearFrameId); 977 CalleeGUID = CallerGUID; 978 } 979 } 980 }; 981 982 struct IndexedMemProfData { 983 // A map to hold memprof data per function. The lower 64 bits obtained from 984 // the md5 hash of the function name is used to index into the map. 985 llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> Records; 986 987 // A map to hold frame id to frame mappings. The mappings are used to 988 // convert IndexedMemProfRecord to MemProfRecords with frame information 989 // inline. 990 llvm::MapVector<FrameId, Frame> Frames; 991 992 // A map to hold call stack id to call stacks. 993 llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks; 994 995 FrameId addFrame(const Frame &F) { 996 const FrameId Id = hashFrame(F); 997 Frames.try_emplace(Id, F); 998 return Id; 999 } 1000 1001 CallStackId addCallStack(ArrayRef<FrameId> CS) { 1002 CallStackId CSId = hashCallStack(CS); 1003 CallStacks.try_emplace(CSId, CS); 1004 return CSId; 1005 } 1006 1007 CallStackId addCallStack(SmallVector<FrameId> &&CS) { 1008 CallStackId CSId = hashCallStack(CS); 1009 CallStacks.try_emplace(CSId, std::move(CS)); 1010 return CSId; 1011 } 1012 1013 private: 1014 // Return a hash value based on the contents of the frame. Here we use a 1015 // cryptographic hash function to minimize the chance of hash collisions. We 1016 // do persist FrameIds as part of memprof formats up to Version 2, inclusive. 1017 // However, the deserializer never calls this function; it uses FrameIds 1018 // merely as keys to look up Frames proper. 1019 FrameId hashFrame(const Frame &F) const { 1020 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 1021 HashBuilder; 1022 HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame); 1023 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 1024 FrameId Id; 1025 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 1026 return Id; 1027 } 1028 1029 // Compute a CallStackId for a given call stack. 1030 CallStackId hashCallStack(ArrayRef<FrameId> CS) const; 1031 }; 1032 1033 // A convenience wrapper around FrameIdConverter and CallStackIdConverter for 1034 // tests. 1035 struct IndexedCallstackIdConveter { 1036 IndexedCallstackIdConveter() = delete; 1037 IndexedCallstackIdConveter(IndexedMemProfData &MemProfData) 1038 : FrameIdConv(MemProfData.Frames), 1039 CSIdConv(MemProfData.CallStacks, FrameIdConv) {} 1040 1041 // Delete the copy constructor and copy assignment operator to avoid a 1042 // situation where a copy of IndexedCallStackIdConverter gets an error in 1043 // LastUnmappedId while the original instance doesn't. 1044 IndexedCallstackIdConveter(const IndexedCallstackIdConveter &) = delete; 1045 IndexedCallstackIdConveter & 1046 operator=(const IndexedCallstackIdConveter &) = delete; 1047 1048 std::vector<Frame> operator()(CallStackId CSId) { return CSIdConv(CSId); } 1049 1050 FrameIdConverter<decltype(IndexedMemProfData::Frames)> FrameIdConv; 1051 CallStackIdConverter<decltype(IndexedMemProfData::CallStacks)> CSIdConv; 1052 }; 1053 1054 struct FrameStat { 1055 // The number of occurrences of a given FrameId. 1056 uint64_t Count = 0; 1057 // The sum of indexes where a given FrameId shows up. 1058 uint64_t PositionSum = 0; 1059 }; 1060 1061 // Compute a histogram of Frames in call stacks. 1062 template <typename FrameIdTy> 1063 llvm::DenseMap<FrameIdTy, FrameStat> 1064 computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>> 1065 &MemProfCallStackData); 1066 1067 // Construct a radix tree of call stacks. 1068 // 1069 // A set of call stacks might look like: 1070 // 1071 // CallStackId 1: f1 -> f2 -> f3 1072 // CallStackId 2: f1 -> f2 -> f4 -> f5 1073 // CallStackId 3: f1 -> f2 -> f4 -> f6 1074 // CallStackId 4: f7 -> f8 -> f9 1075 // 1076 // where each fn refers to a stack frame. 1077 // 1078 // Since we expect a lot of common prefixes, we can compress the call stacks 1079 // into a radix tree like: 1080 // 1081 // CallStackId 1: f1 -> f2 -> f3 1082 // | 1083 // CallStackId 2: +---> f4 -> f5 1084 // | 1085 // CallStackId 3: +---> f6 1086 // 1087 // CallStackId 4: f7 -> f8 -> f9 1088 // 1089 // Now, we are interested in retrieving call stacks for a given CallStackId, so 1090 // we just need a pointer from a given call stack to its parent. For example, 1091 // CallStackId 2 would point to CallStackId 1 as a parent. 1092 // 1093 // We serialize the radix tree above into a single array along with the length 1094 // of each call stack and pointers to the parent call stacks. 1095 // 1096 // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 1097 // Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1 1098 // ^ ^ ^ ^ 1099 // | | | | 1100 // CallStackId 4: 0 --+ | | | 1101 // CallStackId 3: 4 --------------+ | | 1102 // CallStackId 2: 7 -----------------------+ | 1103 // CallStackId 1: 11 -----------------------------------+ 1104 // 1105 // - LN indicates the length of a call stack, encoded as ordinary integer N. 1106 // 1107 // - JN indicates a pointer to the parent, encoded as -N. 1108 // 1109 // The radix tree allows us to reconstruct call stacks in the leaf-to-root 1110 // order as we scan the array from left ro right while following pointers to 1111 // parents along the way. 1112 // 1113 // For example, if we are decoding CallStackId 2, we start a forward traversal 1114 // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When 1115 // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3, 1116 // picking up f2 and f1. We are done after collecting 4 frames as indicated at 1117 // the beginning of the traversal. 1118 // 1119 // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into 1120 // the radix tree array, so we do not explicitly encode mappings like: 1121 // "CallStackId 1 -> 11". 1122 template <typename FrameIdTy> class CallStackRadixTreeBuilder { 1123 // The radix tree array. 1124 std::vector<LinearFrameId> RadixArray; 1125 1126 // Mapping from CallStackIds to indexes into RadixArray. 1127 llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos; 1128 1129 // In build, we partition a given call stack into two parts -- the prefix 1130 // that's common with the previously encoded call stack and the frames beyond 1131 // the common prefix -- the unique portion. Then we want to find out where 1132 // the common prefix is stored in RadixArray so that we can link the unique 1133 // portion to the common prefix. Indexes, declared below, helps with our 1134 // needs. Intuitively, Indexes tells us where each of the previously encoded 1135 // call stack is stored in RadixArray. More formally, Indexes satisfies: 1136 // 1137 // RadixArray[Indexes[I]] == Prev[I] 1138 // 1139 // for every I, where Prev is the the call stack in the root-to-leaf order 1140 // previously encoded by build. (Note that Prev, as passed to 1141 // encodeCallStack, is in the leaf-to-root order.) 1142 // 1143 // For example, if the call stack being encoded shares 5 frames at the root of 1144 // the call stack with the previously encoded call stack, 1145 // RadixArray[Indexes[0]] is the root frame of the common prefix. 1146 // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix. 1147 std::vector<LinearCallStackId> Indexes; 1148 1149 using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>; 1150 1151 // Encode a call stack into RadixArray. Return the starting index within 1152 // RadixArray. 1153 LinearCallStackId encodeCallStack( 1154 const llvm::SmallVector<FrameIdTy> *CallStack, 1155 const llvm::SmallVector<FrameIdTy> *Prev, 1156 const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes); 1157 1158 public: 1159 CallStackRadixTreeBuilder() = default; 1160 1161 // Build a radix tree array. 1162 void 1163 build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>> 1164 &&MemProfCallStackData, 1165 const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes, 1166 llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram); 1167 1168 ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; } 1169 1170 llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() { 1171 return std::move(CallStackPos); 1172 } 1173 }; 1174 } // namespace memprof 1175 } // namespace llvm 1176 1177 #endif // LLVM_PROFILEDATA_MEMPROF_H_ 1178