1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains common definitions used in the reading and writing of 10 // sample profile data. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H 15 #define LLVM_PROFILEDATA_SAMPLEPROF_H 16 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/ProfileData/FunctionId.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MathExtras.h" 28 #include "llvm/ProfileData/HashKeyMap.h" 29 #include <algorithm> 30 #include <cstdint> 31 #include <list> 32 #include <map> 33 #include <set> 34 #include <sstream> 35 #include <string> 36 #include <system_error> 37 #include <unordered_map> 38 #include <utility> 39 40 namespace llvm { 41 42 class DILocation; 43 class raw_ostream; 44 45 const std::error_category &sampleprof_category(); 46 47 enum class sampleprof_error { 48 success = 0, 49 bad_magic, 50 unsupported_version, 51 too_large, 52 truncated, 53 malformed, 54 unrecognized_format, 55 unsupported_writing_format, 56 truncated_name_table, 57 not_implemented, 58 counter_overflow, 59 ostream_seek_unsupported, 60 uncompress_failed, 61 zlib_unavailable, 62 hash_mismatch 63 }; 64 65 inline std::error_code make_error_code(sampleprof_error E) { 66 return std::error_code(static_cast<int>(E), sampleprof_category()); 67 } 68 69 inline sampleprof_error mergeSampleProfErrors(sampleprof_error &Accumulator, 70 sampleprof_error Result) { 71 // Prefer first error encountered as later errors may be secondary effects of 72 // the initial problem. 73 if (Accumulator == sampleprof_error::success && 74 Result != sampleprof_error::success) 75 Accumulator = Result; 76 return Accumulator; 77 } 78 79 } // end namespace llvm 80 81 namespace std { 82 83 template <> 84 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; 85 86 } // end namespace std 87 88 namespace llvm { 89 namespace sampleprof { 90 91 enum SampleProfileFormat { 92 SPF_None = 0, 93 SPF_Text = 0x1, 94 SPF_Compact_Binary = 0x2, // Deprecated 95 SPF_GCC = 0x3, 96 SPF_Ext_Binary = 0x4, 97 SPF_Binary = 0xff 98 }; 99 100 enum SampleProfileLayout { 101 SPL_None = 0, 102 SPL_Nest = 0x1, 103 SPL_Flat = 0x2, 104 }; 105 106 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { 107 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | 108 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | 109 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | 110 uint64_t('2') << (64 - 56) | uint64_t(Format); 111 } 112 113 static inline uint64_t SPVersion() { return 103; } 114 115 // Section Type used by SampleProfileExtBinaryBaseReader and 116 // SampleProfileExtBinaryBaseWriter. Never change the existing 117 // value of enum. Only append new ones. 118 enum SecType { 119 SecInValid = 0, 120 SecProfSummary = 1, 121 SecNameTable = 2, 122 SecProfileSymbolList = 3, 123 SecFuncOffsetTable = 4, 124 SecFuncMetadata = 5, 125 SecCSNameTable = 6, 126 // marker for the first type of profile. 127 SecFuncProfileFirst = 32, 128 SecLBRProfile = SecFuncProfileFirst 129 }; 130 131 static inline std::string getSecName(SecType Type) { 132 switch (static_cast<int>(Type)) { // Avoid -Wcovered-switch-default 133 case SecInValid: 134 return "InvalidSection"; 135 case SecProfSummary: 136 return "ProfileSummarySection"; 137 case SecNameTable: 138 return "NameTableSection"; 139 case SecProfileSymbolList: 140 return "ProfileSymbolListSection"; 141 case SecFuncOffsetTable: 142 return "FuncOffsetTableSection"; 143 case SecFuncMetadata: 144 return "FunctionMetadata"; 145 case SecCSNameTable: 146 return "CSNameTableSection"; 147 case SecLBRProfile: 148 return "LBRProfileSection"; 149 default: 150 return "UnknownSection"; 151 } 152 } 153 154 // Entry type of section header table used by SampleProfileExtBinaryBaseReader 155 // and SampleProfileExtBinaryBaseWriter. 156 struct SecHdrTableEntry { 157 SecType Type; 158 uint64_t Flags; 159 uint64_t Offset; 160 uint64_t Size; 161 // The index indicating the location of the current entry in 162 // SectionHdrLayout table. 163 uint64_t LayoutIndex; 164 }; 165 166 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, 167 // common flags will be saved in the lower 32bits and section specific flags 168 // will be saved in the higher 32 bits. 169 enum class SecCommonFlags : uint32_t { 170 SecFlagInValid = 0, 171 SecFlagCompress = (1 << 0), 172 // Indicate the section contains only profile without context. 173 SecFlagFlat = (1 << 1) 174 }; 175 176 // Section specific flags are defined here. 177 // !!!Note: Everytime a new enum class is created here, please add 178 // a new check in verifySecFlag. 179 enum class SecNameTableFlags : uint32_t { 180 SecFlagInValid = 0, 181 SecFlagMD5Name = (1 << 0), 182 // Store MD5 in fixed length instead of ULEB128 so NameTable can be 183 // accessed like an array. 184 SecFlagFixedLengthMD5 = (1 << 1), 185 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip 186 // the suffix when doing profile matching when seeing the flag. 187 SecFlagUniqSuffix = (1 << 2) 188 }; 189 enum class SecProfSummaryFlags : uint32_t { 190 SecFlagInValid = 0, 191 /// SecFlagPartial means the profile is for common/shared code. 192 /// The common profile is usually merged from profiles collected 193 /// from running other targets. 194 SecFlagPartial = (1 << 0), 195 /// SecFlagContext means this is context-sensitive flat profile for 196 /// CSSPGO 197 SecFlagFullContext = (1 << 1), 198 /// SecFlagFSDiscriminator means this profile uses flow-sensitive 199 /// discriminators. 200 SecFlagFSDiscriminator = (1 << 2), 201 /// SecFlagIsPreInlined means this profile contains ShouldBeInlined 202 /// contexts thus this is CS preinliner computed. 203 SecFlagIsPreInlined = (1 << 4), 204 }; 205 206 enum class SecFuncMetadataFlags : uint32_t { 207 SecFlagInvalid = 0, 208 SecFlagIsProbeBased = (1 << 0), 209 SecFlagHasAttribute = (1 << 1), 210 }; 211 212 enum class SecFuncOffsetFlags : uint32_t { 213 SecFlagInvalid = 0, 214 // Store function offsets in an order of contexts. The order ensures that 215 // callee contexts of a given context laid out next to it. 216 SecFlagOrdered = (1 << 0), 217 }; 218 219 // Verify section specific flag is used for the correct section. 220 template <class SecFlagType> 221 static inline void verifySecFlag(SecType Type, SecFlagType Flag) { 222 // No verification is needed for common flags. 223 if (std::is_same<SecCommonFlags, SecFlagType>()) 224 return; 225 226 // Verification starts here for section specific flag. 227 bool IsFlagLegal = false; 228 switch (Type) { 229 case SecNameTable: 230 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); 231 break; 232 case SecProfSummary: 233 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); 234 break; 235 case SecFuncMetadata: 236 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); 237 break; 238 default: 239 case SecFuncOffsetTable: 240 IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); 241 break; 242 } 243 if (!IsFlagLegal) 244 llvm_unreachable("Misuse of a flag in an incompatible section"); 245 } 246 247 template <class SecFlagType> 248 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 249 verifySecFlag(Entry.Type, Flag); 250 auto FVal = static_cast<uint64_t>(Flag); 251 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 252 Entry.Flags |= IsCommon ? FVal : (FVal << 32); 253 } 254 255 template <class SecFlagType> 256 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { 257 verifySecFlag(Entry.Type, Flag); 258 auto FVal = static_cast<uint64_t>(Flag); 259 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 260 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); 261 } 262 263 template <class SecFlagType> 264 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { 265 verifySecFlag(Entry.Type, Flag); 266 auto FVal = static_cast<uint64_t>(Flag); 267 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); 268 return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); 269 } 270 271 /// Represents the relative location of an instruction. 272 /// 273 /// Instruction locations are specified by the line offset from the 274 /// beginning of the function (marked by the line where the function 275 /// header is) and the discriminator value within that line. 276 /// 277 /// The discriminator value is useful to distinguish instructions 278 /// that are on the same line but belong to different basic blocks 279 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). 280 struct LineLocation { 281 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} 282 283 void print(raw_ostream &OS) const; 284 void dump() const; 285 286 bool operator<(const LineLocation &O) const { 287 return LineOffset < O.LineOffset || 288 (LineOffset == O.LineOffset && Discriminator < O.Discriminator); 289 } 290 291 bool operator==(const LineLocation &O) const { 292 return LineOffset == O.LineOffset && Discriminator == O.Discriminator; 293 } 294 295 bool operator!=(const LineLocation &O) const { 296 return LineOffset != O.LineOffset || Discriminator != O.Discriminator; 297 } 298 299 uint64_t getHashCode() const { 300 return ((uint64_t) Discriminator << 32) | LineOffset; 301 } 302 303 uint32_t LineOffset; 304 uint32_t Discriminator; 305 }; 306 307 struct LineLocationHash { 308 uint64_t operator()(const LineLocation &Loc) const { 309 return Loc.getHashCode(); 310 } 311 }; 312 313 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); 314 315 /// Representation of a single sample record. 316 /// 317 /// A sample record is represented by a positive integer value, which 318 /// indicates how frequently was the associated line location executed. 319 /// 320 /// Additionally, if the associated location contains a function call, 321 /// the record will hold a list of all the possible called targets. For 322 /// direct calls, this will be the exact function being invoked. For 323 /// indirect calls (function pointers, virtual table dispatch), this 324 /// will be a list of one or more functions. 325 class SampleRecord { 326 public: 327 using CallTarget = std::pair<FunctionId, uint64_t>; 328 struct CallTargetComparator { 329 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { 330 if (LHS.second != RHS.second) 331 return LHS.second > RHS.second; 332 333 return LHS.first < RHS.first; 334 } 335 }; 336 337 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; 338 using CallTargetMap = std::unordered_map<FunctionId, uint64_t>; 339 SampleRecord() = default; 340 341 /// Increment the number of samples for this record by \p S. 342 /// Optionally scale sample count \p S by \p Weight. 343 /// 344 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 345 /// around unsigned integers. 346 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { 347 bool Overflowed; 348 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); 349 return Overflowed ? sampleprof_error::counter_overflow 350 : sampleprof_error::success; 351 } 352 353 /// Decrease the number of samples for this record by \p S. Return the amout 354 /// of samples actually decreased. 355 uint64_t removeSamples(uint64_t S) { 356 if (S > NumSamples) 357 S = NumSamples; 358 NumSamples -= S; 359 return S; 360 } 361 362 /// Add called function \p F with samples \p S. 363 /// Optionally scale sample count \p S by \p Weight. 364 /// 365 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping 366 /// around unsigned integers. 367 sampleprof_error addCalledTarget(FunctionId F, uint64_t S, 368 uint64_t Weight = 1) { 369 uint64_t &TargetSamples = CallTargets[F]; 370 bool Overflowed; 371 TargetSamples = 372 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); 373 return Overflowed ? sampleprof_error::counter_overflow 374 : sampleprof_error::success; 375 } 376 377 /// Remove called function from the call target map. Return the target sample 378 /// count of the called function. 379 uint64_t removeCalledTarget(FunctionId F) { 380 uint64_t Count = 0; 381 auto I = CallTargets.find(F); 382 if (I != CallTargets.end()) { 383 Count = I->second; 384 CallTargets.erase(I); 385 } 386 return Count; 387 } 388 389 /// Return true if this sample record contains function calls. 390 bool hasCalls() const { return !CallTargets.empty(); } 391 392 uint64_t getSamples() const { return NumSamples; } 393 const CallTargetMap &getCallTargets() const { return CallTargets; } 394 const SortedCallTargetSet getSortedCallTargets() const { 395 return sortCallTargets(CallTargets); 396 } 397 398 uint64_t getCallTargetSum() const { 399 uint64_t Sum = 0; 400 for (const auto &I : CallTargets) 401 Sum += I.second; 402 return Sum; 403 } 404 405 /// Sort call targets in descending order of call frequency. 406 static const SortedCallTargetSet 407 sortCallTargets(const CallTargetMap &Targets) { 408 SortedCallTargetSet SortedTargets; 409 for (const auto &[Target, Frequency] : Targets) { 410 SortedTargets.emplace(Target, Frequency); 411 } 412 return SortedTargets; 413 } 414 415 /// Prorate call targets by a distribution factor. 416 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, 417 float DistributionFactor) { 418 CallTargetMap AdjustedTargets; 419 for (const auto &[Target, Frequency] : Targets) { 420 AdjustedTargets[Target] = Frequency * DistributionFactor; 421 } 422 return AdjustedTargets; 423 } 424 425 /// Merge the samples in \p Other into this record. 426 /// Optionally scale sample counts by \p Weight. 427 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); 428 void print(raw_ostream &OS, unsigned Indent) const; 429 void dump() const; 430 431 bool operator==(const SampleRecord &Other) const { 432 return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets; 433 } 434 435 bool operator!=(const SampleRecord &Other) const { 436 return !(*this == Other); 437 } 438 439 private: 440 uint64_t NumSamples = 0; 441 CallTargetMap CallTargets; 442 }; 443 444 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); 445 446 // State of context associated with FunctionSamples 447 enum ContextStateMask { 448 UnknownContext = 0x0, // Profile without context 449 RawContext = 0x1, // Full context profile from input profile 450 SyntheticContext = 0x2, // Synthetic context created for context promotion 451 InlinedContext = 0x4, // Profile for context that is inlined into caller 452 MergedContext = 0x8 // Profile for context merged into base profile 453 }; 454 455 // Attribute of context associated with FunctionSamples 456 enum ContextAttributeMask { 457 ContextNone = 0x0, 458 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build 459 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined 460 ContextDuplicatedIntoBase = 461 0x4, // Leaf of context is duplicated into the base profile 462 }; 463 464 // Represents a context frame with profile function and line location 465 struct SampleContextFrame { 466 FunctionId Func; 467 LineLocation Location; 468 469 SampleContextFrame() : Location(0, 0) {} 470 471 SampleContextFrame(FunctionId Func, LineLocation Location) 472 : Func(Func), Location(Location) {} 473 474 bool operator==(const SampleContextFrame &That) const { 475 return Location == That.Location && Func == That.Func; 476 } 477 478 bool operator!=(const SampleContextFrame &That) const { 479 return !(*this == That); 480 } 481 482 std::string toString(bool OutputLineLocation) const { 483 std::ostringstream OContextStr; 484 OContextStr << Func.str(); 485 if (OutputLineLocation) { 486 OContextStr << ":" << Location.LineOffset; 487 if (Location.Discriminator) 488 OContextStr << "." << Location.Discriminator; 489 } 490 return OContextStr.str(); 491 } 492 493 uint64_t getHashCode() const { 494 uint64_t NameHash = Func.getHashCode(); 495 uint64_t LocId = Location.getHashCode(); 496 return NameHash + (LocId << 5) + LocId; 497 } 498 }; 499 500 static inline hash_code hash_value(const SampleContextFrame &arg) { 501 return arg.getHashCode(); 502 } 503 504 using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; 505 using SampleContextFrames = ArrayRef<SampleContextFrame>; 506 507 struct SampleContextFrameHash { 508 uint64_t operator()(const SampleContextFrameVector &S) const { 509 return hash_combine_range(S.begin(), S.end()); 510 } 511 }; 512 513 // Sample context for FunctionSamples. It consists of the calling context, 514 // the function name and context state. Internally sample context is represented 515 // using ArrayRef, which is also the input for constructing a `SampleContext`. 516 // It can accept and represent both full context string as well as context-less 517 // function name. 518 // For a CS profile, a full context vector can look like: 519 // `main:3 _Z5funcAi:1 _Z8funcLeafi` 520 // For a base CS profile without calling context, the context vector should only 521 // contain the leaf frame name. 522 // For a non-CS profile, the context vector should be empty. 523 class SampleContext { 524 public: 525 SampleContext() : State(UnknownContext), Attributes(ContextNone) {} 526 527 SampleContext(StringRef Name) 528 : Func(Name), State(UnknownContext), Attributes(ContextNone) { 529 assert(!Name.empty() && "Name is empty"); 530 } 531 532 SampleContext(FunctionId Func) 533 : Func(Func), State(UnknownContext), Attributes(ContextNone) {} 534 535 SampleContext(SampleContextFrames Context, 536 ContextStateMask CState = RawContext) 537 : Attributes(ContextNone) { 538 assert(!Context.empty() && "Context is empty"); 539 setContext(Context, CState); 540 } 541 542 // Give a context string, decode and populate internal states like 543 // Function name, Calling context and context state. Example of input 544 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` 545 SampleContext(StringRef ContextStr, 546 std::list<SampleContextFrameVector> &CSNameTable, 547 ContextStateMask CState = RawContext) 548 : Attributes(ContextNone) { 549 assert(!ContextStr.empty()); 550 // Note that `[]` wrapped input indicates a full context string, otherwise 551 // it's treated as context-less function name only. 552 bool HasContext = ContextStr.starts_with("["); 553 if (!HasContext) { 554 State = UnknownContext; 555 Func = FunctionId(ContextStr); 556 } else { 557 CSNameTable.emplace_back(); 558 SampleContextFrameVector &Context = CSNameTable.back(); 559 createCtxVectorFromStr(ContextStr, Context); 560 setContext(Context, CState); 561 } 562 } 563 564 /// Create a context vector from a given context string and save it in 565 /// `Context`. 566 static void createCtxVectorFromStr(StringRef ContextStr, 567 SampleContextFrameVector &Context) { 568 // Remove encapsulating '[' and ']' if any 569 ContextStr = ContextStr.substr(1, ContextStr.size() - 2); 570 StringRef ContextRemain = ContextStr; 571 StringRef ChildContext; 572 FunctionId Callee; 573 while (!ContextRemain.empty()) { 574 auto ContextSplit = ContextRemain.split(" @ "); 575 ChildContext = ContextSplit.first; 576 ContextRemain = ContextSplit.second; 577 LineLocation CallSiteLoc(0, 0); 578 decodeContextString(ChildContext, Callee, CallSiteLoc); 579 Context.emplace_back(Callee, CallSiteLoc); 580 } 581 } 582 583 // Decode context string for a frame to get function name and location. 584 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. 585 static void decodeContextString(StringRef ContextStr, 586 FunctionId &Func, 587 LineLocation &LineLoc) { 588 // Get function name 589 auto EntrySplit = ContextStr.split(':'); 590 Func = FunctionId(EntrySplit.first); 591 592 LineLoc = {0, 0}; 593 if (!EntrySplit.second.empty()) { 594 // Get line offset, use signed int for getAsInteger so string will 595 // be parsed as signed. 596 int LineOffset = 0; 597 auto LocSplit = EntrySplit.second.split('.'); 598 LocSplit.first.getAsInteger(10, LineOffset); 599 LineLoc.LineOffset = LineOffset; 600 601 // Get discriminator 602 if (!LocSplit.second.empty()) 603 LocSplit.second.getAsInteger(10, LineLoc.Discriminator); 604 } 605 } 606 607 operator SampleContextFrames() const { return FullContext; } 608 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } 609 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } 610 uint32_t getAllAttributes() { return Attributes; } 611 void setAllAttributes(uint32_t A) { Attributes = A; } 612 bool hasState(ContextStateMask S) { return State & (uint32_t)S; } 613 void setState(ContextStateMask S) { State |= (uint32_t)S; } 614 void clearState(ContextStateMask S) { State &= (uint32_t)~S; } 615 bool hasContext() const { return State != UnknownContext; } 616 bool isBaseContext() const { return FullContext.size() == 1; } 617 FunctionId getFunction() const { return Func; } 618 SampleContextFrames getContextFrames() const { return FullContext; } 619 620 static std::string getContextString(SampleContextFrames Context, 621 bool IncludeLeafLineLocation = false) { 622 std::ostringstream OContextStr; 623 for (uint32_t I = 0; I < Context.size(); I++) { 624 if (OContextStr.str().size()) { 625 OContextStr << " @ "; 626 } 627 OContextStr << Context[I].toString(I != Context.size() - 1 || 628 IncludeLeafLineLocation); 629 } 630 return OContextStr.str(); 631 } 632 633 std::string toString() const { 634 if (!hasContext()) 635 return Func.str(); 636 return getContextString(FullContext, false); 637 } 638 639 uint64_t getHashCode() const { 640 if (hasContext()) 641 return hash_value(getContextFrames()); 642 return getFunction().getHashCode(); 643 } 644 645 /// Set the name of the function and clear the current context. 646 void setFunction(FunctionId NewFunctionID) { 647 Func = NewFunctionID; 648 FullContext = SampleContextFrames(); 649 State = UnknownContext; 650 } 651 652 void setContext(SampleContextFrames Context, 653 ContextStateMask CState = RawContext) { 654 assert(CState != UnknownContext); 655 FullContext = Context; 656 Func = Context.back().Func; 657 State = CState; 658 } 659 660 bool operator==(const SampleContext &That) const { 661 return State == That.State && Func == That.Func && 662 FullContext == That.FullContext; 663 } 664 665 bool operator!=(const SampleContext &That) const { return !(*this == That); } 666 667 bool operator<(const SampleContext &That) const { 668 if (State != That.State) 669 return State < That.State; 670 671 if (!hasContext()) { 672 return Func < That.Func; 673 } 674 675 uint64_t I = 0; 676 while (I < std::min(FullContext.size(), That.FullContext.size())) { 677 auto &Context1 = FullContext[I]; 678 auto &Context2 = That.FullContext[I]; 679 auto V = Context1.Func.compare(Context2.Func); 680 if (V) 681 return V < 0; 682 if (Context1.Location != Context2.Location) 683 return Context1.Location < Context2.Location; 684 I++; 685 } 686 687 return FullContext.size() < That.FullContext.size(); 688 } 689 690 struct Hash { 691 uint64_t operator()(const SampleContext &Context) const { 692 return Context.getHashCode(); 693 } 694 }; 695 696 bool isPrefixOf(const SampleContext &That) const { 697 auto ThisContext = FullContext; 698 auto ThatContext = That.FullContext; 699 if (ThatContext.size() < ThisContext.size()) 700 return false; 701 ThatContext = ThatContext.take_front(ThisContext.size()); 702 // Compare Leaf frame first 703 if (ThisContext.back().Func != ThatContext.back().Func) 704 return false; 705 // Compare leading context 706 return ThisContext.drop_back() == ThatContext.drop_back(); 707 } 708 709 private: 710 // The function associated with this context. If CS profile, this is the leaf 711 // function. 712 FunctionId Func; 713 // Full context including calling context and leaf function name 714 SampleContextFrames FullContext; 715 // State of the associated sample profile 716 uint32_t State; 717 // Attribute of the associated sample profile 718 uint32_t Attributes; 719 }; 720 721 static inline hash_code hash_value(const SampleContext &Context) { 722 return Context.getHashCode(); 723 } 724 725 inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) { 726 return OS << Context.toString(); 727 } 728 729 class FunctionSamples; 730 class SampleProfileReaderItaniumRemapper; 731 732 using BodySampleMap = std::map<LineLocation, SampleRecord>; 733 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more 734 // memory, which is *very* significant for large profiles. 735 using FunctionSamplesMap = std::map<FunctionId, FunctionSamples>; 736 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; 737 using LocToLocMap = 738 std::unordered_map<LineLocation, LineLocation, LineLocationHash>; 739 740 /// Representation of the samples collected for a function. 741 /// 742 /// This data structure contains all the collected samples for the body 743 /// of a function. Each sample corresponds to a LineLocation instance 744 /// within the body of the function. 745 class FunctionSamples { 746 public: 747 FunctionSamples() = default; 748 749 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; 750 void dump() const; 751 752 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { 753 bool Overflowed; 754 TotalSamples = 755 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); 756 return Overflowed ? sampleprof_error::counter_overflow 757 : sampleprof_error::success; 758 } 759 760 void removeTotalSamples(uint64_t Num) { 761 if (TotalSamples < Num) 762 TotalSamples = 0; 763 else 764 TotalSamples -= Num; 765 } 766 767 void setTotalSamples(uint64_t Num) { TotalSamples = Num; } 768 769 void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; } 770 771 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { 772 bool Overflowed; 773 TotalHeadSamples = 774 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); 775 return Overflowed ? sampleprof_error::counter_overflow 776 : sampleprof_error::success; 777 } 778 779 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, 780 uint64_t Num, uint64_t Weight = 1) { 781 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( 782 Num, Weight); 783 } 784 785 sampleprof_error addCalledTargetSamples(uint32_t LineOffset, 786 uint32_t Discriminator, 787 FunctionId Func, 788 uint64_t Num, 789 uint64_t Weight = 1) { 790 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( 791 Func, Num, Weight); 792 } 793 794 sampleprof_error addSampleRecord(LineLocation Location, 795 const SampleRecord &SampleRecord, 796 uint64_t Weight = 1) { 797 return BodySamples[Location].merge(SampleRecord, Weight); 798 } 799 800 // Remove a call target and decrease the body sample correspondingly. Return 801 // the number of body samples actually decreased. 802 uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, 803 uint32_t Discriminator, 804 FunctionId Func) { 805 uint64_t Count = 0; 806 auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); 807 if (I != BodySamples.end()) { 808 Count = I->second.removeCalledTarget(Func); 809 Count = I->second.removeSamples(Count); 810 if (!I->second.getSamples()) 811 BodySamples.erase(I); 812 } 813 return Count; 814 } 815 816 // Remove all call site samples for inlinees. This is needed when flattening 817 // a nested profile. 818 void removeAllCallsiteSamples() { 819 CallsiteSamples.clear(); 820 } 821 822 // Accumulate all call target samples to update the body samples. 823 void updateCallsiteSamples() { 824 for (auto &I : BodySamples) { 825 uint64_t TargetSamples = I.second.getCallTargetSum(); 826 // It's possible that the body sample count can be greater than the call 827 // target sum. E.g, if some call targets are external targets, they won't 828 // be considered valid call targets, but the body sample count which is 829 // from lbr ranges can actually include them. 830 if (TargetSamples > I.second.getSamples()) 831 I.second.addSamples(TargetSamples - I.second.getSamples()); 832 } 833 } 834 835 // Accumulate all body samples to set total samples. 836 void updateTotalSamples() { 837 setTotalSamples(0); 838 for (const auto &I : BodySamples) 839 addTotalSamples(I.second.getSamples()); 840 841 for (auto &I : CallsiteSamples) { 842 for (auto &CS : I.second) { 843 CS.second.updateTotalSamples(); 844 addTotalSamples(CS.second.getTotalSamples()); 845 } 846 } 847 } 848 849 // Set current context and all callee contexts to be synthetic. 850 void setContextSynthetic() { 851 Context.setState(SyntheticContext); 852 for (auto &I : CallsiteSamples) { 853 for (auto &CS : I.second) { 854 CS.second.setContextSynthetic(); 855 } 856 } 857 } 858 859 // Query the stale profile matching results and remap the location. 860 const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const { 861 // There is no remapping if the profile is not stale or the matching gives 862 // the same location. 863 if (!IRToProfileLocationMap) 864 return IRLoc; 865 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); 866 if (ProfileLoc != IRToProfileLocationMap->end()) 867 return ProfileLoc->second; 868 return IRLoc; 869 } 870 871 /// Return the number of samples collected at the given location. 872 /// Each location is specified by \p LineOffset and \p Discriminator. 873 /// If the location is not found in profile, return error. 874 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, 875 uint32_t Discriminator) const { 876 const auto &Ret = BodySamples.find( 877 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 878 if (Ret == BodySamples.end()) 879 return std::error_code(); 880 return Ret->second.getSamples(); 881 } 882 883 /// Returns the call target map collected at a given location. 884 /// Each location is specified by \p LineOffset and \p Discriminator. 885 /// If the location is not found in profile, return error. 886 ErrorOr<const SampleRecord::CallTargetMap &> 887 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { 888 const auto &Ret = BodySamples.find( 889 mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator))); 890 if (Ret == BodySamples.end()) 891 return std::error_code(); 892 return Ret->second.getCallTargets(); 893 } 894 895 /// Returns the call target map collected at a given location specified by \p 896 /// CallSite. If the location is not found in profile, return error. 897 ErrorOr<const SampleRecord::CallTargetMap &> 898 findCallTargetMapAt(const LineLocation &CallSite) const { 899 const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite)); 900 if (Ret == BodySamples.end()) 901 return std::error_code(); 902 return Ret->second.getCallTargets(); 903 } 904 905 /// Return the function samples at the given callsite location. 906 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { 907 return CallsiteSamples[mapIRLocToProfileLoc(Loc)]; 908 } 909 910 /// Returns the FunctionSamplesMap at the given \p Loc. 911 const FunctionSamplesMap * 912 findFunctionSamplesMapAt(const LineLocation &Loc) const { 913 auto Iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); 914 if (Iter == CallsiteSamples.end()) 915 return nullptr; 916 return &Iter->second; 917 } 918 919 /// Returns a pointer to FunctionSamples at the given callsite location 920 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax 921 /// the restriction to return the FunctionSamples at callsite location 922 /// \p Loc with the maximum total sample count. If \p Remapper or \p 923 /// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples 924 /// with equivalent name as \p CalleeName. 925 const FunctionSamples *findFunctionSamplesAt( 926 const LineLocation &Loc, StringRef CalleeName, 927 SampleProfileReaderItaniumRemapper *Remapper, 928 const HashKeyMap<std::unordered_map, FunctionId, FunctionId> 929 *FuncNameToProfNameMap = nullptr) const; 930 931 bool empty() const { return TotalSamples == 0; } 932 933 /// Return the total number of samples collected inside the function. 934 uint64_t getTotalSamples() const { return TotalSamples; } 935 936 /// For top-level functions, return the total number of branch samples that 937 /// have the function as the branch target (or 0 otherwise). This is the raw 938 /// data fetched from the profile. This should be equivalent to the sample of 939 /// the first instruction of the symbol. But as we directly get this info for 940 /// raw profile without referring to potentially inaccurate debug info, this 941 /// gives more accurate profile data and is preferred for standalone symbols. 942 uint64_t getHeadSamples() const { return TotalHeadSamples; } 943 944 /// Return an estimate of the sample count of the function entry basic block. 945 /// The function can be either a standalone symbol or an inlined function. 946 /// For Context-Sensitive profiles, this will prefer returning the head 947 /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from 948 /// the function body's samples or callsite samples. 949 uint64_t getHeadSamplesEstimate() const { 950 if (FunctionSamples::ProfileIsCS && getHeadSamples()) { 951 // For CS profile, if we already have more accurate head samples 952 // counted by branch sample from caller, use them as entry samples. 953 return getHeadSamples(); 954 } 955 uint64_t Count = 0; 956 // Use either BodySamples or CallsiteSamples which ever has the smaller 957 // lineno. 958 if (!BodySamples.empty() && 959 (CallsiteSamples.empty() || 960 BodySamples.begin()->first < CallsiteSamples.begin()->first)) 961 Count = BodySamples.begin()->second.getSamples(); 962 else if (!CallsiteSamples.empty()) { 963 // An indirect callsite may be promoted to several inlined direct calls. 964 // We need to get the sum of them. 965 for (const auto &FuncSamples : CallsiteSamples.begin()->second) 966 Count += FuncSamples.second.getHeadSamplesEstimate(); 967 } 968 // Return at least 1 if total sample is not 0. 969 return Count ? Count : TotalSamples > 0; 970 } 971 972 /// Return all the samples collected in the body of the function. 973 const BodySampleMap &getBodySamples() const { return BodySamples; } 974 975 /// Return all the callsite samples collected in the body of the function. 976 const CallsiteSampleMap &getCallsiteSamples() const { 977 return CallsiteSamples; 978 } 979 980 /// Return the maximum of sample counts in a function body. When SkipCallSite 981 /// is false, which is the default, the return count includes samples in the 982 /// inlined functions. When SkipCallSite is true, the return count only 983 /// considers the body samples. 984 uint64_t getMaxCountInside(bool SkipCallSite = false) const { 985 uint64_t MaxCount = 0; 986 for (const auto &L : getBodySamples()) 987 MaxCount = std::max(MaxCount, L.second.getSamples()); 988 if (SkipCallSite) 989 return MaxCount; 990 for (const auto &C : getCallsiteSamples()) 991 for (const FunctionSamplesMap::value_type &F : C.second) 992 MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); 993 return MaxCount; 994 } 995 996 /// Merge the samples in \p Other into this one. 997 /// Optionally scale samples by \p Weight. 998 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { 999 sampleprof_error Result = sampleprof_error::success; 1000 if (!GUIDToFuncNameMap) 1001 GUIDToFuncNameMap = Other.GUIDToFuncNameMap; 1002 if (Context.getFunction().empty()) 1003 Context = Other.getContext(); 1004 if (FunctionHash == 0) { 1005 // Set the function hash code for the target profile. 1006 FunctionHash = Other.getFunctionHash(); 1007 } else if (FunctionHash != Other.getFunctionHash()) { 1008 // The two profiles coming with different valid hash codes indicates 1009 // either: 1010 // 1. They are same-named static functions from different compilation 1011 // units (without using -unique-internal-linkage-names), or 1012 // 2. They are really the same function but from different compilations. 1013 // Let's bail out in either case for now, which means one profile is 1014 // dropped. 1015 return sampleprof_error::hash_mismatch; 1016 } 1017 1018 mergeSampleProfErrors(Result, 1019 addTotalSamples(Other.getTotalSamples(), Weight)); 1020 mergeSampleProfErrors(Result, 1021 addHeadSamples(Other.getHeadSamples(), Weight)); 1022 for (const auto &I : Other.getBodySamples()) { 1023 const LineLocation &Loc = I.first; 1024 const SampleRecord &Rec = I.second; 1025 mergeSampleProfErrors(Result, BodySamples[Loc].merge(Rec, Weight)); 1026 } 1027 for (const auto &I : Other.getCallsiteSamples()) { 1028 const LineLocation &Loc = I.first; 1029 FunctionSamplesMap &FSMap = functionSamplesAt(Loc); 1030 for (const auto &Rec : I.second) 1031 mergeSampleProfErrors(Result, 1032 FSMap[Rec.first].merge(Rec.second, Weight)); 1033 } 1034 return Result; 1035 } 1036 1037 /// Recursively traverses all children, if the total sample count of the 1038 /// corresponding function is no less than \p Threshold, add its corresponding 1039 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID 1040 /// to \p S. 1041 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, 1042 const HashKeyMap<std::unordered_map, FunctionId, 1043 Function *> &SymbolMap, 1044 uint64_t Threshold) const { 1045 if (TotalSamples <= Threshold) 1046 return; 1047 auto IsDeclaration = [](const Function *F) { 1048 return !F || F->isDeclaration(); 1049 }; 1050 if (IsDeclaration(SymbolMap.lookup(getFunction()))) { 1051 // Add to the import list only when it's defined out of module. 1052 S.insert(getGUID()); 1053 } 1054 // Import hot CallTargets, which may not be available in IR because full 1055 // profile annotation cannot be done until backend compilation in ThinLTO. 1056 for (const auto &BS : BodySamples) 1057 for (const auto &TS : BS.second.getCallTargets()) 1058 if (TS.second > Threshold) { 1059 const Function *Callee = SymbolMap.lookup(TS.first); 1060 if (IsDeclaration(Callee)) 1061 S.insert(TS.first.getHashCode()); 1062 } 1063 for (const auto &CS : CallsiteSamples) 1064 for (const auto &NameFS : CS.second) 1065 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); 1066 } 1067 1068 /// Set the name of the function. 1069 void setFunction(FunctionId NewFunctionID) { 1070 Context.setFunction(NewFunctionID); 1071 } 1072 1073 /// Return the function name. 1074 FunctionId getFunction() const { return Context.getFunction(); } 1075 1076 /// Return the original function name. 1077 StringRef getFuncName() const { return getFuncName(getFunction()); } 1078 1079 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } 1080 1081 uint64_t getFunctionHash() const { return FunctionHash; } 1082 1083 void setIRToProfileLocationMap(const LocToLocMap *LTLM) { 1084 assert(IRToProfileLocationMap == nullptr && "this should be set only once"); 1085 IRToProfileLocationMap = LTLM; 1086 } 1087 1088 /// Return the canonical name for a function, taking into account 1089 /// suffix elision policy attributes. 1090 static StringRef getCanonicalFnName(const Function &F) { 1091 const char *AttrName = "sample-profile-suffix-elision-policy"; 1092 auto Attr = F.getFnAttribute(AttrName).getValueAsString(); 1093 return getCanonicalFnName(F.getName(), Attr); 1094 } 1095 1096 /// Name suffixes which canonicalization should handle to avoid 1097 /// profile mismatch. 1098 static constexpr const char *LLVMSuffix = ".llvm."; 1099 static constexpr const char *PartSuffix = ".part."; 1100 static constexpr const char *UniqSuffix = ".__uniq."; 1101 1102 static StringRef getCanonicalFnName(StringRef FnName, 1103 StringRef Attr = "selected") { 1104 // Note the sequence of the suffixes in the knownSuffixes array matters. 1105 // If suffix "A" is appended after the suffix "B", "A" should be in front 1106 // of "B" in knownSuffixes. 1107 const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; 1108 if (Attr == "" || Attr == "all") 1109 return FnName.split('.').first; 1110 if (Attr == "selected") { 1111 StringRef Cand(FnName); 1112 for (const auto &Suf : KnownSuffixes) { 1113 StringRef Suffix(Suf); 1114 // If the profile contains ".__uniq." suffix, don't strip the 1115 // suffix for names in the IR. 1116 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) 1117 continue; 1118 auto It = Cand.rfind(Suffix); 1119 if (It == StringRef::npos) 1120 continue; 1121 auto Dit = Cand.rfind('.'); 1122 if (Dit == It + Suffix.size() - 1) 1123 Cand = Cand.substr(0, It); 1124 } 1125 return Cand; 1126 } 1127 if (Attr == "none") 1128 return FnName; 1129 assert(false && "internal error: unknown suffix elision policy"); 1130 return FnName; 1131 } 1132 1133 /// Translate \p Func into its original name. 1134 /// When profile doesn't use MD5, \p Func needs no translation. 1135 /// When profile uses MD5, \p Func in current FunctionSamples 1136 /// is actually GUID of the original function name. getFuncName will 1137 /// translate \p Func in current FunctionSamples into its original name 1138 /// by looking up in the function map GUIDToFuncNameMap. 1139 /// If the original name doesn't exist in the map, return empty StringRef. 1140 StringRef getFuncName(FunctionId Func) const { 1141 if (!UseMD5) 1142 return Func.stringRef(); 1143 1144 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); 1145 return GUIDToFuncNameMap->lookup(Func.getHashCode()); 1146 } 1147 1148 /// Returns the line offset to the start line of the subprogram. 1149 /// We assume that a single function will not exceed 65535 LOC. 1150 static unsigned getOffset(const DILocation *DIL); 1151 1152 /// Returns a unique call site identifier for a given debug location of a call 1153 /// instruction. This is wrapper of two scenarios, the probe-based profile and 1154 /// regular profile, to hide implementation details from the sample loader and 1155 /// the context tracker. 1156 static LineLocation getCallSiteIdentifier(const DILocation *DIL, 1157 bool ProfileIsFS = false); 1158 1159 /// Returns a unique hash code for a combination of a callsite location and 1160 /// the callee function name. 1161 /// Guarantee MD5 and non-MD5 representation of the same function results in 1162 /// the same hash. 1163 static uint64_t getCallSiteHash(FunctionId Callee, 1164 const LineLocation &Callsite) { 1165 return SampleContextFrame(Callee, Callsite).getHashCode(); 1166 } 1167 1168 /// Get the FunctionSamples of the inline instance where DIL originates 1169 /// from. 1170 /// 1171 /// The FunctionSamples of the instruction (Machine or IR) associated to 1172 /// \p DIL is the inlined instance in which that instruction is coming from. 1173 /// We traverse the inline stack of that instruction, and match it with the 1174 /// tree nodes in the profile. 1175 /// 1176 /// \returns the FunctionSamples pointer to the inlined instance. 1177 /// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used 1178 /// to find matching FunctionSamples with not exactly the same but equivalent 1179 /// name. 1180 const FunctionSamples *findFunctionSamples( 1181 const DILocation *DIL, 1182 SampleProfileReaderItaniumRemapper *Remapper = nullptr, 1183 const HashKeyMap<std::unordered_map, FunctionId, FunctionId> 1184 *FuncNameToProfNameMap = nullptr) const; 1185 1186 static bool ProfileIsProbeBased; 1187 1188 static bool ProfileIsCS; 1189 1190 static bool ProfileIsPreInlined; 1191 1192 SampleContext &getContext() const { return Context; } 1193 1194 void setContext(const SampleContext &FContext) { Context = FContext; } 1195 1196 /// Whether the profile uses MD5 to represent string. 1197 static bool UseMD5; 1198 1199 /// Whether the profile contains any ".__uniq." suffix in a name. 1200 static bool HasUniqSuffix; 1201 1202 /// If this profile uses flow sensitive discriminators. 1203 static bool ProfileIsFS; 1204 1205 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for 1206 /// all the function symbols defined or declared in current module. 1207 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; 1208 1209 /// Return the GUID of the context's name. If the context is already using 1210 /// MD5, don't hash it again. 1211 uint64_t getGUID() const { 1212 return getFunction().getHashCode(); 1213 } 1214 1215 // Find all the names in the current FunctionSamples including names in 1216 // all the inline instances and names of call targets. 1217 void findAllNames(DenseSet<FunctionId> &NameSet) const; 1218 1219 bool operator==(const FunctionSamples &Other) const { 1220 return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap || 1221 (GUIDToFuncNameMap && Other.GUIDToFuncNameMap && 1222 *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) && 1223 FunctionHash == Other.FunctionHash && Context == Other.Context && 1224 TotalSamples == Other.TotalSamples && 1225 TotalHeadSamples == Other.TotalHeadSamples && 1226 BodySamples == Other.BodySamples && 1227 CallsiteSamples == Other.CallsiteSamples; 1228 } 1229 1230 bool operator!=(const FunctionSamples &Other) const { 1231 return !(*this == Other); 1232 } 1233 1234 private: 1235 /// CFG hash value for the function. 1236 uint64_t FunctionHash = 0; 1237 1238 /// Calling context for function profile 1239 mutable SampleContext Context; 1240 1241 /// Total number of samples collected inside this function. 1242 /// 1243 /// Samples are cumulative, they include all the samples collected 1244 /// inside this function and all its inlined callees. 1245 uint64_t TotalSamples = 0; 1246 1247 /// Total number of samples collected at the head of the function. 1248 /// This is an approximation of the number of calls made to this function 1249 /// at runtime. 1250 uint64_t TotalHeadSamples = 0; 1251 1252 /// Map instruction locations to collected samples. 1253 /// 1254 /// Each entry in this map contains the number of samples 1255 /// collected at the corresponding line offset. All line locations 1256 /// are an offset from the start of the function. 1257 BodySampleMap BodySamples; 1258 1259 /// Map call sites to collected samples for the called function. 1260 /// 1261 /// Each entry in this map corresponds to all the samples 1262 /// collected for the inlined function call at the given 1263 /// location. For example, given: 1264 /// 1265 /// void foo() { 1266 /// 1 bar(); 1267 /// ... 1268 /// 8 baz(); 1269 /// } 1270 /// 1271 /// If the bar() and baz() calls were inlined inside foo(), this 1272 /// map will contain two entries. One for all the samples collected 1273 /// in the call to bar() at line offset 1, the other for all the samples 1274 /// collected in the call to baz() at line offset 8. 1275 CallsiteSampleMap CallsiteSamples; 1276 1277 /// IR to profile location map generated by stale profile matching. 1278 /// 1279 /// Each entry is a mapping from the location on current build to the matched 1280 /// location in the "stale" profile. For example: 1281 /// Profiled source code: 1282 /// void foo() { 1283 /// 1 bar(); 1284 /// } 1285 /// 1286 /// Current source code: 1287 /// void foo() { 1288 /// 1 // Code change 1289 /// 2 bar(); 1290 /// } 1291 /// Supposing the stale profile matching algorithm generated the mapping [2 -> 1292 /// 1], the profile query using the location of bar on the IR which is 2 will 1293 /// be remapped to 1 and find the location of bar in the profile. 1294 const LocToLocMap *IRToProfileLocationMap = nullptr; 1295 }; 1296 1297 /// Get the proper representation of a string according to whether the 1298 /// current Format uses MD5 to represent the string. 1299 static inline FunctionId getRepInFormat(StringRef Name) { 1300 if (Name.empty() || !FunctionSamples::UseMD5) 1301 return FunctionId(Name); 1302 return FunctionId(Function::getGUID(Name)); 1303 } 1304 1305 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); 1306 1307 /// This class provides operator overloads to the map container using MD5 as the 1308 /// key type, so that existing code can still work in most cases using 1309 /// SampleContext as key. 1310 /// Note: when populating container, make sure to assign the SampleContext to 1311 /// the mapped value immediately because the key no longer holds it. 1312 class SampleProfileMap 1313 : public HashKeyMap<std::unordered_map, SampleContext, FunctionSamples> { 1314 public: 1315 // Convenience method because this is being used in many places. Set the 1316 // FunctionSamples' context if its newly inserted. 1317 mapped_type &create(const SampleContext &Ctx) { 1318 auto Ret = try_emplace(Ctx, FunctionSamples()); 1319 if (Ret.second) 1320 Ret.first->second.setContext(Ctx); 1321 return Ret.first->second; 1322 } 1323 1324 iterator find(const SampleContext &Ctx) { 1325 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( 1326 Ctx); 1327 } 1328 1329 const_iterator find(const SampleContext &Ctx) const { 1330 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find( 1331 Ctx); 1332 } 1333 1334 size_t erase(const SampleContext &Ctx) { 1335 return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>:: 1336 erase(Ctx); 1337 } 1338 1339 size_t erase(const key_type &Key) { return base_type::erase(Key); } 1340 1341 iterator erase(iterator It) { return base_type::erase(It); } 1342 }; 1343 1344 using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>; 1345 1346 void sortFuncProfiles(const SampleProfileMap &ProfileMap, 1347 std::vector<NameFunctionSamples> &SortedProfiles); 1348 1349 /// Sort a LocationT->SampleT map by LocationT. 1350 /// 1351 /// It produces a sorted list of <LocationT, SampleT> records by ascending 1352 /// order of LocationT. 1353 template <class LocationT, class SampleT> class SampleSorter { 1354 public: 1355 using SamplesWithLoc = std::pair<const LocationT, SampleT>; 1356 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; 1357 1358 SampleSorter(const std::map<LocationT, SampleT> &Samples) { 1359 for (const auto &I : Samples) 1360 V.push_back(&I); 1361 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { 1362 return A->first < B->first; 1363 }); 1364 } 1365 1366 const SamplesWithLocList &get() const { return V; } 1367 1368 private: 1369 SamplesWithLocList V; 1370 }; 1371 1372 /// SampleContextTrimmer impelements helper functions to trim, merge cold 1373 /// context profiles. It also supports context profile canonicalization to make 1374 /// sure ProfileMap's key is consistent with FunctionSample's name/context. 1375 class SampleContextTrimmer { 1376 public: 1377 SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; 1378 // Trim and merge cold context profile when requested. TrimBaseProfileOnly 1379 // should only be effective when TrimColdContext is true. On top of 1380 // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all 1381 // cold profiles or only cold base profiles. Trimming base profiles only is 1382 // mainly to honor the preinliner decsion. Note that when MergeColdContext is 1383 // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will 1384 // be ignored. 1385 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, 1386 bool TrimColdContext, 1387 bool MergeColdContext, 1388 uint32_t ColdContextFrameLength, 1389 bool TrimBaseProfileOnly); 1390 1391 private: 1392 SampleProfileMap &ProfileMap; 1393 }; 1394 1395 /// Helper class for profile conversion. 1396 /// 1397 /// It supports full context-sensitive profile to nested profile conversion, 1398 /// nested profile to flatten profile conversion, etc. 1399 class ProfileConverter { 1400 public: 1401 ProfileConverter(SampleProfileMap &Profiles); 1402 // Convert a full context-sensitive flat sample profile into a nested sample 1403 // profile. 1404 void convertCSProfiles(); 1405 struct FrameNode { 1406 FrameNode(FunctionId FName = FunctionId(), 1407 FunctionSamples *FSamples = nullptr, 1408 LineLocation CallLoc = {0, 0}) 1409 : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; 1410 1411 // Map line+discriminator location to child frame 1412 std::map<uint64_t, FrameNode> AllChildFrames; 1413 // Function name for current frame 1414 FunctionId FuncName; 1415 // Function Samples for current frame 1416 FunctionSamples *FuncSamples; 1417 // Callsite location in parent context 1418 LineLocation CallSiteLoc; 1419 1420 FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, 1421 FunctionId CalleeName); 1422 }; 1423 1424 static void flattenProfile(SampleProfileMap &ProfileMap, 1425 bool ProfileIsCS = false) { 1426 SampleProfileMap TmpProfiles; 1427 flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS); 1428 ProfileMap = std::move(TmpProfiles); 1429 } 1430 1431 static void flattenProfile(const SampleProfileMap &InputProfiles, 1432 SampleProfileMap &OutputProfiles, 1433 bool ProfileIsCS = false) { 1434 if (ProfileIsCS) { 1435 for (const auto &I : InputProfiles) { 1436 // Retain the profile name and clear the full context for each function 1437 // profile. 1438 FunctionSamples &FS = OutputProfiles.create(I.second.getFunction()); 1439 FS.merge(I.second); 1440 } 1441 } else { 1442 for (const auto &I : InputProfiles) 1443 flattenNestedProfile(OutputProfiles, I.second); 1444 } 1445 } 1446 1447 private: 1448 static void flattenNestedProfile(SampleProfileMap &OutputProfiles, 1449 const FunctionSamples &FS) { 1450 // To retain the context, checksum, attributes of the original profile, make 1451 // a copy of it if no profile is found. 1452 SampleContext &Context = FS.getContext(); 1453 auto Ret = OutputProfiles.try_emplace(Context, FS); 1454 FunctionSamples &Profile = Ret.first->second; 1455 if (Ret.second) { 1456 // Clear nested inlinees' samples for the flattened copy. These inlinees 1457 // will have their own top-level entries after flattening. 1458 Profile.removeAllCallsiteSamples(); 1459 // We recompute TotalSamples later, so here set to zero. 1460 Profile.setTotalSamples(0); 1461 } else { 1462 for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) { 1463 Profile.addSampleRecord(LineLocation, SampleRecord); 1464 } 1465 } 1466 1467 assert(Profile.getCallsiteSamples().empty() && 1468 "There should be no inlinees' profiles after flattening."); 1469 1470 // TotalSamples might not be equal to the sum of all samples from 1471 // BodySamples and CallsiteSamples. So here we use "TotalSamples = 1472 // Original_TotalSamples - All_of_Callsite_TotalSamples + 1473 // All_of_Callsite_HeadSamples" to compute the new TotalSamples. 1474 uint64_t TotalSamples = FS.getTotalSamples(); 1475 1476 for (const auto &I : FS.getCallsiteSamples()) { 1477 for (const auto &Callee : I.second) { 1478 const auto &CalleeProfile = Callee.second; 1479 // Add body sample. 1480 Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator, 1481 CalleeProfile.getHeadSamplesEstimate()); 1482 // Add callsite sample. 1483 Profile.addCalledTargetSamples( 1484 I.first.LineOffset, I.first.Discriminator, 1485 CalleeProfile.getFunction(), 1486 CalleeProfile.getHeadSamplesEstimate()); 1487 // Update total samples. 1488 TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples() 1489 ? TotalSamples - CalleeProfile.getTotalSamples() 1490 : 0; 1491 TotalSamples += CalleeProfile.getHeadSamplesEstimate(); 1492 // Recursively convert callee profile. 1493 flattenNestedProfile(OutputProfiles, CalleeProfile); 1494 } 1495 } 1496 Profile.addTotalSamples(TotalSamples); 1497 1498 Profile.setHeadSamples(Profile.getHeadSamplesEstimate()); 1499 } 1500 1501 // Nest all children profiles into the profile of Node. 1502 void convertCSProfiles(FrameNode &Node); 1503 FrameNode *getOrCreateContextPath(const SampleContext &Context); 1504 1505 SampleProfileMap &ProfileMap; 1506 FrameNode RootFrame; 1507 }; 1508 1509 /// ProfileSymbolList records the list of function symbols shown up 1510 /// in the binary used to generate the profile. It is useful to 1511 /// to discriminate a function being so cold as not to shown up 1512 /// in the profile and a function newly added. 1513 class ProfileSymbolList { 1514 public: 1515 /// copy indicates whether we need to copy the underlying memory 1516 /// for the input Name. 1517 void add(StringRef Name, bool Copy = false) { 1518 if (!Copy) { 1519 Syms.insert(Name); 1520 return; 1521 } 1522 Syms.insert(Name.copy(Allocator)); 1523 } 1524 1525 bool contains(StringRef Name) { return Syms.count(Name); } 1526 1527 void merge(const ProfileSymbolList &List) { 1528 for (auto Sym : List.Syms) 1529 add(Sym, true); 1530 } 1531 1532 unsigned size() { return Syms.size(); } 1533 1534 void setToCompress(bool TC) { ToCompress = TC; } 1535 bool toCompress() { return ToCompress; } 1536 1537 std::error_code read(const uint8_t *Data, uint64_t ListSize); 1538 std::error_code write(raw_ostream &OS); 1539 void dump(raw_ostream &OS = dbgs()) const; 1540 1541 private: 1542 // Determine whether or not to compress the symbol list when 1543 // writing it into profile. The variable is unused when the symbol 1544 // list is read from an existing profile. 1545 bool ToCompress = false; 1546 DenseSet<StringRef> Syms; 1547 BumpPtrAllocator Allocator; 1548 }; 1549 1550 } // end namespace sampleprof 1551 1552 using namespace sampleprof; 1553 // Provide DenseMapInfo for SampleContext. 1554 template <> struct DenseMapInfo<SampleContext> { 1555 static inline SampleContext getEmptyKey() { return SampleContext(); } 1556 1557 static inline SampleContext getTombstoneKey() { 1558 return SampleContext(FunctionId(~1ULL)); 1559 } 1560 1561 static unsigned getHashValue(const SampleContext &Val) { 1562 return Val.getHashCode(); 1563 } 1564 1565 static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { 1566 return LHS == RHS; 1567 } 1568 }; 1569 1570 // Prepend "__uniq" before the hash for tools like profilers to understand 1571 // that this symbol is of internal linkage type. The "__uniq" is the 1572 // pre-determined prefix that is used to tell tools that this symbol was 1573 // created with -funique-internal-linkage-symbols and the tools can strip or 1574 // keep the prefix as needed. 1575 inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { 1576 llvm::MD5 Md5; 1577 Md5.update(FName); 1578 llvm::MD5::MD5Result R; 1579 Md5.final(R); 1580 SmallString<32> Str; 1581 llvm::MD5::stringifyResult(R, Str); 1582 // Convert MD5hash to Decimal. Demangler suffixes can either contain 1583 // numbers or characters but not both. 1584 llvm::APInt IntHash(128, Str.str(), 16); 1585 return toString(IntHash, /* Radix = */ 10, /* Signed = */ false) 1586 .insert(0, FunctionSamples::UniqSuffix); 1587 } 1588 1589 } // end namespace llvm 1590 1591 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H 1592