1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
16
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorOr.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cstdint>
32 #include <map>
33 #include <set>
34 #include <string>
35 #include <system_error>
36 #include <utility>
37
38 namespace llvm {
39
40 const std::error_category &sampleprof_category();
41
42 enum class sampleprof_error {
43 success = 0,
44 bad_magic,
45 unsupported_version,
46 too_large,
47 truncated,
48 malformed,
49 unrecognized_format,
50 unsupported_writing_format,
51 truncated_name_table,
52 not_implemented,
53 counter_overflow,
54 ostream_seek_unsupported,
55 compress_failed,
56 uncompress_failed,
57 zlib_unavailable,
58 hash_mismatch
59 };
60
make_error_code(sampleprof_error E)61 inline std::error_code make_error_code(sampleprof_error E) {
62 return std::error_code(static_cast<int>(E), sampleprof_category());
63 }
64
MergeResult(sampleprof_error & Accumulator,sampleprof_error Result)65 inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
66 sampleprof_error Result) {
67 // Prefer first error encountered as later errors may be secondary effects of
68 // the initial problem.
69 if (Accumulator == sampleprof_error::success &&
70 Result != sampleprof_error::success)
71 Accumulator = Result;
72 return Accumulator;
73 }
74
75 } // end namespace llvm
76
77 namespace std {
78
79 template <>
80 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
81
82 } // end namespace std
83
84 namespace llvm {
85 namespace sampleprof {
86
87 enum SampleProfileFormat {
88 SPF_None = 0,
89 SPF_Text = 0x1,
90 SPF_Compact_Binary = 0x2,
91 SPF_GCC = 0x3,
92 SPF_Ext_Binary = 0x4,
93 SPF_Binary = 0xff
94 };
95
96 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
97 return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
98 uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
99 uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
100 uint64_t('2') << (64 - 56) | uint64_t(Format);
101 }
102
103 /// Get the proper representation of a string according to whether the
104 /// current Format uses MD5 to represent the string.
105 static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
106 std::string &GUIDBuf) {
107 if (Name.empty())
108 return Name;
109 GUIDBuf = std::to_string(Function::getGUID(Name));
110 return UseMD5 ? StringRef(GUIDBuf) : Name;
111 }
112
113 static inline uint64_t SPVersion() { return 103; }
114
115 // Section Type used by SampleProfileExtBinaryBaseReader and
116 // SampleProfileExtBinaryBaseWriter. Never change the existing
117 // value of enum. Only append new ones.
118 enum SecType {
119 SecInValid = 0,
120 SecProfSummary = 1,
121 SecNameTable = 2,
122 SecProfileSymbolList = 3,
123 SecFuncOffsetTable = 4,
124 SecFuncMetadata = 5,
125 // marker for the first type of profile.
126 SecFuncProfileFirst = 32,
127 SecLBRProfile = SecFuncProfileFirst
128 };
129
130 static inline std::string getSecName(SecType Type) {
131 switch (Type) {
132 case SecInValid:
133 return "InvalidSection";
134 case SecProfSummary:
135 return "ProfileSummarySection";
136 case SecNameTable:
137 return "NameTableSection";
138 case SecProfileSymbolList:
139 return "ProfileSymbolListSection";
140 case SecFuncOffsetTable:
141 return "FuncOffsetTableSection";
142 case SecFuncMetadata:
143 return "FunctionMetadata";
144 case SecLBRProfile:
145 return "LBRProfileSection";
146 }
147 llvm_unreachable("A SecType has no name for output");
148 }
149
150 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
151 // and SampleProfileExtBinaryBaseWriter.
152 struct SecHdrTableEntry {
153 SecType Type;
154 uint64_t Flags;
155 uint64_t Offset;
156 uint64_t Size;
157 // The index indicating the location of the current entry in
158 // SectionHdrLayout table.
159 uint32_t LayoutIndex;
160 };
161
162 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
163 // common flags will be saved in the lower 32bits and section specific flags
164 // will be saved in the higher 32 bits.
165 enum class SecCommonFlags : uint32_t {
166 SecFlagInValid = 0,
167 SecFlagCompress = (1 << 0),
168 // Indicate the section contains only profile without context.
169 SecFlagFlat = (1 << 1)
170 };
171
172 // Section specific flags are defined here.
173 // !!!Note: Everytime a new enum class is created here, please add
174 // a new check in verifySecFlag.
175 enum class SecNameTableFlags : uint32_t {
176 SecFlagInValid = 0,
177 SecFlagMD5Name = (1 << 0),
178 // Store MD5 in fixed length instead of ULEB128 so NameTable can be
179 // accessed like an array.
180 SecFlagFixedLengthMD5 = (1 << 1),
181 // Profile contains ".__uniq." suffix name. Compiler shouldn't strip
182 // the suffix when doing profile matching when seeing the flag.
183 SecFlagUniqSuffix = (1 << 2)
184 };
185 enum class SecProfSummaryFlags : uint32_t {
186 SecFlagInValid = 0,
187 /// SecFlagPartial means the profile is for common/shared code.
188 /// The common profile is usually merged from profiles collected
189 /// from running other targets.
190 SecFlagPartial = (1 << 0),
191 /// SecFlagContext means this is context-sensitive profile for
192 /// CSSPGO
193 SecFlagFullContext = (1 << 1)
194 };
195
196 enum class SecFuncMetadataFlags : uint32_t {
197 SecFlagInvalid = 0,
198 SecFlagIsProbeBased = (1 << 0),
199 SecFlagHasAttribute = (1 << 1)
200 };
201
202 // Verify section specific flag is used for the correct section.
203 template <class SecFlagType>
204 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
205 // No verification is needed for common flags.
206 if (std::is_same<SecCommonFlags, SecFlagType>())
207 return;
208
209 // Verification starts here for section specific flag.
210 bool IsFlagLegal = false;
211 switch (Type) {
212 case SecNameTable:
213 IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
214 break;
215 case SecProfSummary:
216 IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
217 break;
218 case SecFuncMetadata:
219 IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
220 break;
221 default:
222 break;
223 }
224 if (!IsFlagLegal)
225 llvm_unreachable("Misuse of a flag in an incompatible section");
226 }
227
228 template <class SecFlagType>
229 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
230 verifySecFlag(Entry.Type, Flag);
231 auto FVal = static_cast<uint64_t>(Flag);
232 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
233 Entry.Flags |= IsCommon ? FVal : (FVal << 32);
234 }
235
236 template <class SecFlagType>
237 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
238 verifySecFlag(Entry.Type, Flag);
239 auto FVal = static_cast<uint64_t>(Flag);
240 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
241 Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
242 }
243
244 template <class SecFlagType>
245 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
246 verifySecFlag(Entry.Type, Flag);
247 auto FVal = static_cast<uint64_t>(Flag);
248 bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
249 return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
250 }
251
252 /// Represents the relative location of an instruction.
253 ///
254 /// Instruction locations are specified by the line offset from the
255 /// beginning of the function (marked by the line where the function
256 /// header is) and the discriminator value within that line.
257 ///
258 /// The discriminator value is useful to distinguish instructions
259 /// that are on the same line but belong to different basic blocks
260 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
261 struct LineLocation {
262 LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
263
264 void print(raw_ostream &OS) const;
265 void dump() const;
266
267 bool operator<(const LineLocation &O) const {
268 return LineOffset < O.LineOffset ||
269 (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
270 }
271
272 bool operator==(const LineLocation &O) const {
273 return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
274 }
275
276 bool operator!=(const LineLocation &O) const {
277 return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
278 }
279
280 uint32_t LineOffset;
281 uint32_t Discriminator;
282 };
283
284 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
285
286 /// Representation of a single sample record.
287 ///
288 /// A sample record is represented by a positive integer value, which
289 /// indicates how frequently was the associated line location executed.
290 ///
291 /// Additionally, if the associated location contains a function call,
292 /// the record will hold a list of all the possible called targets. For
293 /// direct calls, this will be the exact function being invoked. For
294 /// indirect calls (function pointers, virtual table dispatch), this
295 /// will be a list of one or more functions.
296 class SampleRecord {
297 public:
298 using CallTarget = std::pair<StringRef, uint64_t>;
299 struct CallTargetComparator {
300 bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
301 if (LHS.second != RHS.second)
302 return LHS.second > RHS.second;
303
304 return LHS.first < RHS.first;
305 }
306 };
307
308 using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
309 using CallTargetMap = StringMap<uint64_t>;
310 SampleRecord() = default;
311
312 /// Increment the number of samples for this record by \p S.
313 /// Optionally scale sample count \p S by \p Weight.
314 ///
315 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
316 /// around unsigned integers.
317 sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
318 bool Overflowed;
319 NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
320 return Overflowed ? sampleprof_error::counter_overflow
321 : sampleprof_error::success;
322 }
323
324 /// Add called function \p F with samples \p S.
325 /// Optionally scale sample count \p S by \p Weight.
326 ///
327 /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
328 /// around unsigned integers.
329 sampleprof_error addCalledTarget(StringRef F, uint64_t S,
330 uint64_t Weight = 1) {
331 uint64_t &TargetSamples = CallTargets[F];
332 bool Overflowed;
333 TargetSamples =
334 SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
335 return Overflowed ? sampleprof_error::counter_overflow
336 : sampleprof_error::success;
337 }
338
339 /// Return true if this sample record contains function calls.
340 bool hasCalls() const { return !CallTargets.empty(); }
341
342 uint64_t getSamples() const { return NumSamples; }
343 const CallTargetMap &getCallTargets() const { return CallTargets; }
344 const SortedCallTargetSet getSortedCallTargets() const {
345 return SortCallTargets(CallTargets);
346 }
347
348 /// Sort call targets in descending order of call frequency.
349 static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
350 SortedCallTargetSet SortedTargets;
351 for (const auto &I : Targets) {
352 SortedTargets.emplace(I.first(), I.second);
353 }
354 return SortedTargets;
355 }
356
357 /// Prorate call targets by a distribution factor.
358 static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
359 float DistributionFactor) {
360 CallTargetMap AdjustedTargets;
361 for (const auto &I : Targets) {
362 AdjustedTargets[I.first()] = I.second * DistributionFactor;
363 }
364 return AdjustedTargets;
365 }
366
367 /// Merge the samples in \p Other into this record.
368 /// Optionally scale sample counts by \p Weight.
369 sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1);
370 void print(raw_ostream &OS, unsigned Indent) const;
371 void dump() const;
372
373 private:
374 uint64_t NumSamples = 0;
375 CallTargetMap CallTargets;
376 };
377
378 raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
379
380 // State of context associated with FunctionSamples
381 enum ContextStateMask {
382 UnknownContext = 0x0, // Profile without context
383 RawContext = 0x1, // Full context profile from input profile
384 SyntheticContext = 0x2, // Synthetic context created for context promotion
385 InlinedContext = 0x4, // Profile for context that is inlined into caller
386 MergedContext = 0x8 // Profile for context merged into base profile
387 };
388
389 // Attribute of context associated with FunctionSamples
390 enum ContextAttributeMask {
391 ContextNone = 0x0,
392 ContextWasInlined = 0x1, // Leaf of context was inlined in previous build
393 ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
394 };
395
396 // Sample context for FunctionSamples. It consists of the calling context,
397 // the function name and context state. Internally sample context is represented
398 // using StringRef, which is also the input for constructing a `SampleContext`.
399 // It can accept and represent both full context string as well as context-less
400 // function name.
401 // Example of full context string (note the wrapping `[]`):
402 // `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
403 // Example of context-less function name (same as AutoFDO):
404 // `_Z8funcLeafi`
405 class SampleContext {
406 public:
407 SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
408 SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
409 : Attributes(ContextNone) {
410 setContext(ContextStr, CState);
411 }
412
413 // Promote context by removing top frames (represented by `ContextStrToRemove`).
414 // Note that with string representation of context, the promotion is effectively
415 // a substr operation with `ContextStrToRemove` removed from left.
416 void promoteOnPath(StringRef ContextStrToRemove) {
417 assert(FullContext.startswith(ContextStrToRemove));
418
419 // Remove leading context and frame separator " @ ".
420 FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
421 CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
422 }
423
424 // Split the top context frame (left-most substr) from context.
425 static std::pair<StringRef, StringRef>
426 splitContextString(StringRef ContextStr) {
427 return ContextStr.split(" @ ");
428 }
429
430 // Decode context string for a frame to get function name and location.
431 // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
432 static void decodeContextString(StringRef ContextStr, StringRef &FName,
433 LineLocation &LineLoc) {
434 // Get function name
435 auto EntrySplit = ContextStr.split(':');
436 FName = EntrySplit.first;
437
438 LineLoc = {0, 0};
439 if (!EntrySplit.second.empty()) {
440 // Get line offset, use signed int for getAsInteger so string will
441 // be parsed as signed.
442 int LineOffset = 0;
443 auto LocSplit = EntrySplit.second.split('.');
444 LocSplit.first.getAsInteger(10, LineOffset);
445 LineLoc.LineOffset = LineOffset;
446
447 // Get discriminator
448 if (!LocSplit.second.empty())
449 LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
450 }
451 }
452
453 operator StringRef() const { return FullContext; }
454 bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
455 void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
456 uint32_t getAllAttributes() { return Attributes; }
457 void setAllAttributes(uint32_t A) { Attributes = A; }
458 bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
459 void setState(ContextStateMask S) { State |= (uint32_t)S; }
460 void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
461 bool hasContext() const { return State != UnknownContext; }
462 bool isBaseContext() const { return CallingContext.empty(); }
463 StringRef getNameWithoutContext() const { return Name; }
464 StringRef getCallingContext() const { return CallingContext; }
465 StringRef getNameWithContext() const { return FullContext; }
466
467 private:
468 // Give a context string, decode and populate internal states like
469 // Function name, Calling context and context state. Example of input
470 // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
471 void setContext(StringRef ContextStr, ContextStateMask CState) {
472 assert(!ContextStr.empty());
473 // Note that `[]` wrapped input indicates a full context string, otherwise
474 // it's treated as context-less function name only.
475 bool HasContext = ContextStr.startswith("[");
476 if (!HasContext && CState == UnknownContext) {
477 State = UnknownContext;
478 Name = FullContext = ContextStr;
479 } else {
480 // Assume raw context profile if unspecified
481 if (CState == UnknownContext)
482 State = RawContext;
483 else
484 State = CState;
485
486 // Remove encapsulating '[' and ']' if any
487 if (HasContext)
488 FullContext = ContextStr.substr(1, ContextStr.size() - 2);
489 else
490 FullContext = ContextStr;
491
492 // Caller is to the left of callee in context string
493 auto NameContext = FullContext.rsplit(" @ ");
494 if (NameContext.second.empty()) {
495 Name = NameContext.first;
496 CallingContext = NameContext.second;
497 } else {
498 Name = NameContext.second;
499 CallingContext = NameContext.first;
500 }
501 }
502 }
503
504 // Full context string including calling context and leaf function name
505 StringRef FullContext;
506 // Function name for the associated sample profile
507 StringRef Name;
508 // Calling context (leaf function excluded) for the associated sample profile
509 StringRef CallingContext;
510 // State of the associated sample profile
511 uint32_t State;
512 // Attribute of the associated sample profile
513 uint32_t Attributes;
514 };
515
516 class FunctionSamples;
517 class SampleProfileReaderItaniumRemapper;
518
519 using BodySampleMap = std::map<LineLocation, SampleRecord>;
520 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
521 // memory, which is *very* significant for large profiles.
522 using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
523 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
524
525 /// Representation of the samples collected for a function.
526 ///
527 /// This data structure contains all the collected samples for the body
528 /// of a function. Each sample corresponds to a LineLocation instance
529 /// within the body of the function.
530 class FunctionSamples {
531 public:
532 FunctionSamples() = default;
533
534 void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
535 void dump() const;
536
537 sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
538 bool Overflowed;
539 TotalSamples =
540 SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
541 return Overflowed ? sampleprof_error::counter_overflow
542 : sampleprof_error::success;
543 }
544
545 void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
546
547 sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
548 bool Overflowed;
549 TotalHeadSamples =
550 SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
551 return Overflowed ? sampleprof_error::counter_overflow
552 : sampleprof_error::success;
553 }
554
555 sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
556 uint64_t Num, uint64_t Weight = 1) {
557 return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
558 Num, Weight);
559 }
560
561 sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
562 uint32_t Discriminator,
563 StringRef FName, uint64_t Num,
564 uint64_t Weight = 1) {
565 return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
566 FName, Num, Weight);
567 }
568
569 sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
570 uint64_t Weight = 1) {
571 SampleRecord S;
572 S.addSamples(Num, Weight);
573 return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
574 }
575
576 /// Return the number of samples collected at the given location.
577 /// Each location is specified by \p LineOffset and \p Discriminator.
578 /// If the location is not found in profile, return error.
579 ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
580 uint32_t Discriminator) const {
581 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
582 if (ret == BodySamples.end()) {
583 // For CSSPGO, in order to conserve profile size, we no longer write out
584 // locations profile for those not hit during training, so we need to
585 // treat them as zero instead of error here.
586 if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased)
587 return 0;
588 return std::error_code();
589 } else {
590 // Return error for an invalid sample count which is usually assigned to
591 // dangling probe.
592 if (FunctionSamples::ProfileIsProbeBased &&
593 ret->second.getSamples() == FunctionSamples::InvalidProbeCount)
594 return std::error_code();
595 return ret->second.getSamples();
596 }
597 }
598
599 /// Returns the call target map collected at a given location.
600 /// Each location is specified by \p LineOffset and \p Discriminator.
601 /// If the location is not found in profile, return error.
602 ErrorOr<SampleRecord::CallTargetMap>
603 findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
604 const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
605 if (ret == BodySamples.end())
606 return std::error_code();
607 return ret->second.getCallTargets();
608 }
609
610 /// Returns the call target map collected at a given location specified by \p
611 /// CallSite. If the location is not found in profile, return error.
612 ErrorOr<SampleRecord::CallTargetMap>
613 findCallTargetMapAt(const LineLocation &CallSite) const {
614 const auto &Ret = BodySamples.find(CallSite);
615 if (Ret == BodySamples.end())
616 return std::error_code();
617 return Ret->second.getCallTargets();
618 }
619
620 /// Return the function samples at the given callsite location.
621 FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
622 return CallsiteSamples[Loc];
623 }
624
625 /// Returns the FunctionSamplesMap at the given \p Loc.
626 const FunctionSamplesMap *
627 findFunctionSamplesMapAt(const LineLocation &Loc) const {
628 auto iter = CallsiteSamples.find(Loc);
629 if (iter == CallsiteSamples.end())
630 return nullptr;
631 return &iter->second;
632 }
633
634 /// Returns a pointer to FunctionSamples at the given callsite location
635 /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
636 /// the restriction to return the FunctionSamples at callsite location
637 /// \p Loc with the maximum total sample count. If \p Remapper is not
638 /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
639 /// as \p CalleeName.
640 const FunctionSamples *
641 findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
642 SampleProfileReaderItaniumRemapper *Remapper) const;
643
644 bool empty() const { return TotalSamples == 0; }
645
646 /// Return the total number of samples collected inside the function.
647 uint64_t getTotalSamples() const { return TotalSamples; }
648
649 /// Return the total number of branch samples that have the function as the
650 /// branch target. This should be equivalent to the sample of the first
651 /// instruction of the symbol. But as we directly get this info for raw
652 /// profile without referring to potentially inaccurate debug info, this
653 /// gives more accurate profile data and is preferred for standalone symbols.
654 uint64_t getHeadSamples() const { return TotalHeadSamples; }
655
656 /// Return the sample count of the first instruction of the function.
657 /// The function can be either a standalone symbol or an inlined function.
658 uint64_t getEntrySamples() const {
659 if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
660 // For CS profile, if we already have more accurate head samples
661 // counted by branch sample from caller, use them as entry samples.
662 return getHeadSamples();
663 }
664 uint64_t Count = 0;
665 // Use either BodySamples or CallsiteSamples which ever has the smaller
666 // lineno.
667 if (!BodySamples.empty() &&
668 (CallsiteSamples.empty() ||
669 BodySamples.begin()->first < CallsiteSamples.begin()->first))
670 Count = BodySamples.begin()->second.getSamples();
671 else if (!CallsiteSamples.empty()) {
672 // An indirect callsite may be promoted to several inlined direct calls.
673 // We need to get the sum of them.
674 for (const auto &N_FS : CallsiteSamples.begin()->second)
675 Count += N_FS.second.getEntrySamples();
676 }
677 // Return at least 1 if total sample is not 0.
678 return Count ? Count : TotalSamples > 0;
679 }
680
681 /// Return all the samples collected in the body of the function.
682 const BodySampleMap &getBodySamples() const { return BodySamples; }
683
684 /// Return all the callsite samples collected in the body of the function.
685 const CallsiteSampleMap &getCallsiteSamples() const {
686 return CallsiteSamples;
687 }
688
689 /// Return the maximum of sample counts in a function body including functions
690 /// inlined in it.
691 uint64_t getMaxCountInside() const {
692 uint64_t MaxCount = 0;
693 for (const auto &L : getBodySamples())
694 MaxCount = std::max(MaxCount, L.second.getSamples());
695 for (const auto &C : getCallsiteSamples())
696 for (const FunctionSamplesMap::value_type &F : C.second)
697 MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
698 return MaxCount;
699 }
700
701 /// Merge the samples in \p Other into this one.
702 /// Optionally scale samples by \p Weight.
703 sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
704 sampleprof_error Result = sampleprof_error::success;
705 Name = Other.getName();
706 if (!GUIDToFuncNameMap)
707 GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
708 if (Context.getNameWithContext().empty())
709 Context = Other.getContext();
710 if (FunctionHash == 0) {
711 // Set the function hash code for the target profile.
712 FunctionHash = Other.getFunctionHash();
713 } else if (FunctionHash != Other.getFunctionHash()) {
714 // The two profiles coming with different valid hash codes indicates
715 // either:
716 // 1. They are same-named static functions from different compilation
717 // units (without using -unique-internal-linkage-names), or
718 // 2. They are really the same function but from different compilations.
719 // Let's bail out in either case for now, which means one profile is
720 // dropped.
721 return sampleprof_error::hash_mismatch;
722 }
723
724 MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
725 MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
726 for (const auto &I : Other.getBodySamples()) {
727 const LineLocation &Loc = I.first;
728 const SampleRecord &Rec = I.second;
729 MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
730 }
731 for (const auto &I : Other.getCallsiteSamples()) {
732 const LineLocation &Loc = I.first;
733 FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
734 for (const auto &Rec : I.second)
735 MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
736 }
737 return Result;
738 }
739
740 /// Recursively traverses all children, if the total sample count of the
741 /// corresponding function is no less than \p Threshold, add its corresponding
742 /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
743 /// to \p S.
744 void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
745 const StringMap<Function *> &SymbolMap,
746 uint64_t Threshold) const {
747 if (TotalSamples <= Threshold)
748 return;
749 auto isDeclaration = [](const Function *F) {
750 return !F || F->isDeclaration();
751 };
752 if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
753 // Add to the import list only when it's defined out of module.
754 S.insert(getGUID(Name));
755 }
756 // Import hot CallTargets, which may not be available in IR because full
757 // profile annotation cannot be done until backend compilation in ThinLTO.
758 for (const auto &BS : BodySamples)
759 for (const auto &TS : BS.second.getCallTargets())
760 if (TS.getValue() > Threshold) {
761 const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey()));
762 if (isDeclaration(Callee))
763 S.insert(getGUID(TS.getKey()));
764 }
765 for (const auto &CS : CallsiteSamples)
766 for (const auto &NameFS : CS.second)
767 NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
768 }
769
770 /// Set the name of the function.
771 void setName(StringRef FunctionName) { Name = FunctionName; }
772
773 /// Return the function name.
774 StringRef getName() const { return Name; }
775
776 /// Return function name with context.
777 StringRef getNameWithContext() const {
778 return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
779 }
780
781 /// Return the original function name.
782 StringRef getFuncName() const { return getFuncName(Name); }
783
784 void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
785
786 uint64_t getFunctionHash() const { return FunctionHash; }
787
788 /// Return the canonical name for a function, taking into account
789 /// suffix elision policy attributes.
790 static StringRef getCanonicalFnName(const Function &F) {
791 auto AttrName = "sample-profile-suffix-elision-policy";
792 auto Attr = F.getFnAttribute(AttrName).getValueAsString();
793 return getCanonicalFnName(F.getName(), Attr);
794 }
795
796 /// Name suffixes which canonicalization should handle to avoid
797 /// profile mismatch.
798 static constexpr const char *LLVMSuffix = ".llvm.";
799 static constexpr const char *PartSuffix = ".part.";
800 static constexpr const char *UniqSuffix = ".__uniq.";
801
802 static StringRef getCanonicalFnName(StringRef FnName,
803 StringRef Attr = "selected") {
804 // Note the sequence of the suffixes in the knownSuffixes array matters.
805 // If suffix "A" is appended after the suffix "B", "A" should be in front
806 // of "B" in knownSuffixes.
807 const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
808 if (Attr == "" || Attr == "all") {
809 return FnName.split('.').first;
810 } else if (Attr == "selected") {
811 StringRef Cand(FnName);
812 for (const auto &Suf : knownSuffixes) {
813 StringRef Suffix(Suf);
814 // If the profile contains ".__uniq." suffix, don't strip the
815 // suffix for names in the IR.
816 if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
817 continue;
818 auto It = Cand.rfind(Suffix);
819 if (It == StringRef::npos)
820 continue;
821 auto Dit = Cand.rfind('.');
822 if (Dit == It + Suffix.size() - 1)
823 Cand = Cand.substr(0, It);
824 }
825 return Cand;
826 } else if (Attr == "none") {
827 return FnName;
828 } else {
829 assert(false && "internal error: unknown suffix elision policy");
830 }
831 return FnName;
832 }
833
834 /// Translate \p Name into its original name.
835 /// When profile doesn't use MD5, \p Name needs no translation.
836 /// When profile uses MD5, \p Name in current FunctionSamples
837 /// is actually GUID of the original function name. getFuncName will
838 /// translate \p Name in current FunctionSamples into its original name
839 /// by looking up in the function map GUIDToFuncNameMap.
840 /// If the original name doesn't exist in the map, return empty StringRef.
841 StringRef getFuncName(StringRef Name) const {
842 if (!UseMD5)
843 return Name;
844
845 assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
846 return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
847 }
848
849 /// Returns the line offset to the start line of the subprogram.
850 /// We assume that a single function will not exceed 65535 LOC.
851 static unsigned getOffset(const DILocation *DIL);
852
853 /// Returns a unique call site identifier for a given debug location of a call
854 /// instruction. This is wrapper of two scenarios, the probe-based profile and
855 /// regular profile, to hide implementation details from the sample loader and
856 /// the context tracker.
857 static LineLocation getCallSiteIdentifier(const DILocation *DIL);
858
859 /// Get the FunctionSamples of the inline instance where DIL originates
860 /// from.
861 ///
862 /// The FunctionSamples of the instruction (Machine or IR) associated to
863 /// \p DIL is the inlined instance in which that instruction is coming from.
864 /// We traverse the inline stack of that instruction, and match it with the
865 /// tree nodes in the profile.
866 ///
867 /// \returns the FunctionSamples pointer to the inlined instance.
868 /// If \p Remapper is not nullptr, it will be used to find matching
869 /// FunctionSamples with not exactly the same but equivalent name.
870 const FunctionSamples *findFunctionSamples(
871 const DILocation *DIL,
872 SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
873
874 // The invalid sample count is used to represent samples collected for a
875 // dangling probe.
876 static constexpr uint64_t InvalidProbeCount = UINT64_MAX;
877
878 static bool ProfileIsProbeBased;
879
880 static bool ProfileIsCS;
881
882 SampleContext &getContext() const { return Context; }
883
884 void setContext(const SampleContext &FContext) { Context = FContext; }
885
886 static SampleProfileFormat Format;
887
888 /// Whether the profile uses MD5 to represent string.
889 static bool UseMD5;
890
891 /// Whether the profile contains any ".__uniq." suffix in a name.
892 static bool HasUniqSuffix;
893
894 /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
895 /// all the function symbols defined or declared in current module.
896 DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
897
898 // Assume the input \p Name is a name coming from FunctionSamples itself.
899 // If UseMD5 is true, the name is already a GUID and we
900 // don't want to return the GUID of GUID.
901 static uint64_t getGUID(StringRef Name) {
902 return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
903 }
904
905 // Find all the names in the current FunctionSamples including names in
906 // all the inline instances and names of call targets.
907 void findAllNames(DenseSet<StringRef> &NameSet) const;
908
909 private:
910 /// Mangled name of the function.
911 StringRef Name;
912
913 /// CFG hash value for the function.
914 uint64_t FunctionHash = 0;
915
916 /// Calling context for function profile
917 mutable SampleContext Context;
918
919 /// Total number of samples collected inside this function.
920 ///
921 /// Samples are cumulative, they include all the samples collected
922 /// inside this function and all its inlined callees.
923 uint64_t TotalSamples = 0;
924
925 /// Total number of samples collected at the head of the function.
926 /// This is an approximation of the number of calls made to this function
927 /// at runtime.
928 uint64_t TotalHeadSamples = 0;
929
930 /// Map instruction locations to collected samples.
931 ///
932 /// Each entry in this map contains the number of samples
933 /// collected at the corresponding line offset. All line locations
934 /// are an offset from the start of the function.
935 BodySampleMap BodySamples;
936
937 /// Map call sites to collected samples for the called function.
938 ///
939 /// Each entry in this map corresponds to all the samples
940 /// collected for the inlined function call at the given
941 /// location. For example, given:
942 ///
943 /// void foo() {
944 /// 1 bar();
945 /// ...
946 /// 8 baz();
947 /// }
948 ///
949 /// If the bar() and baz() calls were inlined inside foo(), this
950 /// map will contain two entries. One for all the samples collected
951 /// in the call to bar() at line offset 1, the other for all the samples
952 /// collected in the call to baz() at line offset 8.
953 CallsiteSampleMap CallsiteSamples;
954 };
955
956 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
957
958 /// Sort a LocationT->SampleT map by LocationT.
959 ///
960 /// It produces a sorted list of <LocationT, SampleT> records by ascending
961 /// order of LocationT.
962 template <class LocationT, class SampleT> class SampleSorter {
963 public:
964 using SamplesWithLoc = std::pair<const LocationT, SampleT>;
965 using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
966
967 SampleSorter(const std::map<LocationT, SampleT> &Samples) {
968 for (const auto &I : Samples)
969 V.push_back(&I);
970 llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
971 return A->first < B->first;
972 });
973 }
974
975 const SamplesWithLocList &get() const { return V; }
976
977 private:
978 SamplesWithLocList V;
979 };
980
981 /// SampleContextTrimmer impelements helper functions to trim, merge cold
982 /// context profiles. It also supports context profile canonicalization to make
983 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
984 class SampleContextTrimmer {
985 public:
986 SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
987 : ProfileMap(Profiles){};
988 // Trim and merge cold context profile when requested.
989 void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
990 bool TrimColdContext = true,
991 bool MergeColdContext = true);
992 // Canonicalize context profile name and attributes.
993 void canonicalizeContextProfiles();
994
995 private:
996 StringMap<FunctionSamples> &ProfileMap;
997 };
998
999 /// ProfileSymbolList records the list of function symbols shown up
1000 /// in the binary used to generate the profile. It is useful to
1001 /// to discriminate a function being so cold as not to shown up
1002 /// in the profile and a function newly added.
1003 class ProfileSymbolList {
1004 public:
1005 /// copy indicates whether we need to copy the underlying memory
1006 /// for the input Name.
1007 void add(StringRef Name, bool copy = false) {
1008 if (!copy) {
1009 Syms.insert(Name);
1010 return;
1011 }
1012 Syms.insert(Name.copy(Allocator));
1013 }
1014
1015 bool contains(StringRef Name) { return Syms.count(Name); }
1016
1017 void merge(const ProfileSymbolList &List) {
1018 for (auto Sym : List.Syms)
1019 add(Sym, true);
1020 }
1021
1022 unsigned size() { return Syms.size(); }
1023
1024 void setToCompress(bool TC) { ToCompress = TC; }
1025 bool toCompress() { return ToCompress; }
1026
1027 std::error_code read(const uint8_t *Data, uint64_t ListSize);
1028 std::error_code write(raw_ostream &OS);
1029 void dump(raw_ostream &OS = dbgs()) const;
1030
1031 private:
1032 // Determine whether or not to compress the symbol list when
1033 // writing it into profile. The variable is unused when the symbol
1034 // list is read from an existing profile.
1035 bool ToCompress = false;
1036 DenseSet<StringRef> Syms;
1037 BumpPtrAllocator Allocator;
1038 };
1039
1040 } // end namespace sampleprof
1041 } // end namespace llvm
1042
1043 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
1044