xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-profgen/PerfReader.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <fstream>
17 #include <list>
18 #include <map>
19 #include <vector>
20 
21 using namespace llvm;
22 using namespace sampleprof;
23 
24 namespace llvm {
25 namespace sampleprof {
26 
27 // Stream based trace line iterator
28 class TraceStream {
29   std::string CurrentLine;
30   std::ifstream Fin;
31   bool IsAtEoF = false;
32   uint64_t LineNumber = 0;
33 
34 public:
TraceStream(StringRef Filename)35   TraceStream(StringRef Filename) : Fin(Filename.str()) {
36     if (!Fin.good())
37       exitWithError("Error read input perf script file", Filename);
38     advance();
39   }
40 
getCurrentLine()41   StringRef getCurrentLine() {
42     assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
43     return CurrentLine;
44   }
45 
getLineNumber()46   uint64_t getLineNumber() { return LineNumber; }
47 
isAtEoF()48   bool isAtEoF() { return IsAtEoF; }
49 
50   // Read the next line
advance()51   void advance() {
52     if (!std::getline(Fin, CurrentLine)) {
53       IsAtEoF = true;
54       return;
55     }
56     LineNumber++;
57   }
58 };
59 
60 // The type of perfscript
61 enum PerfScriptType {
62   PERF_UNKNOWN = 0,
63   PERF_INVALID = 1,
64   PERF_LBR = 2,       // Only LBR sample
65   PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack.
66 };
67 
68 // The parsed LBR sample entry.
69 struct LBREntry {
70   uint64_t Source = 0;
71   uint64_t Target = 0;
72   // An artificial branch stands for a series of consecutive branches starting
73   // from the current binary with a transition through external code and
74   // eventually landing back in the current binary.
75   bool IsArtificial = false;
LBREntryLBREntry76   LBREntry(uint64_t S, uint64_t T, bool I)
77       : Source(S), Target(T), IsArtificial(I) {}
78 };
79 
80 // Hash interface for generic data of type T
81 // Data should implement a \fn getHashCode and a \fn isEqual
82 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
83 // i.e we explicitly calculate hash of derived class, assign to base class's
84 // HashCode. This also provides the flexibility for calculating the hash code
85 // incrementally(like rolling hash) during frame stack unwinding since unwinding
86 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
87 // which will have perf overhead. In the future, if we redesign a better hash
88 // function, then we can just skip this or switch to non-virtual function(like
89 // just ignore comparision if hash conflicts probabilities is low)
90 template <class T> class Hashable {
91 public:
92   std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)93   Hashable(const std::shared_ptr<T> &D) : Data(D) {}
94 
95   // Hash code generation
96   struct Hash {
operatorHash97     uint64_t operator()(const Hashable<T> &Key) const {
98       // Don't make it virtual for getHashCode
99       assert(Key.Data->getHashCode() && "Should generate HashCode for it!");
100       return Key.Data->getHashCode();
101     }
102   };
103 
104   // Hash equal
105   struct Equal {
operatorEqual106     bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
107       // Precisely compare the data, vtable will have overhead.
108       return LHS.Data->isEqual(RHS.Data.get());
109     }
110   };
111 
getPtr()112   T *getPtr() const { return Data.get(); }
113 };
114 
115 // Base class to extend for all types of perf sample
116 struct PerfSample {
117   uint64_t HashCode = 0;
118 
119   virtual ~PerfSample() = default;
getHashCodePerfSample120   uint64_t getHashCode() const { return HashCode; }
isEqualPerfSample121   virtual bool isEqual(const PerfSample *K) const {
122     return HashCode == K->HashCode;
123   };
124 
125   // Utilities for LLVM-style RTTI
126   enum PerfKind { PK_HybridSample };
127   const PerfKind Kind;
getKindPerfSample128   PerfKind getKind() const { return Kind; }
PerfSamplePerfSample129   PerfSample(PerfKind K) : Kind(K){};
130 };
131 
132 // The parsed hybrid sample including call stack and LBR stack.
133 struct HybridSample : public PerfSample {
134   // Profiled binary that current frame address belongs to
135   ProfiledBinary *Binary;
136   // Call stack recorded in FILO(leaf to root) order
137   SmallVector<uint64_t, 16> CallStack;
138   // LBR stack recorded in FIFO order
139   SmallVector<LBREntry, 16> LBRStack;
140 
HybridSampleHybridSample141   HybridSample() : PerfSample(PK_HybridSample){};
classofHybridSample142   static bool classof(const PerfSample *K) {
143     return K->getKind() == PK_HybridSample;
144   }
145 
146   // Used for sample aggregation
isEqualHybridSample147   bool isEqual(const PerfSample *K) const override {
148     const HybridSample *Other = dyn_cast<HybridSample>(K);
149     if (Other->Binary != Binary)
150       return false;
151     const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
152     const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
153 
154     if (CallStack.size() != OtherCallStack.size() ||
155         LBRStack.size() != OtherLBRStack.size())
156       return false;
157 
158     auto Iter = CallStack.begin();
159     for (auto Address : OtherCallStack) {
160       if (Address != *Iter++)
161         return false;
162     }
163 
164     for (size_t I = 0; I < OtherLBRStack.size(); I++) {
165       if (LBRStack[I].Source != OtherLBRStack[I].Source ||
166           LBRStack[I].Target != OtherLBRStack[I].Target)
167         return false;
168     }
169     return true;
170   }
171 
genHashCodeHybridSample172   void genHashCode() {
173     // Use simple DJB2 hash
174     auto HashCombine = [](uint64_t H, uint64_t V) {
175       return ((H << 5) + H) + V;
176     };
177     uint64_t Hash = 5381;
178     Hash = HashCombine(Hash, reinterpret_cast<uint64_t>(Binary));
179     for (const auto &Value : CallStack) {
180       Hash = HashCombine(Hash, Value);
181     }
182     for (const auto &Entry : LBRStack) {
183       Hash = HashCombine(Hash, Entry.Source);
184       Hash = HashCombine(Hash, Entry.Target);
185     }
186     HashCode = Hash;
187   }
188 };
189 
190 // After parsing the sample, we record the samples by aggregating them
191 // into this counter. The key stores the sample data and the value is
192 // the sample repeat times.
193 using AggregatedCounter =
194     std::unordered_map<Hashable<PerfSample>, uint64_t,
195                        Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
196 
197 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
198 // The state for the unwinder, it doesn't hold the data but only keep the
199 // pointer/index of the data, While unwinding, the CallStack is changed
200 // dynamicially and will be recorded as the context of the sample
201 struct UnwindState {
202   // Profiled binary that current frame address belongs to
203   const ProfiledBinary *Binary;
204   // Call stack trie node
205   struct ProfiledFrame {
206     const uint64_t Address = 0;
207     ProfiledFrame *Parent;
208     SampleVector RangeSamples;
209     SampleVector BranchSamples;
210     std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
211 
212     ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame213         : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame214     ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
215       assert(Address && "Address can't be zero!");
216       auto Ret = Children.emplace(
217           Address, std::make_unique<ProfiledFrame>(Address, this));
218       return Ret.first->second.get();
219     }
recordRangeCountUnwindState::ProfiledFrame220     void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
221       RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
222     }
recordBranchCountUnwindState::ProfiledFrame223     void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
224       BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
225     }
isDummyRootUnwindState::ProfiledFrame226     bool isDummyRoot() { return Address == 0; }
227   };
228 
229   ProfiledFrame DummyTrieRoot;
230   ProfiledFrame *CurrentLeafFrame;
231   // Used to fall through the LBR stack
232   uint32_t LBRIndex = 0;
233   // Reference to HybridSample.LBRStack
234   const SmallVector<LBREntry, 16> &LBRStack;
235   // Used to iterate the address range
236   InstructionPointer InstPtr;
UnwindStateUnwindState237   UnwindState(const HybridSample *Sample)
238       : Binary(Sample->Binary), LBRStack(Sample->LBRStack),
239         InstPtr(Sample->Binary, Sample->CallStack.front()) {
240     initFrameTrie(Sample->CallStack);
241   }
242 
validateInitialStateUnwindState243   bool validateInitialState() {
244     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
245     uint64_t LeafAddr = CurrentLeafFrame->Address;
246     // When we take a stack sample, ideally the sampling distance between the
247     // leaf IP of stack and the last LBR target shouldn't be very large.
248     // Use a heuristic size (0x100) to filter out broken records.
249     if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
250       WithColor::warning() << "Bogus trace: stack tip = "
251                            << format("%#010x", LeafAddr)
252                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
253       return false;
254     }
255     return true;
256   }
257 
checkStateConsistencyUnwindState258   void checkStateConsistency() {
259     assert(InstPtr.Address == CurrentLeafFrame->Address &&
260            "IP should align with context leaf");
261   }
262 
getBinaryUnwindState263   const ProfiledBinary *getBinary() const { return Binary; }
hasNextLBRUnwindState264   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState265   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState266   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState267   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
advanceLBRUnwindState268   void advanceLBR() { LBRIndex++; }
269 
getParentFrameUnwindState270   ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
271 
pushFrameUnwindState272   void pushFrame(uint64_t Address) {
273     CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
274   }
275 
switchToFrameUnwindState276   void switchToFrame(uint64_t Address) {
277     if (CurrentLeafFrame->Address == Address)
278       return;
279     CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
280   }
281 
popFrameUnwindState282   void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
283 
initFrameTrieUnwindState284   void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
285     ProfiledFrame *Cur = &DummyTrieRoot;
286     for (auto Address : reverse(CallStack)) {
287       Cur = Cur->getOrCreateChildFrame(Address);
288     }
289     CurrentLeafFrame = Cur;
290   }
291 
getDummyRootPtrUnwindState292   ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
293 };
294 
295 // Base class for sample counter key with context
296 struct ContextKey {
297   uint64_t HashCode = 0;
298   virtual ~ContextKey() = default;
getHashCodeContextKey299   uint64_t getHashCode() const { return HashCode; }
isEqualContextKey300   virtual bool isEqual(const ContextKey *K) const {
301     return HashCode == K->HashCode;
302   };
303 
304   // Utilities for LLVM-style RTTI
305   enum ContextKind { CK_StringBased, CK_ProbeBased };
306   const ContextKind Kind;
getKindContextKey307   ContextKind getKind() const { return Kind; }
ContextKeyContextKey308   ContextKey(ContextKind K) : Kind(K){};
309 };
310 
311 // String based context id
312 struct StringBasedCtxKey : public ContextKey {
313   std::string Context;
314   bool WasLeafInlined;
StringBasedCtxKeyStringBasedCtxKey315   StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
classofStringBasedCtxKey316   static bool classof(const ContextKey *K) {
317     return K->getKind() == CK_StringBased;
318   }
319 
isEqualStringBasedCtxKey320   bool isEqual(const ContextKey *K) const override {
321     const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
322     return Context == Other->Context;
323   }
324 
genHashCodeStringBasedCtxKey325   void genHashCode() { HashCode = hash_value(Context); }
326 };
327 
328 // Probe based context key as the intermediate key of context
329 // String based context key will introduce redundant string handling
330 // since the callee context is inferred from the context string which
331 // need to be splitted by '@' to get the last location frame, so we
332 // can just use probe instead and generate the string in the end.
333 struct ProbeBasedCtxKey : public ContextKey {
334   SmallVector<const PseudoProbe *, 16> Probes;
335 
ProbeBasedCtxKeyProbeBasedCtxKey336   ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
classofProbeBasedCtxKey337   static bool classof(const ContextKey *K) {
338     return K->getKind() == CK_ProbeBased;
339   }
340 
isEqualProbeBasedCtxKey341   bool isEqual(const ContextKey *K) const override {
342     const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
343     assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
344     return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
345                       O->Probes.end());
346   }
347 
genHashCodeProbeBasedCtxKey348   void genHashCode() {
349     for (const auto *P : Probes) {
350       HashCode = hash_combine(HashCode, P);
351     }
352     if (HashCode == 0) {
353       // Avoid zero value of HashCode when it's an empty list
354       HashCode = 1;
355     }
356   }
357 };
358 
359 // The counter of branch samples for one function indexed by the branch,
360 // which is represented as the source and target offset pair.
361 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
362 // The counter of range samples for one function indexed by the range,
363 // which is represented as the start and end offset pair.
364 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
365 // Wrapper for sample counters including range counter and branch counter
366 struct SampleCounter {
367   RangeSample RangeCounter;
368   BranchSample BranchCounter;
369 
recordRangeCountSampleCounter370   void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
371     RangeCounter[{Start, End}] += Repeat;
372   }
recordBranchCountSampleCounter373   void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
374     BranchCounter[{Source, Target}] += Repeat;
375   }
376 };
377 
378 // Sample counter with context to support context-sensitive profile
379 using ContextSampleCounterMap =
380     std::unordered_map<Hashable<ContextKey>, SampleCounter,
381                        Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
382 
383 struct FrameStack {
384   SmallVector<uint64_t, 16> Stack;
385   const ProfiledBinary *Binary;
FrameStackFrameStack386   FrameStack(const ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack387   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
388     Stack.push_back(Cur->Address);
389     return true;
390   }
391 
popFrameFrameStack392   void popFrame() {
393     if (!Stack.empty())
394       Stack.pop_back();
395   }
396   std::shared_ptr<StringBasedCtxKey> getContextKey();
397 };
398 
399 struct ProbeStack {
400   SmallVector<const PseudoProbe *, 16> Stack;
401   const ProfiledBinary *Binary;
ProbeStackProbeStack402   ProbeStack(const ProfiledBinary *B) : Binary(B) {}
pushFrameProbeStack403   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
404     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
405     // We may not find a probe for a merged or external callsite.
406     // Callsite merging may cause the loss of original probe IDs.
407     // Cutting off the context from here since the inliner will
408     // not know how to consume a context with unknown callsites.
409     if (!CallProbe)
410       return false;
411     Stack.push_back(CallProbe);
412     return true;
413   }
414 
popFrameProbeStack415   void popFrame() {
416     if (!Stack.empty())
417       Stack.pop_back();
418   }
419   // Use pseudo probe based context key to get the sample counter
420   // A context stands for a call path from 'main' to an uninlined
421   // callee with all inline frames recovered on that path. The probes
422   // belonging to that call path is the probes either originated from
423   // the callee or from any functions inlined into the callee. Since
424   // pseudo probes are organized in a tri-tree style after decoded,
425   // the tree path from the tri-tree root (which is the uninlined
426   // callee) to the probe node forms an inline context.
427   // Here we use a list of probe(pointer) as the context key to speed up
428   // aggregation and the final context string will be generate in
429   // ProfileGenerator
430   std::shared_ptr<ProbeBasedCtxKey> getContextKey();
431 };
432 
433 /*
434 As in hybrid sample we have a group of LBRs and the most recent sampling call
435 stack, we can walk through those LBRs to infer more call stacks which would be
436 used as context for profile. VirtualUnwinder is the class to do the call stack
437 unwinding based on LBR state. Two types of unwinding are processd here:
438 1) LBR unwinding and 2) linear range unwinding.
439 Specifically, for each LBR entry(can be classified into call, return, regular
440 branch), LBR unwinding will replay the operation by pushing, popping or
441 switching leaf frame towards the call stack and since the initial call stack
442 is most recently sampled, the replay should be in anti-execution order, i.e. for
443 the regular case, pop the call stack when LBR is call, push frame on call stack
444 when LBR is return. After each LBR processed, it also needs to align with the
445 next LBR by going through instructions from previous LBR's target to current
446 LBR's source, which is the linear unwinding. As instruction from linear range
447 can come from different function by inlining, linear unwinding will do the range
448 splitting and record counters by the range with same inline context. Over those
449 unwinding process we will record each call stack as context id and LBR/linear
450 range as sample counter for further CS profile generation.
451 */
452 class VirtualUnwinder {
453 public:
VirtualUnwinder(ContextSampleCounterMap * Counter,const ProfiledBinary * B)454   VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
455       : CtxCounterMap(Counter), Binary(B) {}
456   bool unwind(const HybridSample *Sample, uint64_t Repeat);
457 
458 private:
isCallState(UnwindState & State)459   bool isCallState(UnwindState &State) const {
460     // The tail call frame is always missing here in stack sample, we will
461     // use a specific tail call tracker to infer it.
462     return Binary->addressIsCall(State.getCurrentLBRSource());
463   }
464 
isReturnState(UnwindState & State)465   bool isReturnState(UnwindState &State) const {
466     // Simply check addressIsReturn, as ret is always reliable, both for
467     // regular call and tail call.
468     return Binary->addressIsReturn(State.getCurrentLBRSource());
469   }
470 
471   void unwindCall(UnwindState &State);
472   void unwindLinear(UnwindState &State, uint64_t Repeat);
473   void unwindReturn(UnwindState &State);
474   void unwindBranchWithinFrame(UnwindState &State);
475 
476   template <typename T>
477   void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
478   // Collect each samples on trie node by DFS traversal
479   template <typename T>
480   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
481   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
482 
483   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
484                         uint64_t Repeat);
485   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
486                          uint64_t Repeat);
487 
488   ContextSampleCounterMap *CtxCounterMap;
489   // Profiled binary that current frame address belongs to
490   const ProfiledBinary *Binary;
491 };
492 
493 // Filename to binary map
494 using BinaryMap = StringMap<ProfiledBinary>;
495 // Address to binary map for fast look-up
496 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>;
497 // Binary to ContextSampleCounters Map to support multiple binary, we may have
498 // same binary loaded at different addresses, they should share the same sample
499 // counter
500 using BinarySampleCounterMap =
501     std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>;
502 
503 // Load binaries and read perf trace to parse the events and samples
504 class PerfReader {
505 
506 public:
507   PerfReader(cl::list<std::string> &BinaryFilenames,
508              cl::list<std::string> &PerfTraceFilenames);
509 
510   // A LBR sample is like:
511   // 0x5c6313f/0x5c63170/P/-/-/0  0x5c630e7/0x5c63130/P/-/-/0 ...
512   // A heuristic for fast detection by checking whether a
513   // leading "  0x" and the '/' exist.
isLBRSample(StringRef Line)514   static bool isLBRSample(StringRef Line) {
515     if (!Line.startswith(" 0x"))
516       return false;
517     if (Line.find('/') != StringRef::npos)
518       return true;
519     return false;
520   }
521 
522   // The raw hybird sample is like
523   // e.g.
524   // 	          4005dc    # call stack leaf
525   //	          400634
526   //	          400684    # call stack root
527   // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
528   //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
529   // Determine the perfscript contains hybrid samples(call stack + LBRs) by
530   // checking whether there is a non-empty call stack immediately followed by
531   // a LBR sample
checkPerfScriptType(StringRef FileName)532   static PerfScriptType checkPerfScriptType(StringRef FileName) {
533     TraceStream TraceIt(FileName);
534     uint64_t FrameAddr = 0;
535     while (!TraceIt.isAtEoF()) {
536       int32_t Count = 0;
537       while (!TraceIt.isAtEoF() &&
538              !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
539         Count++;
540         TraceIt.advance();
541       }
542       if (!TraceIt.isAtEoF()) {
543         if (isLBRSample(TraceIt.getCurrentLine())) {
544           if (Count > 0)
545             return PERF_LBR_STACK;
546           else
547             return PERF_LBR;
548         }
549         TraceIt.advance();
550       }
551     }
552     return PERF_INVALID;
553   }
554 
555   // The parsed MMap event
556   struct MMapEvent {
557     uint64_t PID = 0;
558     uint64_t BaseAddress = 0;
559     uint64_t Size = 0;
560     uint64_t Offset = 0;
561     StringRef BinaryPath;
562   };
563 
564   /// Load symbols and disassemble the code of a give binary.
565   /// Also register the binary in the binary table.
566   ///
567   ProfiledBinary &loadBinary(const StringRef BinaryPath,
568                              bool AllowNameConflict = true);
569   void updateBinaryAddress(const MMapEvent &Event);
getPerfScriptType()570   PerfScriptType getPerfScriptType() const { return PerfType; }
571   // Entry of the reader to parse multiple perf traces
572   void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
getBinarySampleCounters()573   const BinarySampleCounterMap &getBinarySampleCounters() const {
574     return BinarySampleCounters;
575   }
576 
577 private:
578   /// Validate the command line input
579   void validateCommandLine(cl::list<std::string> &BinaryFilenames,
580                            cl::list<std::string> &PerfTraceFilenames);
581   /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
582   /// mapping between the binary name and its memory layout.
583   ///
584   void parseMMap2Event(TraceStream &TraceIt);
585   // Parse perf events/samples and do aggregation
586   void parseAndAggregateTrace(StringRef Filename);
587   // Parse either an MMAP event or a perf sample
588   void parseEventOrSample(TraceStream &TraceIt);
589   // Parse the hybrid sample including the call and LBR line
590   void parseHybridSample(TraceStream &TraceIt);
591   // Extract call stack from the perf trace lines
592   bool extractCallstack(TraceStream &TraceIt,
593                         SmallVectorImpl<uint64_t> &CallStack);
594   // Extract LBR stack from one perf trace line
595   bool extractLBRStack(TraceStream &TraceIt,
596                        SmallVectorImpl<LBREntry> &LBRStack,
597                        ProfiledBinary *Binary);
598   void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
599   // Post process the profile after trace aggregation, we will do simple range
600   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
601   void generateRawProfile();
602   // Unwind the hybrid samples after aggregration
603   void unwindSamples();
604   void printUnwinderOutput();
605   // Helper function for looking up binary in AddressBinaryMap
606   ProfiledBinary *getBinary(uint64_t Address);
607 
608   BinaryMap BinaryTable;
609   AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
610 
611 private:
612   BinarySampleCounterMap BinarySampleCounters;
613   // Samples with the repeating time generated by the perf reader
614   AggregatedCounter AggregatedSamples;
615   PerfScriptType PerfType = PERF_UNKNOWN;
616 };
617 
618 } // end namespace sampleprof
619 } // end namespace llvm
620 
621 #endif
622