xref: /llvm-project/llvm/tools/llvm-profgen/PerfReader.h (revision 2fa6eaf93bfe5b638b6824f25ad1ebde686bd7d4)
1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <map>
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 namespace llvm {
24 
25 class CleanupInstaller;
26 
27 namespace sampleprof {
28 
29 // Stream based trace line iterator
30 class TraceStream {
31   std::string CurrentLine;
32   std::ifstream Fin;
33   bool IsAtEoF = false;
34   uint64_t LineNumber = 0;
35 
36 public:
TraceStream(StringRef Filename)37   TraceStream(StringRef Filename) : Fin(Filename.str()) {
38     if (!Fin.good())
39       exitWithError("Error read input perf script file", Filename);
40     advance();
41   }
42 
getCurrentLine()43   StringRef getCurrentLine() {
44     assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45     return CurrentLine;
46   }
47 
getLineNumber()48   uint64_t getLineNumber() { return LineNumber; }
49 
isAtEoF()50   bool isAtEoF() { return IsAtEoF; }
51 
52   // Read the next line
advance()53   void advance() {
54     if (!std::getline(Fin, CurrentLine)) {
55       IsAtEoF = true;
56       return;
57     }
58     LineNumber++;
59   }
60 };
61 
62 // The type of input format.
63 enum PerfFormat {
64   UnknownFormat = 0,
65   PerfData = 1,            // Raw linux perf.data.
66   PerfScript = 2,          // Perf script create by `perf script` command.
67   UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68 
69 };
70 
71 // The type of perfscript content.
72 enum PerfContent {
73   UnknownContent = 0,
74   LBR = 1,      // Only LBR sample.
75   LBRStack = 2, // Hybrid sample including call stack and LBR stack.
76 };
77 
78 struct PerfInputFile {
79   std::string InputFile;
80   PerfFormat Format = PerfFormat::UnknownFormat;
81   PerfContent Content = PerfContent::UnknownContent;
82 };
83 
84 // The parsed LBR sample entry.
85 struct LBREntry {
86   uint64_t Source = 0;
87   uint64_t Target = 0;
LBREntryLBREntry88   LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
89 
90 #ifndef NDEBUG
printLBREntry91   void print() const {
92     dbgs() << "from " << format("%#010x", Source) << " to "
93            << format("%#010x", Target);
94   }
95 #endif
96 };
97 
98 #ifndef NDEBUG
printLBRStack(const SmallVectorImpl<LBREntry> & LBRStack)99 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
100   for (size_t I = 0; I < LBRStack.size(); I++) {
101     dbgs() << "[" << I << "] ";
102     LBRStack[I].print();
103     dbgs() << "\n";
104   }
105 }
106 
printCallStack(const SmallVectorImpl<uint64_t> & CallStack)107 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
108   for (size_t I = 0; I < CallStack.size(); I++) {
109     dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
110   }
111 }
112 #endif
113 
114 // Hash interface for generic data of type T
115 // Data should implement a \fn getHashCode and a \fn isEqual
116 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117 // i.e we explicitly calculate hash of derived class, assign to base class's
118 // HashCode. This also provides the flexibility for calculating the hash code
119 // incrementally(like rolling hash) during frame stack unwinding since unwinding
120 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
121 // which will have perf overhead. In the future, if we redesign a better hash
122 // function, then we can just skip this or switch to non-virtual function(like
123 // just ignore comparison if hash conflicts probabilities is low)
124 template <class T> class Hashable {
125 public:
126   std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)127   Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128 
129   // Hash code generation
130   struct Hash {
operatorHash131     uint64_t operator()(const Hashable<T> &Key) const {
132       // Don't make it virtual for getHashCode
133       uint64_t Hash = Key.Data->getHashCode();
134       assert(Hash && "Should generate HashCode for it!");
135       return Hash;
136     }
137   };
138 
139   // Hash equal
140   struct Equal {
operatorEqual141     bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
142       // Precisely compare the data, vtable will have overhead.
143       return LHS.Data->isEqual(RHS.Data.get());
144     }
145   };
146 
getPtr()147   T *getPtr() const { return Data.get(); }
148 };
149 
150 struct PerfSample {
151   // LBR stack recorded in FIFO order.
152   SmallVector<LBREntry, 16> LBRStack;
153   // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
154   // generation
155   SmallVector<uint64_t, 16> CallStack;
156 
157   virtual ~PerfSample() = default;
getHashCodePerfSample158   uint64_t getHashCode() const {
159     // Use simple DJB2 hash
160     auto HashCombine = [](uint64_t H, uint64_t V) {
161       return ((H << 5) + H) + V;
162     };
163     uint64_t Hash = 5381;
164     for (const auto &Value : CallStack) {
165       Hash = HashCombine(Hash, Value);
166     }
167     for (const auto &Entry : LBRStack) {
168       Hash = HashCombine(Hash, Entry.Source);
169       Hash = HashCombine(Hash, Entry.Target);
170     }
171     return Hash;
172   }
173 
isEqualPerfSample174   bool isEqual(const PerfSample *Other) const {
175     const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
176     const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
177 
178     if (CallStack.size() != OtherCallStack.size() ||
179         LBRStack.size() != OtherLBRStack.size())
180       return false;
181 
182     if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
183       return false;
184 
185     for (size_t I = 0; I < OtherLBRStack.size(); I++) {
186       if (LBRStack[I].Source != OtherLBRStack[I].Source ||
187           LBRStack[I].Target != OtherLBRStack[I].Target)
188         return false;
189     }
190     return true;
191   }
192 
193 #ifndef NDEBUG
194   uint64_t Linenum = 0;
195 
printPerfSample196   void print() const {
197     dbgs() << "Line " << Linenum << "\n";
198     dbgs() << "LBR stack\n";
199     printLBRStack(LBRStack);
200     dbgs() << "Call stack\n";
201     printCallStack(CallStack);
202   }
203 #endif
204 };
205 // After parsing the sample, we record the samples by aggregating them
206 // into this counter. The key stores the sample data and the value is
207 // the sample repeat times.
208 using AggregatedCounter =
209     std::unordered_map<Hashable<PerfSample>, uint64_t,
210                        Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
211 
212 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
213 
isValidFallThroughRange(uint64_t Start,uint64_t End,ProfiledBinary * Binary)214 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
215                                     ProfiledBinary *Binary) {
216   // Start bigger than End is considered invalid.
217   // LBR ranges cross the unconditional jmp are also assumed invalid.
218   // It's found that perf data may contain duplicate LBR entries that could form
219   // a range that does not reflect real execution flow on some Intel targets,
220   // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
221   // cannot be a linear execution range that spans over unconditional jmp.
222   return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
223 }
224 
225 // The state for the unwinder, it doesn't hold the data but only keep the
226 // pointer/index of the data, While unwinding, the CallStack is changed
227 // dynamicially and will be recorded as the context of the sample
228 struct UnwindState {
229   // Profiled binary that current frame address belongs to
230   const ProfiledBinary *Binary;
231   // Call stack trie node
232   struct ProfiledFrame {
233     const uint64_t Address = DummyRoot;
234     ProfiledFrame *Parent;
235     SampleVector RangeSamples;
236     SampleVector BranchSamples;
237     std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
238 
239     ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame240         : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame241     ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
242       assert(Address && "Address can't be zero!");
243       auto Ret = Children.emplace(
244           Address, std::make_unique<ProfiledFrame>(Address, this));
245       return Ret.first->second.get();
246     }
recordRangeCountUnwindState::ProfiledFrame247     void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
248       RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
249     }
recordBranchCountUnwindState::ProfiledFrame250     void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
251       BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
252     }
isDummyRootUnwindState::ProfiledFrame253     bool isDummyRoot() { return Address == DummyRoot; }
isExternalFrameUnwindState::ProfiledFrame254     bool isExternalFrame() { return Address == ExternalAddr; }
isLeafFrameUnwindState::ProfiledFrame255     bool isLeafFrame() { return Children.empty(); }
256   };
257 
258   ProfiledFrame DummyTrieRoot;
259   ProfiledFrame *CurrentLeafFrame;
260   // Used to fall through the LBR stack
261   uint32_t LBRIndex = 0;
262   // Reference to PerfSample.LBRStack
263   const SmallVector<LBREntry, 16> &LBRStack;
264   // Used to iterate the address range
265   InstructionPointer InstPtr;
266   // Indicate whether unwinding is currently in a bad state which requires to
267   // skip all subsequent unwinding.
268   bool Invalid = false;
UnwindStateUnwindState269   UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270       : Binary(Binary), LBRStack(Sample->LBRStack),
271         InstPtr(Binary, Sample->CallStack.front()) {
272     initFrameTrie(Sample->CallStack);
273   }
274 
validateInitialStateUnwindState275   bool validateInitialState() {
276     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
277     uint64_t LeafAddr = CurrentLeafFrame->Address;
278     assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
279            "External leading LBR should match the leaf frame.");
280 
281     // When we take a stack sample, ideally the sampling distance between the
282     // leaf IP of stack and the last LBR target shouldn't be very large.
283     // Use a heuristic size (0x100) to filter out broken records.
284     if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
285       WithColor::warning() << "Bogus trace: stack tip = "
286                            << format("%#010x", LeafAddr)
287                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
288       return false;
289     }
290     return true;
291   }
292 
checkStateConsistencyUnwindState293   void checkStateConsistency() {
294     assert(InstPtr.Address == CurrentLeafFrame->Address &&
295            "IP should align with context leaf");
296   }
297 
setInvalidUnwindState298   void setInvalid() { Invalid = true; }
hasNextLBRUnwindState299   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState300   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState301   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState302   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
IsLastLBRUnwindState303   bool IsLastLBR() const { return LBRIndex == 0; }
getLBRStackSizeUnwindState304   bool getLBRStackSize() const { return LBRStack.size(); }
advanceLBRUnwindState305   void advanceLBR() { LBRIndex++; }
getParentFrameUnwindState306   ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
307 
pushFrameUnwindState308   void pushFrame(uint64_t Address) {
309     CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
310   }
311 
switchToFrameUnwindState312   void switchToFrame(uint64_t Address) {
313     if (CurrentLeafFrame->Address == Address)
314       return;
315     CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
316   }
317 
popFrameUnwindState318   void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
319 
clearCallStackUnwindState320   void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
321 
initFrameTrieUnwindState322   void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
323     ProfiledFrame *Cur = &DummyTrieRoot;
324     for (auto Address : reverse(CallStack)) {
325       Cur = Cur->getOrCreateChildFrame(Address);
326     }
327     CurrentLeafFrame = Cur;
328   }
329 
getDummyRootPtrUnwindState330   ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
331 };
332 
333 // Base class for sample counter key with context
334 struct ContextKey {
335   uint64_t HashCode = 0;
336   virtual ~ContextKey() = default;
getHashCodeContextKey337   uint64_t getHashCode() {
338     if (HashCode == 0)
339       genHashCode();
340     return HashCode;
341   }
342   virtual void genHashCode() = 0;
isEqualContextKey343   virtual bool isEqual(const ContextKey *K) const {
344     return HashCode == K->HashCode;
345   };
346 
347   // Utilities for LLVM-style RTTI
348   enum ContextKind { CK_StringBased, CK_AddrBased };
349   const ContextKind Kind;
getKindContextKey350   ContextKind getKind() const { return Kind; }
ContextKeyContextKey351   ContextKey(ContextKind K) : Kind(K){};
352 };
353 
354 // String based context id
355 struct StringBasedCtxKey : public ContextKey {
356   SampleContextFrameVector Context;
357 
358   bool WasLeafInlined;
StringBasedCtxKeyStringBasedCtxKey359   StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
classofStringBasedCtxKey360   static bool classof(const ContextKey *K) {
361     return K->getKind() == CK_StringBased;
362   }
363 
isEqualStringBasedCtxKey364   bool isEqual(const ContextKey *K) const override {
365     const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
366     return Context == Other->Context;
367   }
368 
genHashCodeStringBasedCtxKey369   void genHashCode() override {
370     HashCode = hash_value(SampleContextFrames(Context));
371   }
372 };
373 
374 // Address-based context id
375 struct AddrBasedCtxKey : public ContextKey {
376   SmallVector<uint64_t, 16> Context;
377 
378   bool WasLeafInlined;
AddrBasedCtxKeyAddrBasedCtxKey379   AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
classofAddrBasedCtxKey380   static bool classof(const ContextKey *K) {
381     return K->getKind() == CK_AddrBased;
382   }
383 
isEqualAddrBasedCtxKey384   bool isEqual(const ContextKey *K) const override {
385     const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
386     return Context == Other->Context;
387   }
388 
genHashCodeAddrBasedCtxKey389   void genHashCode() override {
390     HashCode = hash_combine_range(Context.begin(), Context.end());
391   }
392 };
393 
394 // The counter of branch samples for one function indexed by the branch,
395 // which is represented as the source and target offset pair.
396 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
397 // The counter of range samples for one function indexed by the range,
398 // which is represented as the start and end offset pair.
399 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
400 // Wrapper for sample counters including range counter and branch counter
401 struct SampleCounter {
402   RangeSample RangeCounter;
403   BranchSample BranchCounter;
404 
recordRangeCountSampleCounter405   void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
406     assert(Start <= End && "Invalid instruction range");
407     RangeCounter[{Start, End}] += Repeat;
408   }
recordBranchCountSampleCounter409   void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
410     BranchCounter[{Source, Target}] += Repeat;
411   }
412 };
413 
414 // Sample counter with context to support context-sensitive profile
415 using ContextSampleCounterMap =
416     std::unordered_map<Hashable<ContextKey>, SampleCounter,
417                        Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
418 
419 struct FrameStack {
420   SmallVector<uint64_t, 16> Stack;
421   ProfiledBinary *Binary;
FrameStackFrameStack422   FrameStack(ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack423   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
424     assert(!Cur->isExternalFrame() &&
425            "External frame's not expected for context stack.");
426     Stack.push_back(Cur->Address);
427     return true;
428   }
429 
popFrameFrameStack430   void popFrame() {
431     if (!Stack.empty())
432       Stack.pop_back();
433   }
434   std::shared_ptr<StringBasedCtxKey> getContextKey();
435 };
436 
437 struct AddressStack {
438   SmallVector<uint64_t, 16> Stack;
439   ProfiledBinary *Binary;
AddressStackAddressStack440   AddressStack(ProfiledBinary *B) : Binary(B) {}
pushFrameAddressStack441   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
442     assert(!Cur->isExternalFrame() &&
443            "External frame's not expected for context stack.");
444     Stack.push_back(Cur->Address);
445     return true;
446   }
447 
popFrameAddressStack448   void popFrame() {
449     if (!Stack.empty())
450       Stack.pop_back();
451   }
452   std::shared_ptr<AddrBasedCtxKey> getContextKey();
453 };
454 
455 /*
456 As in hybrid sample we have a group of LBRs and the most recent sampling call
457 stack, we can walk through those LBRs to infer more call stacks which would be
458 used as context for profile. VirtualUnwinder is the class to do the call stack
459 unwinding based on LBR state. Two types of unwinding are processd here:
460 1) LBR unwinding and 2) linear range unwinding.
461 Specifically, for each LBR entry(can be classified into call, return, regular
462 branch), LBR unwinding will replay the operation by pushing, popping or
463 switching leaf frame towards the call stack and since the initial call stack
464 is most recently sampled, the replay should be in anti-execution order, i.e. for
465 the regular case, pop the call stack when LBR is call, push frame on call stack
466 when LBR is return. After each LBR processed, it also needs to align with the
467 next LBR by going through instructions from previous LBR's target to current
468 LBR's source, which is the linear unwinding. As instruction from linear range
469 can come from different function by inlining, linear unwinding will do the range
470 splitting and record counters by the range with same inline context. Over those
471 unwinding process we will record each call stack as context id and LBR/linear
472 range as sample counter for further CS profile generation.
473 */
474 class VirtualUnwinder {
475 public:
VirtualUnwinder(ContextSampleCounterMap * Counter,ProfiledBinary * B)476   VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
477       : CtxCounterMap(Counter), Binary(B) {}
478   bool unwind(const PerfSample *Sample, uint64_t Repeat);
getUntrackedCallsites()479   std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
480 
481   uint64_t NumTotalBranches = 0;
482   uint64_t NumExtCallBranch = 0;
483   uint64_t NumMissingExternalFrame = 0;
484   uint64_t NumMismatchedProEpiBranch = 0;
485   uint64_t NumMismatchedExtCallBranch = 0;
486   uint64_t NumUnpairedExtAddr = 0;
487   uint64_t NumPairedExtAddr = 0;
488 
489 private:
isSourceExternal(UnwindState & State)490   bool isSourceExternal(UnwindState &State) const {
491     return State.getCurrentLBRSource() == ExternalAddr;
492   }
493 
isTargetExternal(UnwindState & State)494   bool isTargetExternal(UnwindState &State) const {
495     return State.getCurrentLBRTarget() == ExternalAddr;
496   }
497 
498   // Determine whether the return source is from external code by checking if
499   // the target's the next inst is a call inst.
isReturnFromExternal(UnwindState & State)500   bool isReturnFromExternal(UnwindState &State) const {
501     return isSourceExternal(State) &&
502            (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
503   }
504 
505   // If the source is external address but it's not the `return` case, treat it
506   // as a call from external.
isCallFromExternal(UnwindState & State)507   bool isCallFromExternal(UnwindState &State) const {
508     return isSourceExternal(State) &&
509            Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
510   }
511 
isCallState(UnwindState & State)512   bool isCallState(UnwindState &State) const {
513     // The tail call frame is always missing here in stack sample, we will
514     // use a specific tail call tracker to infer it.
515     if (!isValidState(State))
516       return false;
517 
518     if (Binary->addressIsCall(State.getCurrentLBRSource()))
519       return true;
520 
521     return isCallFromExternal(State);
522   }
523 
isReturnState(UnwindState & State)524   bool isReturnState(UnwindState &State) const {
525     if (!isValidState(State))
526       return false;
527 
528     // Simply check addressIsReturn, as ret is always reliable, both for
529     // regular call and tail call.
530     if (Binary->addressIsReturn(State.getCurrentLBRSource()))
531       return true;
532 
533     return isReturnFromExternal(State);
534   }
535 
isValidState(UnwindState & State)536   bool isValidState(UnwindState &State) const { return !State.Invalid; }
537 
538   void unwindCall(UnwindState &State);
539   void unwindLinear(UnwindState &State, uint64_t Repeat);
540   void unwindReturn(UnwindState &State);
541   void unwindBranch(UnwindState &State);
542 
543   template <typename T>
544   void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
545   // Collect each samples on trie node by DFS traversal
546   template <typename T>
547   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
548   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
549 
550   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
551                         uint64_t Repeat);
552   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
553                          uint64_t Repeat);
554 
555   ContextSampleCounterMap *CtxCounterMap;
556   // Profiled binary that current frame address belongs to
557   ProfiledBinary *Binary;
558   // Keep track of all untracked callsites
559   std::set<uint64_t> UntrackedCallsites;
560 };
561 
562 // Read perf trace to parse the events and samples.
563 class PerfReaderBase {
564 public:
PerfReaderBase(ProfiledBinary * B,StringRef PerfTrace)565   PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
566       : Binary(B), PerfTraceFile(PerfTrace) {
567     // Initialize the base address to preferred address.
568     Binary->setBaseAddress(Binary->getPreferredBaseAddress());
569   };
570   virtual ~PerfReaderBase() = default;
571   static std::unique_ptr<PerfReaderBase>
572   create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
573          std::optional<int32_t> PIDFilter);
574 
575   // Entry of the reader to parse multiple perf traces
576   virtual void parsePerfTraces() = 0;
getSampleCounters()577   const ContextSampleCounterMap &getSampleCounters() const {
578     return SampleCounters;
579   }
profileIsCS()580   bool profileIsCS() { return ProfileIsCS; }
581 
582 protected:
583   ProfiledBinary *Binary = nullptr;
584   StringRef PerfTraceFile;
585 
586   ContextSampleCounterMap SampleCounters;
587   bool ProfileIsCS = false;
588 
589   uint64_t NumTotalSample = 0;
590   uint64_t NumLeafExternalFrame = 0;
591   uint64_t NumLeadingOutgoingLBR = 0;
592 };
593 
594 // Read perf script to parse the events and samples.
595 class PerfScriptReader : public PerfReaderBase {
596 public:
PerfScriptReader(ProfiledBinary * B,StringRef PerfTrace,std::optional<int32_t> PID)597   PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
598                    std::optional<int32_t> PID)
599       : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
600 
601   // Entry of the reader to parse multiple perf traces
602   void parsePerfTraces() override;
603   // Generate perf script from perf data
604   static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
605                                               bool SkipPID, PerfInputFile &File,
606                                               std::optional<int32_t> PIDFilter);
607   // Extract perf script type by peaking at the input
608   static PerfContent checkPerfScriptType(StringRef FileName);
609 
610   // Cleanup installers for temporary files created by perf script command.
611   // Those files will be automatically removed when running destructor or
612   // receiving signals.
613   static SmallVector<CleanupInstaller, 2> TempFileCleanups;
614 
615 protected:
616   // The parsed MMap event
617   struct MMapEvent {
618     int64_t PID = 0;
619     uint64_t Address = 0;
620     uint64_t Size = 0;
621     uint64_t Offset = 0;
622     StringRef BinaryPath;
623   };
624 
625   // Check whether a given line is LBR sample
626   static bool isLBRSample(StringRef Line);
627   // Check whether a given line is MMAP event
628   static bool isMMapEvent(StringRef Line);
629   // Parse a single line of a PERF_RECORD_MMAP event looking for a
630   // mapping between the binary name and its memory layout.
631   static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
632                                         MMapEvent &MMap);
633   // Update base address based on mmap events
634   void updateBinaryAddress(const MMapEvent &Event);
635   // Parse mmap event and update binary address
636   void parseMMapEvent(TraceStream &TraceIt);
637   // Parse perf events/samples and do aggregation
638   void parseAndAggregateTrace();
639   // Parse either an MMAP event or a perf sample
640   void parseEventOrSample(TraceStream &TraceIt);
641   // Warn if the relevant mmap event is missing.
642   void warnIfMissingMMap();
643   // Emit accumulate warnings.
644   void warnTruncatedStack();
645   // Warn if range is invalid.
646   void warnInvalidRange();
647   // Extract call stack from the perf trace lines
648   bool extractCallstack(TraceStream &TraceIt,
649                         SmallVectorImpl<uint64_t> &CallStack);
650   // Extract LBR stack from one perf trace line
651   bool extractLBRStack(TraceStream &TraceIt,
652                        SmallVectorImpl<LBREntry> &LBRStack);
653   uint64_t parseAggregatedCount(TraceStream &TraceIt);
654   // Parse one sample from multiple perf lines, override this for different
655   // sample type
656   void parseSample(TraceStream &TraceIt);
657   // An aggregated count is given to indicate how many times the sample is
658   // repeated.
parseSample(TraceStream & TraceIt,uint64_t Count)659   virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
660   void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
661   // Post process the profile after trace aggregation, we will do simple range
662   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
663   virtual void generateUnsymbolizedProfile();
664   void writeUnsymbolizedProfile(StringRef Filename);
665   void writeUnsymbolizedProfile(raw_fd_ostream &OS);
666 
667   // Samples with the repeating time generated by the perf reader
668   AggregatedCounter AggregatedSamples;
669   // Keep track of all invalid return addresses
670   std::set<uint64_t> InvalidReturnAddresses;
671   // PID for the process of interest
672   std::optional<int32_t> PIDFilter;
673 };
674 
675 /*
676   The reader of LBR only perf script.
677   A typical LBR sample is like:
678     40062f 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
679           ... 0x4005c8/0x4005dc/P/-/-/0
680 */
681 class LBRPerfReader : public PerfScriptReader {
682 public:
LBRPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<int32_t> PID)683   LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
684                 std::optional<int32_t> PID)
685       : PerfScriptReader(Binary, PerfTrace, PID) {};
686   // Parse the LBR only sample.
687   void parseSample(TraceStream &TraceIt, uint64_t Count) override;
688 };
689 
690 /*
691   Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
692   which is used to generate CS profile. An example of hybrid sample:
693     4005dc    # call stack leaf
694     400634
695     400684    # call stack root
696     0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
697           ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
698 */
699 class HybridPerfReader : public PerfScriptReader {
700 public:
HybridPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<int32_t> PID)701   HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
702                    std::optional<int32_t> PID)
703       : PerfScriptReader(Binary, PerfTrace, PID) {};
704   // Parse the hybrid sample including the call and LBR line
705   void parseSample(TraceStream &TraceIt, uint64_t Count) override;
706   void generateUnsymbolizedProfile() override;
707 
708 private:
709   // Unwind the hybrid samples after aggregration
710   void unwindSamples();
711 };
712 
713 /*
714    Format of unsymbolized profile:
715 
716     [frame1 @ frame2 @ ...]  # If it's a CS profile
717       number of entries in RangeCounter
718       from_1-to_1:count_1
719       from_2-to_2:count_2
720       ......
721       from_n-to_n:count_n
722       number of entries in BranchCounter
723       src_1->dst_1:count_1
724       src_2->dst_2:count_2
725       ......
726       src_n->dst_n:count_n
727     [frame1 @ frame2 @ ...]  # Next context
728       ......
729 
730 Note that non-CS profile doesn't have the empty `[]` context.
731 */
732 class UnsymbolizedProfileReader : public PerfReaderBase {
733 public:
UnsymbolizedProfileReader(ProfiledBinary * Binary,StringRef PerfTrace)734   UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
735       : PerfReaderBase(Binary, PerfTrace){};
736   void parsePerfTraces() override;
737 
738 private:
739   void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
740   void readUnsymbolizedProfile(StringRef Filename);
741 
742   std::unordered_set<std::string> ContextStrSet;
743 };
744 
745 } // end namespace sampleprof
746 } // end namespace llvm
747 
748 #endif
749