1a94fa862Swlei //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2a94fa862Swlei //
3a94fa862Swlei // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a94fa862Swlei // See https://llvm.org/LICENSE.txt for license information.
5a94fa862Swlei // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a94fa862Swlei //
7a94fa862Swlei //===----------------------------------------------------------------------===//
8a94fa862Swlei
9a94fa862Swlei #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10a94fa862Swlei #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11a94fa862Swlei #include "ErrorHandling.h"
12a94fa862Swlei #include "ProfiledBinary.h"
13414930b9Swlei #include "llvm/Support/Casting.h"
14a94fa862Swlei #include "llvm/Support/CommandLine.h"
15a94fa862Swlei #include "llvm/Support/Regex.h"
166eca242eSWenlei He #include <cstdint>
17a94fa862Swlei #include <fstream>
18a94fa862Swlei #include <map>
19a94fa862Swlei
20a94fa862Swlei using namespace llvm;
21a94fa862Swlei using namespace sampleprof;
22a94fa862Swlei
23a94fa862Swlei namespace llvm {
243f7f446dSHaohai Wen
253f7f446dSHaohai Wen class CleanupInstaller;
263f7f446dSHaohai Wen
27a94fa862Swlei namespace sampleprof {
28a94fa862Swlei
29a94fa862Swlei // Stream based trace line iterator
30a94fa862Swlei class TraceStream {
31a94fa862Swlei std::string CurrentLine;
32a94fa862Swlei std::ifstream Fin;
33a94fa862Swlei bool IsAtEoF = false;
34a94fa862Swlei uint64_t LineNumber = 0;
35a94fa862Swlei
36a94fa862Swlei public:
TraceStream(StringRef Filename)37a94fa862Swlei TraceStream(StringRef Filename) : Fin(Filename.str()) {
38a94fa862Swlei if (!Fin.good())
39a94fa862Swlei exitWithError("Error read input perf script file", Filename);
40a94fa862Swlei advance();
41a94fa862Swlei }
42a94fa862Swlei
getCurrentLine()43a94fa862Swlei StringRef getCurrentLine() {
44a94fa862Swlei assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
45a94fa862Swlei return CurrentLine;
46a94fa862Swlei }
47a94fa862Swlei
getLineNumber()48a94fa862Swlei uint64_t getLineNumber() { return LineNumber; }
49a94fa862Swlei
isAtEoF()50a94fa862Swlei bool isAtEoF() { return IsAtEoF; }
51a94fa862Swlei
52a94fa862Swlei // Read the next line
advance()53a94fa862Swlei void advance() {
54a94fa862Swlei if (!std::getline(Fin, CurrentLine)) {
55a94fa862Swlei IsAtEoF = true;
56a94fa862Swlei return;
57a94fa862Swlei }
58a94fa862Swlei LineNumber++;
59a94fa862Swlei }
60a94fa862Swlei };
61a94fa862Swlei
62a5f411b7Swlei // The type of input format.
63a5f411b7Swlei enum PerfFormat {
64a5f411b7Swlei UnknownFormat = 0,
65a5f411b7Swlei PerfData = 1, // Raw linux perf.data.
66a5f411b7Swlei PerfScript = 2, // Perf script create by `perf script` command.
67a5f411b7Swlei UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
68a5f411b7Swlei
69a5f411b7Swlei };
70a5f411b7Swlei
71a5f411b7Swlei // The type of perfscript content.
72a5f411b7Swlei enum PerfContent {
73a5f411b7Swlei UnknownContent = 0,
74a5f411b7Swlei LBR = 1, // Only LBR sample.
75a5f411b7Swlei LBRStack = 2, // Hybrid sample including call stack and LBR stack.
76a5f411b7Swlei };
77a5f411b7Swlei
78a5f411b7Swlei struct PerfInputFile {
79a5f411b7Swlei std::string InputFile;
80a5f411b7Swlei PerfFormat Format = PerfFormat::UnknownFormat;
81a5f411b7Swlei PerfContent Content = PerfContent::UnknownContent;
821f05b1a9Swlei };
831f05b1a9Swlei
841f05b1a9Swlei // The parsed LBR sample entry.
851f05b1a9Swlei struct LBREntry {
861f05b1a9Swlei uint64_t Source = 0;
871f05b1a9Swlei uint64_t Target = 0;
LBREntryLBREntry88bfcb2c11Swlei LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
893b51b518SHongtao Yu
903b51b518SHongtao Yu #ifndef NDEBUG
printLBREntry913b51b518SHongtao Yu void print() const {
923b51b518SHongtao Yu dbgs() << "from " << format("%#010x", Source) << " to "
933b51b518SHongtao Yu << format("%#010x", Target);
943b51b518SHongtao Yu }
953b51b518SHongtao Yu #endif
961f05b1a9Swlei };
971f05b1a9Swlei
983b51b518SHongtao Yu #ifndef NDEBUG
printLBRStack(const SmallVectorImpl<LBREntry> & LBRStack)993b51b518SHongtao Yu static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
1003b51b518SHongtao Yu for (size_t I = 0; I < LBRStack.size(); I++) {
1013b51b518SHongtao Yu dbgs() << "[" << I << "] ";
1023b51b518SHongtao Yu LBRStack[I].print();
1033b51b518SHongtao Yu dbgs() << "\n";
1043b51b518SHongtao Yu }
1053b51b518SHongtao Yu }
1063b51b518SHongtao Yu
printCallStack(const SmallVectorImpl<uint64_t> & CallStack)1073b51b518SHongtao Yu static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
1083b51b518SHongtao Yu for (size_t I = 0; I < CallStack.size(); I++) {
1093b51b518SHongtao Yu dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
1103b51b518SHongtao Yu }
1113b51b518SHongtao Yu }
1123b51b518SHongtao Yu #endif
1133b51b518SHongtao Yu
114414930b9Swlei // Hash interface for generic data of type T
115414930b9Swlei // Data should implement a \fn getHashCode and a \fn isEqual
116414930b9Swlei // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
117414930b9Swlei // i.e we explicitly calculate hash of derived class, assign to base class's
118414930b9Swlei // HashCode. This also provides the flexibility for calculating the hash code
119414930b9Swlei // incrementally(like rolling hash) during frame stack unwinding since unwinding
120414930b9Swlei // only changes the leaf of frame stack. \fn isEqual is a virtual function,
121414930b9Swlei // which will have perf overhead. In the future, if we redesign a better hash
122414930b9Swlei // function, then we can just skip this or switch to non-virtual function(like
123a2d45017SKazu Hirata // just ignore comparison if hash conflicts probabilities is low)
124414930b9Swlei template <class T> class Hashable {
125414930b9Swlei public:
126414930b9Swlei std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)127414930b9Swlei Hashable(const std::shared_ptr<T> &D) : Data(D) {}
128414930b9Swlei
129414930b9Swlei // Hash code generation
130414930b9Swlei struct Hash {
operatorHash131414930b9Swlei uint64_t operator()(const Hashable<T> &Key) const {
132414930b9Swlei // Don't make it virtual for getHashCode
133964053d5Swlei uint64_t Hash = Key.Data->getHashCode();
134964053d5Swlei assert(Hash && "Should generate HashCode for it!");
135964053d5Swlei return Hash;
136414930b9Swlei }
137414930b9Swlei };
138414930b9Swlei
139414930b9Swlei // Hash equal
140414930b9Swlei struct Equal {
operatorEqual141414930b9Swlei bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
142414930b9Swlei // Precisely compare the data, vtable will have overhead.
143414930b9Swlei return LHS.Data->isEqual(RHS.Data.get());
144414930b9Swlei }
145414930b9Swlei };
146414930b9Swlei
getPtr()147414930b9Swlei T *getPtr() const { return Data.get(); }
148414930b9Swlei };
149414930b9Swlei
150414930b9Swlei struct PerfSample {
151964053d5Swlei // LBR stack recorded in FIFO order.
152964053d5Swlei SmallVector<LBREntry, 16> LBRStack;
153964053d5Swlei // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
154964053d5Swlei // generation
155964053d5Swlei SmallVector<uint64_t, 16> CallStack;
156414930b9Swlei
157414930b9Swlei virtual ~PerfSample() = default;
getHashCodePerfSample158964053d5Swlei uint64_t getHashCode() const {
159414930b9Swlei // Use simple DJB2 hash
160414930b9Swlei auto HashCombine = [](uint64_t H, uint64_t V) {
161414930b9Swlei return ((H << 5) + H) + V;
1621f05b1a9Swlei };
163414930b9Swlei uint64_t Hash = 5381;
164414930b9Swlei for (const auto &Value : CallStack) {
165414930b9Swlei Hash = HashCombine(Hash, Value);
166414930b9Swlei }
167414930b9Swlei for (const auto &Entry : LBRStack) {
168414930b9Swlei Hash = HashCombine(Hash, Entry.Source);
169414930b9Swlei Hash = HashCombine(Hash, Entry.Target);
170414930b9Swlei }
171964053d5Swlei return Hash;
172964053d5Swlei }
173964053d5Swlei
isEqualPerfSample174964053d5Swlei bool isEqual(const PerfSample *Other) const {
175964053d5Swlei const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
176964053d5Swlei const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
177964053d5Swlei
178964053d5Swlei if (CallStack.size() != OtherCallStack.size() ||
179964053d5Swlei LBRStack.size() != OtherLBRStack.size())
180964053d5Swlei return false;
181964053d5Swlei
182964053d5Swlei if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
183964053d5Swlei return false;
184964053d5Swlei
185964053d5Swlei for (size_t I = 0; I < OtherLBRStack.size(); I++) {
186964053d5Swlei if (LBRStack[I].Source != OtherLBRStack[I].Source ||
187964053d5Swlei LBRStack[I].Target != OtherLBRStack[I].Target)
188964053d5Swlei return false;
189964053d5Swlei }
190964053d5Swlei return true;
191414930b9Swlei }
1923b51b518SHongtao Yu
1933b51b518SHongtao Yu #ifndef NDEBUG
1948a0406dcSHongtao Yu uint64_t Linenum = 0;
1958a0406dcSHongtao Yu
printPerfSample19600bfde72SHongtao Yu void print() const {
1978a0406dcSHongtao Yu dbgs() << "Line " << Linenum << "\n";
1983b51b518SHongtao Yu dbgs() << "LBR stack\n";
1993b51b518SHongtao Yu printLBRStack(LBRStack);
2003b51b518SHongtao Yu dbgs() << "Call stack\n";
2013b51b518SHongtao Yu printCallStack(CallStack);
2023b51b518SHongtao Yu }
2033b51b518SHongtao Yu #endif
204414930b9Swlei };
205414930b9Swlei // After parsing the sample, we record the samples by aggregating them
206414930b9Swlei // into this counter. The key stores the sample data and the value is
207414930b9Swlei // the sample repeat times.
208414930b9Swlei using AggregatedCounter =
209414930b9Swlei std::unordered_map<Hashable<PerfSample>, uint64_t,
210414930b9Swlei Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
2111f05b1a9Swlei
2123869309aSwlei using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
2133dcb60dbSwlei
isValidFallThroughRange(uint64_t Start,uint64_t End,ProfiledBinary * Binary)2149f732af5SHongtao Yu inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
2159f732af5SHongtao Yu ProfiledBinary *Binary) {
2169f732af5SHongtao Yu // Start bigger than End is considered invalid.
2179f732af5SHongtao Yu // LBR ranges cross the unconditional jmp are also assumed invalid.
2189f732af5SHongtao Yu // It's found that perf data may contain duplicate LBR entries that could form
2199f732af5SHongtao Yu // a range that does not reflect real execution flow on some Intel targets,
2209f732af5SHongtao Yu // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
2219f732af5SHongtao Yu // cannot be a linear execution range that spans over unconditional jmp.
2229f732af5SHongtao Yu return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
2239f732af5SHongtao Yu }
2249f732af5SHongtao Yu
2251f05b1a9Swlei // The state for the unwinder, it doesn't hold the data but only keep the
2261f05b1a9Swlei // pointer/index of the data, While unwinding, the CallStack is changed
2271f05b1a9Swlei // dynamicially and will be recorded as the context of the sample
2281f05b1a9Swlei struct UnwindState {
2291f05b1a9Swlei // Profiled binary that current frame address belongs to
2301f05b1a9Swlei const ProfiledBinary *Binary;
2313869309aSwlei // Call stack trie node
2323869309aSwlei struct ProfiledFrame {
2333dcb60dbSwlei const uint64_t Address = DummyRoot;
2343869309aSwlei ProfiledFrame *Parent;
2353869309aSwlei SampleVector RangeSamples;
2363869309aSwlei SampleVector BranchSamples;
2373869309aSwlei std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
2383869309aSwlei
2393869309aSwlei ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame2403869309aSwlei : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame2413869309aSwlei ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
2423869309aSwlei assert(Address && "Address can't be zero!");
2433869309aSwlei auto Ret = Children.emplace(
2443869309aSwlei Address, std::make_unique<ProfiledFrame>(Address, this));
2453869309aSwlei return Ret.first->second.get();
2463869309aSwlei }
recordRangeCountUnwindState::ProfiledFrame2473869309aSwlei void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
2483869309aSwlei RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
2493869309aSwlei }
recordBranchCountUnwindState::ProfiledFrame2503869309aSwlei void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
2513869309aSwlei BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
2523869309aSwlei }
isDummyRootUnwindState::ProfiledFrame2533dcb60dbSwlei bool isDummyRoot() { return Address == DummyRoot; }
isExternalFrameUnwindState::ProfiledFrame2543dcb60dbSwlei bool isExternalFrame() { return Address == ExternalAddr; }
isLeafFrameUnwindState::ProfiledFrame2553b51b518SHongtao Yu bool isLeafFrame() { return Children.empty(); }
2563869309aSwlei };
2573869309aSwlei
2583869309aSwlei ProfiledFrame DummyTrieRoot;
2593869309aSwlei ProfiledFrame *CurrentLeafFrame;
2601f05b1a9Swlei // Used to fall through the LBR stack
2611f05b1a9Swlei uint32_t LBRIndex = 0;
262964053d5Swlei // Reference to PerfSample.LBRStack
2631f05b1a9Swlei const SmallVector<LBREntry, 16> &LBRStack;
2641f05b1a9Swlei // Used to iterate the address range
2651f05b1a9Swlei InstructionPointer InstPtr;
2668a0406dcSHongtao Yu // Indicate whether unwinding is currently in a bad state which requires to
2678a0406dcSHongtao Yu // skip all subsequent unwinding.
2688a0406dcSHongtao Yu bool Invalid = false;
UnwindStateUnwindState269964053d5Swlei UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
270964053d5Swlei : Binary(Binary), LBRStack(Sample->LBRStack),
271964053d5Swlei InstPtr(Binary, Sample->CallStack.front()) {
2723869309aSwlei initFrameTrie(Sample->CallStack);
2733869309aSwlei }
2741f05b1a9Swlei
validateInitialStateUnwindState2751f05b1a9Swlei bool validateInitialState() {
2761f05b1a9Swlei uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
2773869309aSwlei uint64_t LeafAddr = CurrentLeafFrame->Address;
2783dcb60dbSwlei assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
2793dcb60dbSwlei "External leading LBR should match the leaf frame.");
2803dcb60dbSwlei
2811f05b1a9Swlei // When we take a stack sample, ideally the sampling distance between the
2821f05b1a9Swlei // leaf IP of stack and the last LBR target shouldn't be very large.
2831f05b1a9Swlei // Use a heuristic size (0x100) to filter out broken records.
284bfcb2c11Swlei if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
2851f05b1a9Swlei WithColor::warning() << "Bogus trace: stack tip = "
2863869309aSwlei << format("%#010x", LeafAddr)
2871f05b1a9Swlei << ", LBR tip = " << format("%#010x\n", LBRLeaf);
2881f05b1a9Swlei return false;
2891f05b1a9Swlei }
2901f05b1a9Swlei return true;
2911f05b1a9Swlei }
2921f05b1a9Swlei
checkStateConsistencyUnwindState2931f05b1a9Swlei void checkStateConsistency() {
2943869309aSwlei assert(InstPtr.Address == CurrentLeafFrame->Address &&
2951f05b1a9Swlei "IP should align with context leaf");
2961f05b1a9Swlei }
2971f05b1a9Swlei
setInvalidUnwindState2988a0406dcSHongtao Yu void setInvalid() { Invalid = true; }
hasNextLBRUnwindState2991f05b1a9Swlei bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState3001f05b1a9Swlei uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState3011f05b1a9Swlei uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState3021f05b1a9Swlei const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
IsLastLBRUnwindState3033dcb60dbSwlei bool IsLastLBR() const { return LBRIndex == 0; }
getLBRStackSizeUnwindState3043dcb60dbSwlei bool getLBRStackSize() const { return LBRStack.size(); }
advanceLBRUnwindState3051f05b1a9Swlei void advanceLBR() { LBRIndex++; }
getParentFrameUnwindState3063869309aSwlei ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
3073869309aSwlei
pushFrameUnwindState3083869309aSwlei void pushFrame(uint64_t Address) {
3093869309aSwlei CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
3103869309aSwlei }
3113869309aSwlei
switchToFrameUnwindState3123869309aSwlei void switchToFrame(uint64_t Address) {
3133869309aSwlei if (CurrentLeafFrame->Address == Address)
3143869309aSwlei return;
3153869309aSwlei CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
3163869309aSwlei }
3173869309aSwlei
popFrameUnwindState3183869309aSwlei void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
3193869309aSwlei
clearCallStackUnwindState3200f53df86Swlei void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
3210f53df86Swlei
initFrameTrieUnwindState3223869309aSwlei void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
3233869309aSwlei ProfiledFrame *Cur = &DummyTrieRoot;
3243869309aSwlei for (auto Address : reverse(CallStack)) {
3253869309aSwlei Cur = Cur->getOrCreateChildFrame(Address);
3263869309aSwlei }
3273869309aSwlei CurrentLeafFrame = Cur;
3283869309aSwlei }
3293869309aSwlei
getDummyRootPtrUnwindState3303869309aSwlei ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
3311f05b1a9Swlei };
3321f05b1a9Swlei
333414930b9Swlei // Base class for sample counter key with context
334414930b9Swlei struct ContextKey {
335414930b9Swlei uint64_t HashCode = 0;
336414930b9Swlei virtual ~ContextKey() = default;
getHashCodeContextKey337dc9f0379Swlei uint64_t getHashCode() {
338dc9f0379Swlei if (HashCode == 0)
339dc9f0379Swlei genHashCode();
340dc9f0379Swlei return HashCode;
341dc9f0379Swlei }
342dc9f0379Swlei virtual void genHashCode() = 0;
isEqualContextKey343414930b9Swlei virtual bool isEqual(const ContextKey *K) const {
344414930b9Swlei return HashCode == K->HashCode;
345414930b9Swlei };
346414930b9Swlei
347414930b9Swlei // Utilities for LLVM-style RTTI
3483f970168SHongtao Yu enum ContextKind { CK_StringBased, CK_AddrBased };
349414930b9Swlei const ContextKind Kind;
getKindContextKey350414930b9Swlei ContextKind getKind() const { return Kind; }
ContextKeyContextKey351414930b9Swlei ContextKey(ContextKind K) : Kind(K){};
352414930b9Swlei };
353414930b9Swlei
354414930b9Swlei // String based context id
355414930b9Swlei struct StringBasedCtxKey : public ContextKey {
356b9db7036SHongtao Yu SampleContextFrameVector Context;
357b9db7036SHongtao Yu
3581410db70SWenlei He bool WasLeafInlined;
StringBasedCtxKeyStringBasedCtxKey3591410db70SWenlei He StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
classofStringBasedCtxKey360414930b9Swlei static bool classof(const ContextKey *K) {
361414930b9Swlei return K->getKind() == CK_StringBased;
362414930b9Swlei }
363414930b9Swlei
isEqualStringBasedCtxKey364414930b9Swlei bool isEqual(const ContextKey *K) const override {
365414930b9Swlei const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
366414930b9Swlei return Context == Other->Context;
367414930b9Swlei }
368414930b9Swlei
genHashCodeStringBasedCtxKey369dc9f0379Swlei void genHashCode() override {
370dc9f0379Swlei HashCode = hash_value(SampleContextFrames(Context));
371dc9f0379Swlei }
372414930b9Swlei };
373414930b9Swlei
3743f970168SHongtao Yu // Address-based context id
3753f970168SHongtao Yu struct AddrBasedCtxKey : public ContextKey {
3763f970168SHongtao Yu SmallVector<uint64_t, 16> Context;
377c681400bSwlei
3783f970168SHongtao Yu bool WasLeafInlined;
AddrBasedCtxKeyAddrBasedCtxKey3793f970168SHongtao Yu AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
classofAddrBasedCtxKey380c681400bSwlei static bool classof(const ContextKey *K) {
3813f970168SHongtao Yu return K->getKind() == CK_AddrBased;
382c681400bSwlei }
383c681400bSwlei
isEqualAddrBasedCtxKey384c681400bSwlei bool isEqual(const ContextKey *K) const override {
3853f970168SHongtao Yu const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
3863f970168SHongtao Yu return Context == Other->Context;
387c681400bSwlei }
388c681400bSwlei
genHashCodeAddrBasedCtxKey389dc9f0379Swlei void genHashCode() override {
3903f970168SHongtao Yu HashCode = hash_combine_range(Context.begin(), Context.end());
391c681400bSwlei }
392c681400bSwlei };
393c681400bSwlei
3941f05b1a9Swlei // The counter of branch samples for one function indexed by the branch,
3951f05b1a9Swlei // which is represented as the source and target offset pair.
3961f05b1a9Swlei using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
3971f05b1a9Swlei // The counter of range samples for one function indexed by the range,
3981f05b1a9Swlei // which is represented as the start and end offset pair.
3991f05b1a9Swlei using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
400414930b9Swlei // Wrapper for sample counters including range counter and branch counter
401414930b9Swlei struct SampleCounter {
402414930b9Swlei RangeSample RangeCounter;
403414930b9Swlei BranchSample BranchCounter;
4041f05b1a9Swlei
recordRangeCountSampleCounter405414930b9Swlei void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
4068cbbd7e0SHongtao Yu assert(Start <= End && "Invalid instruction range");
407414930b9Swlei RangeCounter[{Start, End}] += Repeat;
4081f05b1a9Swlei }
recordBranchCountSampleCounter409414930b9Swlei void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
410414930b9Swlei BranchCounter[{Source, Target}] += Repeat;
4111f05b1a9Swlei }
4121f05b1a9Swlei };
4131f05b1a9Swlei
414414930b9Swlei // Sample counter with context to support context-sensitive profile
415414930b9Swlei using ContextSampleCounterMap =
416414930b9Swlei std::unordered_map<Hashable<ContextKey>, SampleCounter,
417414930b9Swlei Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
4181f05b1a9Swlei
4193869309aSwlei struct FrameStack {
4203869309aSwlei SmallVector<uint64_t, 16> Stack;
421091c16f7Swlei ProfiledBinary *Binary;
FrameStackFrameStack422091c16f7Swlei FrameStack(ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack4233869309aSwlei bool pushFrame(UnwindState::ProfiledFrame *Cur) {
4240f53df86Swlei assert(!Cur->isExternalFrame() &&
4250f53df86Swlei "External frame's not expected for context stack.");
4263869309aSwlei Stack.push_back(Cur->Address);
4273869309aSwlei return true;
4283869309aSwlei }
4293869309aSwlei
popFrameFrameStack4303869309aSwlei void popFrame() {
4313869309aSwlei if (!Stack.empty())
4323869309aSwlei Stack.pop_back();
4333869309aSwlei }
4343869309aSwlei std::shared_ptr<StringBasedCtxKey> getContextKey();
4353869309aSwlei };
4363869309aSwlei
4373f970168SHongtao Yu struct AddressStack {
4383f970168SHongtao Yu SmallVector<uint64_t, 16> Stack;
439091c16f7Swlei ProfiledBinary *Binary;
AddressStackAddressStack4403f970168SHongtao Yu AddressStack(ProfiledBinary *B) : Binary(B) {}
pushFrameAddressStack4413869309aSwlei bool pushFrame(UnwindState::ProfiledFrame *Cur) {
4420f53df86Swlei assert(!Cur->isExternalFrame() &&
4430f53df86Swlei "External frame's not expected for context stack.");
4443f970168SHongtao Yu Stack.push_back(Cur->Address);
4453869309aSwlei return true;
4463869309aSwlei }
4473869309aSwlei
popFrameAddressStack4483869309aSwlei void popFrame() {
4493869309aSwlei if (!Stack.empty())
4503869309aSwlei Stack.pop_back();
4513869309aSwlei }
4523f970168SHongtao Yu std::shared_ptr<AddrBasedCtxKey> getContextKey();
4533869309aSwlei };
4543869309aSwlei
4551f05b1a9Swlei /*
4561f05b1a9Swlei As in hybrid sample we have a group of LBRs and the most recent sampling call
4571f05b1a9Swlei stack, we can walk through those LBRs to infer more call stacks which would be
4581f05b1a9Swlei used as context for profile. VirtualUnwinder is the class to do the call stack
4591f05b1a9Swlei unwinding based on LBR state. Two types of unwinding are processd here:
4601f05b1a9Swlei 1) LBR unwinding and 2) linear range unwinding.
4611f05b1a9Swlei Specifically, for each LBR entry(can be classified into call, return, regular
4621f05b1a9Swlei branch), LBR unwinding will replay the operation by pushing, popping or
4631f05b1a9Swlei switching leaf frame towards the call stack and since the initial call stack
4641f05b1a9Swlei is most recently sampled, the replay should be in anti-execution order, i.e. for
4651f05b1a9Swlei the regular case, pop the call stack when LBR is call, push frame on call stack
4661f05b1a9Swlei when LBR is return. After each LBR processed, it also needs to align with the
4671f05b1a9Swlei next LBR by going through instructions from previous LBR's target to current
4681f05b1a9Swlei LBR's source, which is the linear unwinding. As instruction from linear range
4691f05b1a9Swlei can come from different function by inlining, linear unwinding will do the range
4701f05b1a9Swlei splitting and record counters by the range with same inline context. Over those
4711f05b1a9Swlei unwinding process we will record each call stack as context id and LBR/linear
4721f05b1a9Swlei range as sample counter for further CS profile generation.
4731f05b1a9Swlei */
4741f05b1a9Swlei class VirtualUnwinder {
4751f05b1a9Swlei public:
VirtualUnwinder(ContextSampleCounterMap * Counter,ProfiledBinary * B)476091c16f7Swlei VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
4773869309aSwlei : CtxCounterMap(Counter), Binary(B) {}
478964053d5Swlei bool unwind(const PerfSample *Sample, uint64_t Repeat);
getUntrackedCallsites()4796eca242eSWenlei He std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
4801f05b1a9Swlei
4810f53df86Swlei uint64_t NumTotalBranches = 0;
4820f53df86Swlei uint64_t NumExtCallBranch = 0;
4830f53df86Swlei uint64_t NumMissingExternalFrame = 0;
4840f53df86Swlei uint64_t NumMismatchedProEpiBranch = 0;
4850f53df86Swlei uint64_t NumMismatchedExtCallBranch = 0;
486bfcb2c11Swlei uint64_t NumUnpairedExtAddr = 0;
487bfcb2c11Swlei uint64_t NumPairedExtAddr = 0;
4880f53df86Swlei
4893869309aSwlei private:
isSourceExternal(UnwindState & State)490bfcb2c11Swlei bool isSourceExternal(UnwindState &State) const {
491bfcb2c11Swlei return State.getCurrentLBRSource() == ExternalAddr;
492bfcb2c11Swlei }
493bfcb2c11Swlei
isTargetExternal(UnwindState & State)494bfcb2c11Swlei bool isTargetExternal(UnwindState &State) const {
495bfcb2c11Swlei return State.getCurrentLBRTarget() == ExternalAddr;
496bfcb2c11Swlei }
497bfcb2c11Swlei
498bfcb2c11Swlei // Determine whether the return source is from external code by checking if
499bfcb2c11Swlei // the target's the next inst is a call inst.
isReturnFromExternal(UnwindState & State)500bfcb2c11Swlei bool isReturnFromExternal(UnwindState &State) const {
501bfcb2c11Swlei return isSourceExternal(State) &&
502bfcb2c11Swlei (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
503bfcb2c11Swlei }
504bfcb2c11Swlei
505bfcb2c11Swlei // If the source is external address but it's not the `return` case, treat it
506bfcb2c11Swlei // as a call from external.
isCallFromExternal(UnwindState & State)507bfcb2c11Swlei bool isCallFromExternal(UnwindState &State) const {
508bfcb2c11Swlei return isSourceExternal(State) &&
509bfcb2c11Swlei Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
510bfcb2c11Swlei }
511bfcb2c11Swlei
isCallState(UnwindState & State)5121f05b1a9Swlei bool isCallState(UnwindState &State) const {
5131f05b1a9Swlei // The tail call frame is always missing here in stack sample, we will
5141f05b1a9Swlei // use a specific tail call tracker to infer it.
515bfcb2c11Swlei if (!isValidState(State))
516bfcb2c11Swlei return false;
517bfcb2c11Swlei
518bfcb2c11Swlei if (Binary->addressIsCall(State.getCurrentLBRSource()))
519bfcb2c11Swlei return true;
520bfcb2c11Swlei
521bfcb2c11Swlei return isCallFromExternal(State);
5221f05b1a9Swlei }
5231f05b1a9Swlei
isReturnState(UnwindState & State)5241f05b1a9Swlei bool isReturnState(UnwindState &State) const {
5258a0406dcSHongtao Yu if (!isValidState(State))
5268a0406dcSHongtao Yu return false;
5278a0406dcSHongtao Yu
5281f05b1a9Swlei // Simply check addressIsReturn, as ret is always reliable, both for
5291f05b1a9Swlei // regular call and tail call.
530bfcb2c11Swlei if (Binary->addressIsReturn(State.getCurrentLBRSource()))
531bfcb2c11Swlei return true;
5320f53df86Swlei
533bfcb2c11Swlei return isReturnFromExternal(State);
5341f05b1a9Swlei }
5351f05b1a9Swlei
isValidState(UnwindState & State)5368a0406dcSHongtao Yu bool isValidState(UnwindState &State) const { return !State.Invalid; }
5378a0406dcSHongtao Yu
5381f05b1a9Swlei void unwindCall(UnwindState &State);
5391f05b1a9Swlei void unwindLinear(UnwindState &State, uint64_t Repeat);
5401f05b1a9Swlei void unwindReturn(UnwindState &State);
5413dcb60dbSwlei void unwindBranch(UnwindState &State);
5423869309aSwlei
5433869309aSwlei template <typename T>
5443869309aSwlei void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
5453869309aSwlei // Collect each samples on trie node by DFS traversal
5463869309aSwlei template <typename T>
5473869309aSwlei void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
5483869309aSwlei void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
5493869309aSwlei
5501f05b1a9Swlei void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
5511f05b1a9Swlei uint64_t Repeat);
5521f05b1a9Swlei void recordBranchCount(const LBREntry &Branch, UnwindState &State,
5531f05b1a9Swlei uint64_t Repeat);
5541f05b1a9Swlei
555414930b9Swlei ContextSampleCounterMap *CtxCounterMap;
5563869309aSwlei // Profiled binary that current frame address belongs to
557091c16f7Swlei ProfiledBinary *Binary;
5586eca242eSWenlei He // Keep track of all untracked callsites
5596eca242eSWenlei He std::set<uint64_t> UntrackedCallsites;
5601f05b1a9Swlei };
5611f05b1a9Swlei
5629af46710Swlei // Read perf trace to parse the events and samples.
5636da9241aSwlei class PerfReaderBase {
5641f05b1a9Swlei public:
PerfReaderBase(ProfiledBinary * B,StringRef PerfTrace)565a5f411b7Swlei PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
566a5f411b7Swlei : Binary(B), PerfTraceFile(PerfTrace) {
5679af46710Swlei // Initialize the base address to preferred address.
5689af46710Swlei Binary->setBaseAddress(Binary->getPreferredBaseAddress());
5699af46710Swlei };
5706da9241aSwlei virtual ~PerfReaderBase() = default;
571da2f5d0aSFangrui Song static std::unique_ptr<PerfReaderBase>
572da2f5d0aSFangrui Song create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
573*2fa6eaf9Sxur-llvm std::optional<int32_t> PIDFilter);
5741f05b1a9Swlei
575941191aaSWenlei He // Entry of the reader to parse multiple perf traces
576a5f411b7Swlei virtual void parsePerfTraces() = 0;
getSampleCounters()577941191aaSWenlei He const ContextSampleCounterMap &getSampleCounters() const {
578941191aaSWenlei He return SampleCounters;
579426e326aSwlei }
profileIsCS()580e36786d1SHongtao Yu bool profileIsCS() { return ProfileIsCS; }
581426e326aSwlei
582941191aaSWenlei He protected:
583a5f411b7Swlei ProfiledBinary *Binary = nullptr;
584a5f411b7Swlei StringRef PerfTraceFile;
585a5f411b7Swlei
586a5f411b7Swlei ContextSampleCounterMap SampleCounters;
587e36786d1SHongtao Yu bool ProfileIsCS = false;
5883dcb60dbSwlei
5893dcb60dbSwlei uint64_t NumTotalSample = 0;
5903dcb60dbSwlei uint64_t NumLeafExternalFrame = 0;
5913dcb60dbSwlei uint64_t NumLeadingOutgoingLBR = 0;
592a5f411b7Swlei };
593a5f411b7Swlei
594a5f411b7Swlei // Read perf script to parse the events and samples.
595a5f411b7Swlei class PerfScriptReader : public PerfReaderBase {
596a5f411b7Swlei public:
PerfScriptReader(ProfiledBinary * B,StringRef PerfTrace,std::optional<int32_t> PID)59717f6cba3SWenlei He PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
598*2fa6eaf9Sxur-llvm std::optional<int32_t> PID)
59917f6cba3SWenlei He : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
600a5f411b7Swlei
601a5f411b7Swlei // Entry of the reader to parse multiple perf traces
602b5188591SKazu Hirata void parsePerfTraces() override;
603a5f411b7Swlei // Generate perf script from perf data
604*2fa6eaf9Sxur-llvm static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
605*2fa6eaf9Sxur-llvm bool SkipPID, PerfInputFile &File,
606*2fa6eaf9Sxur-llvm std::optional<int32_t> PIDFilter);
607a5f411b7Swlei // Extract perf script type by peaking at the input
608a5f411b7Swlei static PerfContent checkPerfScriptType(StringRef FileName);
609a5f411b7Swlei
6103f7f446dSHaohai Wen // Cleanup installers for temporary files created by perf script command.
6113f7f446dSHaohai Wen // Those files will be automatically removed when running destructor or
6123f7f446dSHaohai Wen // receiving signals.
6133f7f446dSHaohai Wen static SmallVector<CleanupInstaller, 2> TempFileCleanups;
6143f7f446dSHaohai Wen
615a5f411b7Swlei protected:
616a94fa862Swlei // The parsed MMap event
617a94fa862Swlei struct MMapEvent {
618*2fa6eaf9Sxur-llvm int64_t PID = 0;
61907120384SHongtao Yu uint64_t Address = 0;
620a94fa862Swlei uint64_t Size = 0;
621a94fa862Swlei uint64_t Offset = 0;
622a94fa862Swlei StringRef BinaryPath;
623a94fa862Swlei };
624a5f411b7Swlei
6251f0bc617SWenlei He // Check whether a given line is LBR sample
626941191aaSWenlei He static bool isLBRSample(StringRef Line);
6271f0bc617SWenlei He // Check whether a given line is MMAP event
628*2fa6eaf9Sxur-llvm static bool isMMapEvent(StringRef Line);
629*2fa6eaf9Sxur-llvm // Parse a single line of a PERF_RECORD_MMAP event looking for a
630941191aaSWenlei He // mapping between the binary name and its memory layout.
631*2fa6eaf9Sxur-llvm static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
6321f0bc617SWenlei He MMapEvent &MMap);
6331f0bc617SWenlei He // Update base address based on mmap events
6341f0bc617SWenlei He void updateBinaryAddress(const MMapEvent &Event);
6351f0bc617SWenlei He // Parse mmap event and update binary address
636*2fa6eaf9Sxur-llvm void parseMMapEvent(TraceStream &TraceIt);
6371f05b1a9Swlei // Parse perf events/samples and do aggregation
638941191aaSWenlei He void parseAndAggregateTrace();
6391f05b1a9Swlei // Parse either an MMAP event or a perf sample
6401f05b1a9Swlei void parseEventOrSample(TraceStream &TraceIt);
641964053d5Swlei // Warn if the relevant mmap event is missing.
642964053d5Swlei void warnIfMissingMMap();
6430057c718SHongtao Yu // Emit accumulate warnings.
6440057c718SHongtao Yu void warnTruncatedStack();
645138202a8Swlei // Warn if range is invalid.
646138202a8Swlei void warnInvalidRange();
6471f05b1a9Swlei // Extract call stack from the perf trace lines
6483869309aSwlei bool extractCallstack(TraceStream &TraceIt,
6493869309aSwlei SmallVectorImpl<uint64_t> &CallStack);
6501f05b1a9Swlei // Extract LBR stack from one perf trace line
6511f05b1a9Swlei bool extractLBRStack(TraceStream &TraceIt,
652f812c192Swlei SmallVectorImpl<LBREntry> &LBRStack);
653f1affe8dSwlei uint64_t parseAggregatedCount(TraceStream &TraceIt);
6546da9241aSwlei // Parse one sample from multiple perf lines, override this for different
6556da9241aSwlei // sample type
656f1affe8dSwlei void parseSample(TraceStream &TraceIt);
657f1affe8dSwlei // An aggregated count is given to indicate how many times the sample is
658f1affe8dSwlei // repeated.
parseSample(TraceStream & TraceIt,uint64_t Count)659a5f411b7Swlei virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
660a5f411b7Swlei void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
6611f05b1a9Swlei // Post process the profile after trace aggregation, we will do simple range
6621f05b1a9Swlei // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
663a5f411b7Swlei virtual void generateUnsymbolizedProfile();
664a5f411b7Swlei void writeUnsymbolizedProfile(StringRef Filename);
665a5f411b7Swlei void writeUnsymbolizedProfile(raw_fd_ostream &OS);
6661f05b1a9Swlei
6671f05b1a9Swlei // Samples with the repeating time generated by the perf reader
668414930b9Swlei AggregatedCounter AggregatedSamples;
6690057c718SHongtao Yu // Keep track of all invalid return addresses
6700057c718SHongtao Yu std::set<uint64_t> InvalidReturnAddresses;
67117f6cba3SWenlei He // PID for the process of interest
672*2fa6eaf9Sxur-llvm std::optional<int32_t> PIDFilter;
673964053d5Swlei };
674964053d5Swlei
675964053d5Swlei /*
676964053d5Swlei The reader of LBR only perf script.
677964053d5Swlei A typical LBR sample is like:
678964053d5Swlei 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
679964053d5Swlei ... 0x4005c8/0x4005dc/P/-/-/0
680964053d5Swlei */
681a5f411b7Swlei class LBRPerfReader : public PerfScriptReader {
682964053d5Swlei public:
LBRPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<int32_t> PID)68317f6cba3SWenlei He LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
684*2fa6eaf9Sxur-llvm std::optional<int32_t> PID)
68517f6cba3SWenlei He : PerfScriptReader(Binary, PerfTrace, PID) {};
686964053d5Swlei // Parse the LBR only sample.
687b5188591SKazu Hirata void parseSample(TraceStream &TraceIt, uint64_t Count) override;
688a03cf331Swlei };
689a03cf331Swlei
690a03cf331Swlei /*
691a03cf331Swlei Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
692a03cf331Swlei which is used to generate CS profile. An example of hybrid sample:
693a03cf331Swlei 4005dc # call stack leaf
694a03cf331Swlei 400634
695a03cf331Swlei 400684 # call stack root
696a03cf331Swlei 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
697a03cf331Swlei ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
698a03cf331Swlei */
699a5f411b7Swlei class HybridPerfReader : public PerfScriptReader {
700a03cf331Swlei public:
HybridPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<int32_t> PID)70117f6cba3SWenlei He HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
702*2fa6eaf9Sxur-llvm std::optional<int32_t> PID)
70317f6cba3SWenlei He : PerfScriptReader(Binary, PerfTrace, PID) {};
704a03cf331Swlei // Parse the hybrid sample including the call and LBR line
705964053d5Swlei void parseSample(TraceStream &TraceIt, uint64_t Count) override;
706a5f411b7Swlei void generateUnsymbolizedProfile() override;
707964053d5Swlei
708964053d5Swlei private:
709a03cf331Swlei // Unwind the hybrid samples after aggregration
710a03cf331Swlei void unwindSamples();
7116da9241aSwlei };
7126da9241aSwlei
713a5f411b7Swlei /*
714a5f411b7Swlei Format of unsymbolized profile:
715a5f411b7Swlei
716a5f411b7Swlei [frame1 @ frame2 @ ...] # If it's a CS profile
717a5f411b7Swlei number of entries in RangeCounter
718a5f411b7Swlei from_1-to_1:count_1
719a5f411b7Swlei from_2-to_2:count_2
720a5f411b7Swlei ......
721a5f411b7Swlei from_n-to_n:count_n
722a5f411b7Swlei number of entries in BranchCounter
723a5f411b7Swlei src_1->dst_1:count_1
724a5f411b7Swlei src_2->dst_2:count_2
725a5f411b7Swlei ......
726a5f411b7Swlei src_n->dst_n:count_n
727a5f411b7Swlei [frame1 @ frame2 @ ...] # Next context
728a5f411b7Swlei ......
729a5f411b7Swlei
730a5f411b7Swlei Note that non-CS profile doesn't have the empty `[]` context.
731a5f411b7Swlei */
732a5f411b7Swlei class UnsymbolizedProfileReader : public PerfReaderBase {
733a5f411b7Swlei public:
UnsymbolizedProfileReader(ProfiledBinary * Binary,StringRef PerfTrace)734a5f411b7Swlei UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
735a5f411b7Swlei : PerfReaderBase(Binary, PerfTrace){};
736a5f411b7Swlei void parsePerfTraces() override;
737a5f411b7Swlei
738a5f411b7Swlei private:
739a5f411b7Swlei void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
740a5f411b7Swlei void readUnsymbolizedProfile(StringRef Filename);
741a5f411b7Swlei
742a5f411b7Swlei std::unordered_set<std::string> ContextStrSet;
743a5f411b7Swlei };
744a5f411b7Swlei
745a94fa862Swlei } // end namespace sampleprof
746a94fa862Swlei } // end namespace llvm
747a94fa862Swlei
748a94fa862Swlei #endif
749