1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 11 #include "ErrorHandling.h" 12 #include "ProfiledBinary.h" 13 #include "llvm/Support/Casting.h" 14 #include "llvm/Support/CommandLine.h" 15 #include "llvm/Support/Regex.h" 16 #include <fstream> 17 #include <list> 18 #include <map> 19 #include <vector> 20 21 using namespace llvm; 22 using namespace sampleprof; 23 24 namespace llvm { 25 namespace sampleprof { 26 27 // Stream based trace line iterator 28 class TraceStream { 29 std::string CurrentLine; 30 std::ifstream Fin; 31 bool IsAtEoF = false; 32 uint64_t LineNumber = 0; 33 34 public: TraceStream(StringRef Filename)35 TraceStream(StringRef Filename) : Fin(Filename.str()) { 36 if (!Fin.good()) 37 exitWithError("Error read input perf script file", Filename); 38 advance(); 39 } 40 getCurrentLine()41 StringRef getCurrentLine() { 42 assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); 43 return CurrentLine; 44 } 45 getLineNumber()46 uint64_t getLineNumber() { return LineNumber; } 47 isAtEoF()48 bool isAtEoF() { return IsAtEoF; } 49 50 // Read the next line advance()51 void advance() { 52 if (!std::getline(Fin, CurrentLine)) { 53 IsAtEoF = true; 54 return; 55 } 56 LineNumber++; 57 } 58 }; 59 60 // The type of perfscript 61 enum PerfScriptType { 62 PERF_UNKNOWN = 0, 63 PERF_INVALID = 1, 64 PERF_LBR = 2, // Only LBR sample 65 PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack. 66 }; 67 68 // The parsed LBR sample entry. 69 struct LBREntry { 70 uint64_t Source = 0; 71 uint64_t Target = 0; 72 // An artificial branch stands for a series of consecutive branches starting 73 // from the current binary with a transition through external code and 74 // eventually landing back in the current binary. 75 bool IsArtificial = false; LBREntryLBREntry76 LBREntry(uint64_t S, uint64_t T, bool I) 77 : Source(S), Target(T), IsArtificial(I) {} 78 }; 79 80 // Hash interface for generic data of type T 81 // Data should implement a \fn getHashCode and a \fn isEqual 82 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable, 83 // i.e we explicitly calculate hash of derived class, assign to base class's 84 // HashCode. This also provides the flexibility for calculating the hash code 85 // incrementally(like rolling hash) during frame stack unwinding since unwinding 86 // only changes the leaf of frame stack. \fn isEqual is a virtual function, 87 // which will have perf overhead. In the future, if we redesign a better hash 88 // function, then we can just skip this or switch to non-virtual function(like 89 // just ignore comparision if hash conflicts probabilities is low) 90 template <class T> class Hashable { 91 public: 92 std::shared_ptr<T> Data; Hashable(const std::shared_ptr<T> & D)93 Hashable(const std::shared_ptr<T> &D) : Data(D) {} 94 95 // Hash code generation 96 struct Hash { operatorHash97 uint64_t operator()(const Hashable<T> &Key) const { 98 // Don't make it virtual for getHashCode 99 assert(Key.Data->getHashCode() && "Should generate HashCode for it!"); 100 return Key.Data->getHashCode(); 101 } 102 }; 103 104 // Hash equal 105 struct Equal { operatorEqual106 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const { 107 // Precisely compare the data, vtable will have overhead. 108 return LHS.Data->isEqual(RHS.Data.get()); 109 } 110 }; 111 getPtr()112 T *getPtr() const { return Data.get(); } 113 }; 114 115 // Base class to extend for all types of perf sample 116 struct PerfSample { 117 uint64_t HashCode = 0; 118 119 virtual ~PerfSample() = default; getHashCodePerfSample120 uint64_t getHashCode() const { return HashCode; } isEqualPerfSample121 virtual bool isEqual(const PerfSample *K) const { 122 return HashCode == K->HashCode; 123 }; 124 125 // Utilities for LLVM-style RTTI 126 enum PerfKind { PK_HybridSample }; 127 const PerfKind Kind; getKindPerfSample128 PerfKind getKind() const { return Kind; } PerfSamplePerfSample129 PerfSample(PerfKind K) : Kind(K){}; 130 }; 131 132 // The parsed hybrid sample including call stack and LBR stack. 133 struct HybridSample : public PerfSample { 134 // Profiled binary that current frame address belongs to 135 ProfiledBinary *Binary; 136 // Call stack recorded in FILO(leaf to root) order 137 SmallVector<uint64_t, 16> CallStack; 138 // LBR stack recorded in FIFO order 139 SmallVector<LBREntry, 16> LBRStack; 140 HybridSampleHybridSample141 HybridSample() : PerfSample(PK_HybridSample){}; classofHybridSample142 static bool classof(const PerfSample *K) { 143 return K->getKind() == PK_HybridSample; 144 } 145 146 // Used for sample aggregation isEqualHybridSample147 bool isEqual(const PerfSample *K) const override { 148 const HybridSample *Other = dyn_cast<HybridSample>(K); 149 if (Other->Binary != Binary) 150 return false; 151 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack; 152 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack; 153 154 if (CallStack.size() != OtherCallStack.size() || 155 LBRStack.size() != OtherLBRStack.size()) 156 return false; 157 158 auto Iter = CallStack.begin(); 159 for (auto Address : OtherCallStack) { 160 if (Address != *Iter++) 161 return false; 162 } 163 164 for (size_t I = 0; I < OtherLBRStack.size(); I++) { 165 if (LBRStack[I].Source != OtherLBRStack[I].Source || 166 LBRStack[I].Target != OtherLBRStack[I].Target) 167 return false; 168 } 169 return true; 170 } 171 genHashCodeHybridSample172 void genHashCode() { 173 // Use simple DJB2 hash 174 auto HashCombine = [](uint64_t H, uint64_t V) { 175 return ((H << 5) + H) + V; 176 }; 177 uint64_t Hash = 5381; 178 Hash = HashCombine(Hash, reinterpret_cast<uint64_t>(Binary)); 179 for (const auto &Value : CallStack) { 180 Hash = HashCombine(Hash, Value); 181 } 182 for (const auto &Entry : LBRStack) { 183 Hash = HashCombine(Hash, Entry.Source); 184 Hash = HashCombine(Hash, Entry.Target); 185 } 186 HashCode = Hash; 187 } 188 }; 189 190 // After parsing the sample, we record the samples by aggregating them 191 // into this counter. The key stores the sample data and the value is 192 // the sample repeat times. 193 using AggregatedCounter = 194 std::unordered_map<Hashable<PerfSample>, uint64_t, 195 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>; 196 197 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>; 198 // The state for the unwinder, it doesn't hold the data but only keep the 199 // pointer/index of the data, While unwinding, the CallStack is changed 200 // dynamicially and will be recorded as the context of the sample 201 struct UnwindState { 202 // Profiled binary that current frame address belongs to 203 const ProfiledBinary *Binary; 204 // Call stack trie node 205 struct ProfiledFrame { 206 const uint64_t Address = 0; 207 ProfiledFrame *Parent; 208 SampleVector RangeSamples; 209 SampleVector BranchSamples; 210 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children; 211 212 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr) AddressUnwindState::ProfiledFrame213 : Address(Addr), Parent(P) {} getOrCreateChildFrameUnwindState::ProfiledFrame214 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) { 215 assert(Address && "Address can't be zero!"); 216 auto Ret = Children.emplace( 217 Address, std::make_unique<ProfiledFrame>(Address, this)); 218 return Ret.first->second.get(); 219 } recordRangeCountUnwindState::ProfiledFrame220 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) { 221 RangeSamples.emplace_back(std::make_tuple(Start, End, Count)); 222 } recordBranchCountUnwindState::ProfiledFrame223 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) { 224 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count)); 225 } isDummyRootUnwindState::ProfiledFrame226 bool isDummyRoot() { return Address == 0; } 227 }; 228 229 ProfiledFrame DummyTrieRoot; 230 ProfiledFrame *CurrentLeafFrame; 231 // Used to fall through the LBR stack 232 uint32_t LBRIndex = 0; 233 // Reference to HybridSample.LBRStack 234 const SmallVector<LBREntry, 16> &LBRStack; 235 // Used to iterate the address range 236 InstructionPointer InstPtr; UnwindStateUnwindState237 UnwindState(const HybridSample *Sample) 238 : Binary(Sample->Binary), LBRStack(Sample->LBRStack), 239 InstPtr(Sample->Binary, Sample->CallStack.front()) { 240 initFrameTrie(Sample->CallStack); 241 } 242 validateInitialStateUnwindState243 bool validateInitialState() { 244 uint64_t LBRLeaf = LBRStack[LBRIndex].Target; 245 uint64_t LeafAddr = CurrentLeafFrame->Address; 246 // When we take a stack sample, ideally the sampling distance between the 247 // leaf IP of stack and the last LBR target shouldn't be very large. 248 // Use a heuristic size (0x100) to filter out broken records. 249 if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) { 250 WithColor::warning() << "Bogus trace: stack tip = " 251 << format("%#010x", LeafAddr) 252 << ", LBR tip = " << format("%#010x\n", LBRLeaf); 253 return false; 254 } 255 return true; 256 } 257 checkStateConsistencyUnwindState258 void checkStateConsistency() { 259 assert(InstPtr.Address == CurrentLeafFrame->Address && 260 "IP should align with context leaf"); 261 } 262 getBinaryUnwindState263 const ProfiledBinary *getBinary() const { return Binary; } hasNextLBRUnwindState264 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); } getCurrentLBRSourceUnwindState265 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; } getCurrentLBRTargetUnwindState266 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; } getCurrentLBRUnwindState267 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; } advanceLBRUnwindState268 void advanceLBR() { LBRIndex++; } 269 getParentFrameUnwindState270 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; } 271 pushFrameUnwindState272 void pushFrame(uint64_t Address) { 273 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address); 274 } 275 switchToFrameUnwindState276 void switchToFrame(uint64_t Address) { 277 if (CurrentLeafFrame->Address == Address) 278 return; 279 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address); 280 } 281 popFrameUnwindState282 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; } 283 initFrameTrieUnwindState284 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) { 285 ProfiledFrame *Cur = &DummyTrieRoot; 286 for (auto Address : reverse(CallStack)) { 287 Cur = Cur->getOrCreateChildFrame(Address); 288 } 289 CurrentLeafFrame = Cur; 290 } 291 getDummyRootPtrUnwindState292 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; } 293 }; 294 295 // Base class for sample counter key with context 296 struct ContextKey { 297 uint64_t HashCode = 0; 298 virtual ~ContextKey() = default; getHashCodeContextKey299 uint64_t getHashCode() const { return HashCode; } isEqualContextKey300 virtual bool isEqual(const ContextKey *K) const { 301 return HashCode == K->HashCode; 302 }; 303 304 // Utilities for LLVM-style RTTI 305 enum ContextKind { CK_StringBased, CK_ProbeBased }; 306 const ContextKind Kind; getKindContextKey307 ContextKind getKind() const { return Kind; } ContextKeyContextKey308 ContextKey(ContextKind K) : Kind(K){}; 309 }; 310 311 // String based context id 312 struct StringBasedCtxKey : public ContextKey { 313 std::string Context; 314 bool WasLeafInlined; StringBasedCtxKeyStringBasedCtxKey315 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){}; classofStringBasedCtxKey316 static bool classof(const ContextKey *K) { 317 return K->getKind() == CK_StringBased; 318 } 319 isEqualStringBasedCtxKey320 bool isEqual(const ContextKey *K) const override { 321 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K); 322 return Context == Other->Context; 323 } 324 genHashCodeStringBasedCtxKey325 void genHashCode() { HashCode = hash_value(Context); } 326 }; 327 328 // Probe based context key as the intermediate key of context 329 // String based context key will introduce redundant string handling 330 // since the callee context is inferred from the context string which 331 // need to be splitted by '@' to get the last location frame, so we 332 // can just use probe instead and generate the string in the end. 333 struct ProbeBasedCtxKey : public ContextKey { 334 SmallVector<const PseudoProbe *, 16> Probes; 335 ProbeBasedCtxKeyProbeBasedCtxKey336 ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {} classofProbeBasedCtxKey337 static bool classof(const ContextKey *K) { 338 return K->getKind() == CK_ProbeBased; 339 } 340 isEqualProbeBasedCtxKey341 bool isEqual(const ContextKey *K) const override { 342 const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K); 343 assert(O != nullptr && "Probe based key shouldn't be null in isEqual"); 344 return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(), 345 O->Probes.end()); 346 } 347 genHashCodeProbeBasedCtxKey348 void genHashCode() { 349 for (const auto *P : Probes) { 350 HashCode = hash_combine(HashCode, P); 351 } 352 if (HashCode == 0) { 353 // Avoid zero value of HashCode when it's an empty list 354 HashCode = 1; 355 } 356 } 357 }; 358 359 // The counter of branch samples for one function indexed by the branch, 360 // which is represented as the source and target offset pair. 361 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 362 // The counter of range samples for one function indexed by the range, 363 // which is represented as the start and end offset pair. 364 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 365 // Wrapper for sample counters including range counter and branch counter 366 struct SampleCounter { 367 RangeSample RangeCounter; 368 BranchSample BranchCounter; 369 recordRangeCountSampleCounter370 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) { 371 RangeCounter[{Start, End}] += Repeat; 372 } recordBranchCountSampleCounter373 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) { 374 BranchCounter[{Source, Target}] += Repeat; 375 } 376 }; 377 378 // Sample counter with context to support context-sensitive profile 379 using ContextSampleCounterMap = 380 std::unordered_map<Hashable<ContextKey>, SampleCounter, 381 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>; 382 383 struct FrameStack { 384 SmallVector<uint64_t, 16> Stack; 385 const ProfiledBinary *Binary; FrameStackFrameStack386 FrameStack(const ProfiledBinary *B) : Binary(B) {} pushFrameFrameStack387 bool pushFrame(UnwindState::ProfiledFrame *Cur) { 388 Stack.push_back(Cur->Address); 389 return true; 390 } 391 popFrameFrameStack392 void popFrame() { 393 if (!Stack.empty()) 394 Stack.pop_back(); 395 } 396 std::shared_ptr<StringBasedCtxKey> getContextKey(); 397 }; 398 399 struct ProbeStack { 400 SmallVector<const PseudoProbe *, 16> Stack; 401 const ProfiledBinary *Binary; ProbeStackProbeStack402 ProbeStack(const ProfiledBinary *B) : Binary(B) {} pushFrameProbeStack403 bool pushFrame(UnwindState::ProfiledFrame *Cur) { 404 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address); 405 // We may not find a probe for a merged or external callsite. 406 // Callsite merging may cause the loss of original probe IDs. 407 // Cutting off the context from here since the inliner will 408 // not know how to consume a context with unknown callsites. 409 if (!CallProbe) 410 return false; 411 Stack.push_back(CallProbe); 412 return true; 413 } 414 popFrameProbeStack415 void popFrame() { 416 if (!Stack.empty()) 417 Stack.pop_back(); 418 } 419 // Use pseudo probe based context key to get the sample counter 420 // A context stands for a call path from 'main' to an uninlined 421 // callee with all inline frames recovered on that path. The probes 422 // belonging to that call path is the probes either originated from 423 // the callee or from any functions inlined into the callee. Since 424 // pseudo probes are organized in a tri-tree style after decoded, 425 // the tree path from the tri-tree root (which is the uninlined 426 // callee) to the probe node forms an inline context. 427 // Here we use a list of probe(pointer) as the context key to speed up 428 // aggregation and the final context string will be generate in 429 // ProfileGenerator 430 std::shared_ptr<ProbeBasedCtxKey> getContextKey(); 431 }; 432 433 /* 434 As in hybrid sample we have a group of LBRs and the most recent sampling call 435 stack, we can walk through those LBRs to infer more call stacks which would be 436 used as context for profile. VirtualUnwinder is the class to do the call stack 437 unwinding based on LBR state. Two types of unwinding are processd here: 438 1) LBR unwinding and 2) linear range unwinding. 439 Specifically, for each LBR entry(can be classified into call, return, regular 440 branch), LBR unwinding will replay the operation by pushing, popping or 441 switching leaf frame towards the call stack and since the initial call stack 442 is most recently sampled, the replay should be in anti-execution order, i.e. for 443 the regular case, pop the call stack when LBR is call, push frame on call stack 444 when LBR is return. After each LBR processed, it also needs to align with the 445 next LBR by going through instructions from previous LBR's target to current 446 LBR's source, which is the linear unwinding. As instruction from linear range 447 can come from different function by inlining, linear unwinding will do the range 448 splitting and record counters by the range with same inline context. Over those 449 unwinding process we will record each call stack as context id and LBR/linear 450 range as sample counter for further CS profile generation. 451 */ 452 class VirtualUnwinder { 453 public: VirtualUnwinder(ContextSampleCounterMap * Counter,const ProfiledBinary * B)454 VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B) 455 : CtxCounterMap(Counter), Binary(B) {} 456 bool unwind(const HybridSample *Sample, uint64_t Repeat); 457 458 private: isCallState(UnwindState & State)459 bool isCallState(UnwindState &State) const { 460 // The tail call frame is always missing here in stack sample, we will 461 // use a specific tail call tracker to infer it. 462 return Binary->addressIsCall(State.getCurrentLBRSource()); 463 } 464 isReturnState(UnwindState & State)465 bool isReturnState(UnwindState &State) const { 466 // Simply check addressIsReturn, as ret is always reliable, both for 467 // regular call and tail call. 468 return Binary->addressIsReturn(State.getCurrentLBRSource()); 469 } 470 471 void unwindCall(UnwindState &State); 472 void unwindLinear(UnwindState &State, uint64_t Repeat); 473 void unwindReturn(UnwindState &State); 474 void unwindBranchWithinFrame(UnwindState &State); 475 476 template <typename T> 477 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack); 478 // Collect each samples on trie node by DFS traversal 479 template <typename T> 480 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack); 481 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur); 482 483 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State, 484 uint64_t Repeat); 485 void recordBranchCount(const LBREntry &Branch, UnwindState &State, 486 uint64_t Repeat); 487 488 ContextSampleCounterMap *CtxCounterMap; 489 // Profiled binary that current frame address belongs to 490 const ProfiledBinary *Binary; 491 }; 492 493 // Filename to binary map 494 using BinaryMap = StringMap<ProfiledBinary>; 495 // Address to binary map for fast look-up 496 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>; 497 // Binary to ContextSampleCounters Map to support multiple binary, we may have 498 // same binary loaded at different addresses, they should share the same sample 499 // counter 500 using BinarySampleCounterMap = 501 std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>; 502 503 // Load binaries and read perf trace to parse the events and samples 504 class PerfReader { 505 506 public: 507 PerfReader(cl::list<std::string> &BinaryFilenames, 508 cl::list<std::string> &PerfTraceFilenames); 509 510 // A LBR sample is like: 511 // 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... 512 // A heuristic for fast detection by checking whether a 513 // leading " 0x" and the '/' exist. isLBRSample(StringRef Line)514 static bool isLBRSample(StringRef Line) { 515 if (!Line.startswith(" 0x")) 516 return false; 517 if (Line.find('/') != StringRef::npos) 518 return true; 519 return false; 520 } 521 522 // The raw hybird sample is like 523 // e.g. 524 // 4005dc # call stack leaf 525 // 400634 526 // 400684 # call stack root 527 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 528 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 529 // Determine the perfscript contains hybrid samples(call stack + LBRs) by 530 // checking whether there is a non-empty call stack immediately followed by 531 // a LBR sample checkPerfScriptType(StringRef FileName)532 static PerfScriptType checkPerfScriptType(StringRef FileName) { 533 TraceStream TraceIt(FileName); 534 uint64_t FrameAddr = 0; 535 while (!TraceIt.isAtEoF()) { 536 int32_t Count = 0; 537 while (!TraceIt.isAtEoF() && 538 !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { 539 Count++; 540 TraceIt.advance(); 541 } 542 if (!TraceIt.isAtEoF()) { 543 if (isLBRSample(TraceIt.getCurrentLine())) { 544 if (Count > 0) 545 return PERF_LBR_STACK; 546 else 547 return PERF_LBR; 548 } 549 TraceIt.advance(); 550 } 551 } 552 return PERF_INVALID; 553 } 554 555 // The parsed MMap event 556 struct MMapEvent { 557 uint64_t PID = 0; 558 uint64_t BaseAddress = 0; 559 uint64_t Size = 0; 560 uint64_t Offset = 0; 561 StringRef BinaryPath; 562 }; 563 564 /// Load symbols and disassemble the code of a give binary. 565 /// Also register the binary in the binary table. 566 /// 567 ProfiledBinary &loadBinary(const StringRef BinaryPath, 568 bool AllowNameConflict = true); 569 void updateBinaryAddress(const MMapEvent &Event); getPerfScriptType()570 PerfScriptType getPerfScriptType() const { return PerfType; } 571 // Entry of the reader to parse multiple perf traces 572 void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames); getBinarySampleCounters()573 const BinarySampleCounterMap &getBinarySampleCounters() const { 574 return BinarySampleCounters; 575 } 576 577 private: 578 /// Validate the command line input 579 void validateCommandLine(cl::list<std::string> &BinaryFilenames, 580 cl::list<std::string> &PerfTraceFilenames); 581 /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a 582 /// mapping between the binary name and its memory layout. 583 /// 584 void parseMMap2Event(TraceStream &TraceIt); 585 // Parse perf events/samples and do aggregation 586 void parseAndAggregateTrace(StringRef Filename); 587 // Parse either an MMAP event or a perf sample 588 void parseEventOrSample(TraceStream &TraceIt); 589 // Parse the hybrid sample including the call and LBR line 590 void parseHybridSample(TraceStream &TraceIt); 591 // Extract call stack from the perf trace lines 592 bool extractCallstack(TraceStream &TraceIt, 593 SmallVectorImpl<uint64_t> &CallStack); 594 // Extract LBR stack from one perf trace line 595 bool extractLBRStack(TraceStream &TraceIt, 596 SmallVectorImpl<LBREntry> &LBRStack, 597 ProfiledBinary *Binary); 598 void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames); 599 // Post process the profile after trace aggregation, we will do simple range 600 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). 601 void generateRawProfile(); 602 // Unwind the hybrid samples after aggregration 603 void unwindSamples(); 604 void printUnwinderOutput(); 605 // Helper function for looking up binary in AddressBinaryMap 606 ProfiledBinary *getBinary(uint64_t Address); 607 608 BinaryMap BinaryTable; 609 AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. 610 611 private: 612 BinarySampleCounterMap BinarySampleCounters; 613 // Samples with the repeating time generated by the perf reader 614 AggregatedCounter AggregatedSamples; 615 PerfScriptType PerfType = PERF_UNKNOWN; 616 }; 617 618 } // end namespace sampleprof 619 } // end namespace llvm 620 621 #endif 622