xref: /llvm-project/bolt/lib/Profile/DataAggregator.cpp (revision e6c9cd9c060c1fa8343398b9556a5a6c0f35d515)
1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions reads profile data written by perf record,
10 // aggregate it and then write it back to an output file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Profile/DataAggregator.h"
15 #include "bolt/Core/BinaryContext.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "bolt/Passes/BinaryPasses.h"
18 #include "bolt/Profile/BoltAddressTranslation.h"
19 #include "bolt/Profile/Heatmap.h"
20 #include "bolt/Profile/YAMLProfileWriter.h"
21 #include "bolt/Utils/CommandLineOpts.h"
22 #include "bolt/Utils/Utils.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Compiler.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/Errc.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/Regex.h"
33 #include "llvm/Support/Timer.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <map>
36 #include <optional>
37 #include <unordered_map>
38 #include <utility>
39 
40 #define DEBUG_TYPE "aggregator"
41 
42 using namespace llvm;
43 using namespace bolt;
44 
45 namespace opts {
46 
47 static cl::opt<bool>
48     BasicAggregation("nl",
49                      cl::desc("aggregate basic samples (without LBR info)"),
50                      cl::cat(AggregatorCategory));
51 
52 static cl::opt<std::string>
53     ITraceAggregation("itrace",
54                       cl::desc("Generate LBR info with perf itrace argument"),
55                       cl::cat(AggregatorCategory));
56 
57 static cl::opt<bool>
58 FilterMemProfile("filter-mem-profile",
59   cl::desc("if processing a memory profile, filter out stack or heap accesses "
60            "that won't be useful for BOLT to reduce profile file size"),
61   cl::init(true),
62   cl::cat(AggregatorCategory));
63 
64 static cl::opt<unsigned long long>
65 FilterPID("pid",
66   cl::desc("only use samples from process with specified PID"),
67   cl::init(0),
68   cl::Optional,
69   cl::cat(AggregatorCategory));
70 
71 static cl::opt<bool>
72 IgnoreBuildID("ignore-build-id",
73   cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
74   cl::init(false),
75   cl::cat(AggregatorCategory));
76 
77 static cl::opt<bool> IgnoreInterruptLBR(
78     "ignore-interrupt-lbr",
79     cl::desc("ignore kernel interrupt LBR that happens asynchronously"),
80     cl::init(true), cl::cat(AggregatorCategory));
81 
82 static cl::opt<unsigned long long>
83 MaxSamples("max-samples",
84   cl::init(-1ULL),
85   cl::desc("maximum number of samples to read from LBR profile"),
86   cl::Optional,
87   cl::Hidden,
88   cl::cat(AggregatorCategory));
89 
90 extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
91 extern cl::opt<bool> ProfileWritePseudoProbes;
92 extern cl::opt<std::string> SaveProfile;
93 
94 cl::opt<bool> ReadPreAggregated(
95     "pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
96     cl::cat(AggregatorCategory));
97 
98 cl::opt<std::string>
99     ReadPerfEvents("perf-script-events",
100                    cl::desc("skip perf event collection by supplying a "
101                             "perf-script output in a textual format"),
102                    cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));
103 
104 static cl::opt<bool>
105 TimeAggregator("time-aggr",
106   cl::desc("time BOLT aggregator"),
107   cl::init(false),
108   cl::ZeroOrMore,
109   cl::cat(AggregatorCategory));
110 
111 } // namespace opts
112 
113 namespace {
114 
115 const char TimerGroupName[] = "aggregator";
116 const char TimerGroupDesc[] = "Aggregator";
117 
118 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
119   std::vector<SectionNameAndRange> sections;
120   for (BinarySection &Section : BC->sections()) {
121     if (!Section.isText())
122       continue;
123     if (Section.getSize() == 0)
124       continue;
125     sections.push_back(
126         {Section.getName(), Section.getAddress(), Section.getEndAddress()});
127   }
128   llvm::sort(sections,
129              [](const SectionNameAndRange &A, const SectionNameAndRange &B) {
130                return A.BeginAddress < B.BeginAddress;
131              });
132   return sections;
133 }
134 }
135 
136 constexpr uint64_t DataAggregator::KernelBaseAddr;
137 
138 DataAggregator::~DataAggregator() { deleteTempFiles(); }
139 
140 namespace {
141 void deleteTempFile(const std::string &FileName) {
142   if (std::error_code Errc = sys::fs::remove(FileName.c_str()))
143     errs() << "PERF2BOLT: failed to delete temporary file " << FileName
144            << " with error " << Errc.message() << "\n";
145 }
146 }
147 
148 void DataAggregator::deleteTempFiles() {
149   for (std::string &FileName : TempFiles)
150     deleteTempFile(FileName);
151   TempFiles.clear();
152 }
153 
154 void DataAggregator::findPerfExecutable() {
155   std::optional<std::string> PerfExecutable =
156       sys::Process::FindInEnvPath("PATH", "perf");
157   if (!PerfExecutable) {
158     outs() << "PERF2BOLT: No perf executable found!\n";
159     exit(1);
160   }
161   PerfPath = *PerfExecutable;
162 }
163 
164 void DataAggregator::start() {
165   outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
166 
167   // Don't launch perf for pre-aggregated files or when perf input is specified
168   // by the user.
169   if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
170     return;
171 
172   findPerfExecutable();
173 
174   if (opts::BasicAggregation) {
175     launchPerfProcess("events without LBR",
176                       MainEventsPPI,
177                       "script -F pid,event,ip",
178                       /*Wait = */false);
179   } else if (!opts::ITraceAggregation.empty()) {
180     std::string ItracePerfScriptArgs = llvm::formatv(
181         "script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
182     launchPerfProcess("branch events with itrace", MainEventsPPI,
183                       ItracePerfScriptArgs.c_str(),
184                       /*Wait = */ false);
185   } else {
186     launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack",
187                       /*Wait = */ false);
188   }
189 
190   // Note: we launch script for mem events regardless of the option, as the
191   //       command fails fairly fast if mem events were not collected.
192   launchPerfProcess("mem events",
193                     MemEventsPPI,
194                     "script -F pid,event,addr,ip",
195                     /*Wait = */false);
196 
197   launchPerfProcess("process events", MMapEventsPPI,
198                     "script --show-mmap-events --no-itrace",
199                     /*Wait = */ false);
200 
201   launchPerfProcess("task events", TaskEventsPPI,
202                     "script --show-task-events --no-itrace",
203                     /*Wait = */ false);
204 }
205 
206 void DataAggregator::abort() {
207   if (opts::ReadPreAggregated)
208     return;
209 
210   std::string Error;
211 
212   // Kill subprocesses in case they are not finished
213   sys::Wait(TaskEventsPPI.PI, 1, &Error);
214   sys::Wait(MMapEventsPPI.PI, 1, &Error);
215   sys::Wait(MainEventsPPI.PI, 1, &Error);
216   sys::Wait(MemEventsPPI.PI, 1, &Error);
217 
218   deleteTempFiles();
219 
220   exit(1);
221 }
222 
223 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI,
224                                        const char *ArgsString, bool Wait) {
225   SmallVector<StringRef, 4> Argv;
226 
227   outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n';
228   Argv.push_back(PerfPath.data());
229 
230   StringRef(ArgsString).split(Argv, ' ');
231   Argv.push_back("-f");
232   Argv.push_back("-i");
233   Argv.push_back(Filename.c_str());
234 
235   if (std::error_code Errc =
236           sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) {
237     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath
238            << " with error " << Errc.message() << "\n";
239     exit(1);
240   }
241   TempFiles.push_back(PPI.StdoutPath.data());
242 
243   if (std::error_code Errc =
244           sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) {
245     errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath
246            << " with error " << Errc.message() << "\n";
247     exit(1);
248   }
249   TempFiles.push_back(PPI.StderrPath.data());
250 
251   std::optional<StringRef> Redirects[] = {
252       std::nullopt,                      // Stdin
253       StringRef(PPI.StdoutPath.data()),  // Stdout
254       StringRef(PPI.StderrPath.data())}; // Stderr
255 
256   LLVM_DEBUG({
257     dbgs() << "Launching perf: ";
258     for (StringRef Arg : Argv)
259       dbgs() << Arg << " ";
260     dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data()
261            << "\n";
262   });
263 
264   if (Wait)
265     PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv,
266                                             /*envp*/ std::nullopt, Redirects);
267   else
268     PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt,
269                                 Redirects);
270 }
271 
272 void DataAggregator::processFileBuildID(StringRef FileBuildID) {
273   PerfProcessInfo BuildIDProcessInfo;
274   launchPerfProcess("buildid list",
275                     BuildIDProcessInfo,
276                     "buildid-list",
277                     /*Wait = */true);
278 
279   if (BuildIDProcessInfo.PI.ReturnCode != 0) {
280     ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
281         MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data());
282     StringRef ErrBuf = (*MB)->getBuffer();
283 
284     errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode
285            << '\n';
286     errs() << ErrBuf;
287     return;
288   }
289 
290   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
291       MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data());
292   if (std::error_code EC = MB.getError()) {
293     errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": "
294            << EC.message() << "\n";
295     return;
296   }
297 
298   FileBuf = std::move(*MB);
299   ParsingBuf = FileBuf->getBuffer();
300 
301   std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID);
302   if (!FileName) {
303     if (hasAllBuildIDs()) {
304       errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. "
305                 "This indicates the input binary supplied for data aggregation "
306                 "is not the same recorded by perf when collecting profiling "
307                 "data, or there were no samples recorded for the binary. "
308                 "Use -ignore-build-id option to override.\n";
309       if (!opts::IgnoreBuildID)
310         abort();
311     } else {
312       errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf "
313                 "data was recorded without it\n";
314       return;
315     }
316   } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) {
317     errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n";
318     BuildIDBinaryName = std::string(*FileName);
319   } else {
320     outs() << "PERF2BOLT: matched build-id and file name\n";
321   }
322 }
323 
324 bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
325   if (opts::ReadPreAggregated)
326     return true;
327 
328   Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
329   if (!FD) {
330     consumeError(FD.takeError());
331     return false;
332   }
333 
334   char Buf[7] = {0, 0, 0, 0, 0, 0, 0};
335 
336   auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); });
337   Expected<size_t> BytesRead = sys::fs::readNativeFileSlice(
338       *FD, MutableArrayRef(Buf, sizeof(Buf)), 0);
339   if (!BytesRead) {
340     consumeError(BytesRead.takeError());
341     return false;
342   }
343 
344   if (*BytesRead != 7)
345     return false;
346 
347   if (strncmp(Buf, "PERFILE", 7) == 0)
348     return true;
349   return false;
350 }
351 
352 void DataAggregator::parsePreAggregated() {
353   std::string Error;
354 
355   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
356       MemoryBuffer::getFileOrSTDIN(Filename);
357   if (std::error_code EC = MB.getError()) {
358     errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
359            << EC.message() << "\n";
360     exit(1);
361   }
362 
363   FileBuf = std::move(*MB);
364   ParsingBuf = FileBuf->getBuffer();
365   Col = 0;
366   Line = 1;
367   if (parsePreAggregatedLBRSamples()) {
368     errs() << "PERF2BOLT: failed to parse samples\n";
369     exit(1);
370   }
371 }
372 
373 void DataAggregator::filterBinaryMMapInfo() {
374   if (opts::FilterPID) {
375     auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
376     if (MMapInfoIter != BinaryMMapInfo.end()) {
377       MMapInfo MMap = MMapInfoIter->second;
378       BinaryMMapInfo.clear();
379       BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap));
380     } else {
381       if (errs().has_colors())
382         errs().changeColor(raw_ostream::RED);
383       errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \""
384              << opts::FilterPID << "\""
385              << " for binary \"" << BC->getFilename() << "\".";
386       assert(!BinaryMMapInfo.empty() && "No memory map for matching binary");
387       errs() << " Profile for the following process is available:\n";
388       for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo)
389         outs() << "  " << MMI.second.PID
390                << (MMI.second.Forked ? " (forked)\n" : "\n");
391 
392       if (errs().has_colors())
393         errs().resetColor();
394 
395       exit(1);
396     }
397   }
398 }
399 
400 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
401                                    PerfProcessErrorCallbackTy Callback) {
402   if (!opts::ReadPerfEvents.empty()) {
403     outs() << "PERF2BOLT: using pre-processed perf events for '" << Name
404            << "' (perf-script-events)\n";
405     ParsingBuf = opts::ReadPerfEvents;
406     return 0;
407   }
408 
409   std::string Error;
410   outs() << "PERF2BOLT: waiting for perf " << Name
411          << " collection to finish...\n";
412   sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error);
413 
414   if (!Error.empty()) {
415     errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n";
416     deleteTempFiles();
417     exit(1);
418   }
419 
420   if (PI.ReturnCode != 0) {
421     ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB =
422         MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data());
423     StringRef ErrBuf = (*ErrorMB)->getBuffer();
424 
425     deleteTempFiles();
426     Callback(PI.ReturnCode, ErrBuf);
427     return PI.ReturnCode;
428   }
429 
430   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
431       MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data());
432   if (std::error_code EC = MB.getError()) {
433     errs() << "Cannot open " << Process.StdoutPath.data() << ": "
434            << EC.message() << "\n";
435     deleteTempFiles();
436     exit(1);
437   }
438 
439   FileBuf = std::move(*MB);
440   ParsingBuf = FileBuf->getBuffer();
441   Col = 0;
442   Line = 1;
443   return PI.ReturnCode;
444 }
445 
446 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
447   this->BC = &BC;
448 
449   if (opts::ReadPreAggregated) {
450     parsePreAggregated();
451     return Error::success();
452   }
453 
454   if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
455     outs() << "BOLT-INFO: binary build-id is:     " << *FileBuildID << "\n";
456     processFileBuildID(*FileBuildID);
457   } else {
458     errs() << "BOLT-WARNING: build-id will not be checked because we could "
459               "not read one from input binary\n";
460   }
461 
462   auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
463     errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
464     exit(1);
465   };
466 
467   auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) {
468     Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
469                  "Cannot print 'addr' field.");
470     if (!NoData.match(ErrBuf))
471       ErrorCallback(ReturnCode, ErrBuf);
472   };
473 
474   if (BC.IsLinuxKernel) {
475     // Current MMap parsing logic does not work with linux kernel.
476     // MMap entries for linux kernel uses PERF_RECORD_MMAP
477     // format instead of typical PERF_RECORD_MMAP2 format.
478     // Since linux kernel address mapping is absolute (same as
479     // in the ELF file), we avoid parsing MMap in linux kernel mode.
480     // While generating optimized linux kernel binary, we may need
481     // to parse MMap entries.
482 
483     // In linux kernel mode, we analyze and optimize
484     // all linux kernel binary instructions, irrespective
485     // of whether they are due to system calls or due to
486     // interrupts. Therefore, we cannot ignore interrupt
487     // in Linux kernel mode.
488     opts::IgnoreInterruptLBR = false;
489   } else {
490     prepareToParse("mmap events", MMapEventsPPI, ErrorCallback);
491     if (parseMMapEvents())
492       errs() << "PERF2BOLT: failed to parse mmap events\n";
493   }
494 
495   prepareToParse("task events", TaskEventsPPI, ErrorCallback);
496   if (parseTaskEvents())
497     errs() << "PERF2BOLT: failed to parse task events\n";
498 
499   filterBinaryMMapInfo();
500   prepareToParse("events", MainEventsPPI, ErrorCallback);
501 
502   if (opts::HeatmapMode) {
503     if (std::error_code EC = printLBRHeatMap()) {
504       errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
505       exit(1);
506     }
507     exit(0);
508   }
509 
510   if ((!opts::BasicAggregation && parseBranchEvents()) ||
511       (opts::BasicAggregation && parseBasicEvents()))
512     errs() << "PERF2BOLT: failed to parse samples\n";
513 
514   // Special handling for memory events
515   if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
516     return Error::success();
517 
518   if (const std::error_code EC = parseMemEvents())
519     errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
520            << '\n';
521 
522   deleteTempFiles();
523 
524   return Error::success();
525 }
526 
527 Error DataAggregator::readProfile(BinaryContext &BC) {
528   processProfile(BC);
529 
530   for (auto &BFI : BC.getBinaryFunctions()) {
531     BinaryFunction &Function = BFI.second;
532     convertBranchData(Function);
533   }
534 
535   if (opts::AggregateOnly) {
536     if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
537       if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
538         report_error("cannot create output data file", EC);
539 
540     // BAT YAML is handled by DataAggregator since normal YAML output requires
541     // CFG which is not available in BAT mode.
542     if (usesBAT()) {
543       if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
544         if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
545           report_error("cannot create output data file", EC);
546       if (!opts::SaveProfile.empty())
547         if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
548           report_error("cannot create output data file", EC);
549     }
550   }
551 
552   return Error::success();
553 }
554 
555 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
556   return Function.hasProfileAvailable();
557 }
558 
559 void DataAggregator::processProfile(BinaryContext &BC) {
560   if (opts::ReadPreAggregated)
561     processPreAggregated();
562   else if (opts::BasicAggregation)
563     processBasicEvents();
564   else
565     processBranchEvents();
566 
567   processMemEvents();
568 
569   // Mark all functions with registered events as having a valid profile.
570   const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
571                                             : BinaryFunction::PF_LBR;
572   for (auto &BFI : BC.getBinaryFunctions()) {
573     BinaryFunction &BF = BFI.second;
574     FuncBranchData *FBD = getBranchData(BF);
575     if (FBD || getFuncSampleData(BF.getNames())) {
576       BF.markProfiled(Flags);
577       if (FBD)
578         BF.RawBranchCount = FBD->getNumExecutedBranches();
579     }
580   }
581 
582   for (auto &FuncBranches : NamesToBranches)
583     llvm::stable_sort(FuncBranches.second.Data);
584 
585   for (auto &MemEvents : NamesToMemEvents)
586     llvm::stable_sort(MemEvents.second.Data);
587 
588   // Release intermediate storage.
589   clear(BranchLBRs);
590   clear(FallthroughLBRs);
591   clear(AggregatedLBRs);
592   clear(BasicSamples);
593   clear(MemSamples);
594 }
595 
596 BinaryFunction *
597 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
598   if (!BC->containsAddress(Address))
599     return nullptr;
600 
601   return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false,
602                                                 /*UseMaxSize=*/true);
603 }
604 
605 BinaryFunction *
606 DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
607   if (BAT)
608     if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
609       return getBinaryFunctionContainingAddress(HotAddr);
610   return nullptr;
611 }
612 
613 StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
614                                           bool BAT) {
615   if (!BAT)
616     return Func.getOneName();
617 
618   const BinaryFunction *OrigFunc = &Func;
619   // If it is a local function, prefer the name containing the file name where
620   // the local function was declared
621   for (StringRef AlternativeName : OrigFunc->getNames()) {
622     size_t FileNameIdx = AlternativeName.find('/');
623     // Confirm the alternative name has the pattern Symbol/FileName/1 before
624     // using it
625     if (FileNameIdx == StringRef::npos ||
626         AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos)
627       continue;
628     return AlternativeName;
629   }
630   return OrigFunc->getOneName();
631 }
632 
633 bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634                               uint64_t Count) {
635   BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
636   BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637   if (ParentFunc)
638     NumColdSamples += Count;
639 
640   auto I = NamesToSamples.find(Func.getOneName());
641   if (I == NamesToSamples.end()) {
642     bool Success;
643     StringRef LocName = getLocationName(Func, BAT);
644     std::tie(I, Success) = NamesToSamples.insert(
645         std::make_pair(Func.getOneName(),
646                        FuncSampleData(LocName, FuncSampleData::ContainerTy())));
647   }
648 
649   Address -= Func.getAddress();
650   if (BAT)
651     Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false);
652 
653   I->second.bumpCount(Address, Count);
654   return true;
655 }
656 
657 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
658                                    uint64_t To, uint64_t Count,
659                                    uint64_t Mispreds) {
660   FuncBranchData *AggrData = getBranchData(Func);
661   if (!AggrData) {
662     AggrData = &NamesToBranches[Func.getOneName()];
663     AggrData->Name = getLocationName(Func, BAT);
664     setBranchData(Func, AggrData);
665   }
666 
667   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
668                     << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
669   AggrData->bumpBranchCount(From, To, Count, Mispreds);
670   return true;
671 }
672 
673 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
674                                    BinaryFunction *ToFunc, uint64_t From,
675                                    uint64_t To, uint64_t Count,
676                                    uint64_t Mispreds) {
677   FuncBranchData *FromAggrData = nullptr;
678   FuncBranchData *ToAggrData = nullptr;
679   StringRef SrcFunc;
680   StringRef DstFunc;
681   if (FromFunc) {
682     SrcFunc = getLocationName(*FromFunc, BAT);
683     FromAggrData = getBranchData(*FromFunc);
684     if (!FromAggrData) {
685       FromAggrData = &NamesToBranches[FromFunc->getOneName()];
686       FromAggrData->Name = SrcFunc;
687       setBranchData(*FromFunc, FromAggrData);
688     }
689 
690     recordExit(*FromFunc, From, Mispreds, Count);
691   }
692   if (ToFunc) {
693     DstFunc = getLocationName(*ToFunc, BAT);
694     ToAggrData = getBranchData(*ToFunc);
695     if (!ToAggrData) {
696       ToAggrData = &NamesToBranches[ToFunc->getOneName()];
697       ToAggrData->Name = DstFunc;
698       setBranchData(*ToFunc, ToAggrData);
699     }
700 
701     recordEntry(*ToFunc, To, Mispreds, Count);
702   }
703 
704   if (FromAggrData)
705     FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
706                                 Count, Mispreds);
707   if (ToAggrData)
708     ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
709                                Count, Mispreds);
710   return true;
711 }
712 
713 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
714                               uint64_t Mispreds, bool IsPreagg) {
715   // Returns whether \p Offset in \p Func contains a return instruction.
716   auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
717     auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
718     return Func.hasInstructions()
719                ? isReturn(Func.getInstructionAtOffset(Offset))
720                : isReturn(Func.disassembleInstructionAtOffset(Offset));
721   };
722 
723   // Returns whether \p Offset in \p Func may be a call continuation excluding
724   // entry points and landing pads.
725   auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
726     // No call continuation at a function start.
727     if (!Offset)
728       return false;
729 
730     // FIXME: support BAT case where the function might be in empty state
731     // (split fragments declared non-simple).
732     if (!Func.hasCFG())
733       return false;
734 
735     // The offset should not be an entry point or a landing pad.
736     const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
737     return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
738   };
739 
740   // Mutates \p Addr to an offset into the containing function, performing BAT
741   // offset translation and parent lookup.
742   //
743   // Returns the containing function (or BAT parent) and whether the address
744   // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
745   auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
746     BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
747     if (!Func)
748       return std::pair{Func, false};
749 
750     Addr -= Func->getAddress();
751 
752     bool IsRetOrCallCont =
753         IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
754 
755     if (BAT)
756       Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
757 
758     BinaryFunction *ParentFunc = getBATParentFunction(*Func);
759     if (!ParentFunc)
760       return std::pair{Func, IsRetOrCallCont};
761 
762     if (IsFrom)
763       NumColdSamples += Count;
764 
765     return std::pair{ParentFunc, IsRetOrCallCont};
766   };
767 
768   uint64_t ToOrig = To;
769   auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
770   auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
771   if (!FromFunc && !ToFunc)
772     return false;
773 
774   // Record call to continuation trace.
775   if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
776     LBREntry First{ToOrig - 1, ToOrig - 1, false};
777     LBREntry Second{ToOrig, ToOrig, false};
778     return doTrace(First, Second, Count);
779   }
780   // Ignore returns.
781   if (IsReturn)
782     return true;
783 
784   // Treat recursive control transfers as inter-branches.
785   if (FromFunc == ToFunc && To != 0) {
786     recordBranch(*FromFunc, From, To, Count, Mispreds);
787     return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
788   }
789 
790   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
791 }
792 
793 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
794                              uint64_t Count) {
795   BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
796   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
797   if (!FromFunc || !ToFunc) {
798     LLVM_DEBUG({
799       dbgs() << "Out of range trace starting in ";
800       if (FromFunc)
801         dbgs() << formatv("{0} @ {1:x}", *FromFunc,
802                           First.To - FromFunc->getAddress());
803       else
804         dbgs() << Twine::utohexstr(First.To);
805       dbgs() << " and ending in ";
806       if (ToFunc)
807         dbgs() << formatv("{0} @ {1:x}", *ToFunc,
808                           Second.From - ToFunc->getAddress());
809       else
810         dbgs() << Twine::utohexstr(Second.From);
811       dbgs() << '\n';
812     });
813     NumLongRangeTraces += Count;
814     return false;
815   }
816   if (FromFunc != ToFunc) {
817     NumInvalidTraces += Count;
818     LLVM_DEBUG({
819       dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
820              << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
821              << " and ending in " << ToFunc->getPrintName()
822              << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
823     });
824     return false;
825   }
826 
827   // Set ParentFunc to BAT parent function or FromFunc itself.
828   BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
829   if (!ParentFunc)
830     ParentFunc = FromFunc;
831   ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
832 
833   std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
834       BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
835                                         Second.From)
836           : getFallthroughsInTrace(*FromFunc, First, Second, Count);
837   if (!FTs) {
838     LLVM_DEBUG(
839         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
840                << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
841                << " and ending in " << ToFunc->getPrintName() << " @ "
842                << ToFunc->getPrintName() << " @ "
843                << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
844     NumInvalidTraces += Count;
845     return false;
846   }
847 
848   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
849                     << FromFunc->getPrintName() << ":"
850                     << Twine::utohexstr(First.To) << " to "
851                     << Twine::utohexstr(Second.From) << ".\n");
852   for (auto [From, To] : *FTs) {
853     if (BAT) {
854       From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
855       To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
856     }
857     doIntraBranch(*ParentFunc, From, To, Count, false);
858   }
859 
860   return true;
861 }
862 
863 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
864 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
865                                        const LBREntry &FirstLBR,
866                                        const LBREntry &SecondLBR,
867                                        uint64_t Count) const {
868   SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
869 
870   BinaryContext &BC = BF.getBinaryContext();
871 
872   if (!BF.isSimple())
873     return std::nullopt;
874 
875   assert(BF.hasCFG() && "can only record traces in CFG state");
876 
877   // Offsets of the trace within this function.
878   const uint64_t From = FirstLBR.To - BF.getAddress();
879   const uint64_t To = SecondLBR.From - BF.getAddress();
880 
881   if (From > To)
882     return std::nullopt;
883 
884   const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
885   const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);
886 
887   if (!FromBB || !ToBB)
888     return std::nullopt;
889 
890   // Adjust FromBB if the first LBR is a return from the last instruction in
891   // the previous block (that instruction should be a call).
892   if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
893       !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
894     const BinaryBasicBlock *PrevBB =
895         BF.getLayout().getBlock(FromBB->getIndex() - 1);
896     if (PrevBB->getSuccessor(FromBB->getLabel())) {
897       const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
898       if (Instr && BC.MIB->isCall(*Instr))
899         FromBB = PrevBB;
900       else
901         LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
902                           << '\n');
903     } else {
904       LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
905     }
906   }
907 
908   // Fill out information for fall-through edges. The From and To could be
909   // within the same basic block, e.g. when two call instructions are in the
910   // same block. In this case we skip the processing.
911   if (FromBB == ToBB)
912     return Branches;
913 
914   // Process blocks in the original layout order.
915   BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex());
916   assert(BB == FromBB && "index mismatch");
917   while (BB != ToBB) {
918     BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1);
919     assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
920 
921     // Check for bad LBRs.
922     if (!BB->getSuccessor(NextBB->getLabel())) {
923       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
924                         << "  " << FirstLBR << '\n'
925                         << "  " << SecondLBR << '\n');
926       return std::nullopt;
927     }
928 
929     const MCInst *Instr = BB->getLastNonPseudoInstr();
930     uint64_t Offset = 0;
931     if (Instr)
932       Offset = BC.MIB->getOffsetWithDefault(*Instr, 0);
933     else
934       Offset = BB->getOffset();
935 
936     Branches.emplace_back(Offset, NextBB->getOffset());
937 
938     BB = NextBB;
939   }
940 
941   // Record fall-through jumps
942   for (const auto &[FromOffset, ToOffset] : Branches) {
943     BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset);
944     BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset);
945     assert(FromBB && ToBB);
946     BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
947     BI.Count += Count;
948   }
949 
950   return Branches;
951 }
952 
953 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred,
954                                  uint64_t Count) const {
955   if (To > BF.getSize())
956     return false;
957 
958   if (!BF.hasProfile())
959     BF.ExecutionCount = 0;
960 
961   BinaryBasicBlock *EntryBB = nullptr;
962   if (To == 0) {
963     BF.ExecutionCount += Count;
964     if (!BF.empty())
965       EntryBB = &BF.front();
966   } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) {
967     if (BB->isEntryPoint())
968       EntryBB = BB;
969   }
970 
971   if (EntryBB)
972     EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count);
973 
974   return true;
975 }
976 
977 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
978                                 uint64_t Count) const {
979   if (!BF.isSimple() || From > BF.getSize())
980     return false;
981 
982   if (!BF.hasProfile())
983     BF.ExecutionCount = 0;
984 
985   return true;
986 }
987 
988 ErrorOr<LBREntry> DataAggregator::parseLBREntry() {
989   LBREntry Res;
990   ErrorOr<StringRef> FromStrRes = parseString('/');
991   if (std::error_code EC = FromStrRes.getError())
992     return EC;
993   StringRef OffsetStr = FromStrRes.get();
994   if (OffsetStr.getAsInteger(0, Res.From)) {
995     reportError("expected hexadecimal number with From address");
996     Diag << "Found: " << OffsetStr << "\n";
997     return make_error_code(llvm::errc::io_error);
998   }
999 
1000   ErrorOr<StringRef> ToStrRes = parseString('/');
1001   if (std::error_code EC = ToStrRes.getError())
1002     return EC;
1003   OffsetStr = ToStrRes.get();
1004   if (OffsetStr.getAsInteger(0, Res.To)) {
1005     reportError("expected hexadecimal number with To address");
1006     Diag << "Found: " << OffsetStr << "\n";
1007     return make_error_code(llvm::errc::io_error);
1008   }
1009 
1010   ErrorOr<StringRef> MispredStrRes = parseString('/');
1011   if (std::error_code EC = MispredStrRes.getError())
1012     return EC;
1013   StringRef MispredStr = MispredStrRes.get();
1014   if (MispredStr.size() != 1 ||
1015       (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
1016     reportError("expected single char for mispred bit");
1017     Diag << "Found: " << MispredStr << "\n";
1018     return make_error_code(llvm::errc::io_error);
1019   }
1020   Res.Mispred = MispredStr[0] == 'M';
1021 
1022   static bool MispredWarning = true;
1023   if (MispredStr[0] == '-' && MispredWarning) {
1024     errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n";
1025     MispredWarning = false;
1026   }
1027 
1028   ErrorOr<StringRef> Rest = parseString(FieldSeparator, true);
1029   if (std::error_code EC = Rest.getError())
1030     return EC;
1031   if (Rest.get().size() < 5) {
1032     reportError("expected rest of LBR entry");
1033     Diag << "Found: " << Rest.get() << "\n";
1034     return make_error_code(llvm::errc::io_error);
1035   }
1036   return Res;
1037 }
1038 
1039 bool DataAggregator::checkAndConsumeFS() {
1040   if (ParsingBuf[0] != FieldSeparator)
1041     return false;
1042 
1043   ParsingBuf = ParsingBuf.drop_front(1);
1044   Col += 1;
1045   return true;
1046 }
1047 
1048 void DataAggregator::consumeRestOfLine() {
1049   size_t LineEnd = ParsingBuf.find_first_of('\n');
1050   if (LineEnd == StringRef::npos) {
1051     ParsingBuf = StringRef();
1052     Col = 0;
1053     Line += 1;
1054     return;
1055   }
1056   ParsingBuf = ParsingBuf.drop_front(LineEnd + 1);
1057   Col = 0;
1058   Line += 1;
1059 }
1060 
1061 bool DataAggregator::checkNewLine() {
1062   return ParsingBuf[0] == '\n';
1063 }
1064 
1065 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
1066   PerfBranchSample Res;
1067 
1068   while (checkAndConsumeFS()) {
1069   }
1070 
1071   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1072   if (std::error_code EC = PIDRes.getError())
1073     return EC;
1074   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1075   if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) {
1076     consumeRestOfLine();
1077     return make_error_code(errc::no_such_process);
1078   }
1079 
1080   if (checkAndConsumeNewLine())
1081     return Res;
1082 
1083   while (!checkAndConsumeNewLine()) {
1084     checkAndConsumeFS();
1085 
1086     ErrorOr<LBREntry> LBRRes = parseLBREntry();
1087     if (std::error_code EC = LBRRes.getError())
1088       return EC;
1089     LBREntry LBR = LBRRes.get();
1090     if (ignoreKernelInterrupt(LBR))
1091       continue;
1092     if (!BC->HasFixedLoadAddress)
1093       adjustLBR(LBR, MMapInfoIter->second);
1094     Res.LBR.push_back(LBR);
1095   }
1096 
1097   return Res;
1098 }
1099 
1100 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() {
1101   while (checkAndConsumeFS()) {
1102   }
1103 
1104   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1105   if (std::error_code EC = PIDRes.getError())
1106     return EC;
1107 
1108   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1109   if (MMapInfoIter == BinaryMMapInfo.end()) {
1110     consumeRestOfLine();
1111     return PerfBasicSample{StringRef(), 0};
1112   }
1113 
1114   while (checkAndConsumeFS()) {
1115   }
1116 
1117   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1118   if (std::error_code EC = Event.getError())
1119     return EC;
1120 
1121   while (checkAndConsumeFS()) {
1122   }
1123 
1124   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true);
1125   if (std::error_code EC = AddrRes.getError())
1126     return EC;
1127 
1128   if (!checkAndConsumeNewLine()) {
1129     reportError("expected end of line");
1130     return make_error_code(llvm::errc::io_error);
1131   }
1132 
1133   uint64_t Address = *AddrRes;
1134   if (!BC->HasFixedLoadAddress)
1135     adjustAddress(Address, MMapInfoIter->second);
1136 
1137   return PerfBasicSample{Event.get(), Address};
1138 }
1139 
1140 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() {
1141   PerfMemSample Res{0, 0};
1142 
1143   while (checkAndConsumeFS()) {
1144   }
1145 
1146   ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true);
1147   if (std::error_code EC = PIDRes.getError())
1148     return EC;
1149 
1150   auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes);
1151   if (MMapInfoIter == BinaryMMapInfo.end()) {
1152     consumeRestOfLine();
1153     return Res;
1154   }
1155 
1156   while (checkAndConsumeFS()) {
1157   }
1158 
1159   ErrorOr<StringRef> Event = parseString(FieldSeparator);
1160   if (std::error_code EC = Event.getError())
1161     return EC;
1162   if (!Event.get().contains("mem-loads")) {
1163     consumeRestOfLine();
1164     return Res;
1165   }
1166 
1167   while (checkAndConsumeFS()) {
1168   }
1169 
1170   ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator);
1171   if (std::error_code EC = AddrRes.getError())
1172     return EC;
1173 
1174   while (checkAndConsumeFS()) {
1175   }
1176 
1177   ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1178   if (std::error_code EC = PCRes.getError()) {
1179     consumeRestOfLine();
1180     return EC;
1181   }
1182 
1183   if (!checkAndConsumeNewLine()) {
1184     reportError("expected end of line");
1185     return make_error_code(llvm::errc::io_error);
1186   }
1187 
1188   uint64_t Address = *AddrRes;
1189   if (!BC->HasFixedLoadAddress)
1190     adjustAddress(Address, MMapInfoIter->second);
1191 
1192   return PerfMemSample{PCRes.get(), Address};
1193 }
1194 
1195 ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
1196   auto parseOffset = [this]() -> ErrorOr<Location> {
1197     ErrorOr<uint64_t> Res = parseHexField(FieldSeparator);
1198     if (std::error_code EC = Res.getError())
1199       return EC;
1200     return Location(Res.get());
1201   };
1202 
1203   size_t Sep = ParsingBuf.find_first_of(" \n");
1204   if (Sep == StringRef::npos)
1205     return parseOffset();
1206   StringRef LookAhead = ParsingBuf.substr(0, Sep);
1207   if (!LookAhead.contains(':'))
1208     return parseOffset();
1209 
1210   ErrorOr<StringRef> BuildID = parseString(':');
1211   if (std::error_code EC = BuildID.getError())
1212     return EC;
1213   ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator);
1214   if (std::error_code EC = Offset.getError())
1215     return EC;
1216   return Location(true, BuildID.get(), Offset.get());
1217 }
1218 
1219 ErrorOr<DataAggregator::AggregatedLBREntry>
1220 DataAggregator::parseAggregatedLBREntry() {
1221   while (checkAndConsumeFS()) {
1222   }
1223 
1224   ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1225   if (std::error_code EC = TypeOrErr.getError())
1226     return EC;
1227   auto Type = AggregatedLBREntry::BRANCH;
1228   if (TypeOrErr.get() == "B") {
1229     Type = AggregatedLBREntry::BRANCH;
1230   } else if (TypeOrErr.get() == "F") {
1231     Type = AggregatedLBREntry::FT;
1232   } else if (TypeOrErr.get() == "f") {
1233     Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1234   } else {
1235     reportError("expected B, F or f");
1236     return make_error_code(llvm::errc::io_error);
1237   }
1238 
1239   while (checkAndConsumeFS()) {
1240   }
1241   ErrorOr<Location> From = parseLocationOrOffset();
1242   if (std::error_code EC = From.getError())
1243     return EC;
1244 
1245   while (checkAndConsumeFS()) {
1246   }
1247   ErrorOr<Location> To = parseLocationOrOffset();
1248   if (std::error_code EC = To.getError())
1249     return EC;
1250 
1251   while (checkAndConsumeFS()) {
1252   }
1253   ErrorOr<int64_t> Frequency =
1254       parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1255   if (std::error_code EC = Frequency.getError())
1256     return EC;
1257 
1258   uint64_t Mispreds = 0;
1259   if (Type == AggregatedLBREntry::BRANCH) {
1260     while (checkAndConsumeFS()) {
1261     }
1262     ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1263     if (std::error_code EC = MispredsOrErr.getError())
1264       return EC;
1265     Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1266   }
1267 
1268   if (!checkAndConsumeNewLine()) {
1269     reportError("expected end of line");
1270     return make_error_code(llvm::errc::io_error);
1271   }
1272 
1273   return AggregatedLBREntry{From.get(), To.get(),
1274                             static_cast<uint64_t>(Frequency.get()), Mispreds,
1275                             Type};
1276 }
1277 
1278 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
1279   return opts::IgnoreInterruptLBR &&
1280          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
1281 }
1282 
1283 std::error_code DataAggregator::printLBRHeatMap() {
1284   outs() << "PERF2BOLT: parse branch events...\n";
1285   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1286                      TimerGroupDesc, opts::TimeAggregator);
1287 
1288   if (BC->IsLinuxKernel) {
1289     opts::HeatmapMaxAddress = 0xffffffffffffffff;
1290     opts::HeatmapMinAddress = KernelBaseAddr;
1291   }
1292   Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1293              opts::HeatmapMaxAddress, getTextSections(BC));
1294   uint64_t NumTotalSamples = 0;
1295 
1296   if (opts::BasicAggregation) {
1297     while (hasData()) {
1298       ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1299       if (std::error_code EC = SampleRes.getError()) {
1300         if (EC == errc::no_such_process)
1301           continue;
1302         return EC;
1303       }
1304       PerfBasicSample &Sample = SampleRes.get();
1305       HM.registerAddress(Sample.PC);
1306       NumTotalSamples++;
1307     }
1308     outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1309   } else {
1310     while (hasData()) {
1311       ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1312       if (std::error_code EC = SampleRes.getError()) {
1313         if (EC == errc::no_such_process)
1314           continue;
1315         return EC;
1316       }
1317 
1318       PerfBranchSample &Sample = SampleRes.get();
1319 
1320       // LBRs are stored in reverse execution order. NextLBR refers to the next
1321       // executed branch record.
1322       const LBREntry *NextLBR = nullptr;
1323       for (const LBREntry &LBR : Sample.LBR) {
1324         if (NextLBR) {
1325           // Record fall-through trace.
1326           const uint64_t TraceFrom = LBR.To;
1327           const uint64_t TraceTo = NextLBR->From;
1328           ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1329         }
1330         NextLBR = &LBR;
1331       }
1332       if (!Sample.LBR.empty()) {
1333         HM.registerAddress(Sample.LBR.front().To);
1334         HM.registerAddress(Sample.LBR.back().From);
1335       }
1336       NumTotalSamples += Sample.LBR.size();
1337     }
1338     outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1339     outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1340   }
1341 
1342   if (!NumTotalSamples) {
1343     if (opts::BasicAggregation) {
1344       errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
1345                 "Cannot build heatmap.";
1346     } else {
1347       errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
1348                 "Cannot build heatmap. Use -nl for building heatmap from "
1349                 "basic events.\n";
1350     }
1351     exit(1);
1352   }
1353 
1354   outs() << "HEATMAP: building heat map...\n";
1355 
1356   for (const auto &LBR : FallthroughLBRs) {
1357     const Trace &Trace = LBR.first;
1358     const FTInfo &Info = LBR.second;
1359     HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1360   }
1361 
1362   if (HM.getNumInvalidRanges())
1363     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
1364 
1365   if (!HM.size()) {
1366     errs() << "HEATMAP-ERROR: no valid traces registered\n";
1367     exit(1);
1368   }
1369 
1370   HM.print(opts::OutputFilename);
1371   if (opts::OutputFilename == "-")
1372     HM.printCDF(opts::OutputFilename);
1373   else
1374     HM.printCDF(opts::OutputFilename + ".csv");
1375   if (opts::OutputFilename == "-")
1376     HM.printSectionHotness(opts::OutputFilename);
1377   else
1378     HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
1379 
1380   return std::error_code();
1381 }
1382 
1383 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1384                                         bool NeedsSkylakeFix) {
1385   uint64_t NumTraces{0};
1386   // LBRs are stored in reverse execution order. NextLBR refers to the next
1387   // executed branch record.
1388   const LBREntry *NextLBR = nullptr;
1389   uint32_t NumEntry = 0;
1390   for (const LBREntry &LBR : Sample.LBR) {
1391     ++NumEntry;
1392     // Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
1393     // sometimes record entry 32 as an exact copy of entry 31. This will cause
1394     // us to likely record an invalid trace and generate a stale function for
1395     // BAT mode (non BAT disassembles the function and is able to ignore this
1396     // trace at aggregation time). Drop first 2 entries (last two, in
1397     // chronological order)
1398     if (NeedsSkylakeFix && NumEntry <= 2)
1399       continue;
1400     if (NextLBR) {
1401       // Record fall-through trace.
1402       const uint64_t TraceFrom = LBR.To;
1403       const uint64_t TraceTo = NextLBR->From;
1404       const BinaryFunction *TraceBF =
1405           getBinaryFunctionContainingAddress(TraceFrom);
1406       if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1407         FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1408         if (TraceBF->containsAddress(LBR.From))
1409           ++Info.InternCount;
1410         else
1411           ++Info.ExternCount;
1412       } else {
1413         const BinaryFunction *ToFunc =
1414             getBinaryFunctionContainingAddress(TraceTo);
1415         if (TraceBF && ToFunc) {
1416           LLVM_DEBUG({
1417             dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
1418                    << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
1419                    << formatv(" and ending @ {0:x}\n", TraceTo);
1420           });
1421           ++NumInvalidTraces;
1422         } else {
1423           LLVM_DEBUG({
1424             dbgs() << "Out of range trace starting in "
1425                    << (TraceBF ? TraceBF->getPrintName() : "None")
1426                    << formatv(" @ {0:x}",
1427                               TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
1428                    << " and ending in "
1429                    << (ToFunc ? ToFunc->getPrintName() : "None")
1430                    << formatv(" @ {0:x}\n",
1431                               TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
1432           });
1433           ++NumLongRangeTraces;
1434         }
1435       }
1436       ++NumTraces;
1437     }
1438     NextLBR = &LBR;
1439 
1440     uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
1441     uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
1442     if (!From && !To)
1443       continue;
1444     TakenBranchInfo &Info = BranchLBRs[Trace(From, To)];
1445     ++Info.TakenCount;
1446     Info.MispredCount += LBR.Mispred;
1447   }
1448   return NumTraces;
1449 }
1450 
1451 std::error_code DataAggregator::parseBranchEvents() {
1452   outs() << "PERF2BOLT: parse branch events...\n";
1453   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
1454                      TimerGroupDesc, opts::TimeAggregator);
1455 
1456   uint64_t NumTotalSamples = 0;
1457   uint64_t NumEntries = 0;
1458   uint64_t NumSamples = 0;
1459   uint64_t NumSamplesNoLBR = 0;
1460   uint64_t NumTraces = 0;
1461   bool NeedsSkylakeFix = false;
1462 
1463   while (hasData() && NumTotalSamples < opts::MaxSamples) {
1464     ++NumTotalSamples;
1465 
1466     ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1467     if (std::error_code EC = SampleRes.getError()) {
1468       if (EC == errc::no_such_process)
1469         continue;
1470       return EC;
1471     }
1472     ++NumSamples;
1473 
1474     PerfBranchSample &Sample = SampleRes.get();
1475 
1476     if (Sample.LBR.empty()) {
1477       ++NumSamplesNoLBR;
1478       continue;
1479     }
1480 
1481     NumEntries += Sample.LBR.size();
1482     if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) {
1483       errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n";
1484       NeedsSkylakeFix = true;
1485     }
1486 
1487     NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1488   }
1489 
1490   for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
1491     for (const uint64_t Addr : {Trace.From, Trace.To})
1492       if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1493         BF->setHasProfileAvailable();
1494 
1495   auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1496     OS << " (";
1497     if (OS.has_colors()) {
1498       if (Percent > T2)
1499         OS.changeColor(raw_ostream::RED);
1500       else if (Percent > T1)
1501         OS.changeColor(raw_ostream::YELLOW);
1502       else
1503         OS.changeColor(raw_ostream::GREEN);
1504     }
1505     OS << format("%.1f%%", Percent);
1506     if (OS.has_colors())
1507       OS.resetColor();
1508     OS << ")";
1509   };
1510 
1511   outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1512          << " LBR entries\n";
1513   if (NumTotalSamples) {
1514     if (NumSamples && NumSamplesNoLBR == NumSamples) {
1515       // Note: we don't know if perf2bolt is being used to parse memory samples
1516       // at this point. In this case, it is OK to parse zero LBRs.
1517       errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack "
1518                 "LBR. Record profile with perf record -j any or run perf2bolt "
1519                 "in no-LBR mode with -nl (the performance improvement in -nl "
1520                 "mode may be limited)\n";
1521     } else {
1522       const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1523       const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1524       outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1525       printColored(outs(), PercentIgnored, 20, 50);
1526       outs() << " were ignored\n";
1527       if (PercentIgnored > 50.0f)
1528         errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1529                   "were attributed to the input binary\n";
1530     }
1531   }
1532   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1533          << NumInvalidTraces;
1534   float Perc = 0.0f;
1535   if (NumTraces > 0) {
1536     Perc = NumInvalidTraces * 100.0f / NumTraces;
1537     printColored(outs(), Perc, 5, 10);
1538   }
1539   outs() << "\n";
1540   if (Perc > 10.0f)
1541     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1542               "binary is probably not the same binary used during profiling "
1543               "collection. The generated data may be ineffective for improving "
1544               "performance.\n\n";
1545 
1546   outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1547          << NumLongRangeTraces;
1548   if (NumTraces > 0)
1549     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1550   outs() << "\n";
1551 
1552   if (NumColdSamples > 0) {
1553     const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1554     outs() << "PERF2BOLT: " << NumColdSamples
1555            << format(" (%.1f%%)", ColdSamples)
1556            << " samples recorded in cold regions of split functions.\n";
1557     if (ColdSamples > 5.0f)
1558       outs()
1559           << "WARNING: The BOLT-processed binary where samples were collected "
1560              "likely used bad data or your service observed a large shift in "
1561              "profile. You may want to audit this.\n";
1562   }
1563 
1564   return std::error_code();
1565 }
1566 
1567 void DataAggregator::processBranchEvents() {
1568   outs() << "PERF2BOLT: processing branch events...\n";
1569   NamedRegionTimer T("processBranch", "Processing branch events",
1570                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1571 
1572   for (const auto &AggrLBR : FallthroughLBRs) {
1573     const Trace &Loc = AggrLBR.first;
1574     const FTInfo &Info = AggrLBR.second;
1575     LBREntry First{Loc.From, Loc.From, false};
1576     LBREntry Second{Loc.To, Loc.To, false};
1577     if (Info.InternCount)
1578       doTrace(First, Second, Info.InternCount);
1579     if (Info.ExternCount) {
1580       First.From = 0;
1581       doTrace(First, Second, Info.ExternCount);
1582     }
1583   }
1584 
1585   for (const auto &AggrLBR : BranchLBRs) {
1586     const Trace &Loc = AggrLBR.first;
1587     const TakenBranchInfo &Info = AggrLBR.second;
1588     doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
1589              /*IsPreagg*/ false);
1590   }
1591 }
1592 
1593 std::error_code DataAggregator::parseBasicEvents() {
1594   outs() << "PERF2BOLT: parsing basic events (without LBR)...\n";
1595   NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName,
1596                      TimerGroupDesc, opts::TimeAggregator);
1597   while (hasData()) {
1598     ErrorOr<PerfBasicSample> Sample = parseBasicSample();
1599     if (std::error_code EC = Sample.getError())
1600       return EC;
1601 
1602     if (!Sample->PC)
1603       continue;
1604 
1605     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1606       BF->setHasProfileAvailable();
1607 
1608     ++BasicSamples[Sample->PC];
1609     EventNames.insert(Sample->EventName);
1610   }
1611 
1612   return std::error_code();
1613 }
1614 
1615 void DataAggregator::processBasicEvents() {
1616   outs() << "PERF2BOLT: processing basic events (without LBR)...\n";
1617   NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
1618                      TimerGroupDesc, opts::TimeAggregator);
1619   uint64_t OutOfRangeSamples = 0;
1620   uint64_t NumSamples = 0;
1621   for (auto &Sample : BasicSamples) {
1622     const uint64_t PC = Sample.first;
1623     const uint64_t HitCount = Sample.second;
1624     NumSamples += HitCount;
1625     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1626     if (!Func) {
1627       OutOfRangeSamples += HitCount;
1628       continue;
1629     }
1630 
1631     doSample(*Func, PC, HitCount);
1632   }
1633   outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1634 
1635   outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1636          << OutOfRangeSamples;
1637   float Perc = 0.0f;
1638   if (NumSamples > 0) {
1639     outs() << " (";
1640     Perc = OutOfRangeSamples * 100.0f / NumSamples;
1641     if (outs().has_colors()) {
1642       if (Perc > 60.0f)
1643         outs().changeColor(raw_ostream::RED);
1644       else if (Perc > 40.0f)
1645         outs().changeColor(raw_ostream::YELLOW);
1646       else
1647         outs().changeColor(raw_ostream::GREEN);
1648     }
1649     outs() << format("%.1f%%", Perc);
1650     if (outs().has_colors())
1651       outs().resetColor();
1652     outs() << ")";
1653   }
1654   outs() << "\n";
1655   if (Perc > 80.0f)
1656     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1657               "binary is probably not the same binary used during profiling "
1658               "collection. The generated data may be ineffective for improving "
1659               "performance.\n\n";
1660 }
1661 
1662 std::error_code DataAggregator::parseMemEvents() {
1663   outs() << "PERF2BOLT: parsing memory events...\n";
1664   NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName,
1665                      TimerGroupDesc, opts::TimeAggregator);
1666   while (hasData()) {
1667     ErrorOr<PerfMemSample> Sample = parseMemSample();
1668     if (std::error_code EC = Sample.getError())
1669       return EC;
1670 
1671     if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1672       BF->setHasProfileAvailable();
1673 
1674     MemSamples.emplace_back(std::move(Sample.get()));
1675   }
1676 
1677   return std::error_code();
1678 }
1679 
1680 void DataAggregator::processMemEvents() {
1681   NamedRegionTimer T("ProcessMemEvents", "Processing mem events",
1682                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1683   for (const PerfMemSample &Sample : MemSamples) {
1684     uint64_t PC = Sample.PC;
1685     uint64_t Addr = Sample.Addr;
1686     StringRef FuncName;
1687     StringRef MemName;
1688 
1689     // Try to resolve symbol for PC
1690     BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
1691     if (!Func) {
1692       LLVM_DEBUG(if (PC != 0) {
1693         dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr);
1694       });
1695       continue;
1696     }
1697 
1698     FuncName = Func->getOneName();
1699     PC -= Func->getAddress();
1700 
1701     // Try to resolve symbol for memory load
1702     if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) {
1703       MemName = BD->getName();
1704       Addr -= BD->getAddress();
1705     } else if (opts::FilterMemProfile) {
1706       // Filter out heap/stack accesses
1707       continue;
1708     }
1709 
1710     const Location FuncLoc(!FuncName.empty(), FuncName, PC);
1711     const Location AddrLoc(!MemName.empty(), MemName, Addr);
1712 
1713     FuncMemData *MemData = &NamesToMemEvents[FuncName];
1714     MemData->Name = FuncName;
1715     setMemData(*Func, MemData);
1716     MemData->update(FuncLoc, AddrLoc);
1717     LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n");
1718   }
1719 }
1720 
1721 std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
1722   outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
1723   NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
1724                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1725   while (hasData()) {
1726     ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry();
1727     if (std::error_code EC = AggrEntry.getError())
1728       return EC;
1729 
1730     for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset})
1731       if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1732         BF->setHasProfileAvailable();
1733 
1734     AggregatedLBRs.emplace_back(std::move(AggrEntry.get()));
1735   }
1736 
1737   return std::error_code();
1738 }
1739 
1740 void DataAggregator::processPreAggregated() {
1741   outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
1742   NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
1743                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1744 
1745   uint64_t NumTraces = 0;
1746   for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1747     switch (AggrEntry.EntryType) {
1748     case AggregatedLBREntry::BRANCH:
1749       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
1750                AggrEntry.Mispreds, /*IsPreagg*/ true);
1751       break;
1752     case AggregatedLBREntry::FT:
1753     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
1754       LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
1755                          ? AggrEntry.From.Offset
1756                          : 0,
1757                      AggrEntry.From.Offset, false};
1758       LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
1759       doTrace(First, Second, AggrEntry.Count);
1760       NumTraces += AggrEntry.Count;
1761       break;
1762     }
1763     }
1764   }
1765 
1766   outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
1767          << " aggregated LBR entries\n";
1768   outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1769          << NumInvalidTraces;
1770   float Perc = 0.0f;
1771   if (NumTraces > 0) {
1772     outs() << " (";
1773     Perc = NumInvalidTraces * 100.0f / NumTraces;
1774     if (outs().has_colors()) {
1775       if (Perc > 10.0f)
1776         outs().changeColor(raw_ostream::RED);
1777       else if (Perc > 5.0f)
1778         outs().changeColor(raw_ostream::YELLOW);
1779       else
1780         outs().changeColor(raw_ostream::GREEN);
1781     }
1782     outs() << format("%.1f%%", Perc);
1783     if (outs().has_colors())
1784       outs().resetColor();
1785     outs() << ")";
1786   }
1787   outs() << "\n";
1788   if (Perc > 10.0f)
1789     outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1790               "binary is probably not the same binary used during profiling "
1791               "collection. The generated data may be ineffective for improving "
1792               "performance.\n\n";
1793 
1794   outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1795          << NumLongRangeTraces;
1796   if (NumTraces > 0)
1797     outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1798   outs() << "\n";
1799 }
1800 
1801 std::optional<int32_t> DataAggregator::parseCommExecEvent() {
1802   size_t LineEnd = ParsingBuf.find_first_of("\n");
1803   if (LineEnd == StringRef::npos) {
1804     reportError("expected rest of line");
1805     Diag << "Found: " << ParsingBuf << "\n";
1806     return std::nullopt;
1807   }
1808   StringRef Line = ParsingBuf.substr(0, LineEnd);
1809 
1810   size_t Pos = Line.find("PERF_RECORD_COMM exec");
1811   if (Pos == StringRef::npos)
1812     return std::nullopt;
1813   Line = Line.drop_front(Pos);
1814 
1815   // Line:
1816   //  PERF_RECORD_COMM exec: <name>:<pid>/<tid>"
1817   StringRef PIDStr = Line.rsplit(':').second.split('/').first;
1818   int32_t PID;
1819   if (PIDStr.getAsInteger(10, PID)) {
1820     reportError("expected PID");
1821     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1822     return std::nullopt;
1823   }
1824 
1825   return PID;
1826 }
1827 
1828 namespace {
1829 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) {
1830   const StringRef SecTimeStr = TimeStr.split('.').first;
1831   const StringRef USecTimeStr = TimeStr.split('.').second;
1832   uint64_t SecTime;
1833   uint64_t USecTime;
1834   if (SecTimeStr.getAsInteger(10, SecTime) ||
1835       USecTimeStr.getAsInteger(10, USecTime))
1836     return std::nullopt;
1837   return SecTime * 1000000ULL + USecTime;
1838 }
1839 }
1840 
1841 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() {
1842   while (checkAndConsumeFS()) {
1843   }
1844 
1845   size_t LineEnd = ParsingBuf.find_first_of("\n");
1846   if (LineEnd == StringRef::npos) {
1847     reportError("expected rest of line");
1848     Diag << "Found: " << ParsingBuf << "\n";
1849     return std::nullopt;
1850   }
1851   StringRef Line = ParsingBuf.substr(0, LineEnd);
1852 
1853   size_t Pos = Line.find("PERF_RECORD_FORK");
1854   if (Pos == StringRef::npos) {
1855     consumeRestOfLine();
1856     return std::nullopt;
1857   }
1858 
1859   ForkInfo FI;
1860 
1861   const StringRef TimeStr =
1862       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1863   if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) {
1864     FI.Time = *TimeRes;
1865   }
1866 
1867   Line = Line.drop_front(Pos);
1868 
1869   // Line:
1870   //  PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>)
1871   const StringRef ChildPIDStr = Line.split('(').second.split(':').first;
1872   if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) {
1873     reportError("expected PID");
1874     Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n";
1875     return std::nullopt;
1876   }
1877 
1878   const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first;
1879   if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) {
1880     reportError("expected PID");
1881     Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n";
1882     return std::nullopt;
1883   }
1884 
1885   consumeRestOfLine();
1886 
1887   return FI;
1888 }
1889 
1890 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>>
1891 DataAggregator::parseMMapEvent() {
1892   while (checkAndConsumeFS()) {
1893   }
1894 
1895   MMapInfo ParsedInfo;
1896 
1897   size_t LineEnd = ParsingBuf.find_first_of("\n");
1898   if (LineEnd == StringRef::npos) {
1899     reportError("expected rest of line");
1900     Diag << "Found: " << ParsingBuf << "\n";
1901     return make_error_code(llvm::errc::io_error);
1902   }
1903   StringRef Line = ParsingBuf.substr(0, LineEnd);
1904 
1905   size_t Pos = Line.find("PERF_RECORD_MMAP2");
1906   if (Pos == StringRef::npos) {
1907     consumeRestOfLine();
1908     return std::make_pair(StringRef(), ParsedInfo);
1909   }
1910 
1911   // Line:
1912   //   {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name>
1913 
1914   const StringRef TimeStr =
1915       Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second;
1916   if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr))
1917     ParsedInfo.Time = *TimeRes;
1918 
1919   Line = Line.drop_front(Pos);
1920 
1921   // Line:
1922   //   PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name>
1923 
1924   StringRef FileName = Line.rsplit(FieldSeparator).second;
1925   if (FileName.starts_with("//") || FileName.starts_with("[")) {
1926     consumeRestOfLine();
1927     return std::make_pair(StringRef(), ParsedInfo);
1928   }
1929   FileName = sys::path::filename(FileName);
1930 
1931   const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first;
1932   if (PIDStr.getAsInteger(10, ParsedInfo.PID)) {
1933     reportError("expected PID");
1934     Diag << "Found: " << PIDStr << "in '" << Line << "'\n";
1935     return make_error_code(llvm::errc::io_error);
1936   }
1937 
1938   const StringRef BaseAddressStr = Line.split('[').second.split('(').first;
1939   if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) {
1940     reportError("expected base address");
1941     Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n";
1942     return make_error_code(llvm::errc::io_error);
1943   }
1944 
1945   const StringRef SizeStr = Line.split('(').second.split(')').first;
1946   if (SizeStr.getAsInteger(0, ParsedInfo.Size)) {
1947     reportError("expected mmaped size");
1948     Diag << "Found: " << SizeStr << "in '" << Line << "'\n";
1949     return make_error_code(llvm::errc::io_error);
1950   }
1951 
1952   const StringRef OffsetStr =
1953       Line.split('@').second.ltrim().split(FieldSeparator).first;
1954   if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) {
1955     reportError("expected mmaped page-aligned offset");
1956     Diag << "Found: " << OffsetStr << "in '" << Line << "'\n";
1957     return make_error_code(llvm::errc::io_error);
1958   }
1959 
1960   consumeRestOfLine();
1961 
1962   return std::make_pair(FileName, ParsedInfo);
1963 }
1964 
1965 std::error_code DataAggregator::parseMMapEvents() {
1966   outs() << "PERF2BOLT: parsing perf-script mmap events output\n";
1967   NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName,
1968                      TimerGroupDesc, opts::TimeAggregator);
1969 
1970   std::multimap<StringRef, MMapInfo> GlobalMMapInfo;
1971   while (hasData()) {
1972     ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent();
1973     if (std::error_code EC = FileMMapInfoRes.getError())
1974       return EC;
1975 
1976     std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get();
1977     if (FileMMapInfo.second.PID == -1)
1978       continue;
1979     if (FileMMapInfo.first == "(deleted)")
1980       continue;
1981 
1982     GlobalMMapInfo.insert(FileMMapInfo);
1983   }
1984 
1985   LLVM_DEBUG({
1986     dbgs() << "FileName -> mmap info:\n"
1987            << "  Filename : PID [MMapAddr, Size, Offset]\n";
1988     for (const auto &[Name, MMap] : GlobalMMapInfo)
1989       dbgs() << formatv("  {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID,
1990                         MMap.MMapAddress, MMap.Size, MMap.Offset);
1991   });
1992 
1993   StringRef NameToUse = llvm::sys::path::filename(BC->getFilename());
1994   if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) {
1995     errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName
1996            << "\" for profile matching\n";
1997     NameToUse = BuildIDBinaryName;
1998   }
1999 
2000   auto Range = GlobalMMapInfo.equal_range(NameToUse);
2001   for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) {
2002     if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) {
2003       // Check that the binary mapping matches one of the segments.
2004       bool MatchFound = llvm::any_of(
2005           llvm::make_second_range(BC->SegmentMapInfo),
2006           [&](SegmentInfo &SegInfo) {
2007             // The mapping is page-aligned and hence the MMapAddress could be
2008             // different from the segment start address. We cannot know the page
2009             // size of the mapping, but we know it should not exceed the segment
2010             // alignment value. Hence we are performing an approximate check.
2011             return SegInfo.Address >= MMapInfo.MMapAddress &&
2012                    SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
2013                    SegInfo.IsExecutable;
2014           });
2015       if (!MatchFound) {
2016         errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
2017                << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n';
2018         continue;
2019       }
2020     }
2021 
2022     // Set base address for shared objects.
2023     if (!BC->HasFixedLoadAddress) {
2024       std::optional<uint64_t> BaseAddress =
2025           BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset);
2026       if (!BaseAddress) {
2027         errs() << "PERF2BOLT-WARNING: unable to find base address of the "
2028                   "binary when memory mapped at 0x"
2029                << Twine::utohexstr(MMapInfo.MMapAddress)
2030                << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
2031                << ". Ignoring profile data for this mapping\n";
2032         continue;
2033       }
2034       MMapInfo.BaseAddress = *BaseAddress;
2035     }
2036 
2037     // Try to add MMapInfo to the map and update its size. Large binaries may
2038     // span to multiple text segments, so the mapping is inserted only on the
2039     // first occurrence.
2040     if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second)
2041       assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress &&
2042              "Base address on multiple segment mappings should match");
2043 
2044     // Update mapping size.
2045     const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
2046     const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
2047     if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
2048       BinaryMMapInfo[MMapInfo.PID].Size = Size;
2049   }
2050 
2051   if (BinaryMMapInfo.empty()) {
2052     if (errs().has_colors())
2053       errs().changeColor(raw_ostream::RED);
2054     errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \""
2055            << BC->getFilename() << "\".";
2056     if (!GlobalMMapInfo.empty()) {
2057       errs() << " Profile for the following binary name(s) is available:\n";
2058       for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE;
2059            I = GlobalMMapInfo.upper_bound(I->first))
2060         errs() << "  " << I->first << '\n';
2061       errs() << "Please rename the input binary.\n";
2062     } else {
2063       errs() << " Failed to extract any binary name from a profile.\n";
2064     }
2065     if (errs().has_colors())
2066       errs().resetColor();
2067 
2068     exit(1);
2069   }
2070 
2071   return std::error_code();
2072 }
2073 
2074 std::error_code DataAggregator::parseTaskEvents() {
2075   outs() << "PERF2BOLT: parsing perf-script task events output\n";
2076   NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName,
2077                      TimerGroupDesc, opts::TimeAggregator);
2078 
2079   while (hasData()) {
2080     if (std::optional<int32_t> CommInfo = parseCommExecEvent()) {
2081       // Remove forked child that ran execve
2082       auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo);
2083       if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked)
2084         BinaryMMapInfo.erase(MMapInfoIter);
2085       consumeRestOfLine();
2086       continue;
2087     }
2088 
2089     std::optional<ForkInfo> ForkInfo = parseForkEvent();
2090     if (!ForkInfo)
2091       continue;
2092 
2093     if (ForkInfo->ParentPID == ForkInfo->ChildPID)
2094       continue;
2095 
2096     if (ForkInfo->Time == 0) {
2097       // Process was forked and mmaped before perf ran. In this case the child
2098       // should have its own mmap entry unless it was execve'd.
2099       continue;
2100     }
2101 
2102     auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID);
2103     if (MMapInfoIter == BinaryMMapInfo.end())
2104       continue;
2105 
2106     MMapInfo MMapInfo = MMapInfoIter->second;
2107     MMapInfo.PID = ForkInfo->ChildPID;
2108     MMapInfo.Forked = true;
2109     BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
2110   }
2111 
2112   outs() << "PERF2BOLT: input binary is associated with "
2113          << BinaryMMapInfo.size() << " PID(s)\n";
2114 
2115   LLVM_DEBUG({
2116     for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo))
2117       outs() << formatv("  {0}{1}: ({2:x}: {3:x})\n", MMI.PID,
2118                         (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress,
2119                         MMI.Size);
2120   });
2121 
2122   return std::error_code();
2123 }
2124 
2125 std::optional<std::pair<StringRef, StringRef>>
2126 DataAggregator::parseNameBuildIDPair() {
2127   while (checkAndConsumeFS()) {
2128   }
2129 
2130   ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true);
2131   if (std::error_code EC = BuildIDStr.getError())
2132     return std::nullopt;
2133 
2134   // If one of the strings is missing, don't issue a parsing error, but still
2135   // do not return a value.
2136   consumeAllRemainingFS();
2137   if (checkNewLine())
2138     return std::nullopt;
2139 
2140   ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true);
2141   if (std::error_code EC = NameStr.getError())
2142     return std::nullopt;
2143 
2144   consumeRestOfLine();
2145   return std::make_pair(NameStr.get(), BuildIDStr.get());
2146 }
2147 
2148 bool DataAggregator::hasAllBuildIDs() {
2149   const StringRef SavedParsingBuf = ParsingBuf;
2150 
2151   if (!hasData())
2152     return false;
2153 
2154   bool HasInvalidEntries = false;
2155   while (hasData()) {
2156     if (!parseNameBuildIDPair()) {
2157       HasInvalidEntries = true;
2158       break;
2159     }
2160   }
2161 
2162   ParsingBuf = SavedParsingBuf;
2163 
2164   return !HasInvalidEntries;
2165 }
2166 
2167 std::optional<StringRef>
2168 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) {
2169   const StringRef SavedParsingBuf = ParsingBuf;
2170 
2171   StringRef FileName;
2172   while (hasData()) {
2173     std::optional<std::pair<StringRef, StringRef>> IDPair =
2174         parseNameBuildIDPair();
2175     if (!IDPair) {
2176       consumeRestOfLine();
2177       continue;
2178     }
2179 
2180     if (IDPair->second.starts_with(FileBuildID)) {
2181       FileName = sys::path::filename(IDPair->first);
2182       break;
2183     }
2184   }
2185 
2186   ParsingBuf = SavedParsingBuf;
2187 
2188   if (!FileName.empty())
2189     return FileName;
2190 
2191   return std::nullopt;
2192 }
2193 
2194 std::error_code
2195 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
2196   std::error_code EC;
2197   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2198   if (EC)
2199     return EC;
2200 
2201   bool WriteMemLocs = false;
2202 
2203   auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) {
2204     if (WriteMemLocs)
2205       OutFile << (Loc.IsSymbol ? "4 " : "3 ");
2206     else
2207       OutFile << (Loc.IsSymbol ? "1 " : "0 ");
2208     OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name))
2209             << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator;
2210   };
2211 
2212   uint64_t BranchValues = 0;
2213   uint64_t MemValues = 0;
2214 
2215   if (BAT)
2216     OutFile << "boltedcollection\n";
2217   if (opts::BasicAggregation) {
2218     OutFile << "no_lbr";
2219     for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
2220       OutFile << " " << Entry.getKey();
2221     OutFile << "\n";
2222 
2223     for (const auto &KV : NamesToSamples) {
2224       const FuncSampleData &FSD = KV.second;
2225       for (const SampleInfo &SI : FSD.Data) {
2226         writeLocation(SI.Loc);
2227         OutFile << SI.Hits << "\n";
2228         ++BranchValues;
2229       }
2230     }
2231   } else {
2232     for (const auto &KV : NamesToBranches) {
2233       const FuncBranchData &FBD = KV.second;
2234       for (const BranchInfo &BI : FBD.Data) {
2235         writeLocation(BI.From);
2236         writeLocation(BI.To);
2237         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2238         ++BranchValues;
2239       }
2240       for (const BranchInfo &BI : FBD.EntryData) {
2241         // Do not output if source is a known symbol, since this was already
2242         // accounted for in the source function
2243         if (BI.From.IsSymbol)
2244           continue;
2245         writeLocation(BI.From);
2246         writeLocation(BI.To);
2247         OutFile << BI.Mispreds << " " << BI.Branches << "\n";
2248         ++BranchValues;
2249       }
2250     }
2251 
2252     WriteMemLocs = true;
2253     for (const auto &KV : NamesToMemEvents) {
2254       const FuncMemData &FMD = KV.second;
2255       for (const MemInfo &MemEvent : FMD.Data) {
2256         writeLocation(MemEvent.Offset);
2257         writeLocation(MemEvent.Addr);
2258         OutFile << MemEvent.Count << "\n";
2259         ++MemValues;
2260       }
2261     }
2262   }
2263 
2264   outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues
2265          << " memory objects to " << OutputFilename << "\n";
2266 
2267   return std::error_code();
2268 }
2269 
2270 std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
2271                                              StringRef OutputFilename) const {
2272   std::error_code EC;
2273   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
2274   if (EC)
2275     return EC;
2276 
2277   yaml::bolt::BinaryProfile BP;
2278 
2279   const MCPseudoProbeDecoder *PseudoProbeDecoder =
2280       opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
2281 
2282   // Fill out the header info.
2283   BP.Header.Version = 1;
2284   BP.Header.FileName = std::string(BC.getFilename());
2285   std::optional<StringRef> BuildID = BC.getFileBuildID();
2286   BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
2287   BP.Header.Origin = std::string(getReaderName());
2288   // Only the input binary layout order is supported.
2289   BP.Header.IsDFSOrder = false;
2290   // FIXME: Need to match hash function used to produce BAT hashes.
2291   BP.Header.HashFunction = HashFunction::Default;
2292 
2293   ListSeparator LS(",");
2294   raw_string_ostream EventNamesOS(BP.Header.EventNames);
2295   for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
2296     EventNamesOS << LS << EventEntry.first().str();
2297 
2298   BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
2299                                            : BinaryFunction::PF_LBR;
2300 
2301   // Add probe inline tree nodes.
2302   YAMLProfileWriter::InlineTreeDesc InlineTree;
2303   if (PseudoProbeDecoder)
2304     std::tie(BP.PseudoProbeDesc, InlineTree) =
2305         YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
2306 
2307   if (!opts::BasicAggregation) {
2308     // Convert profile for functions not covered by BAT
2309     for (auto &BFI : BC.getBinaryFunctions()) {
2310       BinaryFunction &Function = BFI.second;
2311       if (!Function.hasProfile())
2312         continue;
2313       if (BAT->isBATFunction(Function.getAddress()))
2314         continue;
2315       BP.Functions.emplace_back(YAMLProfileWriter::convert(
2316           Function, /*UseDFS=*/false, InlineTree, BAT));
2317     }
2318 
2319     for (const auto &KV : NamesToBranches) {
2320       const StringRef FuncName = KV.first;
2321       const FuncBranchData &Branches = KV.second;
2322       yaml::bolt::BinaryFunctionProfile YamlBF;
2323       BinaryData *BD = BC.getBinaryDataByName(FuncName);
2324       assert(BD);
2325       uint64_t FuncAddress = BD->getAddress();
2326       if (!BAT->isBATFunction(FuncAddress))
2327         continue;
2328       BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
2329       assert(BF);
2330       YamlBF.Name = getLocationName(*BF, BAT);
2331       YamlBF.Id = BF->getFunctionNumber();
2332       YamlBF.Hash = BAT->getBFHash(FuncAddress);
2333       YamlBF.ExecCount = BF->getKnownExecutionCount();
2334       YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
2335       const BoltAddressTranslation::BBHashMapTy &BlockMap =
2336           BAT->getBBHashMap(FuncAddress);
2337       YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);
2338 
2339       for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
2340         const auto &Block = Entry.second;
2341         YamlBB.Hash = Block.Hash;
2342         YamlBB.Index = Block.Index;
2343       }
2344 
2345       // Lookup containing basic block offset and index
2346       auto getBlock = [&BlockMap](uint32_t Offset) {
2347         auto BlockIt = BlockMap.upper_bound(Offset);
2348         if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) {
2349           errs() << "BOLT-ERROR: invalid BAT section\n";
2350           exit(1);
2351         }
2352         --BlockIt;
2353         return std::pair(BlockIt->first, BlockIt->second.Index);
2354       };
2355 
2356       for (const BranchInfo &BI : Branches.Data) {
2357         using namespace yaml::bolt;
2358         const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
2359         BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
2360         if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) {
2361           // Internal branch
2362           const unsigned SuccIndex = getBlock(BI.To.Offset).second;
2363           auto &SI = YamlBB.Successors.emplace_back(SuccessorInfo{SuccIndex});
2364           SI.Count = BI.Branches;
2365           SI.Mispreds = BI.Mispreds;
2366         } else {
2367           // Call
2368           const uint32_t Offset = BI.From.Offset - BlockOffset;
2369           auto &CSI = YamlBB.CallSites.emplace_back(CallSiteInfo{Offset});
2370           CSI.Count = BI.Branches;
2371           CSI.Mispreds = BI.Mispreds;
2372           if (const BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
2373             YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
2374                                                  BI.To.Offset);
2375         }
2376       }
2377       // Set entry counts, similar to DataReader::readProfile.
2378       for (const BranchInfo &BI : Branches.EntryData) {
2379         if (!BlockMap.isInputBlock(BI.To.Offset)) {
2380           if (opts::Verbosity >= 1)
2381             errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
2382                    << " at 0x" << Twine::utohexstr(BI.To.Offset) << '\n';
2383           continue;
2384         }
2385         const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
2386         YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
2387       }
2388       if (PseudoProbeDecoder) {
2389         DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2390             InlineTreeNodeId;
2391         if (BF->getGUID()) {
2392           std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
2393               YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
2394                                                      InlineTree, BF->getGUID());
2395         }
2396         // Fetch probes belonging to all fragments
2397         const AddressProbesMap &ProbeMap =
2398             PseudoProbeDecoder->getAddress2ProbesMap();
2399         BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
2400         Fragments.insert(BF);
2401         DenseMap<
2402             uint32_t,
2403             std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2404             BlockProbes;
2405         for (const BinaryFunction *F : Fragments) {
2406           const uint64_t FuncAddr = F->getAddress();
2407           for (const MCDecodedPseudoProbe &Probe :
2408                ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
2409             const uint32_t OutputAddress = Probe.getAddress();
2410             const uint32_t InputOffset = BAT->translate(
2411                 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2412             const unsigned BlockIndex = getBlock(InputOffset).second;
2413             BlockProbes[BlockIndex].emplace_back(Probe);
2414           }
2415         }
2416 
2417         for (auto &[Block, Probes] : BlockProbes) {
2418           YamlBF.Blocks[Block].PseudoProbes =
2419               YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2420         }
2421       }
2422       // Skip printing if there's no profile data
2423       llvm::erase_if(
2424           YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2425             auto HasCount = [](const auto &SI) { return SI.Count; };
2426             bool HasAnyCount = YamlBB.ExecCount ||
2427                                llvm::any_of(YamlBB.Successors, HasCount) ||
2428                                llvm::any_of(YamlBB.CallSites, HasCount);
2429             return !HasAnyCount;
2430           });
2431       BP.Functions.emplace_back(YamlBF);
2432     }
2433   }
2434 
2435   // Write the profile.
2436   yaml::Output Out(OutFile, nullptr, 0);
2437   Out << BP;
2438   return std::error_code();
2439 }
2440 
2441 void DataAggregator::dump() const { DataReader::dump(); }
2442 
2443 void DataAggregator::dump(const LBREntry &LBR) const {
2444   Diag << "From: " << Twine::utohexstr(LBR.From)
2445        << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred
2446        << "\n";
2447 }
2448 
2449 void DataAggregator::dump(const PerfBranchSample &Sample) const {
2450   Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n";
2451   for (const LBREntry &LBR : Sample.LBR)
2452     dump(LBR);
2453 }
2454 
2455 void DataAggregator::dump(const PerfMemSample &Sample) const {
2456   Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n";
2457 }
2458