1 //===- bolt/Profile/DataAggregator.cpp - Perf data aggregator -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions reads profile data written by perf record, 10 // aggregate it and then write it back to an output file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Profile/DataAggregator.h" 15 #include "bolt/Core/BinaryContext.h" 16 #include "bolt/Core/BinaryFunction.h" 17 #include "bolt/Passes/BinaryPasses.h" 18 #include "bolt/Profile/BoltAddressTranslation.h" 19 #include "bolt/Profile/Heatmap.h" 20 #include "bolt/Profile/YAMLProfileWriter.h" 21 #include "bolt/Utils/CommandLineOpts.h" 22 #include "bolt/Utils/Utils.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/ScopeExit.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/Errc.h" 29 #include "llvm/Support/FileSystem.h" 30 #include "llvm/Support/Process.h" 31 #include "llvm/Support/Program.h" 32 #include "llvm/Support/Regex.h" 33 #include "llvm/Support/Timer.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <map> 36 #include <optional> 37 #include <unordered_map> 38 #include <utility> 39 40 #define DEBUG_TYPE "aggregator" 41 42 using namespace llvm; 43 using namespace bolt; 44 45 namespace opts { 46 47 static cl::opt<bool> 48 BasicAggregation("nl", 49 cl::desc("aggregate basic samples (without LBR info)"), 50 cl::cat(AggregatorCategory)); 51 52 static cl::opt<std::string> 53 ITraceAggregation("itrace", 54 cl::desc("Generate LBR info with perf itrace argument"), 55 cl::cat(AggregatorCategory)); 56 57 static cl::opt<bool> 58 FilterMemProfile("filter-mem-profile", 59 cl::desc("if processing a memory profile, filter out stack or heap accesses " 60 "that won't be useful for BOLT to reduce profile file size"), 61 cl::init(true), 62 cl::cat(AggregatorCategory)); 63 64 static cl::opt<unsigned long long> 65 FilterPID("pid", 66 cl::desc("only use samples from process with specified PID"), 67 cl::init(0), 68 cl::Optional, 69 cl::cat(AggregatorCategory)); 70 71 static cl::opt<bool> 72 IgnoreBuildID("ignore-build-id", 73 cl::desc("continue even if build-ids in input binary and perf.data mismatch"), 74 cl::init(false), 75 cl::cat(AggregatorCategory)); 76 77 static cl::opt<bool> IgnoreInterruptLBR( 78 "ignore-interrupt-lbr", 79 cl::desc("ignore kernel interrupt LBR that happens asynchronously"), 80 cl::init(true), cl::cat(AggregatorCategory)); 81 82 static cl::opt<unsigned long long> 83 MaxSamples("max-samples", 84 cl::init(-1ULL), 85 cl::desc("maximum number of samples to read from LBR profile"), 86 cl::Optional, 87 cl::Hidden, 88 cl::cat(AggregatorCategory)); 89 90 extern cl::opt<opts::ProfileFormatKind> ProfileFormat; 91 extern cl::opt<bool> ProfileWritePseudoProbes; 92 extern cl::opt<std::string> SaveProfile; 93 94 cl::opt<bool> ReadPreAggregated( 95 "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), 96 cl::cat(AggregatorCategory)); 97 98 cl::opt<std::string> 99 ReadPerfEvents("perf-script-events", 100 cl::desc("skip perf event collection by supplying a " 101 "perf-script output in a textual format"), 102 cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); 103 104 static cl::opt<bool> 105 TimeAggregator("time-aggr", 106 cl::desc("time BOLT aggregator"), 107 cl::init(false), 108 cl::ZeroOrMore, 109 cl::cat(AggregatorCategory)); 110 111 } // namespace opts 112 113 namespace { 114 115 const char TimerGroupName[] = "aggregator"; 116 const char TimerGroupDesc[] = "Aggregator"; 117 118 std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { 119 std::vector<SectionNameAndRange> sections; 120 for (BinarySection &Section : BC->sections()) { 121 if (!Section.isText()) 122 continue; 123 if (Section.getSize() == 0) 124 continue; 125 sections.push_back( 126 {Section.getName(), Section.getAddress(), Section.getEndAddress()}); 127 } 128 llvm::sort(sections, 129 [](const SectionNameAndRange &A, const SectionNameAndRange &B) { 130 return A.BeginAddress < B.BeginAddress; 131 }); 132 return sections; 133 } 134 } 135 136 constexpr uint64_t DataAggregator::KernelBaseAddr; 137 138 DataAggregator::~DataAggregator() { deleteTempFiles(); } 139 140 namespace { 141 void deleteTempFile(const std::string &FileName) { 142 if (std::error_code Errc = sys::fs::remove(FileName.c_str())) 143 errs() << "PERF2BOLT: failed to delete temporary file " << FileName 144 << " with error " << Errc.message() << "\n"; 145 } 146 } 147 148 void DataAggregator::deleteTempFiles() { 149 for (std::string &FileName : TempFiles) 150 deleteTempFile(FileName); 151 TempFiles.clear(); 152 } 153 154 void DataAggregator::findPerfExecutable() { 155 std::optional<std::string> PerfExecutable = 156 sys::Process::FindInEnvPath("PATH", "perf"); 157 if (!PerfExecutable) { 158 outs() << "PERF2BOLT: No perf executable found!\n"; 159 exit(1); 160 } 161 PerfPath = *PerfExecutable; 162 } 163 164 void DataAggregator::start() { 165 outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; 166 167 // Don't launch perf for pre-aggregated files or when perf input is specified 168 // by the user. 169 if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) 170 return; 171 172 findPerfExecutable(); 173 174 if (opts::BasicAggregation) { 175 launchPerfProcess("events without LBR", 176 MainEventsPPI, 177 "script -F pid,event,ip", 178 /*Wait = */false); 179 } else if (!opts::ITraceAggregation.empty()) { 180 std::string ItracePerfScriptArgs = llvm::formatv( 181 "script -F pid,brstack --itrace={0}", opts::ITraceAggregation); 182 launchPerfProcess("branch events with itrace", MainEventsPPI, 183 ItracePerfScriptArgs.c_str(), 184 /*Wait = */ false); 185 } else { 186 launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack", 187 /*Wait = */ false); 188 } 189 190 // Note: we launch script for mem events regardless of the option, as the 191 // command fails fairly fast if mem events were not collected. 192 launchPerfProcess("mem events", 193 MemEventsPPI, 194 "script -F pid,event,addr,ip", 195 /*Wait = */false); 196 197 launchPerfProcess("process events", MMapEventsPPI, 198 "script --show-mmap-events --no-itrace", 199 /*Wait = */ false); 200 201 launchPerfProcess("task events", TaskEventsPPI, 202 "script --show-task-events --no-itrace", 203 /*Wait = */ false); 204 } 205 206 void DataAggregator::abort() { 207 if (opts::ReadPreAggregated) 208 return; 209 210 std::string Error; 211 212 // Kill subprocesses in case they are not finished 213 sys::Wait(TaskEventsPPI.PI, 1, &Error); 214 sys::Wait(MMapEventsPPI.PI, 1, &Error); 215 sys::Wait(MainEventsPPI.PI, 1, &Error); 216 sys::Wait(MemEventsPPI.PI, 1, &Error); 217 218 deleteTempFiles(); 219 220 exit(1); 221 } 222 223 void DataAggregator::launchPerfProcess(StringRef Name, PerfProcessInfo &PPI, 224 const char *ArgsString, bool Wait) { 225 SmallVector<StringRef, 4> Argv; 226 227 outs() << "PERF2BOLT: spawning perf job to read " << Name << '\n'; 228 Argv.push_back(PerfPath.data()); 229 230 StringRef(ArgsString).split(Argv, ' '); 231 Argv.push_back("-f"); 232 Argv.push_back("-i"); 233 Argv.push_back(Filename.c_str()); 234 235 if (std::error_code Errc = 236 sys::fs::createTemporaryFile("perf.script", "out", PPI.StdoutPath)) { 237 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StdoutPath 238 << " with error " << Errc.message() << "\n"; 239 exit(1); 240 } 241 TempFiles.push_back(PPI.StdoutPath.data()); 242 243 if (std::error_code Errc = 244 sys::fs::createTemporaryFile("perf.script", "err", PPI.StderrPath)) { 245 errs() << "PERF2BOLT: failed to create temporary file " << PPI.StderrPath 246 << " with error " << Errc.message() << "\n"; 247 exit(1); 248 } 249 TempFiles.push_back(PPI.StderrPath.data()); 250 251 std::optional<StringRef> Redirects[] = { 252 std::nullopt, // Stdin 253 StringRef(PPI.StdoutPath.data()), // Stdout 254 StringRef(PPI.StderrPath.data())}; // Stderr 255 256 LLVM_DEBUG({ 257 dbgs() << "Launching perf: "; 258 for (StringRef Arg : Argv) 259 dbgs() << Arg << " "; 260 dbgs() << " 1> " << PPI.StdoutPath.data() << " 2> " << PPI.StderrPath.data() 261 << "\n"; 262 }); 263 264 if (Wait) 265 PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, 266 /*envp*/ std::nullopt, Redirects); 267 else 268 PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt, 269 Redirects); 270 } 271 272 void DataAggregator::processFileBuildID(StringRef FileBuildID) { 273 PerfProcessInfo BuildIDProcessInfo; 274 launchPerfProcess("buildid list", 275 BuildIDProcessInfo, 276 "buildid-list", 277 /*Wait = */true); 278 279 if (BuildIDProcessInfo.PI.ReturnCode != 0) { 280 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 281 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StderrPath.data()); 282 StringRef ErrBuf = (*MB)->getBuffer(); 283 284 errs() << "PERF-ERROR: return code " << BuildIDProcessInfo.PI.ReturnCode 285 << '\n'; 286 errs() << ErrBuf; 287 return; 288 } 289 290 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 291 MemoryBuffer::getFileOrSTDIN(BuildIDProcessInfo.StdoutPath.data()); 292 if (std::error_code EC = MB.getError()) { 293 errs() << "Cannot open " << BuildIDProcessInfo.StdoutPath.data() << ": " 294 << EC.message() << "\n"; 295 return; 296 } 297 298 FileBuf = std::move(*MB); 299 ParsingBuf = FileBuf->getBuffer(); 300 301 std::optional<StringRef> FileName = getFileNameForBuildID(FileBuildID); 302 if (!FileName) { 303 if (hasAllBuildIDs()) { 304 errs() << "PERF2BOLT-ERROR: failed to match build-id from perf output. " 305 "This indicates the input binary supplied for data aggregation " 306 "is not the same recorded by perf when collecting profiling " 307 "data, or there were no samples recorded for the binary. " 308 "Use -ignore-build-id option to override.\n"; 309 if (!opts::IgnoreBuildID) 310 abort(); 311 } else { 312 errs() << "PERF2BOLT-WARNING: build-id will not be checked because perf " 313 "data was recorded without it\n"; 314 return; 315 } 316 } else if (*FileName != llvm::sys::path::filename(BC->getFilename())) { 317 errs() << "PERF2BOLT-WARNING: build-id matched a different file name\n"; 318 BuildIDBinaryName = std::string(*FileName); 319 } else { 320 outs() << "PERF2BOLT: matched build-id and file name\n"; 321 } 322 } 323 324 bool DataAggregator::checkPerfDataMagic(StringRef FileName) { 325 if (opts::ReadPreAggregated) 326 return true; 327 328 Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName); 329 if (!FD) { 330 consumeError(FD.takeError()); 331 return false; 332 } 333 334 char Buf[7] = {0, 0, 0, 0, 0, 0, 0}; 335 336 auto Close = make_scope_exit([&] { sys::fs::closeFile(*FD); }); 337 Expected<size_t> BytesRead = sys::fs::readNativeFileSlice( 338 *FD, MutableArrayRef(Buf, sizeof(Buf)), 0); 339 if (!BytesRead) { 340 consumeError(BytesRead.takeError()); 341 return false; 342 } 343 344 if (*BytesRead != 7) 345 return false; 346 347 if (strncmp(Buf, "PERFILE", 7) == 0) 348 return true; 349 return false; 350 } 351 352 void DataAggregator::parsePreAggregated() { 353 std::string Error; 354 355 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 356 MemoryBuffer::getFileOrSTDIN(Filename); 357 if (std::error_code EC = MB.getError()) { 358 errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " 359 << EC.message() << "\n"; 360 exit(1); 361 } 362 363 FileBuf = std::move(*MB); 364 ParsingBuf = FileBuf->getBuffer(); 365 Col = 0; 366 Line = 1; 367 if (parsePreAggregatedLBRSamples()) { 368 errs() << "PERF2BOLT: failed to parse samples\n"; 369 exit(1); 370 } 371 } 372 373 void DataAggregator::filterBinaryMMapInfo() { 374 if (opts::FilterPID) { 375 auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID); 376 if (MMapInfoIter != BinaryMMapInfo.end()) { 377 MMapInfo MMap = MMapInfoIter->second; 378 BinaryMMapInfo.clear(); 379 BinaryMMapInfo.insert(std::make_pair(MMap.PID, MMap)); 380 } else { 381 if (errs().has_colors()) 382 errs().changeColor(raw_ostream::RED); 383 errs() << "PERF2BOLT-ERROR: could not find a profile matching PID \"" 384 << opts::FilterPID << "\"" 385 << " for binary \"" << BC->getFilename() << "\"."; 386 assert(!BinaryMMapInfo.empty() && "No memory map for matching binary"); 387 errs() << " Profile for the following process is available:\n"; 388 for (std::pair<const uint64_t, MMapInfo> &MMI : BinaryMMapInfo) 389 outs() << " " << MMI.second.PID 390 << (MMI.second.Forked ? " (forked)\n" : "\n"); 391 392 if (errs().has_colors()) 393 errs().resetColor(); 394 395 exit(1); 396 } 397 } 398 } 399 400 int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, 401 PerfProcessErrorCallbackTy Callback) { 402 if (!opts::ReadPerfEvents.empty()) { 403 outs() << "PERF2BOLT: using pre-processed perf events for '" << Name 404 << "' (perf-script-events)\n"; 405 ParsingBuf = opts::ReadPerfEvents; 406 return 0; 407 } 408 409 std::string Error; 410 outs() << "PERF2BOLT: waiting for perf " << Name 411 << " collection to finish...\n"; 412 sys::ProcessInfo PI = sys::Wait(Process.PI, std::nullopt, &Error); 413 414 if (!Error.empty()) { 415 errs() << "PERF-ERROR: " << PerfPath << ": " << Error << "\n"; 416 deleteTempFiles(); 417 exit(1); 418 } 419 420 if (PI.ReturnCode != 0) { 421 ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorMB = 422 MemoryBuffer::getFileOrSTDIN(Process.StderrPath.data()); 423 StringRef ErrBuf = (*ErrorMB)->getBuffer(); 424 425 deleteTempFiles(); 426 Callback(PI.ReturnCode, ErrBuf); 427 return PI.ReturnCode; 428 } 429 430 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 431 MemoryBuffer::getFileOrSTDIN(Process.StdoutPath.data()); 432 if (std::error_code EC = MB.getError()) { 433 errs() << "Cannot open " << Process.StdoutPath.data() << ": " 434 << EC.message() << "\n"; 435 deleteTempFiles(); 436 exit(1); 437 } 438 439 FileBuf = std::move(*MB); 440 ParsingBuf = FileBuf->getBuffer(); 441 Col = 0; 442 Line = 1; 443 return PI.ReturnCode; 444 } 445 446 Error DataAggregator::preprocessProfile(BinaryContext &BC) { 447 this->BC = &BC; 448 449 if (opts::ReadPreAggregated) { 450 parsePreAggregated(); 451 return Error::success(); 452 } 453 454 if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) { 455 outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; 456 processFileBuildID(*FileBuildID); 457 } else { 458 errs() << "BOLT-WARNING: build-id will not be checked because we could " 459 "not read one from input binary\n"; 460 } 461 462 auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { 463 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; 464 exit(1); 465 }; 466 467 auto MemEventsErrorCallback = [&](int ReturnCode, StringRef ErrBuf) { 468 Regex NoData("Samples for '.*' event do not have ADDR attribute set. " 469 "Cannot print 'addr' field."); 470 if (!NoData.match(ErrBuf)) 471 ErrorCallback(ReturnCode, ErrBuf); 472 }; 473 474 if (BC.IsLinuxKernel) { 475 // Current MMap parsing logic does not work with linux kernel. 476 // MMap entries for linux kernel uses PERF_RECORD_MMAP 477 // format instead of typical PERF_RECORD_MMAP2 format. 478 // Since linux kernel address mapping is absolute (same as 479 // in the ELF file), we avoid parsing MMap in linux kernel mode. 480 // While generating optimized linux kernel binary, we may need 481 // to parse MMap entries. 482 483 // In linux kernel mode, we analyze and optimize 484 // all linux kernel binary instructions, irrespective 485 // of whether they are due to system calls or due to 486 // interrupts. Therefore, we cannot ignore interrupt 487 // in Linux kernel mode. 488 opts::IgnoreInterruptLBR = false; 489 } else { 490 prepareToParse("mmap events", MMapEventsPPI, ErrorCallback); 491 if (parseMMapEvents()) 492 errs() << "PERF2BOLT: failed to parse mmap events\n"; 493 } 494 495 prepareToParse("task events", TaskEventsPPI, ErrorCallback); 496 if (parseTaskEvents()) 497 errs() << "PERF2BOLT: failed to parse task events\n"; 498 499 filterBinaryMMapInfo(); 500 prepareToParse("events", MainEventsPPI, ErrorCallback); 501 502 if (opts::HeatmapMode) { 503 if (std::error_code EC = printLBRHeatMap()) { 504 errs() << "ERROR: failed to print heat map: " << EC.message() << '\n'; 505 exit(1); 506 } 507 exit(0); 508 } 509 510 if ((!opts::BasicAggregation && parseBranchEvents()) || 511 (opts::BasicAggregation && parseBasicEvents())) 512 errs() << "PERF2BOLT: failed to parse samples\n"; 513 514 // Special handling for memory events 515 if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) 516 return Error::success(); 517 518 if (const std::error_code EC = parseMemEvents()) 519 errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() 520 << '\n'; 521 522 deleteTempFiles(); 523 524 return Error::success(); 525 } 526 527 Error DataAggregator::readProfile(BinaryContext &BC) { 528 processProfile(BC); 529 530 for (auto &BFI : BC.getBinaryFunctions()) { 531 BinaryFunction &Function = BFI.second; 532 convertBranchData(Function); 533 } 534 535 if (opts::AggregateOnly) { 536 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) 537 if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) 538 report_error("cannot create output data file", EC); 539 540 // BAT YAML is handled by DataAggregator since normal YAML output requires 541 // CFG which is not available in BAT mode. 542 if (usesBAT()) { 543 if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) 544 if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename)) 545 report_error("cannot create output data file", EC); 546 if (!opts::SaveProfile.empty()) 547 if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile)) 548 report_error("cannot create output data file", EC); 549 } 550 } 551 552 return Error::success(); 553 } 554 555 bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { 556 return Function.hasProfileAvailable(); 557 } 558 559 void DataAggregator::processProfile(BinaryContext &BC) { 560 if (opts::ReadPreAggregated) 561 processPreAggregated(); 562 else if (opts::BasicAggregation) 563 processBasicEvents(); 564 else 565 processBranchEvents(); 566 567 processMemEvents(); 568 569 // Mark all functions with registered events as having a valid profile. 570 const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 571 : BinaryFunction::PF_LBR; 572 for (auto &BFI : BC.getBinaryFunctions()) { 573 BinaryFunction &BF = BFI.second; 574 FuncBranchData *FBD = getBranchData(BF); 575 if (FBD || getFuncSampleData(BF.getNames())) { 576 BF.markProfiled(Flags); 577 if (FBD) 578 BF.RawBranchCount = FBD->getNumExecutedBranches(); 579 } 580 } 581 582 for (auto &FuncBranches : NamesToBranches) 583 llvm::stable_sort(FuncBranches.second.Data); 584 585 for (auto &MemEvents : NamesToMemEvents) 586 llvm::stable_sort(MemEvents.second.Data); 587 588 // Release intermediate storage. 589 clear(BranchLBRs); 590 clear(FallthroughLBRs); 591 clear(AggregatedLBRs); 592 clear(BasicSamples); 593 clear(MemSamples); 594 } 595 596 BinaryFunction * 597 DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const { 598 if (!BC->containsAddress(Address)) 599 return nullptr; 600 601 return BC->getBinaryFunctionContainingAddress(Address, /*CheckPastEnd=*/false, 602 /*UseMaxSize=*/true); 603 } 604 605 BinaryFunction * 606 DataAggregator::getBATParentFunction(const BinaryFunction &Func) const { 607 if (BAT) 608 if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) 609 return getBinaryFunctionContainingAddress(HotAddr); 610 return nullptr; 611 } 612 613 StringRef DataAggregator::getLocationName(const BinaryFunction &Func, 614 bool BAT) { 615 if (!BAT) 616 return Func.getOneName(); 617 618 const BinaryFunction *OrigFunc = &Func; 619 // If it is a local function, prefer the name containing the file name where 620 // the local function was declared 621 for (StringRef AlternativeName : OrigFunc->getNames()) { 622 size_t FileNameIdx = AlternativeName.find('/'); 623 // Confirm the alternative name has the pattern Symbol/FileName/1 before 624 // using it 625 if (FileNameIdx == StringRef::npos || 626 AlternativeName.find('/', FileNameIdx + 1) == StringRef::npos) 627 continue; 628 return AlternativeName; 629 } 630 return OrigFunc->getOneName(); 631 } 632 633 bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, 634 uint64_t Count) { 635 BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); 636 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; 637 if (ParentFunc) 638 NumColdSamples += Count; 639 640 auto I = NamesToSamples.find(Func.getOneName()); 641 if (I == NamesToSamples.end()) { 642 bool Success; 643 StringRef LocName = getLocationName(Func, BAT); 644 std::tie(I, Success) = NamesToSamples.insert( 645 std::make_pair(Func.getOneName(), 646 FuncSampleData(LocName, FuncSampleData::ContainerTy()))); 647 } 648 649 Address -= Func.getAddress(); 650 if (BAT) 651 Address = BAT->translate(Func.getAddress(), Address, /*IsBranchSrc=*/false); 652 653 I->second.bumpCount(Address, Count); 654 return true; 655 } 656 657 bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From, 658 uint64_t To, uint64_t Count, 659 uint64_t Mispreds) { 660 FuncBranchData *AggrData = getBranchData(Func); 661 if (!AggrData) { 662 AggrData = &NamesToBranches[Func.getOneName()]; 663 AggrData->Name = getLocationName(Func, BAT); 664 setBranchData(Func, AggrData); 665 } 666 667 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: " 668 << formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To)); 669 AggrData->bumpBranchCount(From, To, Count, Mispreds); 670 return true; 671 } 672 673 bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, 674 BinaryFunction *ToFunc, uint64_t From, 675 uint64_t To, uint64_t Count, 676 uint64_t Mispreds) { 677 FuncBranchData *FromAggrData = nullptr; 678 FuncBranchData *ToAggrData = nullptr; 679 StringRef SrcFunc; 680 StringRef DstFunc; 681 if (FromFunc) { 682 SrcFunc = getLocationName(*FromFunc, BAT); 683 FromAggrData = getBranchData(*FromFunc); 684 if (!FromAggrData) { 685 FromAggrData = &NamesToBranches[FromFunc->getOneName()]; 686 FromAggrData->Name = SrcFunc; 687 setBranchData(*FromFunc, FromAggrData); 688 } 689 690 recordExit(*FromFunc, From, Mispreds, Count); 691 } 692 if (ToFunc) { 693 DstFunc = getLocationName(*ToFunc, BAT); 694 ToAggrData = getBranchData(*ToFunc); 695 if (!ToAggrData) { 696 ToAggrData = &NamesToBranches[ToFunc->getOneName()]; 697 ToAggrData->Name = DstFunc; 698 setBranchData(*ToFunc, ToAggrData); 699 } 700 701 recordEntry(*ToFunc, To, Mispreds, Count); 702 } 703 704 if (FromAggrData) 705 FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To), 706 Count, Mispreds); 707 if (ToAggrData) 708 ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To, 709 Count, Mispreds); 710 return true; 711 } 712 713 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, 714 uint64_t Mispreds, bool IsPreagg) { 715 // Returns whether \p Offset in \p Func contains a return instruction. 716 auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) { 717 auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); }; 718 return Func.hasInstructions() 719 ? isReturn(Func.getInstructionAtOffset(Offset)) 720 : isReturn(Func.disassembleInstructionAtOffset(Offset)); 721 }; 722 723 // Returns whether \p Offset in \p Func may be a call continuation excluding 724 // entry points and landing pads. 725 auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) { 726 // No call continuation at a function start. 727 if (!Offset) 728 return false; 729 730 // FIXME: support BAT case where the function might be in empty state 731 // (split fragments declared non-simple). 732 if (!Func.hasCFG()) 733 return false; 734 735 // The offset should not be an entry point or a landing pad. 736 const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); 737 return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad(); 738 }; 739 740 // Mutates \p Addr to an offset into the containing function, performing BAT 741 // offset translation and parent lookup. 742 // 743 // Returns the containing function (or BAT parent) and whether the address 744 // corresponds to a return (if \p IsFrom) or a call continuation (otherwise). 745 auto handleAddress = [&](uint64_t &Addr, bool IsFrom) { 746 BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr); 747 if (!Func) 748 return std::pair{Func, false}; 749 750 Addr -= Func->getAddress(); 751 752 bool IsRetOrCallCont = 753 IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr); 754 755 if (BAT) 756 Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); 757 758 BinaryFunction *ParentFunc = getBATParentFunction(*Func); 759 if (!ParentFunc) 760 return std::pair{Func, IsRetOrCallCont}; 761 762 if (IsFrom) 763 NumColdSamples += Count; 764 765 return std::pair{ParentFunc, IsRetOrCallCont}; 766 }; 767 768 uint64_t ToOrig = To; 769 auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); 770 auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false); 771 if (!FromFunc && !ToFunc) 772 return false; 773 774 // Record call to continuation trace. 775 if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) { 776 LBREntry First{ToOrig - 1, ToOrig - 1, false}; 777 LBREntry Second{ToOrig, ToOrig, false}; 778 return doTrace(First, Second, Count); 779 } 780 // Ignore returns. 781 if (IsReturn) 782 return true; 783 784 // Treat recursive control transfers as inter-branches. 785 if (FromFunc == ToFunc && To != 0) { 786 recordBranch(*FromFunc, From, To, Count, Mispreds); 787 return doIntraBranch(*FromFunc, From, To, Count, Mispreds); 788 } 789 790 return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds); 791 } 792 793 bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, 794 uint64_t Count) { 795 BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To); 796 BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); 797 if (!FromFunc || !ToFunc) { 798 LLVM_DEBUG({ 799 dbgs() << "Out of range trace starting in "; 800 if (FromFunc) 801 dbgs() << formatv("{0} @ {1:x}", *FromFunc, 802 First.To - FromFunc->getAddress()); 803 else 804 dbgs() << Twine::utohexstr(First.To); 805 dbgs() << " and ending in "; 806 if (ToFunc) 807 dbgs() << formatv("{0} @ {1:x}", *ToFunc, 808 Second.From - ToFunc->getAddress()); 809 else 810 dbgs() << Twine::utohexstr(Second.From); 811 dbgs() << '\n'; 812 }); 813 NumLongRangeTraces += Count; 814 return false; 815 } 816 if (FromFunc != ToFunc) { 817 NumInvalidTraces += Count; 818 LLVM_DEBUG({ 819 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 820 << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) 821 << " and ending in " << ToFunc->getPrintName() 822 << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); 823 }); 824 return false; 825 } 826 827 // Set ParentFunc to BAT parent function or FromFunc itself. 828 BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); 829 if (!ParentFunc) 830 ParentFunc = FromFunc; 831 ParentFunc->SampleCountInBytes += Count * (Second.From - First.To); 832 833 std::optional<BoltAddressTranslation::FallthroughListTy> FTs = 834 BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To, 835 Second.From) 836 : getFallthroughsInTrace(*FromFunc, First, Second, Count); 837 if (!FTs) { 838 LLVM_DEBUG( 839 dbgs() << "Invalid trace starting in " << FromFunc->getPrintName() 840 << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress()) 841 << " and ending in " << ToFunc->getPrintName() << " @ " 842 << ToFunc->getPrintName() << " @ " 843 << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n'); 844 NumInvalidTraces += Count; 845 return false; 846 } 847 848 LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " 849 << FromFunc->getPrintName() << ":" 850 << Twine::utohexstr(First.To) << " to " 851 << Twine::utohexstr(Second.From) << ".\n"); 852 for (auto [From, To] : *FTs) { 853 if (BAT) { 854 From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true); 855 To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false); 856 } 857 doIntraBranch(*ParentFunc, From, To, Count, false); 858 } 859 860 return true; 861 } 862 863 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>> 864 DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, 865 const LBREntry &FirstLBR, 866 const LBREntry &SecondLBR, 867 uint64_t Count) const { 868 SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches; 869 870 BinaryContext &BC = BF.getBinaryContext(); 871 872 if (!BF.isSimple()) 873 return std::nullopt; 874 875 assert(BF.hasCFG() && "can only record traces in CFG state"); 876 877 // Offsets of the trace within this function. 878 const uint64_t From = FirstLBR.To - BF.getAddress(); 879 const uint64_t To = SecondLBR.From - BF.getAddress(); 880 881 if (From > To) 882 return std::nullopt; 883 884 const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); 885 const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); 886 887 if (!FromBB || !ToBB) 888 return std::nullopt; 889 890 // Adjust FromBB if the first LBR is a return from the last instruction in 891 // the previous block (that instruction should be a call). 892 if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && 893 !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { 894 const BinaryBasicBlock *PrevBB = 895 BF.getLayout().getBlock(FromBB->getIndex() - 1); 896 if (PrevBB->getSuccessor(FromBB->getLabel())) { 897 const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); 898 if (Instr && BC.MIB->isCall(*Instr)) 899 FromBB = PrevBB; 900 else 901 LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR 902 << '\n'); 903 } else { 904 LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n'); 905 } 906 } 907 908 // Fill out information for fall-through edges. The From and To could be 909 // within the same basic block, e.g. when two call instructions are in the 910 // same block. In this case we skip the processing. 911 if (FromBB == ToBB) 912 return Branches; 913 914 // Process blocks in the original layout order. 915 BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex()); 916 assert(BB == FromBB && "index mismatch"); 917 while (BB != ToBB) { 918 BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1); 919 assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); 920 921 // Check for bad LBRs. 922 if (!BB->getSuccessor(NextBB->getLabel())) { 923 LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n" 924 << " " << FirstLBR << '\n' 925 << " " << SecondLBR << '\n'); 926 return std::nullopt; 927 } 928 929 const MCInst *Instr = BB->getLastNonPseudoInstr(); 930 uint64_t Offset = 0; 931 if (Instr) 932 Offset = BC.MIB->getOffsetWithDefault(*Instr, 0); 933 else 934 Offset = BB->getOffset(); 935 936 Branches.emplace_back(Offset, NextBB->getOffset()); 937 938 BB = NextBB; 939 } 940 941 // Record fall-through jumps 942 for (const auto &[FromOffset, ToOffset] : Branches) { 943 BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(FromOffset); 944 BinaryBasicBlock *ToBB = BF.getBasicBlockAtOffset(ToOffset); 945 assert(FromBB && ToBB); 946 BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); 947 BI.Count += Count; 948 } 949 950 return Branches; 951 } 952 953 bool DataAggregator::recordEntry(BinaryFunction &BF, uint64_t To, bool Mispred, 954 uint64_t Count) const { 955 if (To > BF.getSize()) 956 return false; 957 958 if (!BF.hasProfile()) 959 BF.ExecutionCount = 0; 960 961 BinaryBasicBlock *EntryBB = nullptr; 962 if (To == 0) { 963 BF.ExecutionCount += Count; 964 if (!BF.empty()) 965 EntryBB = &BF.front(); 966 } else if (BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(To)) { 967 if (BB->isEntryPoint()) 968 EntryBB = BB; 969 } 970 971 if (EntryBB) 972 EntryBB->setExecutionCount(EntryBB->getKnownExecutionCount() + Count); 973 974 return true; 975 } 976 977 bool DataAggregator::recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, 978 uint64_t Count) const { 979 if (!BF.isSimple() || From > BF.getSize()) 980 return false; 981 982 if (!BF.hasProfile()) 983 BF.ExecutionCount = 0; 984 985 return true; 986 } 987 988 ErrorOr<LBREntry> DataAggregator::parseLBREntry() { 989 LBREntry Res; 990 ErrorOr<StringRef> FromStrRes = parseString('/'); 991 if (std::error_code EC = FromStrRes.getError()) 992 return EC; 993 StringRef OffsetStr = FromStrRes.get(); 994 if (OffsetStr.getAsInteger(0, Res.From)) { 995 reportError("expected hexadecimal number with From address"); 996 Diag << "Found: " << OffsetStr << "\n"; 997 return make_error_code(llvm::errc::io_error); 998 } 999 1000 ErrorOr<StringRef> ToStrRes = parseString('/'); 1001 if (std::error_code EC = ToStrRes.getError()) 1002 return EC; 1003 OffsetStr = ToStrRes.get(); 1004 if (OffsetStr.getAsInteger(0, Res.To)) { 1005 reportError("expected hexadecimal number with To address"); 1006 Diag << "Found: " << OffsetStr << "\n"; 1007 return make_error_code(llvm::errc::io_error); 1008 } 1009 1010 ErrorOr<StringRef> MispredStrRes = parseString('/'); 1011 if (std::error_code EC = MispredStrRes.getError()) 1012 return EC; 1013 StringRef MispredStr = MispredStrRes.get(); 1014 if (MispredStr.size() != 1 || 1015 (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { 1016 reportError("expected single char for mispred bit"); 1017 Diag << "Found: " << MispredStr << "\n"; 1018 return make_error_code(llvm::errc::io_error); 1019 } 1020 Res.Mispred = MispredStr[0] == 'M'; 1021 1022 static bool MispredWarning = true; 1023 if (MispredStr[0] == '-' && MispredWarning) { 1024 errs() << "PERF2BOLT-WARNING: misprediction bit is missing in profile\n"; 1025 MispredWarning = false; 1026 } 1027 1028 ErrorOr<StringRef> Rest = parseString(FieldSeparator, true); 1029 if (std::error_code EC = Rest.getError()) 1030 return EC; 1031 if (Rest.get().size() < 5) { 1032 reportError("expected rest of LBR entry"); 1033 Diag << "Found: " << Rest.get() << "\n"; 1034 return make_error_code(llvm::errc::io_error); 1035 } 1036 return Res; 1037 } 1038 1039 bool DataAggregator::checkAndConsumeFS() { 1040 if (ParsingBuf[0] != FieldSeparator) 1041 return false; 1042 1043 ParsingBuf = ParsingBuf.drop_front(1); 1044 Col += 1; 1045 return true; 1046 } 1047 1048 void DataAggregator::consumeRestOfLine() { 1049 size_t LineEnd = ParsingBuf.find_first_of('\n'); 1050 if (LineEnd == StringRef::npos) { 1051 ParsingBuf = StringRef(); 1052 Col = 0; 1053 Line += 1; 1054 return; 1055 } 1056 ParsingBuf = ParsingBuf.drop_front(LineEnd + 1); 1057 Col = 0; 1058 Line += 1; 1059 } 1060 1061 bool DataAggregator::checkNewLine() { 1062 return ParsingBuf[0] == '\n'; 1063 } 1064 1065 ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() { 1066 PerfBranchSample Res; 1067 1068 while (checkAndConsumeFS()) { 1069 } 1070 1071 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1072 if (std::error_code EC = PIDRes.getError()) 1073 return EC; 1074 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1075 if (!BC->IsLinuxKernel && MMapInfoIter == BinaryMMapInfo.end()) { 1076 consumeRestOfLine(); 1077 return make_error_code(errc::no_such_process); 1078 } 1079 1080 if (checkAndConsumeNewLine()) 1081 return Res; 1082 1083 while (!checkAndConsumeNewLine()) { 1084 checkAndConsumeFS(); 1085 1086 ErrorOr<LBREntry> LBRRes = parseLBREntry(); 1087 if (std::error_code EC = LBRRes.getError()) 1088 return EC; 1089 LBREntry LBR = LBRRes.get(); 1090 if (ignoreKernelInterrupt(LBR)) 1091 continue; 1092 if (!BC->HasFixedLoadAddress) 1093 adjustLBR(LBR, MMapInfoIter->second); 1094 Res.LBR.push_back(LBR); 1095 } 1096 1097 return Res; 1098 } 1099 1100 ErrorOr<DataAggregator::PerfBasicSample> DataAggregator::parseBasicSample() { 1101 while (checkAndConsumeFS()) { 1102 } 1103 1104 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1105 if (std::error_code EC = PIDRes.getError()) 1106 return EC; 1107 1108 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1109 if (MMapInfoIter == BinaryMMapInfo.end()) { 1110 consumeRestOfLine(); 1111 return PerfBasicSample{StringRef(), 0}; 1112 } 1113 1114 while (checkAndConsumeFS()) { 1115 } 1116 1117 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1118 if (std::error_code EC = Event.getError()) 1119 return EC; 1120 1121 while (checkAndConsumeFS()) { 1122 } 1123 1124 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator, true); 1125 if (std::error_code EC = AddrRes.getError()) 1126 return EC; 1127 1128 if (!checkAndConsumeNewLine()) { 1129 reportError("expected end of line"); 1130 return make_error_code(llvm::errc::io_error); 1131 } 1132 1133 uint64_t Address = *AddrRes; 1134 if (!BC->HasFixedLoadAddress) 1135 adjustAddress(Address, MMapInfoIter->second); 1136 1137 return PerfBasicSample{Event.get(), Address}; 1138 } 1139 1140 ErrorOr<DataAggregator::PerfMemSample> DataAggregator::parseMemSample() { 1141 PerfMemSample Res{0, 0}; 1142 1143 while (checkAndConsumeFS()) { 1144 } 1145 1146 ErrorOr<int64_t> PIDRes = parseNumberField(FieldSeparator, true); 1147 if (std::error_code EC = PIDRes.getError()) 1148 return EC; 1149 1150 auto MMapInfoIter = BinaryMMapInfo.find(*PIDRes); 1151 if (MMapInfoIter == BinaryMMapInfo.end()) { 1152 consumeRestOfLine(); 1153 return Res; 1154 } 1155 1156 while (checkAndConsumeFS()) { 1157 } 1158 1159 ErrorOr<StringRef> Event = parseString(FieldSeparator); 1160 if (std::error_code EC = Event.getError()) 1161 return EC; 1162 if (!Event.get().contains("mem-loads")) { 1163 consumeRestOfLine(); 1164 return Res; 1165 } 1166 1167 while (checkAndConsumeFS()) { 1168 } 1169 1170 ErrorOr<uint64_t> AddrRes = parseHexField(FieldSeparator); 1171 if (std::error_code EC = AddrRes.getError()) 1172 return EC; 1173 1174 while (checkAndConsumeFS()) { 1175 } 1176 1177 ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true); 1178 if (std::error_code EC = PCRes.getError()) { 1179 consumeRestOfLine(); 1180 return EC; 1181 } 1182 1183 if (!checkAndConsumeNewLine()) { 1184 reportError("expected end of line"); 1185 return make_error_code(llvm::errc::io_error); 1186 } 1187 1188 uint64_t Address = *AddrRes; 1189 if (!BC->HasFixedLoadAddress) 1190 adjustAddress(Address, MMapInfoIter->second); 1191 1192 return PerfMemSample{PCRes.get(), Address}; 1193 } 1194 1195 ErrorOr<Location> DataAggregator::parseLocationOrOffset() { 1196 auto parseOffset = [this]() -> ErrorOr<Location> { 1197 ErrorOr<uint64_t> Res = parseHexField(FieldSeparator); 1198 if (std::error_code EC = Res.getError()) 1199 return EC; 1200 return Location(Res.get()); 1201 }; 1202 1203 size_t Sep = ParsingBuf.find_first_of(" \n"); 1204 if (Sep == StringRef::npos) 1205 return parseOffset(); 1206 StringRef LookAhead = ParsingBuf.substr(0, Sep); 1207 if (!LookAhead.contains(':')) 1208 return parseOffset(); 1209 1210 ErrorOr<StringRef> BuildID = parseString(':'); 1211 if (std::error_code EC = BuildID.getError()) 1212 return EC; 1213 ErrorOr<uint64_t> Offset = parseHexField(FieldSeparator); 1214 if (std::error_code EC = Offset.getError()) 1215 return EC; 1216 return Location(true, BuildID.get(), Offset.get()); 1217 } 1218 1219 ErrorOr<DataAggregator::AggregatedLBREntry> 1220 DataAggregator::parseAggregatedLBREntry() { 1221 while (checkAndConsumeFS()) { 1222 } 1223 1224 ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); 1225 if (std::error_code EC = TypeOrErr.getError()) 1226 return EC; 1227 auto Type = AggregatedLBREntry::BRANCH; 1228 if (TypeOrErr.get() == "B") { 1229 Type = AggregatedLBREntry::BRANCH; 1230 } else if (TypeOrErr.get() == "F") { 1231 Type = AggregatedLBREntry::FT; 1232 } else if (TypeOrErr.get() == "f") { 1233 Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; 1234 } else { 1235 reportError("expected B, F or f"); 1236 return make_error_code(llvm::errc::io_error); 1237 } 1238 1239 while (checkAndConsumeFS()) { 1240 } 1241 ErrorOr<Location> From = parseLocationOrOffset(); 1242 if (std::error_code EC = From.getError()) 1243 return EC; 1244 1245 while (checkAndConsumeFS()) { 1246 } 1247 ErrorOr<Location> To = parseLocationOrOffset(); 1248 if (std::error_code EC = To.getError()) 1249 return EC; 1250 1251 while (checkAndConsumeFS()) { 1252 } 1253 ErrorOr<int64_t> Frequency = 1254 parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); 1255 if (std::error_code EC = Frequency.getError()) 1256 return EC; 1257 1258 uint64_t Mispreds = 0; 1259 if (Type == AggregatedLBREntry::BRANCH) { 1260 while (checkAndConsumeFS()) { 1261 } 1262 ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); 1263 if (std::error_code EC = MispredsOrErr.getError()) 1264 return EC; 1265 Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); 1266 } 1267 1268 if (!checkAndConsumeNewLine()) { 1269 reportError("expected end of line"); 1270 return make_error_code(llvm::errc::io_error); 1271 } 1272 1273 return AggregatedLBREntry{From.get(), To.get(), 1274 static_cast<uint64_t>(Frequency.get()), Mispreds, 1275 Type}; 1276 } 1277 1278 bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { 1279 return opts::IgnoreInterruptLBR && 1280 (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); 1281 } 1282 1283 std::error_code DataAggregator::printLBRHeatMap() { 1284 outs() << "PERF2BOLT: parse branch events...\n"; 1285 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1286 TimerGroupDesc, opts::TimeAggregator); 1287 1288 if (BC->IsLinuxKernel) { 1289 opts::HeatmapMaxAddress = 0xffffffffffffffff; 1290 opts::HeatmapMinAddress = KernelBaseAddr; 1291 } 1292 Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, 1293 opts::HeatmapMaxAddress, getTextSections(BC)); 1294 uint64_t NumTotalSamples = 0; 1295 1296 if (opts::BasicAggregation) { 1297 while (hasData()) { 1298 ErrorOr<PerfBasicSample> SampleRes = parseBasicSample(); 1299 if (std::error_code EC = SampleRes.getError()) { 1300 if (EC == errc::no_such_process) 1301 continue; 1302 return EC; 1303 } 1304 PerfBasicSample &Sample = SampleRes.get(); 1305 HM.registerAddress(Sample.PC); 1306 NumTotalSamples++; 1307 } 1308 outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n"; 1309 } else { 1310 while (hasData()) { 1311 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1312 if (std::error_code EC = SampleRes.getError()) { 1313 if (EC == errc::no_such_process) 1314 continue; 1315 return EC; 1316 } 1317 1318 PerfBranchSample &Sample = SampleRes.get(); 1319 1320 // LBRs are stored in reverse execution order. NextLBR refers to the next 1321 // executed branch record. 1322 const LBREntry *NextLBR = nullptr; 1323 for (const LBREntry &LBR : Sample.LBR) { 1324 if (NextLBR) { 1325 // Record fall-through trace. 1326 const uint64_t TraceFrom = LBR.To; 1327 const uint64_t TraceTo = NextLBR->From; 1328 ++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount; 1329 } 1330 NextLBR = &LBR; 1331 } 1332 if (!Sample.LBR.empty()) { 1333 HM.registerAddress(Sample.LBR.front().To); 1334 HM.registerAddress(Sample.LBR.back().From); 1335 } 1336 NumTotalSamples += Sample.LBR.size(); 1337 } 1338 outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n"; 1339 outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n"; 1340 } 1341 1342 if (!NumTotalSamples) { 1343 if (opts::BasicAggregation) { 1344 errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " 1345 "Cannot build heatmap."; 1346 } else { 1347 errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " 1348 "Cannot build heatmap. Use -nl for building heatmap from " 1349 "basic events.\n"; 1350 } 1351 exit(1); 1352 } 1353 1354 outs() << "HEATMAP: building heat map...\n"; 1355 1356 for (const auto &LBR : FallthroughLBRs) { 1357 const Trace &Trace = LBR.first; 1358 const FTInfo &Info = LBR.second; 1359 HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount); 1360 } 1361 1362 if (HM.getNumInvalidRanges()) 1363 outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; 1364 1365 if (!HM.size()) { 1366 errs() << "HEATMAP-ERROR: no valid traces registered\n"; 1367 exit(1); 1368 } 1369 1370 HM.print(opts::OutputFilename); 1371 if (opts::OutputFilename == "-") 1372 HM.printCDF(opts::OutputFilename); 1373 else 1374 HM.printCDF(opts::OutputFilename + ".csv"); 1375 if (opts::OutputFilename == "-") 1376 HM.printSectionHotness(opts::OutputFilename); 1377 else 1378 HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv"); 1379 1380 return std::error_code(); 1381 } 1382 1383 uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample, 1384 bool NeedsSkylakeFix) { 1385 uint64_t NumTraces{0}; 1386 // LBRs are stored in reverse execution order. NextLBR refers to the next 1387 // executed branch record. 1388 const LBREntry *NextLBR = nullptr; 1389 uint32_t NumEntry = 0; 1390 for (const LBREntry &LBR : Sample.LBR) { 1391 ++NumEntry; 1392 // Hardware bug workaround: Intel Skylake (which has 32 LBR entries) 1393 // sometimes record entry 32 as an exact copy of entry 31. This will cause 1394 // us to likely record an invalid trace and generate a stale function for 1395 // BAT mode (non BAT disassembles the function and is able to ignore this 1396 // trace at aggregation time). Drop first 2 entries (last two, in 1397 // chronological order) 1398 if (NeedsSkylakeFix && NumEntry <= 2) 1399 continue; 1400 if (NextLBR) { 1401 // Record fall-through trace. 1402 const uint64_t TraceFrom = LBR.To; 1403 const uint64_t TraceTo = NextLBR->From; 1404 const BinaryFunction *TraceBF = 1405 getBinaryFunctionContainingAddress(TraceFrom); 1406 if (TraceBF && TraceBF->containsAddress(TraceTo)) { 1407 FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; 1408 if (TraceBF->containsAddress(LBR.From)) 1409 ++Info.InternCount; 1410 else 1411 ++Info.ExternCount; 1412 } else { 1413 const BinaryFunction *ToFunc = 1414 getBinaryFunctionContainingAddress(TraceTo); 1415 if (TraceBF && ToFunc) { 1416 LLVM_DEBUG({ 1417 dbgs() << "Invalid trace starting in " << TraceBF->getPrintName() 1418 << formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress()) 1419 << formatv(" and ending @ {0:x}\n", TraceTo); 1420 }); 1421 ++NumInvalidTraces; 1422 } else { 1423 LLVM_DEBUG({ 1424 dbgs() << "Out of range trace starting in " 1425 << (TraceBF ? TraceBF->getPrintName() : "None") 1426 << formatv(" @ {0:x}", 1427 TraceFrom - (TraceBF ? TraceBF->getAddress() : 0)) 1428 << " and ending in " 1429 << (ToFunc ? ToFunc->getPrintName() : "None") 1430 << formatv(" @ {0:x}\n", 1431 TraceTo - (ToFunc ? ToFunc->getAddress() : 0)); 1432 }); 1433 ++NumLongRangeTraces; 1434 } 1435 } 1436 ++NumTraces; 1437 } 1438 NextLBR = &LBR; 1439 1440 uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0; 1441 uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0; 1442 if (!From && !To) 1443 continue; 1444 TakenBranchInfo &Info = BranchLBRs[Trace(From, To)]; 1445 ++Info.TakenCount; 1446 Info.MispredCount += LBR.Mispred; 1447 } 1448 return NumTraces; 1449 } 1450 1451 std::error_code DataAggregator::parseBranchEvents() { 1452 outs() << "PERF2BOLT: parse branch events...\n"; 1453 NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, 1454 TimerGroupDesc, opts::TimeAggregator); 1455 1456 uint64_t NumTotalSamples = 0; 1457 uint64_t NumEntries = 0; 1458 uint64_t NumSamples = 0; 1459 uint64_t NumSamplesNoLBR = 0; 1460 uint64_t NumTraces = 0; 1461 bool NeedsSkylakeFix = false; 1462 1463 while (hasData() && NumTotalSamples < opts::MaxSamples) { 1464 ++NumTotalSamples; 1465 1466 ErrorOr<PerfBranchSample> SampleRes = parseBranchSample(); 1467 if (std::error_code EC = SampleRes.getError()) { 1468 if (EC == errc::no_such_process) 1469 continue; 1470 return EC; 1471 } 1472 ++NumSamples; 1473 1474 PerfBranchSample &Sample = SampleRes.get(); 1475 1476 if (Sample.LBR.empty()) { 1477 ++NumSamplesNoLBR; 1478 continue; 1479 } 1480 1481 NumEntries += Sample.LBR.size(); 1482 if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { 1483 errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; 1484 NeedsSkylakeFix = true; 1485 } 1486 1487 NumTraces += parseLBRSample(Sample, NeedsSkylakeFix); 1488 } 1489 1490 for (const Trace &Trace : llvm::make_first_range(BranchLBRs)) 1491 for (const uint64_t Addr : {Trace.From, Trace.To}) 1492 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr)) 1493 BF->setHasProfileAvailable(); 1494 1495 auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { 1496 OS << " ("; 1497 if (OS.has_colors()) { 1498 if (Percent > T2) 1499 OS.changeColor(raw_ostream::RED); 1500 else if (Percent > T1) 1501 OS.changeColor(raw_ostream::YELLOW); 1502 else 1503 OS.changeColor(raw_ostream::GREEN); 1504 } 1505 OS << format("%.1f%%", Percent); 1506 if (OS.has_colors()) 1507 OS.resetColor(); 1508 OS << ")"; 1509 }; 1510 1511 outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries 1512 << " LBR entries\n"; 1513 if (NumTotalSamples) { 1514 if (NumSamples && NumSamplesNoLBR == NumSamples) { 1515 // Note: we don't know if perf2bolt is being used to parse memory samples 1516 // at this point. In this case, it is OK to parse zero LBRs. 1517 errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " 1518 "LBR. Record profile with perf record -j any or run perf2bolt " 1519 "in no-LBR mode with -nl (the performance improvement in -nl " 1520 "mode may be limited)\n"; 1521 } else { 1522 const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; 1523 const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; 1524 outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; 1525 printColored(outs(), PercentIgnored, 20, 50); 1526 outs() << " were ignored\n"; 1527 if (PercentIgnored > 50.0f) 1528 errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " 1529 "were attributed to the input binary\n"; 1530 } 1531 } 1532 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1533 << NumInvalidTraces; 1534 float Perc = 0.0f; 1535 if (NumTraces > 0) { 1536 Perc = NumInvalidTraces * 100.0f / NumTraces; 1537 printColored(outs(), Perc, 5, 10); 1538 } 1539 outs() << "\n"; 1540 if (Perc > 10.0f) 1541 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1542 "binary is probably not the same binary used during profiling " 1543 "collection. The generated data may be ineffective for improving " 1544 "performance.\n\n"; 1545 1546 outs() << "PERF2BOLT: out of range traces involving unknown regions: " 1547 << NumLongRangeTraces; 1548 if (NumTraces > 0) 1549 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1550 outs() << "\n"; 1551 1552 if (NumColdSamples > 0) { 1553 const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; 1554 outs() << "PERF2BOLT: " << NumColdSamples 1555 << format(" (%.1f%%)", ColdSamples) 1556 << " samples recorded in cold regions of split functions.\n"; 1557 if (ColdSamples > 5.0f) 1558 outs() 1559 << "WARNING: The BOLT-processed binary where samples were collected " 1560 "likely used bad data or your service observed a large shift in " 1561 "profile. You may want to audit this.\n"; 1562 } 1563 1564 return std::error_code(); 1565 } 1566 1567 void DataAggregator::processBranchEvents() { 1568 outs() << "PERF2BOLT: processing branch events...\n"; 1569 NamedRegionTimer T("processBranch", "Processing branch events", 1570 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1571 1572 for (const auto &AggrLBR : FallthroughLBRs) { 1573 const Trace &Loc = AggrLBR.first; 1574 const FTInfo &Info = AggrLBR.second; 1575 LBREntry First{Loc.From, Loc.From, false}; 1576 LBREntry Second{Loc.To, Loc.To, false}; 1577 if (Info.InternCount) 1578 doTrace(First, Second, Info.InternCount); 1579 if (Info.ExternCount) { 1580 First.From = 0; 1581 doTrace(First, Second, Info.ExternCount); 1582 } 1583 } 1584 1585 for (const auto &AggrLBR : BranchLBRs) { 1586 const Trace &Loc = AggrLBR.first; 1587 const TakenBranchInfo &Info = AggrLBR.second; 1588 doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount, 1589 /*IsPreagg*/ false); 1590 } 1591 } 1592 1593 std::error_code DataAggregator::parseBasicEvents() { 1594 outs() << "PERF2BOLT: parsing basic events (without LBR)...\n"; 1595 NamedRegionTimer T("parseBasic", "Parsing basic events", TimerGroupName, 1596 TimerGroupDesc, opts::TimeAggregator); 1597 while (hasData()) { 1598 ErrorOr<PerfBasicSample> Sample = parseBasicSample(); 1599 if (std::error_code EC = Sample.getError()) 1600 return EC; 1601 1602 if (!Sample->PC) 1603 continue; 1604 1605 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1606 BF->setHasProfileAvailable(); 1607 1608 ++BasicSamples[Sample->PC]; 1609 EventNames.insert(Sample->EventName); 1610 } 1611 1612 return std::error_code(); 1613 } 1614 1615 void DataAggregator::processBasicEvents() { 1616 outs() << "PERF2BOLT: processing basic events (without LBR)...\n"; 1617 NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, 1618 TimerGroupDesc, opts::TimeAggregator); 1619 uint64_t OutOfRangeSamples = 0; 1620 uint64_t NumSamples = 0; 1621 for (auto &Sample : BasicSamples) { 1622 const uint64_t PC = Sample.first; 1623 const uint64_t HitCount = Sample.second; 1624 NumSamples += HitCount; 1625 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1626 if (!Func) { 1627 OutOfRangeSamples += HitCount; 1628 continue; 1629 } 1630 1631 doSample(*Func, PC, HitCount); 1632 } 1633 outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; 1634 1635 outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " 1636 << OutOfRangeSamples; 1637 float Perc = 0.0f; 1638 if (NumSamples > 0) { 1639 outs() << " ("; 1640 Perc = OutOfRangeSamples * 100.0f / NumSamples; 1641 if (outs().has_colors()) { 1642 if (Perc > 60.0f) 1643 outs().changeColor(raw_ostream::RED); 1644 else if (Perc > 40.0f) 1645 outs().changeColor(raw_ostream::YELLOW); 1646 else 1647 outs().changeColor(raw_ostream::GREEN); 1648 } 1649 outs() << format("%.1f%%", Perc); 1650 if (outs().has_colors()) 1651 outs().resetColor(); 1652 outs() << ")"; 1653 } 1654 outs() << "\n"; 1655 if (Perc > 80.0f) 1656 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1657 "binary is probably not the same binary used during profiling " 1658 "collection. The generated data may be ineffective for improving " 1659 "performance.\n\n"; 1660 } 1661 1662 std::error_code DataAggregator::parseMemEvents() { 1663 outs() << "PERF2BOLT: parsing memory events...\n"; 1664 NamedRegionTimer T("parseMemEvents", "Parsing mem events", TimerGroupName, 1665 TimerGroupDesc, opts::TimeAggregator); 1666 while (hasData()) { 1667 ErrorOr<PerfMemSample> Sample = parseMemSample(); 1668 if (std::error_code EC = Sample.getError()) 1669 return EC; 1670 1671 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC)) 1672 BF->setHasProfileAvailable(); 1673 1674 MemSamples.emplace_back(std::move(Sample.get())); 1675 } 1676 1677 return std::error_code(); 1678 } 1679 1680 void DataAggregator::processMemEvents() { 1681 NamedRegionTimer T("ProcessMemEvents", "Processing mem events", 1682 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1683 for (const PerfMemSample &Sample : MemSamples) { 1684 uint64_t PC = Sample.PC; 1685 uint64_t Addr = Sample.Addr; 1686 StringRef FuncName; 1687 StringRef MemName; 1688 1689 // Try to resolve symbol for PC 1690 BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); 1691 if (!Func) { 1692 LLVM_DEBUG(if (PC != 0) { 1693 dbgs() << formatv("Skipped mem event: {0:x} => {1:x}\n", PC, Addr); 1694 }); 1695 continue; 1696 } 1697 1698 FuncName = Func->getOneName(); 1699 PC -= Func->getAddress(); 1700 1701 // Try to resolve symbol for memory load 1702 if (BinaryData *BD = BC->getBinaryDataContainingAddress(Addr)) { 1703 MemName = BD->getName(); 1704 Addr -= BD->getAddress(); 1705 } else if (opts::FilterMemProfile) { 1706 // Filter out heap/stack accesses 1707 continue; 1708 } 1709 1710 const Location FuncLoc(!FuncName.empty(), FuncName, PC); 1711 const Location AddrLoc(!MemName.empty(), MemName, Addr); 1712 1713 FuncMemData *MemData = &NamesToMemEvents[FuncName]; 1714 MemData->Name = FuncName; 1715 setMemData(*Func, MemData); 1716 MemData->update(FuncLoc, AddrLoc); 1717 LLVM_DEBUG(dbgs() << "Mem event: " << FuncLoc << " = " << AddrLoc << "\n"); 1718 } 1719 } 1720 1721 std::error_code DataAggregator::parsePreAggregatedLBRSamples() { 1722 outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; 1723 NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", 1724 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1725 while (hasData()) { 1726 ErrorOr<AggregatedLBREntry> AggrEntry = parseAggregatedLBREntry(); 1727 if (std::error_code EC = AggrEntry.getError()) 1728 return EC; 1729 1730 for (const uint64_t Addr : {AggrEntry->From.Offset, AggrEntry->To.Offset}) 1731 if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr)) 1732 BF->setHasProfileAvailable(); 1733 1734 AggregatedLBRs.emplace_back(std::move(AggrEntry.get())); 1735 } 1736 1737 return std::error_code(); 1738 } 1739 1740 void DataAggregator::processPreAggregated() { 1741 outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; 1742 NamedRegionTimer T("processAggregated", "Processing aggregated branch events", 1743 TimerGroupName, TimerGroupDesc, opts::TimeAggregator); 1744 1745 uint64_t NumTraces = 0; 1746 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { 1747 switch (AggrEntry.EntryType) { 1748 case AggregatedLBREntry::BRANCH: 1749 doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, 1750 AggrEntry.Mispreds, /*IsPreagg*/ true); 1751 break; 1752 case AggregatedLBREntry::FT: 1753 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { 1754 LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT 1755 ? AggrEntry.From.Offset 1756 : 0, 1757 AggrEntry.From.Offset, false}; 1758 LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; 1759 doTrace(First, Second, AggrEntry.Count); 1760 NumTraces += AggrEntry.Count; 1761 break; 1762 } 1763 } 1764 } 1765 1766 outs() << "PERF2BOLT: read " << AggregatedLBRs.size() 1767 << " aggregated LBR entries\n"; 1768 outs() << "PERF2BOLT: traces mismatching disassembled function contents: " 1769 << NumInvalidTraces; 1770 float Perc = 0.0f; 1771 if (NumTraces > 0) { 1772 outs() << " ("; 1773 Perc = NumInvalidTraces * 100.0f / NumTraces; 1774 if (outs().has_colors()) { 1775 if (Perc > 10.0f) 1776 outs().changeColor(raw_ostream::RED); 1777 else if (Perc > 5.0f) 1778 outs().changeColor(raw_ostream::YELLOW); 1779 else 1780 outs().changeColor(raw_ostream::GREEN); 1781 } 1782 outs() << format("%.1f%%", Perc); 1783 if (outs().has_colors()) 1784 outs().resetColor(); 1785 outs() << ")"; 1786 } 1787 outs() << "\n"; 1788 if (Perc > 10.0f) 1789 outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " 1790 "binary is probably not the same binary used during profiling " 1791 "collection. The generated data may be ineffective for improving " 1792 "performance.\n\n"; 1793 1794 outs() << "PERF2BOLT: Out of range traces involving unknown regions: " 1795 << NumLongRangeTraces; 1796 if (NumTraces > 0) 1797 outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); 1798 outs() << "\n"; 1799 } 1800 1801 std::optional<int32_t> DataAggregator::parseCommExecEvent() { 1802 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1803 if (LineEnd == StringRef::npos) { 1804 reportError("expected rest of line"); 1805 Diag << "Found: " << ParsingBuf << "\n"; 1806 return std::nullopt; 1807 } 1808 StringRef Line = ParsingBuf.substr(0, LineEnd); 1809 1810 size_t Pos = Line.find("PERF_RECORD_COMM exec"); 1811 if (Pos == StringRef::npos) 1812 return std::nullopt; 1813 Line = Line.drop_front(Pos); 1814 1815 // Line: 1816 // PERF_RECORD_COMM exec: <name>:<pid>/<tid>" 1817 StringRef PIDStr = Line.rsplit(':').second.split('/').first; 1818 int32_t PID; 1819 if (PIDStr.getAsInteger(10, PID)) { 1820 reportError("expected PID"); 1821 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1822 return std::nullopt; 1823 } 1824 1825 return PID; 1826 } 1827 1828 namespace { 1829 std::optional<uint64_t> parsePerfTime(const StringRef TimeStr) { 1830 const StringRef SecTimeStr = TimeStr.split('.').first; 1831 const StringRef USecTimeStr = TimeStr.split('.').second; 1832 uint64_t SecTime; 1833 uint64_t USecTime; 1834 if (SecTimeStr.getAsInteger(10, SecTime) || 1835 USecTimeStr.getAsInteger(10, USecTime)) 1836 return std::nullopt; 1837 return SecTime * 1000000ULL + USecTime; 1838 } 1839 } 1840 1841 std::optional<DataAggregator::ForkInfo> DataAggregator::parseForkEvent() { 1842 while (checkAndConsumeFS()) { 1843 } 1844 1845 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1846 if (LineEnd == StringRef::npos) { 1847 reportError("expected rest of line"); 1848 Diag << "Found: " << ParsingBuf << "\n"; 1849 return std::nullopt; 1850 } 1851 StringRef Line = ParsingBuf.substr(0, LineEnd); 1852 1853 size_t Pos = Line.find("PERF_RECORD_FORK"); 1854 if (Pos == StringRef::npos) { 1855 consumeRestOfLine(); 1856 return std::nullopt; 1857 } 1858 1859 ForkInfo FI; 1860 1861 const StringRef TimeStr = 1862 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1863 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) { 1864 FI.Time = *TimeRes; 1865 } 1866 1867 Line = Line.drop_front(Pos); 1868 1869 // Line: 1870 // PERF_RECORD_FORK(<child_pid>:<child_tid>):(<parent_pid>:<parent_tid>) 1871 const StringRef ChildPIDStr = Line.split('(').second.split(':').first; 1872 if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { 1873 reportError("expected PID"); 1874 Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; 1875 return std::nullopt; 1876 } 1877 1878 const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; 1879 if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { 1880 reportError("expected PID"); 1881 Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; 1882 return std::nullopt; 1883 } 1884 1885 consumeRestOfLine(); 1886 1887 return FI; 1888 } 1889 1890 ErrorOr<std::pair<StringRef, DataAggregator::MMapInfo>> 1891 DataAggregator::parseMMapEvent() { 1892 while (checkAndConsumeFS()) { 1893 } 1894 1895 MMapInfo ParsedInfo; 1896 1897 size_t LineEnd = ParsingBuf.find_first_of("\n"); 1898 if (LineEnd == StringRef::npos) { 1899 reportError("expected rest of line"); 1900 Diag << "Found: " << ParsingBuf << "\n"; 1901 return make_error_code(llvm::errc::io_error); 1902 } 1903 StringRef Line = ParsingBuf.substr(0, LineEnd); 1904 1905 size_t Pos = Line.find("PERF_RECORD_MMAP2"); 1906 if (Pos == StringRef::npos) { 1907 consumeRestOfLine(); 1908 return std::make_pair(StringRef(), ParsedInfo); 1909 } 1910 1911 // Line: 1912 // {<name> .* <sec>.<usec>: }PERF_RECORD_MMAP2 <pid>/<tid>: .* <file_name> 1913 1914 const StringRef TimeStr = 1915 Line.substr(0, Pos).rsplit(':').first.rsplit(FieldSeparator).second; 1916 if (std::optional<uint64_t> TimeRes = parsePerfTime(TimeStr)) 1917 ParsedInfo.Time = *TimeRes; 1918 1919 Line = Line.drop_front(Pos); 1920 1921 // Line: 1922 // PERF_RECORD_MMAP2 <pid>/<tid>: [<hexbase>(<hexsize>) .*]: .* <file_name> 1923 1924 StringRef FileName = Line.rsplit(FieldSeparator).second; 1925 if (FileName.starts_with("//") || FileName.starts_with("[")) { 1926 consumeRestOfLine(); 1927 return std::make_pair(StringRef(), ParsedInfo); 1928 } 1929 FileName = sys::path::filename(FileName); 1930 1931 const StringRef PIDStr = Line.split(FieldSeparator).second.split('/').first; 1932 if (PIDStr.getAsInteger(10, ParsedInfo.PID)) { 1933 reportError("expected PID"); 1934 Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; 1935 return make_error_code(llvm::errc::io_error); 1936 } 1937 1938 const StringRef BaseAddressStr = Line.split('[').second.split('(').first; 1939 if (BaseAddressStr.getAsInteger(0, ParsedInfo.MMapAddress)) { 1940 reportError("expected base address"); 1941 Diag << "Found: " << BaseAddressStr << "in '" << Line << "'\n"; 1942 return make_error_code(llvm::errc::io_error); 1943 } 1944 1945 const StringRef SizeStr = Line.split('(').second.split(')').first; 1946 if (SizeStr.getAsInteger(0, ParsedInfo.Size)) { 1947 reportError("expected mmaped size"); 1948 Diag << "Found: " << SizeStr << "in '" << Line << "'\n"; 1949 return make_error_code(llvm::errc::io_error); 1950 } 1951 1952 const StringRef OffsetStr = 1953 Line.split('@').second.ltrim().split(FieldSeparator).first; 1954 if (OffsetStr.getAsInteger(0, ParsedInfo.Offset)) { 1955 reportError("expected mmaped page-aligned offset"); 1956 Diag << "Found: " << OffsetStr << "in '" << Line << "'\n"; 1957 return make_error_code(llvm::errc::io_error); 1958 } 1959 1960 consumeRestOfLine(); 1961 1962 return std::make_pair(FileName, ParsedInfo); 1963 } 1964 1965 std::error_code DataAggregator::parseMMapEvents() { 1966 outs() << "PERF2BOLT: parsing perf-script mmap events output\n"; 1967 NamedRegionTimer T("parseMMapEvents", "Parsing mmap events", TimerGroupName, 1968 TimerGroupDesc, opts::TimeAggregator); 1969 1970 std::multimap<StringRef, MMapInfo> GlobalMMapInfo; 1971 while (hasData()) { 1972 ErrorOr<std::pair<StringRef, MMapInfo>> FileMMapInfoRes = parseMMapEvent(); 1973 if (std::error_code EC = FileMMapInfoRes.getError()) 1974 return EC; 1975 1976 std::pair<StringRef, MMapInfo> FileMMapInfo = FileMMapInfoRes.get(); 1977 if (FileMMapInfo.second.PID == -1) 1978 continue; 1979 if (FileMMapInfo.first == "(deleted)") 1980 continue; 1981 1982 GlobalMMapInfo.insert(FileMMapInfo); 1983 } 1984 1985 LLVM_DEBUG({ 1986 dbgs() << "FileName -> mmap info:\n" 1987 << " Filename : PID [MMapAddr, Size, Offset]\n"; 1988 for (const auto &[Name, MMap] : GlobalMMapInfo) 1989 dbgs() << formatv(" {0} : {1} [{2:x}, {3:x} @ {4:x}]\n", Name, MMap.PID, 1990 MMap.MMapAddress, MMap.Size, MMap.Offset); 1991 }); 1992 1993 StringRef NameToUse = llvm::sys::path::filename(BC->getFilename()); 1994 if (GlobalMMapInfo.count(NameToUse) == 0 && !BuildIDBinaryName.empty()) { 1995 errs() << "PERF2BOLT-WARNING: using \"" << BuildIDBinaryName 1996 << "\" for profile matching\n"; 1997 NameToUse = BuildIDBinaryName; 1998 } 1999 2000 auto Range = GlobalMMapInfo.equal_range(NameToUse); 2001 for (MMapInfo &MMapInfo : llvm::make_second_range(make_range(Range))) { 2002 if (BC->HasFixedLoadAddress && MMapInfo.MMapAddress) { 2003 // Check that the binary mapping matches one of the segments. 2004 bool MatchFound = llvm::any_of( 2005 llvm::make_second_range(BC->SegmentMapInfo), 2006 [&](SegmentInfo &SegInfo) { 2007 // The mapping is page-aligned and hence the MMapAddress could be 2008 // different from the segment start address. We cannot know the page 2009 // size of the mapping, but we know it should not exceed the segment 2010 // alignment value. Hence we are performing an approximate check. 2011 return SegInfo.Address >= MMapInfo.MMapAddress && 2012 SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment && 2013 SegInfo.IsExecutable; 2014 }); 2015 if (!MatchFound) { 2016 errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse 2017 << " at 0x" << Twine::utohexstr(MMapInfo.MMapAddress) << '\n'; 2018 continue; 2019 } 2020 } 2021 2022 // Set base address for shared objects. 2023 if (!BC->HasFixedLoadAddress) { 2024 std::optional<uint64_t> BaseAddress = 2025 BC->getBaseAddressForMapping(MMapInfo.MMapAddress, MMapInfo.Offset); 2026 if (!BaseAddress) { 2027 errs() << "PERF2BOLT-WARNING: unable to find base address of the " 2028 "binary when memory mapped at 0x" 2029 << Twine::utohexstr(MMapInfo.MMapAddress) 2030 << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) 2031 << ". Ignoring profile data for this mapping\n"; 2032 continue; 2033 } 2034 MMapInfo.BaseAddress = *BaseAddress; 2035 } 2036 2037 // Try to add MMapInfo to the map and update its size. Large binaries may 2038 // span to multiple text segments, so the mapping is inserted only on the 2039 // first occurrence. 2040 if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second) 2041 assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && 2042 "Base address on multiple segment mappings should match"); 2043 2044 // Update mapping size. 2045 const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; 2046 const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; 2047 if (Size > BinaryMMapInfo[MMapInfo.PID].Size) 2048 BinaryMMapInfo[MMapInfo.PID].Size = Size; 2049 } 2050 2051 if (BinaryMMapInfo.empty()) { 2052 if (errs().has_colors()) 2053 errs().changeColor(raw_ostream::RED); 2054 errs() << "PERF2BOLT-ERROR: could not find a profile matching binary \"" 2055 << BC->getFilename() << "\"."; 2056 if (!GlobalMMapInfo.empty()) { 2057 errs() << " Profile for the following binary name(s) is available:\n"; 2058 for (auto I = GlobalMMapInfo.begin(), IE = GlobalMMapInfo.end(); I != IE; 2059 I = GlobalMMapInfo.upper_bound(I->first)) 2060 errs() << " " << I->first << '\n'; 2061 errs() << "Please rename the input binary.\n"; 2062 } else { 2063 errs() << " Failed to extract any binary name from a profile.\n"; 2064 } 2065 if (errs().has_colors()) 2066 errs().resetColor(); 2067 2068 exit(1); 2069 } 2070 2071 return std::error_code(); 2072 } 2073 2074 std::error_code DataAggregator::parseTaskEvents() { 2075 outs() << "PERF2BOLT: parsing perf-script task events output\n"; 2076 NamedRegionTimer T("parseTaskEvents", "Parsing task events", TimerGroupName, 2077 TimerGroupDesc, opts::TimeAggregator); 2078 2079 while (hasData()) { 2080 if (std::optional<int32_t> CommInfo = parseCommExecEvent()) { 2081 // Remove forked child that ran execve 2082 auto MMapInfoIter = BinaryMMapInfo.find(*CommInfo); 2083 if (MMapInfoIter != BinaryMMapInfo.end() && MMapInfoIter->second.Forked) 2084 BinaryMMapInfo.erase(MMapInfoIter); 2085 consumeRestOfLine(); 2086 continue; 2087 } 2088 2089 std::optional<ForkInfo> ForkInfo = parseForkEvent(); 2090 if (!ForkInfo) 2091 continue; 2092 2093 if (ForkInfo->ParentPID == ForkInfo->ChildPID) 2094 continue; 2095 2096 if (ForkInfo->Time == 0) { 2097 // Process was forked and mmaped before perf ran. In this case the child 2098 // should have its own mmap entry unless it was execve'd. 2099 continue; 2100 } 2101 2102 auto MMapInfoIter = BinaryMMapInfo.find(ForkInfo->ParentPID); 2103 if (MMapInfoIter == BinaryMMapInfo.end()) 2104 continue; 2105 2106 MMapInfo MMapInfo = MMapInfoIter->second; 2107 MMapInfo.PID = ForkInfo->ChildPID; 2108 MMapInfo.Forked = true; 2109 BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); 2110 } 2111 2112 outs() << "PERF2BOLT: input binary is associated with " 2113 << BinaryMMapInfo.size() << " PID(s)\n"; 2114 2115 LLVM_DEBUG({ 2116 for (const MMapInfo &MMI : llvm::make_second_range(BinaryMMapInfo)) 2117 outs() << formatv(" {0}{1}: ({2:x}: {3:x})\n", MMI.PID, 2118 (MMI.Forked ? " (forked)" : ""), MMI.MMapAddress, 2119 MMI.Size); 2120 }); 2121 2122 return std::error_code(); 2123 } 2124 2125 std::optional<std::pair<StringRef, StringRef>> 2126 DataAggregator::parseNameBuildIDPair() { 2127 while (checkAndConsumeFS()) { 2128 } 2129 2130 ErrorOr<StringRef> BuildIDStr = parseString(FieldSeparator, true); 2131 if (std::error_code EC = BuildIDStr.getError()) 2132 return std::nullopt; 2133 2134 // If one of the strings is missing, don't issue a parsing error, but still 2135 // do not return a value. 2136 consumeAllRemainingFS(); 2137 if (checkNewLine()) 2138 return std::nullopt; 2139 2140 ErrorOr<StringRef> NameStr = parseString(FieldSeparator, true); 2141 if (std::error_code EC = NameStr.getError()) 2142 return std::nullopt; 2143 2144 consumeRestOfLine(); 2145 return std::make_pair(NameStr.get(), BuildIDStr.get()); 2146 } 2147 2148 bool DataAggregator::hasAllBuildIDs() { 2149 const StringRef SavedParsingBuf = ParsingBuf; 2150 2151 if (!hasData()) 2152 return false; 2153 2154 bool HasInvalidEntries = false; 2155 while (hasData()) { 2156 if (!parseNameBuildIDPair()) { 2157 HasInvalidEntries = true; 2158 break; 2159 } 2160 } 2161 2162 ParsingBuf = SavedParsingBuf; 2163 2164 return !HasInvalidEntries; 2165 } 2166 2167 std::optional<StringRef> 2168 DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { 2169 const StringRef SavedParsingBuf = ParsingBuf; 2170 2171 StringRef FileName; 2172 while (hasData()) { 2173 std::optional<std::pair<StringRef, StringRef>> IDPair = 2174 parseNameBuildIDPair(); 2175 if (!IDPair) { 2176 consumeRestOfLine(); 2177 continue; 2178 } 2179 2180 if (IDPair->second.starts_with(FileBuildID)) { 2181 FileName = sys::path::filename(IDPair->first); 2182 break; 2183 } 2184 } 2185 2186 ParsingBuf = SavedParsingBuf; 2187 2188 if (!FileName.empty()) 2189 return FileName; 2190 2191 return std::nullopt; 2192 } 2193 2194 std::error_code 2195 DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { 2196 std::error_code EC; 2197 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2198 if (EC) 2199 return EC; 2200 2201 bool WriteMemLocs = false; 2202 2203 auto writeLocation = [&OutFile, &WriteMemLocs](const Location &Loc) { 2204 if (WriteMemLocs) 2205 OutFile << (Loc.IsSymbol ? "4 " : "3 "); 2206 else 2207 OutFile << (Loc.IsSymbol ? "1 " : "0 "); 2208 OutFile << (Loc.Name.empty() ? "[unknown]" : getEscapedName(Loc.Name)) 2209 << " " << Twine::utohexstr(Loc.Offset) << FieldSeparator; 2210 }; 2211 2212 uint64_t BranchValues = 0; 2213 uint64_t MemValues = 0; 2214 2215 if (BAT) 2216 OutFile << "boltedcollection\n"; 2217 if (opts::BasicAggregation) { 2218 OutFile << "no_lbr"; 2219 for (const StringMapEntry<std::nullopt_t> &Entry : EventNames) 2220 OutFile << " " << Entry.getKey(); 2221 OutFile << "\n"; 2222 2223 for (const auto &KV : NamesToSamples) { 2224 const FuncSampleData &FSD = KV.second; 2225 for (const SampleInfo &SI : FSD.Data) { 2226 writeLocation(SI.Loc); 2227 OutFile << SI.Hits << "\n"; 2228 ++BranchValues; 2229 } 2230 } 2231 } else { 2232 for (const auto &KV : NamesToBranches) { 2233 const FuncBranchData &FBD = KV.second; 2234 for (const BranchInfo &BI : FBD.Data) { 2235 writeLocation(BI.From); 2236 writeLocation(BI.To); 2237 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2238 ++BranchValues; 2239 } 2240 for (const BranchInfo &BI : FBD.EntryData) { 2241 // Do not output if source is a known symbol, since this was already 2242 // accounted for in the source function 2243 if (BI.From.IsSymbol) 2244 continue; 2245 writeLocation(BI.From); 2246 writeLocation(BI.To); 2247 OutFile << BI.Mispreds << " " << BI.Branches << "\n"; 2248 ++BranchValues; 2249 } 2250 } 2251 2252 WriteMemLocs = true; 2253 for (const auto &KV : NamesToMemEvents) { 2254 const FuncMemData &FMD = KV.second; 2255 for (const MemInfo &MemEvent : FMD.Data) { 2256 writeLocation(MemEvent.Offset); 2257 writeLocation(MemEvent.Addr); 2258 OutFile << MemEvent.Count << "\n"; 2259 ++MemValues; 2260 } 2261 } 2262 } 2263 2264 outs() << "PERF2BOLT: wrote " << BranchValues << " objects and " << MemValues 2265 << " memory objects to " << OutputFilename << "\n"; 2266 2267 return std::error_code(); 2268 } 2269 2270 std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, 2271 StringRef OutputFilename) const { 2272 std::error_code EC; 2273 raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); 2274 if (EC) 2275 return EC; 2276 2277 yaml::bolt::BinaryProfile BP; 2278 2279 const MCPseudoProbeDecoder *PseudoProbeDecoder = 2280 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; 2281 2282 // Fill out the header info. 2283 BP.Header.Version = 1; 2284 BP.Header.FileName = std::string(BC.getFilename()); 2285 std::optional<StringRef> BuildID = BC.getFileBuildID(); 2286 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>"; 2287 BP.Header.Origin = std::string(getReaderName()); 2288 // Only the input binary layout order is supported. 2289 BP.Header.IsDFSOrder = false; 2290 // FIXME: Need to match hash function used to produce BAT hashes. 2291 BP.Header.HashFunction = HashFunction::Default; 2292 2293 ListSeparator LS(","); 2294 raw_string_ostream EventNamesOS(BP.Header.EventNames); 2295 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) 2296 EventNamesOS << LS << EventEntry.first().str(); 2297 2298 BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE 2299 : BinaryFunction::PF_LBR; 2300 2301 // Add probe inline tree nodes. 2302 YAMLProfileWriter::InlineTreeDesc InlineTree; 2303 if (PseudoProbeDecoder) 2304 std::tie(BP.PseudoProbeDesc, InlineTree) = 2305 YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder); 2306 2307 if (!opts::BasicAggregation) { 2308 // Convert profile for functions not covered by BAT 2309 for (auto &BFI : BC.getBinaryFunctions()) { 2310 BinaryFunction &Function = BFI.second; 2311 if (!Function.hasProfile()) 2312 continue; 2313 if (BAT->isBATFunction(Function.getAddress())) 2314 continue; 2315 BP.Functions.emplace_back(YAMLProfileWriter::convert( 2316 Function, /*UseDFS=*/false, InlineTree, BAT)); 2317 } 2318 2319 for (const auto &KV : NamesToBranches) { 2320 const StringRef FuncName = KV.first; 2321 const FuncBranchData &Branches = KV.second; 2322 yaml::bolt::BinaryFunctionProfile YamlBF; 2323 BinaryData *BD = BC.getBinaryDataByName(FuncName); 2324 assert(BD); 2325 uint64_t FuncAddress = BD->getAddress(); 2326 if (!BAT->isBATFunction(FuncAddress)) 2327 continue; 2328 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress); 2329 assert(BF); 2330 YamlBF.Name = getLocationName(*BF, BAT); 2331 YamlBF.Id = BF->getFunctionNumber(); 2332 YamlBF.Hash = BAT->getBFHash(FuncAddress); 2333 YamlBF.ExecCount = BF->getKnownExecutionCount(); 2334 YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); 2335 const BoltAddressTranslation::BBHashMapTy &BlockMap = 2336 BAT->getBBHashMap(FuncAddress); 2337 YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); 2338 2339 for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) { 2340 const auto &Block = Entry.second; 2341 YamlBB.Hash = Block.Hash; 2342 YamlBB.Index = Block.Index; 2343 } 2344 2345 // Lookup containing basic block offset and index 2346 auto getBlock = [&BlockMap](uint32_t Offset) { 2347 auto BlockIt = BlockMap.upper_bound(Offset); 2348 if (LLVM_UNLIKELY(BlockIt == BlockMap.begin())) { 2349 errs() << "BOLT-ERROR: invalid BAT section\n"; 2350 exit(1); 2351 } 2352 --BlockIt; 2353 return std::pair(BlockIt->first, BlockIt->second.Index); 2354 }; 2355 2356 for (const BranchInfo &BI : Branches.Data) { 2357 using namespace yaml::bolt; 2358 const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset); 2359 BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex]; 2360 if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) { 2361 // Internal branch 2362 const unsigned SuccIndex = getBlock(BI.To.Offset).second; 2363 auto &SI = YamlBB.Successors.emplace_back(SuccessorInfo{SuccIndex}); 2364 SI.Count = BI.Branches; 2365 SI.Mispreds = BI.Mispreds; 2366 } else { 2367 // Call 2368 const uint32_t Offset = BI.From.Offset - BlockOffset; 2369 auto &CSI = YamlBB.CallSites.emplace_back(CallSiteInfo{Offset}); 2370 CSI.Count = BI.Branches; 2371 CSI.Mispreds = BI.Mispreds; 2372 if (const BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) 2373 YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT, 2374 BI.To.Offset); 2375 } 2376 } 2377 // Set entry counts, similar to DataReader::readProfile. 2378 for (const BranchInfo &BI : Branches.EntryData) { 2379 if (!BlockMap.isInputBlock(BI.To.Offset)) { 2380 if (opts::Verbosity >= 1) 2381 errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName 2382 << " at 0x" << Twine::utohexstr(BI.To.Offset) << '\n'; 2383 continue; 2384 } 2385 const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset); 2386 YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches; 2387 } 2388 if (PseudoProbeDecoder) { 2389 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> 2390 InlineTreeNodeId; 2391 if (BF->getGUID()) { 2392 std::tie(YamlBF.InlineTree, InlineTreeNodeId) = 2393 YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder, 2394 InlineTree, BF->getGUID()); 2395 } 2396 // Fetch probes belonging to all fragments 2397 const AddressProbesMap &ProbeMap = 2398 PseudoProbeDecoder->getAddress2ProbesMap(); 2399 BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); 2400 Fragments.insert(BF); 2401 DenseMap< 2402 uint32_t, 2403 std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>> 2404 BlockProbes; 2405 for (const BinaryFunction *F : Fragments) { 2406 const uint64_t FuncAddr = F->getAddress(); 2407 for (const MCDecodedPseudoProbe &Probe : 2408 ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) { 2409 const uint32_t OutputAddress = Probe.getAddress(); 2410 const uint32_t InputOffset = BAT->translate( 2411 FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); 2412 const unsigned BlockIndex = getBlock(InputOffset).second; 2413 BlockProbes[BlockIndex].emplace_back(Probe); 2414 } 2415 } 2416 2417 for (auto &[Block, Probes] : BlockProbes) { 2418 YamlBF.Blocks[Block].PseudoProbes = 2419 YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); 2420 } 2421 } 2422 // Skip printing if there's no profile data 2423 llvm::erase_if( 2424 YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { 2425 auto HasCount = [](const auto &SI) { return SI.Count; }; 2426 bool HasAnyCount = YamlBB.ExecCount || 2427 llvm::any_of(YamlBB.Successors, HasCount) || 2428 llvm::any_of(YamlBB.CallSites, HasCount); 2429 return !HasAnyCount; 2430 }); 2431 BP.Functions.emplace_back(YamlBF); 2432 } 2433 } 2434 2435 // Write the profile. 2436 yaml::Output Out(OutFile, nullptr, 0); 2437 Out << BP; 2438 return std::error_code(); 2439 } 2440 2441 void DataAggregator::dump() const { DataReader::dump(); } 2442 2443 void DataAggregator::dump(const LBREntry &LBR) const { 2444 Diag << "From: " << Twine::utohexstr(LBR.From) 2445 << " To: " << Twine::utohexstr(LBR.To) << " Mispred? " << LBR.Mispred 2446 << "\n"; 2447 } 2448 2449 void DataAggregator::dump(const PerfBranchSample &Sample) const { 2450 Diag << "Sample LBR entries: " << Sample.LBR.size() << "\n"; 2451 for (const LBREntry &LBR : Sample.LBR) 2452 dump(LBR); 2453 } 2454 2455 void DataAggregator::dump(const PerfMemSample &Sample) const { 2456 Diag << "Sample mem entries: " << Sample.PC << ": " << Sample.Addr << "\n"; 2457 } 2458