1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfiledBinary.h" 10 #include "ErrorHandling.h" 11 #include "MissingFrameInferrer.h" 12 #include "ProfileGenerator.h" 13 #include "llvm/ADT/Triple.h" 14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 15 #include "llvm/Demangle/Demangle.h" 16 #include "llvm/IR/DebugInfoMetadata.h" 17 #include "llvm/MC/TargetRegistry.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/Format.h" 21 #include "llvm/Support/TargetSelect.h" 22 #include <optional> 23 24 #define DEBUG_TYPE "load-binary" 25 26 using namespace llvm; 27 using namespace sampleprof; 28 29 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", 30 cl::desc("Print disassembled code.")); 31 32 cl::opt<bool> ShowSourceLocations("show-source-locations", 33 cl::desc("Print source locations.")); 34 35 static cl::opt<bool> 36 ShowCanonicalFnName("show-canonical-fname", 37 cl::desc("Print canonical function name.")); 38 39 static cl::opt<bool> ShowPseudoProbe( 40 "show-pseudo-probe", 41 cl::desc("Print pseudo probe section and disassembled info.")); 42 43 static cl::opt<bool> UseDwarfCorrelation( 44 "use-dwarf-correlation", 45 cl::desc("Use dwarf for profile correlation even when binary contains " 46 "pseudo probe.")); 47 48 static cl::opt<std::string> 49 DWPPath("dwp", cl::init(""), 50 cl::desc("Path of .dwp file. When not specified, it will be " 51 "<binary>.dwp in the same directory as the main binary.")); 52 53 static cl::list<std::string> DisassembleFunctions( 54 "disassemble-functions", cl::CommaSeparated, 55 cl::desc("List of functions to print disassembly for. Accept demangled " 56 "names only. Only work with show-disassembly-only")); 57 58 extern cl::opt<bool> ShowDetailedWarning; 59 extern cl::opt<bool> InferMissingFrames; 60 61 namespace llvm { 62 namespace sampleprof { 63 64 static const Target *getTarget(const ObjectFile *Obj) { 65 Triple TheTriple = Obj->makeTriple(); 66 std::string Error; 67 std::string ArchName; 68 const Target *TheTarget = 69 TargetRegistry::lookupTarget(ArchName, TheTriple, Error); 70 if (!TheTarget) 71 exitWithError(Error, Obj->getFileName()); 72 return TheTarget; 73 } 74 75 void BinarySizeContextTracker::addInstructionForContext( 76 const SampleContextFrameVector &Context, uint32_t InstrSize) { 77 ContextTrieNode *CurNode = &RootContext; 78 bool IsLeaf = true; 79 for (const auto &Callsite : reverse(Context)) { 80 StringRef CallerName = Callsite.FuncName; 81 LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location; 82 CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName); 83 IsLeaf = false; 84 } 85 86 CurNode->addFunctionSize(InstrSize); 87 } 88 89 uint32_t 90 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) { 91 ContextTrieNode *CurrNode = &RootContext; 92 ContextTrieNode *PrevNode = nullptr; 93 94 std::optional<uint32_t> Size; 95 96 // Start from top-level context-less function, traverse down the reverse 97 // context trie to find the best/longest match for given context, then 98 // retrieve the size. 99 LineLocation CallSiteLoc(0, 0); 100 while (CurrNode && Node->getParentContext() != nullptr) { 101 PrevNode = CurrNode; 102 CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName()); 103 if (CurrNode && CurrNode->getFunctionSize()) 104 Size = *CurrNode->getFunctionSize(); 105 CallSiteLoc = Node->getCallSiteLoc(); 106 Node = Node->getParentContext(); 107 } 108 109 // If we traversed all nodes along the path of the context and haven't 110 // found a size yet, pivot to look for size from sibling nodes, i.e size 111 // of inlinee under different context. 112 if (!Size) { 113 if (!CurrNode) 114 CurrNode = PrevNode; 115 while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) { 116 CurrNode = &CurrNode->getAllChildContext().begin()->second; 117 if (CurrNode->getFunctionSize()) 118 Size = *CurrNode->getFunctionSize(); 119 } 120 } 121 122 assert(Size && "We should at least find one context size."); 123 return *Size; 124 } 125 126 void BinarySizeContextTracker::trackInlineesOptimizedAway( 127 MCPseudoProbeDecoder &ProbeDecoder) { 128 ProbeFrameStack ProbeContext; 129 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) 130 trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext); 131 } 132 133 void BinarySizeContextTracker::trackInlineesOptimizedAway( 134 MCPseudoProbeDecoder &ProbeDecoder, 135 MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) { 136 StringRef FuncName = 137 ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName; 138 ProbeContext.emplace_back(FuncName, 0); 139 140 // This ProbeContext has a probe, so it has code before inlining and 141 // optimization. Make sure we mark its size as known. 142 if (!ProbeNode.getProbes().empty()) { 143 ContextTrieNode *SizeContext = &RootContext; 144 for (auto &ProbeFrame : reverse(ProbeContext)) { 145 StringRef CallerName = ProbeFrame.first; 146 LineLocation CallsiteLoc(ProbeFrame.second, 0); 147 SizeContext = 148 SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName); 149 } 150 // Add 0 size to make known. 151 SizeContext->addFunctionSize(0); 152 } 153 154 // DFS down the probe inline tree 155 for (const auto &ChildNode : ProbeNode.getChildren()) { 156 InlineSite Location = ChildNode.first; 157 ProbeContext.back().second = std::get<1>(Location); 158 trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), 159 ProbeContext); 160 } 161 162 ProbeContext.pop_back(); 163 } 164 165 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath, 166 const StringRef DebugBinPath) 167 : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), 168 TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) { 169 // Point to executable binary if debug info binary is not specified. 170 SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; 171 setupSymbolizer(); 172 if (InferMissingFrames) 173 MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this); 174 load(); 175 } 176 177 ProfiledBinary::~ProfiledBinary() {} 178 179 void ProfiledBinary::warnNoFuncEntry() { 180 uint64_t NoFuncEntryNum = 0; 181 for (auto &F : BinaryFunctions) { 182 if (F.second.Ranges.empty()) 183 continue; 184 bool hasFuncEntry = false; 185 for (auto &R : F.second.Ranges) { 186 if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) { 187 if (FR->IsFuncEntry) { 188 hasFuncEntry = true; 189 break; 190 } 191 } 192 } 193 194 if (!hasFuncEntry) { 195 NoFuncEntryNum++; 196 if (ShowDetailedWarning) 197 WithColor::warning() 198 << "Failed to determine function entry for " << F.first 199 << " due to inconsistent name from symbol table and dwarf info.\n"; 200 } 201 } 202 emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(), 203 "of functions failed to determine function entry due to " 204 "inconsistent name from symbol table and dwarf info."); 205 } 206 207 void ProfiledBinary::load() { 208 // Attempt to open the binary. 209 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path); 210 Binary &ExeBinary = *OBinary.getBinary(); 211 212 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary); 213 if (!Obj) 214 exitWithError("not a valid Elf image", Path); 215 216 TheTriple = Obj->makeTriple(); 217 // Current only support X86 218 if (!TheTriple.isX86()) 219 exitWithError("unsupported target", TheTriple.getTriple()); 220 LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); 221 222 // Find the preferred load address for text sections. 223 setPreferredTextSegmentAddresses(Obj); 224 225 // Load debug info of subprograms from DWARF section. 226 // If path of debug info binary is specified, use the debug info from it, 227 // otherwise use the debug info from the executable binary. 228 if (!DebugBinaryPath.empty()) { 229 OwningBinary<Binary> DebugPath = 230 unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath); 231 loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary())); 232 } else { 233 loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary)); 234 } 235 236 DisassembleFunctionSet.insert(DisassembleFunctions.begin(), 237 DisassembleFunctions.end()); 238 239 checkPseudoProbe(Obj); 240 241 if (UsePseudoProbes) 242 populateElfSymbolAddressList(Obj); 243 244 if (ShowDisassemblyOnly) 245 decodePseudoProbe(Obj); 246 247 // Disassemble the text sections. 248 disassemble(Obj); 249 250 // Use function start and return address to infer prolog and epilog 251 ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap); 252 ProEpilogTracker.inferEpilogAddresses(RetAddressSet); 253 254 warnNoFuncEntry(); 255 256 // TODO: decode other sections. 257 } 258 259 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) { 260 const SampleContextFrameVector &Context1 = 261 getCachedFrameLocationStack(Address1); 262 const SampleContextFrameVector &Context2 = 263 getCachedFrameLocationStack(Address2); 264 if (Context1.size() != Context2.size()) 265 return false; 266 if (Context1.empty()) 267 return false; 268 // The leaf frame contains location within the leaf, and it 269 // needs to be remove that as it's not part of the calling context 270 return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1, 271 Context2.begin(), Context2.begin() + Context2.size() - 1); 272 } 273 274 SampleContextFrameVector 275 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack, 276 bool &WasLeafInlined) { 277 SampleContextFrameVector ContextVec; 278 if (Stack.empty()) 279 return ContextVec; 280 // Process from frame root to leaf 281 for (auto Address : Stack) { 282 const SampleContextFrameVector &ExpandedContext = 283 getCachedFrameLocationStack(Address); 284 // An instruction without a valid debug line will be ignored by sample 285 // processing 286 if (ExpandedContext.empty()) 287 return SampleContextFrameVector(); 288 // Set WasLeafInlined to the size of inlined frame count for the last 289 // address which is leaf 290 WasLeafInlined = (ExpandedContext.size() > 1); 291 ContextVec.append(ExpandedContext); 292 } 293 294 // Replace with decoded base discriminator 295 for (auto &Frame : ContextVec) { 296 Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator( 297 Frame.Location.Discriminator, UseFSDiscriminator); 298 } 299 300 assert(ContextVec.size() && "Context length should be at least 1"); 301 302 // Compress the context string except for the leaf frame 303 auto LeafFrame = ContextVec.back(); 304 LeafFrame.Location = LineLocation(0, 0); 305 ContextVec.pop_back(); 306 CSProfileGenerator::compressRecursionContext(ContextVec); 307 CSProfileGenerator::trimContext(ContextVec); 308 ContextVec.push_back(LeafFrame); 309 return ContextVec; 310 } 311 312 template <class ELFT> 313 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, 314 StringRef FileName) { 315 const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName); 316 // FIXME: This should be the page size of the system running profiling. 317 // However such info isn't available at post-processing time, assuming 318 // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h> 319 // because we may build the tools on non-linux. 320 uint32_t PageSize = 0x1000; 321 for (const typename ELFT::Phdr &Phdr : PhdrRange) { 322 if (Phdr.p_type == ELF::PT_LOAD) { 323 if (!FirstLoadableAddress) 324 FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U); 325 if (Phdr.p_flags & ELF::PF_X) { 326 // Segments will always be loaded at a page boundary. 327 PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr & 328 ~(PageSize - 1U)); 329 TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); 330 } 331 } 332 } 333 334 if (PreferredTextSegmentAddresses.empty()) 335 exitWithError("no executable segment found", FileName); 336 } 337 338 void ProfiledBinary::setPreferredTextSegmentAddresses( 339 const ELFObjectFileBase *Obj) { 340 if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj)) 341 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 342 else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj)) 343 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 344 else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj)) 345 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 346 else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj)) 347 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 348 else 349 llvm_unreachable("invalid ELF object format"); 350 } 351 352 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) { 353 if (UseDwarfCorrelation) 354 return; 355 356 bool HasProbeDescSection = false; 357 bool HasPseudoProbeSection = false; 358 359 StringRef FileName = Obj->getFileName(); 360 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 361 SI != SE; ++SI) { 362 const SectionRef &Section = *SI; 363 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 364 if (SectionName == ".pseudo_probe_desc") { 365 HasProbeDescSection = true; 366 } else if (SectionName == ".pseudo_probe") { 367 HasPseudoProbeSection = true; 368 } 369 } 370 371 // set UsePseudoProbes flag, used for PerfReader 372 UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection; 373 } 374 375 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { 376 if (!UsePseudoProbes) 377 return; 378 379 MCPseudoProbeDecoder::Uint64Set GuidFilter; 380 MCPseudoProbeDecoder::Uint64Map FuncStartAddresses; 381 if (ShowDisassemblyOnly) { 382 if (DisassembleFunctionSet.empty()) { 383 FuncStartAddresses = SymbolStartAddrs; 384 } else { 385 for (auto &F : DisassembleFunctionSet) { 386 auto GUID = Function::getGUID(F.first()); 387 if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) { 388 FuncStartAddresses[GUID] = StartAddr; 389 FuncRange &Range = StartAddrToFuncRangeMap[StartAddr]; 390 GuidFilter.insert(Function::getGUID(Range.getFuncName())); 391 } 392 } 393 } 394 } else { 395 for (auto *F : ProfiledFunctions) { 396 GuidFilter.insert(Function::getGUID(F->FuncName)); 397 for (auto &Range : F->Ranges) { 398 auto GUIDs = StartAddrToSymMap.equal_range(Range.first); 399 for (auto I = GUIDs.first; I != GUIDs.second; ++I) 400 FuncStartAddresses[I->second] = I->first; 401 } 402 } 403 } 404 405 StringRef FileName = Obj->getFileName(); 406 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 407 SI != SE; ++SI) { 408 const SectionRef &Section = *SI; 409 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 410 411 if (SectionName == ".pseudo_probe_desc") { 412 StringRef Contents = unwrapOrError(Section.getContents(), FileName); 413 if (!ProbeDecoder.buildGUID2FuncDescMap( 414 reinterpret_cast<const uint8_t *>(Contents.data()), 415 Contents.size())) 416 exitWithError( 417 "Pseudo Probe decoder fail in .pseudo_probe_desc section"); 418 } else if (SectionName == ".pseudo_probe") { 419 StringRef Contents = unwrapOrError(Section.getContents(), FileName); 420 if (!ProbeDecoder.buildAddress2ProbeMap( 421 reinterpret_cast<const uint8_t *>(Contents.data()), 422 Contents.size(), GuidFilter, FuncStartAddresses)) 423 exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); 424 } 425 } 426 427 // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe 428 // is available 429 if (TrackFuncContextSize) { 430 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { 431 auto *Frame = Child.second.get(); 432 StringRef FuncName = 433 ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; 434 TopLevelProbeFrameMap[FuncName] = Frame; 435 } 436 } 437 438 if (ShowPseudoProbe) 439 ProbeDecoder.printGUID2FuncDescMap(outs()); 440 } 441 442 void ProfiledBinary::decodePseudoProbe() { 443 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path); 444 Binary &ExeBinary = *OBinary.getBinary(); 445 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary); 446 decodePseudoProbe(Obj); 447 } 448 449 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange, 450 StringRef RangeSymName) { 451 // Skip external function symbol. 452 if (!FuncRange) 453 return; 454 455 // Set IsFuncEntry to ture if there is only one range in the function or the 456 // RangeSymName from ELF is equal to its DWARF-based function name. 457 if (FuncRange->Func->Ranges.size() == 1 || 458 (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName)) 459 FuncRange->IsFuncEntry = true; 460 } 461 462 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, 463 SectionSymbolsTy &Symbols, 464 const SectionRef &Section) { 465 std::size_t SE = Symbols.size(); 466 uint64_t SectionAddress = Section.getAddress(); 467 uint64_t SectSize = Section.getSize(); 468 uint64_t StartAddress = Symbols[SI].Addr; 469 uint64_t NextStartAddress = 470 (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize; 471 FuncRange *FRange = findFuncRange(StartAddress); 472 setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); 473 StringRef SymbolName = 474 ShowCanonicalFnName 475 ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name) 476 : Symbols[SI].Name; 477 bool ShowDisassembly = 478 ShowDisassemblyOnly && (DisassembleFunctionSet.empty() || 479 DisassembleFunctionSet.count(SymbolName)); 480 if (ShowDisassembly) 481 outs() << '<' << SymbolName << ">:\n"; 482 483 auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) { 484 WithColor::warning() << "Invalid instructions at " 485 << format("%8" PRIx64, Start) << " - " 486 << format("%8" PRIx64, End) << "\n"; 487 }; 488 489 uint64_t Address = StartAddress; 490 // Size of a consecutive invalid instruction range starting from Address -1 491 // backwards. 492 uint64_t InvalidInstLength = 0; 493 while (Address < NextStartAddress) { 494 MCInst Inst; 495 uint64_t Size; 496 // Disassemble an instruction. 497 bool Disassembled = DisAsm->getInstruction( 498 Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls()); 499 if (Size == 0) 500 Size = 1; 501 502 if (ShowDisassembly) { 503 if (ShowPseudoProbe) { 504 ProbeDecoder.printProbeForAddress(outs(), Address); 505 } 506 outs() << format("%8" PRIx64 ":", Address); 507 size_t Start = outs().tell(); 508 if (Disassembled) 509 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs()); 510 else 511 outs() << "\t<unknown>"; 512 if (ShowSourceLocations) { 513 unsigned Cur = outs().tell() - Start; 514 if (Cur < 40) 515 outs().indent(40 - Cur); 516 InstructionPointer IP(this, Address); 517 outs() << getReversedLocWithContext( 518 symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe)); 519 } 520 outs() << "\n"; 521 } 522 523 if (Disassembled) { 524 const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); 525 526 // Record instruction size. 527 AddressToInstSizeMap[Address] = Size; 528 529 // Populate address maps. 530 CodeAddressVec.push_back(Address); 531 if (MCDesc.isCall()) { 532 CallAddressSet.insert(Address); 533 UncondBranchAddrSet.insert(Address); 534 } else if (MCDesc.isReturn()) { 535 RetAddressSet.insert(Address); 536 UncondBranchAddrSet.insert(Address); 537 } else if (MCDesc.isBranch()) { 538 if (MCDesc.isUnconditionalBranch()) 539 UncondBranchAddrSet.insert(Address); 540 BranchAddressSet.insert(Address); 541 } 542 543 // Record potential call targets for tail frame inference later-on. 544 if (InferMissingFrames && FRange) { 545 uint64_t Target = 0; 546 MIA->evaluateBranch(Inst, Address, Size, Target); 547 if (MCDesc.isCall()) { 548 // Indirect call targets are unknown at this point. Recording the 549 // unknown target (zero) for further LBR-based refinement. 550 MissingContextInferrer->CallEdges[Address].insert(Target); 551 } else if (MCDesc.isUnconditionalBranch()) { 552 assert(Target && 553 "target should be known for unconditional direct branch"); 554 // Any inter-function unconditional jump is considered tail call at 555 // this point. This is not 100% accurate and could further be 556 // optimized based on some source annotation. 557 FuncRange *ToFRange = findFuncRange(Target); 558 if (ToFRange && ToFRange->Func != FRange->Func) 559 MissingContextInferrer->TailCallEdges[Address].insert(Target); 560 LLVM_DEBUG({ 561 dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address); 562 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); 563 dbgs() << "\n"; 564 }); 565 } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) { 566 // This is an indirect branch but not necessarily an indirect tail 567 // call. The isBarrier check is to filter out conditional branch. 568 // Similar with indirect call targets, recording the unknown target 569 // (zero) for further LBR-based refinement. 570 MissingContextInferrer->TailCallEdges[Address].insert(Target); 571 LLVM_DEBUG({ 572 dbgs() << "Indirect Tail call: " 573 << format("%8" PRIx64 ":", Address); 574 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); 575 dbgs() << "\n"; 576 }); 577 } 578 } 579 580 if (InvalidInstLength) { 581 WarnInvalidInsts(Address - InvalidInstLength, Address - 1); 582 InvalidInstLength = 0; 583 } 584 } else { 585 InvalidInstLength += Size; 586 } 587 588 Address += Size; 589 } 590 591 if (InvalidInstLength) 592 WarnInvalidInsts(Address - InvalidInstLength, Address - 1); 593 594 if (ShowDisassembly) 595 outs() << "\n"; 596 597 return true; 598 } 599 600 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) { 601 const Target *TheTarget = getTarget(Obj); 602 std::string TripleName = TheTriple.getTriple(); 603 StringRef FileName = Obj->getFileName(); 604 605 MRI.reset(TheTarget->createMCRegInfo(TripleName)); 606 if (!MRI) 607 exitWithError("no register info for target " + TripleName, FileName); 608 609 MCTargetOptions MCOptions; 610 AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); 611 if (!AsmInfo) 612 exitWithError("no assembly info for target " + TripleName, FileName); 613 614 Expected<SubtargetFeatures> Features = Obj->getFeatures(); 615 if (!Features) 616 exitWithError(Features.takeError(), FileName); 617 STI.reset( 618 TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString())); 619 if (!STI) 620 exitWithError("no subtarget info for target " + TripleName, FileName); 621 622 MII.reset(TheTarget->createMCInstrInfo()); 623 if (!MII) 624 exitWithError("no instruction info for target " + TripleName, FileName); 625 626 MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get()); 627 std::unique_ptr<MCObjectFileInfo> MOFI( 628 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); 629 Ctx.setObjectFileInfo(MOFI.get()); 630 DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx)); 631 if (!DisAsm) 632 exitWithError("no disassembler for target " + TripleName, FileName); 633 634 MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); 635 636 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 637 IPrinter.reset(TheTarget->createMCInstPrinter( 638 Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); 639 IPrinter->setPrintBranchImmAsAddress(true); 640 } 641 642 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { 643 // Set up disassembler and related components. 644 setUpDisassembler(Obj); 645 646 // Create a mapping from virtual address to symbol name. The symbols in text 647 // sections are the candidates to dissassemble. 648 std::map<SectionRef, SectionSymbolsTy> AllSymbols; 649 StringRef FileName = Obj->getFileName(); 650 for (const SymbolRef &Symbol : Obj->symbols()) { 651 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); 652 const StringRef Name = unwrapOrError(Symbol.getName(), FileName); 653 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); 654 if (SecI != Obj->section_end()) 655 AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE)); 656 } 657 658 // Sort all the symbols. Use a stable sort to stabilize the output. 659 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols) 660 stable_sort(SecSyms.second); 661 662 assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) && 663 "Functions to disassemble should be only specified together with " 664 "--show-disassembly-only"); 665 666 if (ShowDisassemblyOnly) 667 outs() << "\nDisassembly of " << FileName << ":\n"; 668 669 // Dissassemble a text section. 670 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 671 SI != SE; ++SI) { 672 const SectionRef &Section = *SI; 673 if (!Section.isText()) 674 continue; 675 676 uint64_t ImageLoadAddr = getPreferredBaseAddress(); 677 uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr; 678 uint64_t SectSize = Section.getSize(); 679 if (!SectSize) 680 continue; 681 682 // Register the text section. 683 TextSections.insert({SectionAddress, SectSize}); 684 685 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 686 687 if (ShowDisassemblyOnly) { 688 outs() << "\nDisassembly of section " << SectionName; 689 outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", " 690 << format("0x%" PRIx64, Section.getAddress() + SectSize) 691 << "]:\n\n"; 692 } 693 694 if (SectionName == ".plt") 695 continue; 696 697 // Get the section data. 698 ArrayRef<uint8_t> Bytes = 699 arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName)); 700 701 // Get the list of all the symbols in this section. 702 SectionSymbolsTy &Symbols = AllSymbols[Section]; 703 704 // Disassemble symbol by symbol. 705 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { 706 if (!dissassembleSymbol(SI, Bytes, Symbols, Section)) 707 exitWithError("disassembling error", FileName); 708 } 709 } 710 711 // Dissassemble rodata section to check if FS discriminator symbol exists. 712 checkUseFSDiscriminator(Obj, AllSymbols); 713 } 714 715 void ProfiledBinary::checkUseFSDiscriminator( 716 const ELFObjectFileBase *Obj, 717 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { 718 const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; 719 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 720 SI != SE; ++SI) { 721 const SectionRef &Section = *SI; 722 if (!Section.isData() || Section.getSize() == 0) 723 continue; 724 SectionSymbolsTy &Symbols = AllSymbols[Section]; 725 726 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { 727 if (Symbols[SI].Name == FSDiscriminatorVar) { 728 UseFSDiscriminator = true; 729 return; 730 } 731 } 732 } 733 } 734 735 void ProfiledBinary::populateElfSymbolAddressList( 736 const ELFObjectFileBase *Obj) { 737 // Create a mapping from virtual address to symbol GUID and the other way 738 // around. 739 StringRef FileName = Obj->getFileName(); 740 for (const SymbolRef &Symbol : Obj->symbols()) { 741 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); 742 const StringRef Name = unwrapOrError(Symbol.getName(), FileName); 743 uint64_t GUID = Function::getGUID(Name); 744 SymbolStartAddrs[GUID] = Addr; 745 StartAddrToSymMap.emplace(Addr, GUID); 746 } 747 } 748 749 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { 750 for (const auto &DieInfo : CompilationUnit.dies()) { 751 llvm::DWARFDie Die(&CompilationUnit, &DieInfo); 752 753 if (!Die.isSubprogramDIE()) 754 continue; 755 auto Name = Die.getName(llvm::DINameKind::LinkageName); 756 if (!Name) 757 Name = Die.getName(llvm::DINameKind::ShortName); 758 if (!Name) 759 continue; 760 761 auto RangesOrError = Die.getAddressRanges(); 762 if (!RangesOrError) 763 continue; 764 const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 765 766 if (Ranges.empty()) 767 continue; 768 769 // Different DWARF symbols can have same function name, search or create 770 // BinaryFunction indexed by the name. 771 auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); 772 auto &Func = Ret.first->second; 773 if (Ret.second) 774 Func.FuncName = Ret.first->first; 775 776 for (const auto &Range : Ranges) { 777 uint64_t StartAddress = Range.LowPC; 778 uint64_t EndAddress = Range.HighPC; 779 780 if (EndAddress <= StartAddress || 781 StartAddress < getPreferredBaseAddress()) 782 continue; 783 784 // We may want to know all ranges for one function. Here group the 785 // ranges and store them into BinaryFunction. 786 Func.Ranges.emplace_back(StartAddress, EndAddress); 787 788 auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange()); 789 if (R.second) { 790 FuncRange &FRange = R.first->second; 791 FRange.Func = &Func; 792 FRange.StartAddress = StartAddress; 793 FRange.EndAddress = EndAddress; 794 } else { 795 WithColor::warning() 796 << "Duplicated symbol start address at " 797 << format("%8" PRIx64, StartAddress) << " " 798 << R.first->second.getFuncName() << " and " << Name << "\n"; 799 } 800 } 801 } 802 } 803 804 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { 805 auto DebugContext = llvm::DWARFContext::create( 806 Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath); 807 if (!DebugContext) 808 exitWithError("Error creating the debug info context", Path); 809 810 for (const auto &CompilationUnit : DebugContext->compile_units()) 811 loadSymbolsFromDWARFUnit(*CompilationUnit.get()); 812 813 // Handles DWO sections that can either be in .o, .dwo or .dwp files. 814 for (const auto &CompilationUnit : DebugContext->compile_units()) { 815 DWARFUnit *const DwarfUnit = CompilationUnit.get(); 816 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 817 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 818 if (!DWOCU->isDWOUnit()) { 819 std::string DWOName = dwarf::toString( 820 DwarfUnit->getUnitDIE().find( 821 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 822 ""); 823 WithColor::warning() 824 << "DWO debug information for " << DWOName 825 << " was not loaded. Please check the .o, .dwo or .dwp path.\n"; 826 continue; 827 } 828 loadSymbolsFromDWARFUnit(*DWOCU); 829 } 830 } 831 832 if (BinaryFunctions.empty()) 833 WithColor::warning() << "Loading of DWARF info completed, but no binary " 834 "functions have been retrieved.\n"; 835 } 836 837 void ProfiledBinary::populateSymbolListFromDWARF( 838 ProfileSymbolList &SymbolList) { 839 for (auto &I : StartAddrToFuncRangeMap) 840 SymbolList.add(I.second.getFuncName()); 841 } 842 843 void ProfiledBinary::setupSymbolizer() { 844 symbolize::LLVMSymbolizer::Options SymbolizerOpts; 845 SymbolizerOpts.PrintFunctions = 846 DILineInfoSpecifier::FunctionNameKind::LinkageName; 847 SymbolizerOpts.Demangle = false; 848 SymbolizerOpts.DefaultArch = TheTriple.getArchName().str(); 849 SymbolizerOpts.UseSymbolTable = false; 850 SymbolizerOpts.RelativeAddresses = false; 851 SymbolizerOpts.DWPName = DWPPath; 852 Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts); 853 } 854 855 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, 856 bool UseCanonicalFnName, 857 bool UseProbeDiscriminator) { 858 assert(this == IP.Binary && 859 "Binary should only symbolize its own instruction"); 860 auto Addr = object::SectionedAddress{IP.Address, 861 object::SectionedAddress::UndefSection}; 862 DIInliningInfo InlineStack = unwrapOrError( 863 Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr), 864 SymbolizerPath); 865 866 SampleContextFrameVector CallStack; 867 for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { 868 const auto &CallerFrame = InlineStack.getFrame(I); 869 if (CallerFrame.FunctionName == "<invalid>") 870 break; 871 872 StringRef FunctionName(CallerFrame.FunctionName); 873 if (UseCanonicalFnName) 874 FunctionName = FunctionSamples::getCanonicalFnName(FunctionName); 875 876 uint32_t Discriminator = CallerFrame.Discriminator; 877 uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff; 878 if (UseProbeDiscriminator) { 879 LineOffset = 880 PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); 881 Discriminator = 0; 882 } 883 884 LineLocation Line(LineOffset, Discriminator); 885 auto It = NameStrings.insert(FunctionName.str()); 886 CallStack.emplace_back(*It.first, Line); 887 } 888 889 return CallStack; 890 } 891 892 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin, 893 uint64_t RangeEnd) { 894 InstructionPointer IP(this, RangeBegin, true); 895 896 if (IP.Address != RangeBegin) 897 WithColor::warning() << "Invalid start instruction at " 898 << format("%8" PRIx64, RangeBegin) << "\n"; 899 900 if (IP.Address >= RangeEnd) 901 return; 902 903 do { 904 const SampleContextFrameVector SymbolizedCallStack = 905 getFrameLocationStack(IP.Address, UsePseudoProbes); 906 uint64_t Size = AddressToInstSizeMap[IP.Address]; 907 // Record instruction size for the corresponding context 908 FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); 909 910 } while (IP.advance() && IP.Address < RangeEnd); 911 } 912 913 void ProfiledBinary::computeInlinedContextSizeForFunc( 914 const BinaryFunction *Func) { 915 // Note that a function can be spilt into multiple ranges, so compute for all 916 // ranges of the function. 917 for (const auto &Range : Func->Ranges) 918 computeInlinedContextSizeForRange(Range.first, Range.second); 919 920 // Track optimized-away inlinee for probed binary. A function inlined and then 921 // optimized away should still have their probes left over in places. 922 if (usePseudoProbes()) { 923 auto I = TopLevelProbeFrameMap.find(Func->FuncName); 924 if (I != TopLevelProbeFrameMap.end()) { 925 BinarySizeContextTracker::ProbeFrameStack ProbeContext; 926 FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second, 927 ProbeContext); 928 } 929 } 930 } 931 932 void ProfiledBinary::inferMissingFrames( 933 const SmallVectorImpl<uint64_t> &Context, 934 SmallVectorImpl<uint64_t> &NewContext) { 935 MissingContextInferrer->inferMissingFrames(Context, NewContext); 936 } 937 938 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, 939 uint64_t Address, bool RoundToNext) 940 : Binary(Binary), Address(Address) { 941 Index = Binary->getIndexForAddr(Address); 942 if (RoundToNext) { 943 // we might get address which is not the code 944 // it should round to the next valid address 945 if (Index >= Binary->getCodeAddrVecSize()) 946 this->Address = UINT64_MAX; 947 else 948 this->Address = Binary->getAddressforIndex(Index); 949 } 950 } 951 952 bool InstructionPointer::advance() { 953 Index++; 954 if (Index >= Binary->getCodeAddrVecSize()) { 955 Address = UINT64_MAX; 956 return false; 957 } 958 Address = Binary->getAddressforIndex(Index); 959 return true; 960 } 961 962 bool InstructionPointer::backward() { 963 if (Index == 0) { 964 Address = 0; 965 return false; 966 } 967 Index--; 968 Address = Binary->getAddressforIndex(Index); 969 return true; 970 } 971 972 void InstructionPointer::update(uint64_t Addr) { 973 Address = Addr; 974 Index = Binary->getIndexForAddr(Address); 975 } 976 977 } // end namespace sampleprof 978 } // end namespace llvm 979