1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ProfiledBinary.h" 10 #include "ErrorHandling.h" 11 #include "MissingFrameInferrer.h" 12 #include "ProfileGenerator.h" 13 #include "llvm/ADT/Triple.h" 14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 15 #include "llvm/Demangle/Demangle.h" 16 #include "llvm/IR/DebugInfoMetadata.h" 17 #include "llvm/MC/TargetRegistry.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include "llvm/Support/Format.h" 21 #include "llvm/Support/TargetSelect.h" 22 #include <optional> 23 24 #define DEBUG_TYPE "load-binary" 25 26 using namespace llvm; 27 using namespace sampleprof; 28 29 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", 30 cl::desc("Print disassembled code.")); 31 32 cl::opt<bool> ShowSourceLocations("show-source-locations", 33 cl::desc("Print source locations.")); 34 35 static cl::opt<bool> 36 ShowCanonicalFnName("show-canonical-fname", 37 cl::desc("Print canonical function name.")); 38 39 static cl::opt<bool> ShowPseudoProbe( 40 "show-pseudo-probe", 41 cl::desc("Print pseudo probe section and disassembled info.")); 42 43 static cl::opt<bool> UseDwarfCorrelation( 44 "use-dwarf-correlation", 45 cl::desc("Use dwarf for profile correlation even when binary contains " 46 "pseudo probe.")); 47 48 static cl::opt<std::string> 49 DWPPath("dwp", cl::init(""), 50 cl::desc("Path of .dwp file. When not specified, it will be " 51 "<binary>.dwp in the same directory as the main binary.")); 52 53 static cl::list<std::string> DisassembleFunctions( 54 "disassemble-functions", cl::CommaSeparated, 55 cl::desc("List of functions to print disassembly for. Accept demangled " 56 "names only. Only work with show-disassembly-only")); 57 58 extern cl::opt<bool> ShowDetailedWarning; 59 extern cl::opt<bool> InferMissingFrames; 60 61 namespace llvm { 62 namespace sampleprof { 63 64 static const Target *getTarget(const ObjectFile *Obj) { 65 Triple TheTriple = Obj->makeTriple(); 66 std::string Error; 67 std::string ArchName; 68 const Target *TheTarget = 69 TargetRegistry::lookupTarget(ArchName, TheTriple, Error); 70 if (!TheTarget) 71 exitWithError(Error, Obj->getFileName()); 72 return TheTarget; 73 } 74 75 void BinarySizeContextTracker::addInstructionForContext( 76 const SampleContextFrameVector &Context, uint32_t InstrSize) { 77 ContextTrieNode *CurNode = &RootContext; 78 bool IsLeaf = true; 79 for (const auto &Callsite : reverse(Context)) { 80 StringRef CallerName = Callsite.FuncName; 81 LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location; 82 CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName); 83 IsLeaf = false; 84 } 85 86 CurNode->addFunctionSize(InstrSize); 87 } 88 89 uint32_t 90 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) { 91 ContextTrieNode *CurrNode = &RootContext; 92 ContextTrieNode *PrevNode = nullptr; 93 94 std::optional<uint32_t> Size; 95 96 // Start from top-level context-less function, traverse down the reverse 97 // context trie to find the best/longest match for given context, then 98 // retrieve the size. 99 LineLocation CallSiteLoc(0, 0); 100 while (CurrNode && Node->getParentContext() != nullptr) { 101 PrevNode = CurrNode; 102 CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName()); 103 if (CurrNode && CurrNode->getFunctionSize()) 104 Size = *CurrNode->getFunctionSize(); 105 CallSiteLoc = Node->getCallSiteLoc(); 106 Node = Node->getParentContext(); 107 } 108 109 // If we traversed all nodes along the path of the context and haven't 110 // found a size yet, pivot to look for size from sibling nodes, i.e size 111 // of inlinee under different context. 112 if (!Size) { 113 if (!CurrNode) 114 CurrNode = PrevNode; 115 while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) { 116 CurrNode = &CurrNode->getAllChildContext().begin()->second; 117 if (CurrNode->getFunctionSize()) 118 Size = *CurrNode->getFunctionSize(); 119 } 120 } 121 122 assert(Size && "We should at least find one context size."); 123 return *Size; 124 } 125 126 void BinarySizeContextTracker::trackInlineesOptimizedAway( 127 MCPseudoProbeDecoder &ProbeDecoder) { 128 ProbeFrameStack ProbeContext; 129 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) 130 trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext); 131 } 132 133 void BinarySizeContextTracker::trackInlineesOptimizedAway( 134 MCPseudoProbeDecoder &ProbeDecoder, 135 MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) { 136 StringRef FuncName = 137 ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName; 138 ProbeContext.emplace_back(FuncName, 0); 139 140 // This ProbeContext has a probe, so it has code before inlining and 141 // optimization. Make sure we mark its size as known. 142 if (!ProbeNode.getProbes().empty()) { 143 ContextTrieNode *SizeContext = &RootContext; 144 for (auto &ProbeFrame : reverse(ProbeContext)) { 145 StringRef CallerName = ProbeFrame.first; 146 LineLocation CallsiteLoc(ProbeFrame.second, 0); 147 SizeContext = 148 SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName); 149 } 150 // Add 0 size to make known. 151 SizeContext->addFunctionSize(0); 152 } 153 154 // DFS down the probe inline tree 155 for (const auto &ChildNode : ProbeNode.getChildren()) { 156 InlineSite Location = ChildNode.first; 157 ProbeContext.back().second = std::get<1>(Location); 158 trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), 159 ProbeContext); 160 } 161 162 ProbeContext.pop_back(); 163 } 164 165 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath, 166 const StringRef DebugBinPath) 167 : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), 168 TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) { 169 // Point to executable binary if debug info binary is not specified. 170 SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; 171 setupSymbolizer(); 172 if (InferMissingFrames) 173 MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this); 174 load(); 175 } 176 177 ProfiledBinary::~ProfiledBinary() {} 178 179 void ProfiledBinary::warnNoFuncEntry() { 180 uint64_t NoFuncEntryNum = 0; 181 for (auto &F : BinaryFunctions) { 182 if (F.second.Ranges.empty()) 183 continue; 184 bool hasFuncEntry = false; 185 for (auto &R : F.second.Ranges) { 186 if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) { 187 if (FR->IsFuncEntry) { 188 hasFuncEntry = true; 189 break; 190 } 191 } 192 } 193 194 if (!hasFuncEntry) { 195 NoFuncEntryNum++; 196 if (ShowDetailedWarning) 197 WithColor::warning() 198 << "Failed to determine function entry for " << F.first 199 << " due to inconsistent name from symbol table and dwarf info.\n"; 200 } 201 } 202 emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(), 203 "of functions failed to determine function entry due to " 204 "inconsistent name from symbol table and dwarf info."); 205 } 206 207 void ProfiledBinary::load() { 208 // Attempt to open the binary. 209 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path); 210 Binary &ExeBinary = *OBinary.getBinary(); 211 212 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary); 213 if (!Obj) 214 exitWithError("not a valid Elf image", Path); 215 216 TheTriple = Obj->makeTriple(); 217 // Current only support X86 218 if (!TheTriple.isX86()) 219 exitWithError("unsupported target", TheTriple.getTriple()); 220 LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); 221 222 // Find the preferred load address for text sections. 223 setPreferredTextSegmentAddresses(Obj); 224 225 // Load debug info of subprograms from DWARF section. 226 // If path of debug info binary is specified, use the debug info from it, 227 // otherwise use the debug info from the executable binary. 228 if (!DebugBinaryPath.empty()) { 229 OwningBinary<Binary> DebugPath = 230 unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath); 231 loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary())); 232 } else { 233 loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary)); 234 } 235 236 DisassembleFunctionSet.insert(DisassembleFunctions.begin(), 237 DisassembleFunctions.end()); 238 239 checkPseudoProbe(Obj); 240 241 if (UsePseudoProbes) 242 populateElfSymbolAddressList(Obj); 243 244 if (ShowDisassemblyOnly) 245 decodePseudoProbe(Obj); 246 247 // Disassemble the text sections. 248 disassemble(Obj); 249 250 // Use function start and return address to infer prolog and epilog 251 ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap); 252 ProEpilogTracker.inferEpilogAddresses(RetAddressSet); 253 254 warnNoFuncEntry(); 255 256 // TODO: decode other sections. 257 } 258 259 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) { 260 const SampleContextFrameVector &Context1 = 261 getCachedFrameLocationStack(Address1); 262 const SampleContextFrameVector &Context2 = 263 getCachedFrameLocationStack(Address2); 264 if (Context1.size() != Context2.size()) 265 return false; 266 if (Context1.empty()) 267 return false; 268 // The leaf frame contains location within the leaf, and it 269 // needs to be remove that as it's not part of the calling context 270 return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1, 271 Context2.begin(), Context2.begin() + Context2.size() - 1); 272 } 273 274 SampleContextFrameVector 275 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack, 276 bool &WasLeafInlined) { 277 SampleContextFrameVector ContextVec; 278 if (Stack.empty()) 279 return ContextVec; 280 // Process from frame root to leaf 281 for (auto Address : Stack) { 282 const SampleContextFrameVector &ExpandedContext = 283 getCachedFrameLocationStack(Address); 284 // An instruction without a valid debug line will be ignored by sample 285 // processing 286 if (ExpandedContext.empty()) 287 return SampleContextFrameVector(); 288 // Set WasLeafInlined to the size of inlined frame count for the last 289 // address which is leaf 290 WasLeafInlined = (ExpandedContext.size() > 1); 291 ContextVec.append(ExpandedContext); 292 } 293 294 // Replace with decoded base discriminator 295 for (auto &Frame : ContextVec) { 296 Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator( 297 Frame.Location.Discriminator, UseFSDiscriminator); 298 } 299 300 assert(ContextVec.size() && "Context length should be at least 1"); 301 302 // Compress the context string except for the leaf frame 303 auto LeafFrame = ContextVec.back(); 304 LeafFrame.Location = LineLocation(0, 0); 305 ContextVec.pop_back(); 306 CSProfileGenerator::compressRecursionContext(ContextVec); 307 CSProfileGenerator::trimContext(ContextVec); 308 ContextVec.push_back(LeafFrame); 309 return ContextVec; 310 } 311 312 template <class ELFT> 313 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, 314 StringRef FileName) { 315 const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName); 316 // FIXME: This should be the page size of the system running profiling. 317 // However such info isn't available at post-processing time, assuming 318 // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h> 319 // because we may build the tools on non-linux. 320 uint32_t PageSize = 0x1000; 321 for (const typename ELFT::Phdr &Phdr : PhdrRange) { 322 if (Phdr.p_type == ELF::PT_LOAD) { 323 if (!FirstLoadableAddress) 324 FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U); 325 if (Phdr.p_flags & ELF::PF_X) { 326 // Segments will always be loaded at a page boundary. 327 PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr & 328 ~(PageSize - 1U)); 329 TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); 330 } 331 } 332 } 333 334 if (PreferredTextSegmentAddresses.empty()) 335 exitWithError("no executable segment found", FileName); 336 } 337 338 void ProfiledBinary::setPreferredTextSegmentAddresses( 339 const ELFObjectFileBase *Obj) { 340 if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj)) 341 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 342 else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj)) 343 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 344 else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj)) 345 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 346 else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj)) 347 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); 348 else 349 llvm_unreachable("invalid ELF object format"); 350 } 351 352 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) { 353 if (UseDwarfCorrelation) 354 return; 355 356 bool HasProbeDescSection = false; 357 bool HasPseudoProbeSection = false; 358 359 StringRef FileName = Obj->getFileName(); 360 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 361 SI != SE; ++SI) { 362 const SectionRef &Section = *SI; 363 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 364 if (SectionName == ".pseudo_probe_desc") { 365 HasProbeDescSection = true; 366 } else if (SectionName == ".pseudo_probe") { 367 HasPseudoProbeSection = true; 368 } 369 } 370 371 // set UsePseudoProbes flag, used for PerfReader 372 UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection; 373 } 374 375 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { 376 if (!UsePseudoProbes) 377 return; 378 379 MCPseudoProbeDecoder::Uint64Set GuidFilter; 380 MCPseudoProbeDecoder::Uint64Map FuncStartAddresses; 381 if (ShowDisassemblyOnly) { 382 if (DisassembleFunctionSet.empty()) { 383 FuncStartAddresses = SymbolStartAddrs; 384 } else { 385 for (auto &F : DisassembleFunctionSet) { 386 auto GUID = Function::getGUID(F.first()); 387 if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) { 388 FuncStartAddresses[GUID] = StartAddr; 389 FuncRange &Range = StartAddrToFuncRangeMap[StartAddr]; 390 GuidFilter.insert(Function::getGUID(Range.getFuncName())); 391 } 392 } 393 } 394 } else { 395 for (auto *F : ProfiledFunctions) { 396 GuidFilter.insert(Function::getGUID(F->FuncName)); 397 for (auto &Range : F->Ranges) { 398 auto GUIDs = StartAddrToSymMap.equal_range(Range.first); 399 for (auto I = GUIDs.first; I != GUIDs.second; ++I) 400 FuncStartAddresses[I->second] = I->first; 401 } 402 } 403 } 404 405 StringRef FileName = Obj->getFileName(); 406 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 407 SI != SE; ++SI) { 408 const SectionRef &Section = *SI; 409 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 410 411 if (SectionName == ".pseudo_probe_desc") { 412 StringRef Contents = unwrapOrError(Section.getContents(), FileName); 413 if (!ProbeDecoder.buildGUID2FuncDescMap( 414 reinterpret_cast<const uint8_t *>(Contents.data()), 415 Contents.size())) 416 exitWithError( 417 "Pseudo Probe decoder fail in .pseudo_probe_desc section"); 418 } else if (SectionName == ".pseudo_probe") { 419 StringRef Contents = unwrapOrError(Section.getContents(), FileName); 420 if (!ProbeDecoder.buildAddress2ProbeMap( 421 reinterpret_cast<const uint8_t *>(Contents.data()), 422 Contents.size(), GuidFilter, FuncStartAddresses)) 423 exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); 424 } 425 } 426 427 // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe 428 // is available 429 if (TrackFuncContextSize) { 430 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { 431 auto *Frame = Child.second.get(); 432 StringRef FuncName = 433 ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; 434 TopLevelProbeFrameMap[FuncName] = Frame; 435 } 436 } 437 438 if (ShowPseudoProbe) 439 ProbeDecoder.printGUID2FuncDescMap(outs()); 440 } 441 442 void ProfiledBinary::decodePseudoProbe() { 443 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path); 444 Binary &ExeBinary = *OBinary.getBinary(); 445 auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary); 446 decodePseudoProbe(Obj); 447 } 448 449 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange, 450 StringRef RangeSymName) { 451 // Skip external function symbol. 452 if (!FuncRange) 453 return; 454 455 // Set IsFuncEntry to ture if there is only one range in the function or the 456 // RangeSymName from ELF is equal to its DWARF-based function name. 457 if (FuncRange->Func->Ranges.size() == 1 || 458 (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName)) 459 FuncRange->IsFuncEntry = true; 460 } 461 462 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, 463 SectionSymbolsTy &Symbols, 464 const SectionRef &Section) { 465 std::size_t SE = Symbols.size(); 466 uint64_t SectionAddress = Section.getAddress(); 467 uint64_t SectSize = Section.getSize(); 468 uint64_t StartAddress = Symbols[SI].Addr; 469 uint64_t NextStartAddress = 470 (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize; 471 FuncRange *FRange = findFuncRange(StartAddress); 472 setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); 473 StringRef SymbolName = 474 ShowCanonicalFnName 475 ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name) 476 : Symbols[SI].Name; 477 bool ShowDisassembly = 478 ShowDisassemblyOnly && (DisassembleFunctionSet.empty() || 479 DisassembleFunctionSet.count(SymbolName)); 480 if (ShowDisassembly) 481 outs() << '<' << SymbolName << ">:\n"; 482 483 auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) { 484 WithColor::warning() << "Invalid instructions at " 485 << format("%8" PRIx64, Start) << " - " 486 << format("%8" PRIx64, End) << "\n"; 487 }; 488 489 uint64_t Address = StartAddress; 490 // Size of a consecutive invalid instruction range starting from Address -1 491 // backwards. 492 uint64_t InvalidInstLength = 0; 493 while (Address < NextStartAddress) { 494 MCInst Inst; 495 uint64_t Size; 496 // Disassemble an instruction. 497 bool Disassembled = DisAsm->getInstruction( 498 Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls()); 499 if (Size == 0) 500 Size = 1; 501 502 if (ShowDisassembly) { 503 if (ShowPseudoProbe) { 504 ProbeDecoder.printProbeForAddress(outs(), Address); 505 } 506 outs() << format("%8" PRIx64 ":", Address); 507 size_t Start = outs().tell(); 508 if (Disassembled) 509 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs()); 510 else 511 outs() << "\t<unknown>"; 512 if (ShowSourceLocations) { 513 unsigned Cur = outs().tell() - Start; 514 if (Cur < 40) 515 outs().indent(40 - Cur); 516 InstructionPointer IP(this, Address); 517 outs() << getReversedLocWithContext( 518 symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe)); 519 } 520 outs() << "\n"; 521 } 522 523 if (Disassembled) { 524 const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); 525 526 // Record instruction size. 527 AddressToInstSizeMap[Address] = Size; 528 529 // Populate address maps. 530 CodeAddressVec.push_back(Address); 531 if (MCDesc.isCall()) { 532 CallAddressSet.insert(Address); 533 UncondBranchAddrSet.insert(Address); 534 } else if (MCDesc.isReturn()) { 535 RetAddressSet.insert(Address); 536 UncondBranchAddrSet.insert(Address); 537 } else if (MCDesc.isBranch()) { 538 if (MCDesc.isUnconditionalBranch()) 539 UncondBranchAddrSet.insert(Address); 540 BranchAddressSet.insert(Address); 541 } 542 543 // Record potential call targets for tail frame inference later-on. 544 if (InferMissingFrames && FRange) { 545 uint64_t Target = 0; 546 MIA->evaluateBranch(Inst, Address, Size, Target); 547 if (MCDesc.isCall()) { 548 // Indirect call targets are unknown at this point. Recording the 549 // unknown target (zero) for further LBR-based refinement. 550 MissingContextInferrer->CallEdges[Address].insert(Target); 551 } else if (MCDesc.isUnconditionalBranch()) { 552 assert(Target && 553 "target should be known for unconditional direct branch"); 554 // Any inter-function unconditional jump is considered tail call at 555 // this point. This is not 100% accurate and could further be 556 // optimized based on some source annotation. 557 FuncRange *ToFRange = findFuncRange(Target); 558 if (ToFRange && ToFRange->Func != FRange->Func) 559 MissingContextInferrer->TailCallEdges[Address].insert(Target); 560 LLVM_DEBUG({ 561 dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address); 562 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); 563 dbgs() << "\n"; 564 }); 565 } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) { 566 // This is an indirect branch but not necessarily an indirect tail 567 // call. The isBarrier check is to filter out conditional branch. 568 // Similar with indirect call targets, recording the unknown target 569 // (zero) for further LBR-based refinement. 570 MissingContextInferrer->TailCallEdges[Address].insert(Target); 571 LLVM_DEBUG({ 572 dbgs() << "Indirect Tail call: " 573 << format("%8" PRIx64 ":", Address); 574 IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); 575 dbgs() << "\n"; 576 }); 577 } 578 } 579 580 if (InvalidInstLength) { 581 WarnInvalidInsts(Address - InvalidInstLength, Address - 1); 582 InvalidInstLength = 0; 583 } 584 } else { 585 InvalidInstLength += Size; 586 } 587 588 Address += Size; 589 } 590 591 if (InvalidInstLength) 592 WarnInvalidInsts(Address - InvalidInstLength, Address - 1); 593 594 if (ShowDisassembly) 595 outs() << "\n"; 596 597 return true; 598 } 599 600 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) { 601 const Target *TheTarget = getTarget(Obj); 602 std::string TripleName = TheTriple.getTriple(); 603 StringRef FileName = Obj->getFileName(); 604 605 MRI.reset(TheTarget->createMCRegInfo(TripleName)); 606 if (!MRI) 607 exitWithError("no register info for target " + TripleName, FileName); 608 609 MCTargetOptions MCOptions; 610 AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); 611 if (!AsmInfo) 612 exitWithError("no assembly info for target " + TripleName, FileName); 613 614 SubtargetFeatures Features = Obj->getFeatures(); 615 STI.reset( 616 TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString())); 617 if (!STI) 618 exitWithError("no subtarget info for target " + TripleName, FileName); 619 620 MII.reset(TheTarget->createMCInstrInfo()); 621 if (!MII) 622 exitWithError("no instruction info for target " + TripleName, FileName); 623 624 MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get()); 625 std::unique_ptr<MCObjectFileInfo> MOFI( 626 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); 627 Ctx.setObjectFileInfo(MOFI.get()); 628 DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx)); 629 if (!DisAsm) 630 exitWithError("no disassembler for target " + TripleName, FileName); 631 632 MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); 633 634 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 635 IPrinter.reset(TheTarget->createMCInstPrinter( 636 Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); 637 IPrinter->setPrintBranchImmAsAddress(true); 638 } 639 640 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { 641 // Set up disassembler and related components. 642 setUpDisassembler(Obj); 643 644 // Create a mapping from virtual address to symbol name. The symbols in text 645 // sections are the candidates to dissassemble. 646 std::map<SectionRef, SectionSymbolsTy> AllSymbols; 647 StringRef FileName = Obj->getFileName(); 648 for (const SymbolRef &Symbol : Obj->symbols()) { 649 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); 650 const StringRef Name = unwrapOrError(Symbol.getName(), FileName); 651 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); 652 if (SecI != Obj->section_end()) 653 AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE)); 654 } 655 656 // Sort all the symbols. Use a stable sort to stabilize the output. 657 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols) 658 stable_sort(SecSyms.second); 659 660 assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) && 661 "Functions to disassemble should be only specified together with " 662 "--show-disassembly-only"); 663 664 if (ShowDisassemblyOnly) 665 outs() << "\nDisassembly of " << FileName << ":\n"; 666 667 // Dissassemble a text section. 668 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 669 SI != SE; ++SI) { 670 const SectionRef &Section = *SI; 671 if (!Section.isText()) 672 continue; 673 674 uint64_t ImageLoadAddr = getPreferredBaseAddress(); 675 uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr; 676 uint64_t SectSize = Section.getSize(); 677 if (!SectSize) 678 continue; 679 680 // Register the text section. 681 TextSections.insert({SectionAddress, SectSize}); 682 683 StringRef SectionName = unwrapOrError(Section.getName(), FileName); 684 685 if (ShowDisassemblyOnly) { 686 outs() << "\nDisassembly of section " << SectionName; 687 outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", " 688 << format("0x%" PRIx64, Section.getAddress() + SectSize) 689 << "]:\n\n"; 690 } 691 692 if (SectionName == ".plt") 693 continue; 694 695 // Get the section data. 696 ArrayRef<uint8_t> Bytes = 697 arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName)); 698 699 // Get the list of all the symbols in this section. 700 SectionSymbolsTy &Symbols = AllSymbols[Section]; 701 702 // Disassemble symbol by symbol. 703 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { 704 if (!dissassembleSymbol(SI, Bytes, Symbols, Section)) 705 exitWithError("disassembling error", FileName); 706 } 707 } 708 709 // Dissassemble rodata section to check if FS discriminator symbol exists. 710 checkUseFSDiscriminator(Obj, AllSymbols); 711 } 712 713 void ProfiledBinary::checkUseFSDiscriminator( 714 const ELFObjectFileBase *Obj, 715 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { 716 const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; 717 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); 718 SI != SE; ++SI) { 719 const SectionRef &Section = *SI; 720 if (!Section.isData() || Section.getSize() == 0) 721 continue; 722 SectionSymbolsTy &Symbols = AllSymbols[Section]; 723 724 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { 725 if (Symbols[SI].Name == FSDiscriminatorVar) { 726 UseFSDiscriminator = true; 727 return; 728 } 729 } 730 } 731 } 732 733 void ProfiledBinary::populateElfSymbolAddressList( 734 const ELFObjectFileBase *Obj) { 735 // Create a mapping from virtual address to symbol GUID and the other way 736 // around. 737 StringRef FileName = Obj->getFileName(); 738 for (const SymbolRef &Symbol : Obj->symbols()) { 739 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); 740 const StringRef Name = unwrapOrError(Symbol.getName(), FileName); 741 uint64_t GUID = Function::getGUID(Name); 742 SymbolStartAddrs[GUID] = Addr; 743 StartAddrToSymMap.emplace(Addr, GUID); 744 } 745 } 746 747 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { 748 for (const auto &DieInfo : CompilationUnit.dies()) { 749 llvm::DWARFDie Die(&CompilationUnit, &DieInfo); 750 751 if (!Die.isSubprogramDIE()) 752 continue; 753 auto Name = Die.getName(llvm::DINameKind::LinkageName); 754 if (!Name) 755 Name = Die.getName(llvm::DINameKind::ShortName); 756 if (!Name) 757 continue; 758 759 auto RangesOrError = Die.getAddressRanges(); 760 if (!RangesOrError) 761 continue; 762 const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 763 764 if (Ranges.empty()) 765 continue; 766 767 // Different DWARF symbols can have same function name, search or create 768 // BinaryFunction indexed by the name. 769 auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); 770 auto &Func = Ret.first->second; 771 if (Ret.second) 772 Func.FuncName = Ret.first->first; 773 774 for (const auto &Range : Ranges) { 775 uint64_t StartAddress = Range.LowPC; 776 uint64_t EndAddress = Range.HighPC; 777 778 if (EndAddress <= StartAddress || 779 StartAddress < getPreferredBaseAddress()) 780 continue; 781 782 // We may want to know all ranges for one function. Here group the 783 // ranges and store them into BinaryFunction. 784 Func.Ranges.emplace_back(StartAddress, EndAddress); 785 786 auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange()); 787 if (R.second) { 788 FuncRange &FRange = R.first->second; 789 FRange.Func = &Func; 790 FRange.StartAddress = StartAddress; 791 FRange.EndAddress = EndAddress; 792 } else { 793 WithColor::warning() 794 << "Duplicated symbol start address at " 795 << format("%8" PRIx64, StartAddress) << " " 796 << R.first->second.getFuncName() << " and " << Name << "\n"; 797 } 798 } 799 } 800 } 801 802 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { 803 auto DebugContext = llvm::DWARFContext::create( 804 Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath); 805 if (!DebugContext) 806 exitWithError("Error creating the debug info context", Path); 807 808 for (const auto &CompilationUnit : DebugContext->compile_units()) 809 loadSymbolsFromDWARFUnit(*CompilationUnit.get()); 810 811 // Handles DWO sections that can either be in .o, .dwo or .dwp files. 812 for (const auto &CompilationUnit : DebugContext->compile_units()) { 813 DWARFUnit *const DwarfUnit = CompilationUnit.get(); 814 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 815 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 816 if (!DWOCU->isDWOUnit()) { 817 std::string DWOName = dwarf::toString( 818 DwarfUnit->getUnitDIE().find( 819 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 820 ""); 821 WithColor::warning() 822 << "DWO debug information for " << DWOName 823 << " was not loaded. Please check the .o, .dwo or .dwp path.\n"; 824 continue; 825 } 826 loadSymbolsFromDWARFUnit(*DWOCU); 827 } 828 } 829 830 if (BinaryFunctions.empty()) 831 WithColor::warning() << "Loading of DWARF info completed, but no binary " 832 "functions have been retrieved.\n"; 833 } 834 835 void ProfiledBinary::populateSymbolListFromDWARF( 836 ProfileSymbolList &SymbolList) { 837 for (auto &I : StartAddrToFuncRangeMap) 838 SymbolList.add(I.second.getFuncName()); 839 } 840 841 void ProfiledBinary::setupSymbolizer() { 842 symbolize::LLVMSymbolizer::Options SymbolizerOpts; 843 SymbolizerOpts.PrintFunctions = 844 DILineInfoSpecifier::FunctionNameKind::LinkageName; 845 SymbolizerOpts.Demangle = false; 846 SymbolizerOpts.DefaultArch = TheTriple.getArchName().str(); 847 SymbolizerOpts.UseSymbolTable = false; 848 SymbolizerOpts.RelativeAddresses = false; 849 SymbolizerOpts.DWPName = DWPPath; 850 Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts); 851 } 852 853 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, 854 bool UseCanonicalFnName, 855 bool UseProbeDiscriminator) { 856 assert(this == IP.Binary && 857 "Binary should only symbolize its own instruction"); 858 auto Addr = object::SectionedAddress{IP.Address, 859 object::SectionedAddress::UndefSection}; 860 DIInliningInfo InlineStack = unwrapOrError( 861 Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr), 862 SymbolizerPath); 863 864 SampleContextFrameVector CallStack; 865 for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { 866 const auto &CallerFrame = InlineStack.getFrame(I); 867 if (CallerFrame.FunctionName == "<invalid>") 868 break; 869 870 StringRef FunctionName(CallerFrame.FunctionName); 871 if (UseCanonicalFnName) 872 FunctionName = FunctionSamples::getCanonicalFnName(FunctionName); 873 874 uint32_t Discriminator = CallerFrame.Discriminator; 875 uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff; 876 if (UseProbeDiscriminator) { 877 LineOffset = 878 PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); 879 Discriminator = 0; 880 } 881 882 LineLocation Line(LineOffset, Discriminator); 883 auto It = NameStrings.insert(FunctionName.str()); 884 CallStack.emplace_back(*It.first, Line); 885 } 886 887 return CallStack; 888 } 889 890 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin, 891 uint64_t RangeEnd) { 892 InstructionPointer IP(this, RangeBegin, true); 893 894 if (IP.Address != RangeBegin) 895 WithColor::warning() << "Invalid start instruction at " 896 << format("%8" PRIx64, RangeBegin) << "\n"; 897 898 if (IP.Address >= RangeEnd) 899 return; 900 901 do { 902 const SampleContextFrameVector SymbolizedCallStack = 903 getFrameLocationStack(IP.Address, UsePseudoProbes); 904 uint64_t Size = AddressToInstSizeMap[IP.Address]; 905 // Record instruction size for the corresponding context 906 FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); 907 908 } while (IP.advance() && IP.Address < RangeEnd); 909 } 910 911 void ProfiledBinary::computeInlinedContextSizeForFunc( 912 const BinaryFunction *Func) { 913 // Note that a function can be spilt into multiple ranges, so compute for all 914 // ranges of the function. 915 for (const auto &Range : Func->Ranges) 916 computeInlinedContextSizeForRange(Range.first, Range.second); 917 918 // Track optimized-away inlinee for probed binary. A function inlined and then 919 // optimized away should still have their probes left over in places. 920 if (usePseudoProbes()) { 921 auto I = TopLevelProbeFrameMap.find(Func->FuncName); 922 if (I != TopLevelProbeFrameMap.end()) { 923 BinarySizeContextTracker::ProbeFrameStack ProbeContext; 924 FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second, 925 ProbeContext); 926 } 927 } 928 } 929 930 void ProfiledBinary::inferMissingFrames( 931 const SmallVectorImpl<uint64_t> &Context, 932 SmallVectorImpl<uint64_t> &NewContext) { 933 MissingContextInferrer->inferMissingFrames(Context, NewContext); 934 } 935 936 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, 937 uint64_t Address, bool RoundToNext) 938 : Binary(Binary), Address(Address) { 939 Index = Binary->getIndexForAddr(Address); 940 if (RoundToNext) { 941 // we might get address which is not the code 942 // it should round to the next valid address 943 if (Index >= Binary->getCodeAddrVecSize()) 944 this->Address = UINT64_MAX; 945 else 946 this->Address = Binary->getAddressforIndex(Index); 947 } 948 } 949 950 bool InstructionPointer::advance() { 951 Index++; 952 if (Index >= Binary->getCodeAddrVecSize()) { 953 Address = UINT64_MAX; 954 return false; 955 } 956 Address = Binary->getAddressforIndex(Index); 957 return true; 958 } 959 960 bool InstructionPointer::backward() { 961 if (Index == 0) { 962 Address = 0; 963 return false; 964 } 965 Index--; 966 Address = Binary->getAddressforIndex(Index); 967 return true; 968 } 969 970 void InstructionPointer::update(uint64_t Addr) { 971 Address = Addr; 972 Index = Binary->getIndexForAddr(Address); 973 } 974 975 } // end namespace sampleprof 976 } // end namespace llvm 977