xref: /llvm-project/llvm/tools/llvm-profgen/ProfiledBinary.cpp (revision 04ebd1907c0561831e4fcf2658e1f3614f8cdd77)
1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "MissingFrameInferrer.h"
12 #include "ProfileGenerator.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/MC/TargetRegistry.h"
17 #include "llvm/Object/COFF.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/TargetSelect.h"
22 #include "llvm/TargetParser/Triple.h"
23 #include <optional>
24 
25 #define DEBUG_TYPE "load-binary"
26 
27 using namespace llvm;
28 using namespace sampleprof;
29 
30 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
31                                   cl::desc("Print disassembled code."));
32 
33 cl::opt<bool> ShowSourceLocations("show-source-locations",
34                                   cl::desc("Print source locations."));
35 
36 static cl::opt<bool>
37     ShowCanonicalFnName("show-canonical-fname",
38                         cl::desc("Print canonical function name."));
39 
40 static cl::opt<bool> ShowPseudoProbe(
41     "show-pseudo-probe",
42     cl::desc("Print pseudo probe section and disassembled info."));
43 
44 static cl::opt<bool> UseDwarfCorrelation(
45     "use-dwarf-correlation",
46     cl::desc("Use dwarf for profile correlation even when binary contains "
47              "pseudo probe."));
48 
49 static cl::opt<std::string>
50     DWPPath("dwp", cl::init(""),
51             cl::desc("Path of .dwp file. When not specified, it will be "
52                      "<binary>.dwp in the same directory as the main binary."));
53 
54 static cl::list<std::string> DisassembleFunctions(
55     "disassemble-functions", cl::CommaSeparated,
56     cl::desc("List of functions to print disassembly for. Accept demangled "
57              "names only. Only work with show-disassembly-only"));
58 
59 static cl::opt<bool>
60     KernelBinary("kernel",
61                  cl::desc("Generate the profile for Linux kernel binary."));
62 
63 extern cl::opt<bool> ShowDetailedWarning;
64 extern cl::opt<bool> InferMissingFrames;
65 
66 namespace llvm {
67 namespace sampleprof {
68 
69 static const Target *getTarget(const ObjectFile *Obj) {
70   Triple TheTriple = Obj->makeTriple();
71   std::string Error;
72   std::string ArchName;
73   const Target *TheTarget =
74       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
75   if (!TheTarget)
76     exitWithError(Error, Obj->getFileName());
77   return TheTarget;
78 }
79 
80 void BinarySizeContextTracker::addInstructionForContext(
81     const SampleContextFrameVector &Context, uint32_t InstrSize) {
82   ContextTrieNode *CurNode = &RootContext;
83   bool IsLeaf = true;
84   for (const auto &Callsite : reverse(Context)) {
85     FunctionId CallerName = Callsite.Func;
86     LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
87     CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
88     IsLeaf = false;
89   }
90 
91   CurNode->addFunctionSize(InstrSize);
92 }
93 
94 uint32_t
95 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
96   ContextTrieNode *CurrNode = &RootContext;
97   ContextTrieNode *PrevNode = nullptr;
98 
99   std::optional<uint32_t> Size;
100 
101   // Start from top-level context-less function, traverse down the reverse
102   // context trie to find the best/longest match for given context, then
103   // retrieve the size.
104   LineLocation CallSiteLoc(0, 0);
105   while (CurrNode && Node->getParentContext() != nullptr) {
106     PrevNode = CurrNode;
107     CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
108     if (CurrNode && CurrNode->getFunctionSize())
109       Size = *CurrNode->getFunctionSize();
110     CallSiteLoc = Node->getCallSiteLoc();
111     Node = Node->getParentContext();
112   }
113 
114   // If we traversed all nodes along the path of the context and haven't
115   // found a size yet, pivot to look for size from sibling nodes, i.e size
116   // of inlinee under different context.
117   if (!Size) {
118     if (!CurrNode)
119       CurrNode = PrevNode;
120     while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
121       CurrNode = &CurrNode->getAllChildContext().begin()->second;
122       if (CurrNode->getFunctionSize())
123         Size = *CurrNode->getFunctionSize();
124     }
125   }
126 
127   assert(Size && "We should at least find one context size.");
128   return *Size;
129 }
130 
131 void BinarySizeContextTracker::trackInlineesOptimizedAway(
132     MCPseudoProbeDecoder &ProbeDecoder) {
133   ProbeFrameStack ProbeContext;
134   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
135     trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext);
136 }
137 
138 void BinarySizeContextTracker::trackInlineesOptimizedAway(
139     MCPseudoProbeDecoder &ProbeDecoder,
140     const MCDecodedPseudoProbeInlineTree &ProbeNode,
141     ProbeFrameStack &ProbeContext) {
142   StringRef FuncName =
143       ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
144   ProbeContext.emplace_back(FuncName, 0);
145 
146   // This ProbeContext has a probe, so it has code before inlining and
147   // optimization. Make sure we mark its size as known.
148   if (!ProbeNode.getProbes().empty()) {
149     ContextTrieNode *SizeContext = &RootContext;
150     for (auto &ProbeFrame : reverse(ProbeContext)) {
151       StringRef CallerName = ProbeFrame.first;
152       LineLocation CallsiteLoc(ProbeFrame.second, 0);
153       SizeContext =
154           SizeContext->getOrCreateChildContext(CallsiteLoc,
155                                                FunctionId(CallerName));
156     }
157     // Add 0 size to make known.
158     SizeContext->addFunctionSize(0);
159   }
160 
161   // DFS down the probe inline tree
162   for (const auto &ChildNode : ProbeNode.getChildren()) {
163     InlineSite Location = ChildNode.getInlineSite();
164     ProbeContext.back().second = std::get<1>(Location);
165     trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext);
166   }
167 
168   ProbeContext.pop_back();
169 }
170 
171 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
172                                const StringRef DebugBinPath)
173     : Path(ExeBinPath), DebugBinaryPath(DebugBinPath),
174       SymbolizerOpts(getSymbolizerOpts()), ProEpilogTracker(this),
175       Symbolizer(std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts)),
176       TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
177   // Point to executable binary if debug info binary is not specified.
178   SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
179   if (InferMissingFrames)
180     MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
181   load();
182 }
183 
184 ProfiledBinary::~ProfiledBinary() {}
185 
186 void ProfiledBinary::warnNoFuncEntry() {
187   uint64_t NoFuncEntryNum = 0;
188   for (auto &F : BinaryFunctions) {
189     if (F.second.Ranges.empty())
190       continue;
191     bool hasFuncEntry = false;
192     for (auto &R : F.second.Ranges) {
193       if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
194         if (FR->IsFuncEntry) {
195           hasFuncEntry = true;
196           break;
197         }
198       }
199     }
200 
201     if (!hasFuncEntry) {
202       NoFuncEntryNum++;
203       if (ShowDetailedWarning)
204         WithColor::warning()
205             << "Failed to determine function entry for " << F.first
206             << " due to inconsistent name from symbol table and dwarf info.\n";
207     }
208   }
209   emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
210                      "of functions failed to determine function entry due to "
211                      "inconsistent name from symbol table and dwarf info.");
212 }
213 
214 void ProfiledBinary::load() {
215   // Attempt to open the binary.
216   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
217   Binary &ExeBinary = *OBinary.getBinary();
218 
219   IsCOFF = isa<COFFObjectFile>(&ExeBinary);
220   if (!isa<ELFObjectFileBase>(&ExeBinary) && !IsCOFF)
221     exitWithError("not a valid ELF/COFF image", Path);
222 
223   auto *Obj = cast<ObjectFile>(&ExeBinary);
224   TheTriple = Obj->makeTriple();
225 
226   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
227 
228   // Mark the binary as a kernel image;
229   IsKernel = KernelBinary;
230 
231   // Find the preferred load address for text sections.
232   setPreferredTextSegmentAddresses(Obj);
233 
234   // Load debug info of subprograms from DWARF section.
235   // If path of debug info binary is specified, use the debug info from it,
236   // otherwise use the debug info from the executable binary.
237   if (!DebugBinaryPath.empty()) {
238     OwningBinary<Binary> DebugPath =
239         unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
240     loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
241   } else {
242     loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
243   }
244 
245   DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
246                                 DisassembleFunctions.end());
247 
248   if (auto *ELFObj = dyn_cast<ELFObjectFileBase>(Obj)) {
249     checkPseudoProbe(ELFObj);
250     if (UsePseudoProbes)
251       populateElfSymbolAddressList(ELFObj);
252 
253     if (ShowDisassemblyOnly)
254       decodePseudoProbe(ELFObj);
255   }
256 
257   // Disassemble the text sections.
258   disassemble(Obj);
259 
260   // Use function start and return address to infer prolog and epilog
261   ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
262   ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
263 
264   warnNoFuncEntry();
265 
266   // TODO: decode other sections.
267 }
268 
269 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
270   const SampleContextFrameVector &Context1 =
271       getCachedFrameLocationStack(Address1);
272   const SampleContextFrameVector &Context2 =
273       getCachedFrameLocationStack(Address2);
274   if (Context1.size() != Context2.size())
275     return false;
276   if (Context1.empty())
277     return false;
278   // The leaf frame contains location within the leaf, and it
279   // needs to be remove that as it's not part of the calling context
280   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
281                     Context2.begin(), Context2.begin() + Context2.size() - 1);
282 }
283 
284 SampleContextFrameVector
285 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
286                                    bool &WasLeafInlined) {
287   SampleContextFrameVector ContextVec;
288   if (Stack.empty())
289     return ContextVec;
290   // Process from frame root to leaf
291   for (auto Address : Stack) {
292     const SampleContextFrameVector &ExpandedContext =
293         getCachedFrameLocationStack(Address);
294     // An instruction without a valid debug line will be ignored by sample
295     // processing
296     if (ExpandedContext.empty())
297       return SampleContextFrameVector();
298     // Set WasLeafInlined to the size of inlined frame count for the last
299     // address which is leaf
300     WasLeafInlined = (ExpandedContext.size() > 1);
301     ContextVec.append(ExpandedContext);
302   }
303 
304   // Replace with decoded base discriminator
305   for (auto &Frame : ContextVec) {
306     Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
307         Frame.Location.Discriminator, UseFSDiscriminator);
308   }
309 
310   assert(ContextVec.size() && "Context length should be at least 1");
311 
312   // Compress the context string except for the leaf frame
313   auto LeafFrame = ContextVec.back();
314   LeafFrame.Location = LineLocation(0, 0);
315   ContextVec.pop_back();
316   CSProfileGenerator::compressRecursionContext(ContextVec);
317   CSProfileGenerator::trimContext(ContextVec);
318   ContextVec.push_back(LeafFrame);
319   return ContextVec;
320 }
321 
322 template <class ELFT>
323 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
324                                                       StringRef FileName) {
325   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
326   // FIXME: This should be the page size of the system running profiling.
327   // However such info isn't available at post-processing time, assuming
328   // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
329   // because we may build the tools on non-linux.
330   uint64_t PageSize = 0x1000;
331   for (const typename ELFT::Phdr &Phdr : PhdrRange) {
332     if (Phdr.p_type == ELF::PT_LOAD) {
333       if (!FirstLoadableAddress)
334         FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
335       if (Phdr.p_flags & ELF::PF_X) {
336         // Segments will always be loaded at a page boundary.
337         PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
338                                                 ~(PageSize - 1U));
339         TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
340       }
341     }
342   }
343 
344   if (PreferredTextSegmentAddresses.empty())
345     exitWithError("no executable segment found", FileName);
346 }
347 
348 void ProfiledBinary::setPreferredTextSegmentAddresses(const COFFObjectFile *Obj,
349                                                       StringRef FileName) {
350   uint64_t ImageBase = Obj->getImageBase();
351   if (!ImageBase)
352     exitWithError("Not a COFF image", FileName);
353 
354   PreferredTextSegmentAddresses.push_back(ImageBase);
355   FirstLoadableAddress = ImageBase;
356 
357   for (SectionRef Section : Obj->sections()) {
358     const coff_section *Sec = Obj->getCOFFSection(Section);
359     if (Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE)
360       TextSegmentOffsets.push_back(Sec->VirtualAddress);
361   }
362 }
363 
364 void ProfiledBinary::setPreferredTextSegmentAddresses(const ObjectFile *Obj) {
365   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
366     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
367   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
368     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
369   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
370     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
371   else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
372     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
373   else if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj))
374     setPreferredTextSegmentAddresses(COFFObj, Obj->getFileName());
375   else
376     llvm_unreachable("invalid object format");
377 }
378 
379 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
380   if (UseDwarfCorrelation)
381     return;
382 
383   bool HasProbeDescSection = false;
384   bool HasPseudoProbeSection = false;
385 
386   StringRef FileName = Obj->getFileName();
387   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
388        SI != SE; ++SI) {
389     const SectionRef &Section = *SI;
390     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
391     if (SectionName == ".pseudo_probe_desc") {
392       HasProbeDescSection = true;
393     } else if (SectionName == ".pseudo_probe") {
394       HasPseudoProbeSection = true;
395     }
396   }
397 
398   // set UsePseudoProbes flag, used for PerfReader
399   UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
400 }
401 
402 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
403   if (!UsePseudoProbes)
404     return;
405 
406   MCPseudoProbeDecoder::Uint64Set GuidFilter;
407   MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
408   if (ShowDisassemblyOnly) {
409     if (DisassembleFunctionSet.empty()) {
410       FuncStartAddresses = SymbolStartAddrs;
411     } else {
412       for (auto &F : DisassembleFunctionSet) {
413         auto GUID = Function::getGUID(F.first());
414         if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
415           FuncStartAddresses[GUID] = StartAddr;
416           FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
417           GuidFilter.insert(Function::getGUID(Range.getFuncName()));
418         }
419       }
420     }
421   } else {
422     for (auto *F : ProfiledFunctions) {
423       GuidFilter.insert(Function::getGUID(F->FuncName));
424       for (auto &Range : F->Ranges) {
425         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
426         for (const auto &[StartAddr, Func] : make_range(GUIDs))
427           FuncStartAddresses[Func] = StartAddr;
428       }
429     }
430   }
431 
432   StringRef FileName = Obj->getFileName();
433   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
434        SI != SE; ++SI) {
435     const SectionRef &Section = *SI;
436     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
437 
438     if (SectionName == ".pseudo_probe_desc") {
439       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
440       if (!ProbeDecoder.buildGUID2FuncDescMap(
441               reinterpret_cast<const uint8_t *>(Contents.data()),
442               Contents.size()))
443         exitWithError(
444             "Pseudo Probe decoder fail in .pseudo_probe_desc section");
445     } else if (SectionName == ".pseudo_probe") {
446       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
447       if (!ProbeDecoder.buildAddress2ProbeMap(
448               reinterpret_cast<const uint8_t *>(Contents.data()),
449               Contents.size(), GuidFilter, FuncStartAddresses))
450         exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
451     }
452   }
453 
454   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
455   // is available
456   if (TrackFuncContextSize) {
457     for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
458       auto *Frame = &Child;
459       StringRef FuncName =
460           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
461       TopLevelProbeFrameMap[FuncName] = Frame;
462     }
463   }
464 
465   if (ShowPseudoProbe)
466     ProbeDecoder.printGUID2FuncDescMap(outs());
467 }
468 
469 void ProfiledBinary::decodePseudoProbe() {
470   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
471   Binary &ExeBinary = *OBinary.getBinary();
472   auto *Obj = cast<ELFObjectFileBase>(&ExeBinary);
473   decodePseudoProbe(Obj);
474 }
475 
476 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
477                                     StringRef RangeSymName) {
478   // Skip external function symbol.
479   if (!FuncRange)
480     return;
481 
482   // Set IsFuncEntry to ture if there is only one range in the function or the
483   // RangeSymName from ELF is equal to its DWARF-based function name.
484   if (FuncRange->Func->Ranges.size() == 1 ||
485       (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
486     FuncRange->IsFuncEntry = true;
487 }
488 
489 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
490                                         SectionSymbolsTy &Symbols,
491                                         const SectionRef &Section) {
492   std::size_t SE = Symbols.size();
493   uint64_t SectionAddress = Section.getAddress();
494   uint64_t SectSize = Section.getSize();
495   uint64_t StartAddress = Symbols[SI].Addr;
496   uint64_t NextStartAddress =
497       (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
498   FuncRange *FRange = findFuncRange(StartAddress);
499   setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
500   StringRef SymbolName =
501       ShowCanonicalFnName
502           ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
503           : Symbols[SI].Name;
504   bool ShowDisassembly =
505       ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
506                               DisassembleFunctionSet.count(SymbolName));
507   if (ShowDisassembly)
508     outs() << '<' << SymbolName << ">:\n";
509 
510   uint64_t Address = StartAddress;
511   // Size of a consecutive invalid instruction range starting from Address -1
512   // backwards.
513   uint64_t InvalidInstLength = 0;
514   while (Address < NextStartAddress) {
515     MCInst Inst;
516     uint64_t Size;
517     // Disassemble an instruction.
518     bool Disassembled = DisAsm->getInstruction(
519         Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
520     if (Size == 0)
521       Size = 1;
522 
523     if (ShowDisassembly) {
524       if (ShowPseudoProbe) {
525         ProbeDecoder.printProbeForAddress(outs(), Address);
526       }
527       outs() << format("%8" PRIx64 ":", Address);
528       size_t Start = outs().tell();
529       if (Disassembled)
530         IPrinter->printInst(&Inst, Address + Size, "", *STI, outs());
531       else
532         outs() << "\t<unknown>";
533       if (ShowSourceLocations) {
534         unsigned Cur = outs().tell() - Start;
535         if (Cur < 40)
536           outs().indent(40 - Cur);
537         InstructionPointer IP(this, Address);
538         outs() << getReversedLocWithContext(
539             symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
540       }
541       outs() << "\n";
542     }
543 
544     if (Disassembled) {
545       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
546 
547       // Record instruction size.
548       AddressToInstSizeMap[Address] = Size;
549 
550       // Populate address maps.
551       CodeAddressVec.push_back(Address);
552       if (MCDesc.isCall()) {
553         CallAddressSet.insert(Address);
554         UncondBranchAddrSet.insert(Address);
555       } else if (MCDesc.isReturn()) {
556         RetAddressSet.insert(Address);
557         UncondBranchAddrSet.insert(Address);
558       } else if (MCDesc.isBranch()) {
559         if (MCDesc.isUnconditionalBranch())
560           UncondBranchAddrSet.insert(Address);
561         BranchAddressSet.insert(Address);
562       }
563 
564       // Record potential call targets for tail frame inference later-on.
565       if (InferMissingFrames && FRange) {
566         uint64_t Target = 0;
567         MIA->evaluateBranch(Inst, Address, Size, Target);
568         if (MCDesc.isCall()) {
569           // Indirect call targets are unknown at this point. Recording the
570           // unknown target (zero) for further LBR-based refinement.
571           MissingContextInferrer->CallEdges[Address].insert(Target);
572         } else if (MCDesc.isUnconditionalBranch()) {
573           assert(Target &&
574                  "target should be known for unconditional direct branch");
575           // Any inter-function unconditional jump is considered tail call at
576           // this point. This is not 100% accurate and could further be
577           // optimized based on some source annotation.
578           FuncRange *ToFRange = findFuncRange(Target);
579           if (ToFRange && ToFRange->Func != FRange->Func)
580             MissingContextInferrer->TailCallEdges[Address].insert(Target);
581           LLVM_DEBUG({
582             dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
583             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
584             dbgs() << "\n";
585           });
586         } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
587           // This is an indirect branch but not necessarily an indirect tail
588           // call. The isBarrier check is to filter out conditional branch.
589           // Similar with indirect call targets, recording the unknown target
590           // (zero) for further LBR-based refinement.
591           MissingContextInferrer->TailCallEdges[Address].insert(Target);
592           LLVM_DEBUG({
593             dbgs() << "Indirect Tail call: "
594                    << format("%8" PRIx64 ":", Address);
595             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
596             dbgs() << "\n";
597           });
598         }
599       }
600 
601       if (InvalidInstLength) {
602         AddrsWithInvalidInstruction.insert(
603             {Address - InvalidInstLength, Address - 1});
604         InvalidInstLength = 0;
605       }
606     } else {
607       InvalidInstLength += Size;
608     }
609 
610     Address += Size;
611   }
612 
613   if (InvalidInstLength)
614     AddrsWithInvalidInstruction.insert(
615         {Address - InvalidInstLength, Address - 1});
616 
617   if (ShowDisassembly)
618     outs() << "\n";
619 
620   return true;
621 }
622 
623 void ProfiledBinary::setUpDisassembler(const ObjectFile *Obj) {
624   const Target *TheTarget = getTarget(Obj);
625   std::string TripleName = TheTriple.getTriple();
626   StringRef FileName = Obj->getFileName();
627 
628   MRI.reset(TheTarget->createMCRegInfo(TripleName));
629   if (!MRI)
630     exitWithError("no register info for target " + TripleName, FileName);
631 
632   MCTargetOptions MCOptions;
633   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
634   if (!AsmInfo)
635     exitWithError("no assembly info for target " + TripleName, FileName);
636 
637   Expected<SubtargetFeatures> Features = Obj->getFeatures();
638   if (!Features)
639     exitWithError(Features.takeError(), FileName);
640   STI.reset(
641       TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
642   if (!STI)
643     exitWithError("no subtarget info for target " + TripleName, FileName);
644 
645   MII.reset(TheTarget->createMCInstrInfo());
646   if (!MII)
647     exitWithError("no instruction info for target " + TripleName, FileName);
648 
649   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
650   std::unique_ptr<MCObjectFileInfo> MOFI(
651       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
652   Ctx.setObjectFileInfo(MOFI.get());
653   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
654   if (!DisAsm)
655     exitWithError("no disassembler for target " + TripleName, FileName);
656 
657   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
658 
659   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
660   IPrinter.reset(TheTarget->createMCInstPrinter(
661       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
662   IPrinter->setPrintBranchImmAsAddress(true);
663 }
664 
665 void ProfiledBinary::disassemble(const ObjectFile *Obj) {
666   // Set up disassembler and related components.
667   setUpDisassembler(Obj);
668 
669   // Create a mapping from virtual address to symbol name. The symbols in text
670   // sections are the candidates to dissassemble.
671   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
672   StringRef FileName = Obj->getFileName();
673   for (const SymbolRef &Symbol : Obj->symbols()) {
674     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
675     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
676     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
677     if (SecI != Obj->section_end())
678       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
679   }
680 
681   // Sort all the symbols. Use a stable sort to stabilize the output.
682   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
683     stable_sort(SecSyms.second);
684 
685   assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
686          "Functions to disassemble should be only specified together with "
687          "--show-disassembly-only");
688 
689   if (ShowDisassemblyOnly)
690     outs() << "\nDisassembly of " << FileName << ":\n";
691 
692   // Dissassemble a text section.
693   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
694        SI != SE; ++SI) {
695     const SectionRef &Section = *SI;
696     if (!Section.isText())
697       continue;
698 
699     uint64_t ImageLoadAddr = getPreferredBaseAddress();
700     uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
701     uint64_t SectSize = Section.getSize();
702     if (!SectSize)
703       continue;
704 
705     // Register the text section.
706     TextSections.insert({SectionAddress, SectSize});
707 
708     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
709 
710     if (ShowDisassemblyOnly) {
711       outs() << "\nDisassembly of section " << SectionName;
712       outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
713              << format("0x%" PRIx64, Section.getAddress() + SectSize)
714              << "]:\n\n";
715     }
716 
717     if (isa<ELFObjectFileBase>(Obj) && SectionName == ".plt")
718       continue;
719 
720     // Get the section data.
721     ArrayRef<uint8_t> Bytes =
722         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
723 
724     // Get the list of all the symbols in this section.
725     SectionSymbolsTy &Symbols = AllSymbols[Section];
726 
727     // Disassemble symbol by symbol.
728     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
729       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
730         exitWithError("disassembling error", FileName);
731     }
732   }
733 
734   if (!AddrsWithInvalidInstruction.empty()) {
735     if (ShowDetailedWarning) {
736       for (auto &Addr : AddrsWithInvalidInstruction) {
737         WithColor::warning()
738             << "Invalid instructions at " << format("%8" PRIx64, Addr.first)
739             << " - " << format("%8" PRIx64, Addr.second) << "\n";
740       }
741     }
742     WithColor::warning() << "Found " << AddrsWithInvalidInstruction.size()
743                          << " invalid instructions\n";
744     AddrsWithInvalidInstruction.clear();
745   }
746 
747   // Dissassemble rodata section to check if FS discriminator symbol exists.
748   checkUseFSDiscriminator(Obj, AllSymbols);
749 }
750 
751 void ProfiledBinary::checkUseFSDiscriminator(
752     const ObjectFile *Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
753   const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
754   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
755        SI != SE; ++SI) {
756     const SectionRef &Section = *SI;
757     if (!Section.isData() || Section.getSize() == 0)
758       continue;
759     SectionSymbolsTy &Symbols = AllSymbols[Section];
760 
761     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
762       if (Symbols[SI].Name == FSDiscriminatorVar) {
763         UseFSDiscriminator = true;
764         return;
765       }
766     }
767   }
768 }
769 
770 void ProfiledBinary::populateElfSymbolAddressList(
771     const ELFObjectFileBase *Obj) {
772   // Create a mapping from virtual address to symbol GUID and the other way
773   // around.
774   StringRef FileName = Obj->getFileName();
775   for (const SymbolRef &Symbol : Obj->symbols()) {
776     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
777     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
778     uint64_t GUID = Function::getGUID(Name);
779     SymbolStartAddrs[GUID] = Addr;
780     StartAddrToSymMap.emplace(Addr, GUID);
781   }
782 }
783 
784 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
785   for (const auto &DieInfo : CompilationUnit.dies()) {
786     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
787 
788     if (!Die.isSubprogramDIE())
789       continue;
790     auto Name = Die.getName(llvm::DINameKind::LinkageName);
791     if (!Name)
792       Name = Die.getName(llvm::DINameKind::ShortName);
793     if (!Name)
794       continue;
795 
796     auto RangesOrError = Die.getAddressRanges();
797     if (!RangesOrError)
798       continue;
799     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
800 
801     if (Ranges.empty())
802       continue;
803 
804     // Different DWARF symbols can have same function name, search or create
805     // BinaryFunction indexed by the name.
806     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
807     auto &Func = Ret.first->second;
808     if (Ret.second)
809       Func.FuncName = Ret.first->first;
810 
811     for (const auto &Range : Ranges) {
812       uint64_t StartAddress = Range.LowPC;
813       uint64_t EndAddress = Range.HighPC;
814 
815       if (EndAddress <= StartAddress ||
816           StartAddress < getPreferredBaseAddress())
817         continue;
818 
819       // We may want to know all ranges for one function. Here group the
820       // ranges and store them into BinaryFunction.
821       Func.Ranges.emplace_back(StartAddress, EndAddress);
822 
823       auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
824       if (R.second) {
825         FuncRange &FRange = R.first->second;
826         FRange.Func = &Func;
827         FRange.StartAddress = StartAddress;
828         FRange.EndAddress = EndAddress;
829       } else {
830         AddrsWithMultipleSymbols.insert(StartAddress);
831         if (ShowDetailedWarning)
832           WithColor::warning()
833               << "Duplicated symbol start address at "
834               << format("%8" PRIx64, StartAddress) << " "
835               << R.first->second.getFuncName() << " and " << Name << "\n";
836       }
837     }
838   }
839 }
840 
841 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
842   auto DebugContext = llvm::DWARFContext::create(
843       Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
844   if (!DebugContext)
845     exitWithError("Error creating the debug info context", Path);
846 
847   for (const auto &CompilationUnit : DebugContext->compile_units())
848     loadSymbolsFromDWARFUnit(*CompilationUnit);
849 
850   // Handles DWO sections that can either be in .o, .dwo or .dwp files.
851   uint32_t NumOfDWOMissing = 0;
852   for (const auto &CompilationUnit : DebugContext->compile_units()) {
853     DWARFUnit *const DwarfUnit = CompilationUnit.get();
854     if (DwarfUnit->getDWOId()) {
855       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
856       if (!DWOCU->isDWOUnit()) {
857         NumOfDWOMissing++;
858         if (ShowDetailedWarning) {
859           std::string DWOName = dwarf::toString(
860               DwarfUnit->getUnitDIE().find(
861                   {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
862               "");
863           WithColor::warning() << "DWO debug information for " << DWOName
864                                << " was not loaded.\n";
865         }
866         continue;
867       }
868       loadSymbolsFromDWARFUnit(*DWOCU);
869     }
870   }
871 
872   if (NumOfDWOMissing)
873     WithColor::warning()
874         << " DWO debug information was not loaded for " << NumOfDWOMissing
875         << " modules. Please check the .o, .dwo or .dwp path.\n";
876   if (BinaryFunctions.empty())
877     WithColor::warning() << "Loading of DWARF info completed, but no binary "
878                             "functions have been retrieved.\n";
879   // Populate the hash binary function map for MD5 function name lookup. This
880   // is done after BinaryFunctions are finalized.
881   for (auto &BinaryFunction : BinaryFunctions) {
882     HashBinaryFunctions[MD5Hash(StringRef(BinaryFunction.first))] =
883         &BinaryFunction.second;
884   }
885 
886   if (!AddrsWithMultipleSymbols.empty()) {
887     WithColor::warning() << "Found " << AddrsWithMultipleSymbols.size()
888                          << " start addresses with multiple symbols\n";
889     AddrsWithMultipleSymbols.clear();
890   }
891 }
892 
893 void ProfiledBinary::populateSymbolListFromDWARF(
894     ProfileSymbolList &SymbolList) {
895   for (auto &I : StartAddrToFuncRangeMap)
896     SymbolList.add(I.second.getFuncName());
897 }
898 
899 symbolize::LLVMSymbolizer::Options ProfiledBinary::getSymbolizerOpts() const {
900   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
901   SymbolizerOpts.PrintFunctions =
902       DILineInfoSpecifier::FunctionNameKind::LinkageName;
903   SymbolizerOpts.Demangle = false;
904   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
905   SymbolizerOpts.UseSymbolTable = false;
906   SymbolizerOpts.RelativeAddresses = false;
907   SymbolizerOpts.DWPName = DWPPath;
908   return SymbolizerOpts;
909 }
910 
911 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
912                                                    bool UseCanonicalFnName,
913                                                    bool UseProbeDiscriminator) {
914   assert(this == IP.Binary &&
915          "Binary should only symbolize its own instruction");
916   auto Addr = object::SectionedAddress{IP.Address,
917                                        object::SectionedAddress::UndefSection};
918   DIInliningInfo InlineStack = unwrapOrError(
919       Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
920       SymbolizerPath);
921 
922   SampleContextFrameVector CallStack;
923   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
924     const auto &CallerFrame = InlineStack.getFrame(I);
925     if (CallerFrame.FunctionName.empty() ||
926         (CallerFrame.FunctionName == "<invalid>"))
927       break;
928 
929     StringRef FunctionName(CallerFrame.FunctionName);
930     if (UseCanonicalFnName)
931       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
932 
933     uint32_t Discriminator = CallerFrame.Discriminator;
934     uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
935     if (UseProbeDiscriminator) {
936       LineOffset =
937           PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
938       Discriminator = 0;
939     }
940 
941     LineLocation Line(LineOffset, Discriminator);
942     auto It = NameStrings.insert(FunctionName.str());
943     CallStack.emplace_back(FunctionId(StringRef(*It.first)), Line);
944   }
945 
946   return CallStack;
947 }
948 
949 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
950                                                        uint64_t RangeEnd) {
951   InstructionPointer IP(this, RangeBegin, true);
952 
953   if (IP.Address != RangeBegin)
954     WithColor::warning() << "Invalid start instruction at "
955                          << format("%8" PRIx64, RangeBegin) << "\n";
956 
957   if (IP.Address >= RangeEnd)
958     return;
959 
960   do {
961     const SampleContextFrameVector SymbolizedCallStack =
962         getFrameLocationStack(IP.Address, UsePseudoProbes);
963     uint64_t Size = AddressToInstSizeMap[IP.Address];
964     // Record instruction size for the corresponding context
965     FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
966 
967   } while (IP.advance() && IP.Address < RangeEnd);
968 }
969 
970 void ProfiledBinary::computeInlinedContextSizeForFunc(
971     const BinaryFunction *Func) {
972   // Note that a function can be spilt into multiple ranges, so compute for all
973   // ranges of the function.
974   for (const auto &Range : Func->Ranges)
975     computeInlinedContextSizeForRange(Range.first, Range.second);
976 
977   // Track optimized-away inlinee for probed binary. A function inlined and then
978   // optimized away should still have their probes left over in places.
979   if (usePseudoProbes()) {
980     auto I = TopLevelProbeFrameMap.find(Func->FuncName);
981     if (I != TopLevelProbeFrameMap.end()) {
982       BinarySizeContextTracker::ProbeFrameStack ProbeContext;
983       FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
984                                                  ProbeContext);
985     }
986   }
987 }
988 
989 void ProfiledBinary::inferMissingFrames(
990     const SmallVectorImpl<uint64_t> &Context,
991     SmallVectorImpl<uint64_t> &NewContext) {
992   MissingContextInferrer->inferMissingFrames(Context, NewContext);
993 }
994 
995 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
996                                        uint64_t Address, bool RoundToNext)
997     : Binary(Binary), Address(Address) {
998   Index = Binary->getIndexForAddr(Address);
999   if (RoundToNext) {
1000     // we might get address which is not the code
1001     // it should round to the next valid address
1002     if (Index >= Binary->getCodeAddrVecSize())
1003       this->Address = UINT64_MAX;
1004     else
1005       this->Address = Binary->getAddressforIndex(Index);
1006   }
1007 }
1008 
1009 bool InstructionPointer::advance() {
1010   Index++;
1011   if (Index >= Binary->getCodeAddrVecSize()) {
1012     Address = UINT64_MAX;
1013     return false;
1014   }
1015   Address = Binary->getAddressforIndex(Index);
1016   return true;
1017 }
1018 
1019 bool InstructionPointer::backward() {
1020   if (Index == 0) {
1021     Address = 0;
1022     return false;
1023   }
1024   Index--;
1025   Address = Binary->getAddressforIndex(Index);
1026   return true;
1027 }
1028 
1029 void InstructionPointer::update(uint64_t Addr) {
1030   Address = Addr;
1031   Index = Binary->getIndexForAddr(Address);
1032 }
1033 
1034 } // end namespace sampleprof
1035 } // end namespace llvm
1036