xref: /llvm-project/llvm/tools/llvm-profgen/ProfiledBinary.cpp (revision 967767830d7b4947bc0d72e7160d9023aab9eddc)
1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "MissingFrameInferrer.h"
12 #include "ProfileGenerator.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/IR/DebugInfoMetadata.h"
16 #include "llvm/MC/TargetRegistry.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/Format.h"
20 #include "llvm/Support/TargetSelect.h"
21 #include "llvm/TargetParser/Triple.h"
22 #include <optional>
23 
24 #define DEBUG_TYPE "load-binary"
25 
26 using namespace llvm;
27 using namespace sampleprof;
28 
29 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
30                                   cl::desc("Print disassembled code."));
31 
32 cl::opt<bool> ShowSourceLocations("show-source-locations",
33                                   cl::desc("Print source locations."));
34 
35 static cl::opt<bool>
36     ShowCanonicalFnName("show-canonical-fname",
37                         cl::desc("Print canonical function name."));
38 
39 static cl::opt<bool> ShowPseudoProbe(
40     "show-pseudo-probe",
41     cl::desc("Print pseudo probe section and disassembled info."));
42 
43 static cl::opt<bool> UseDwarfCorrelation(
44     "use-dwarf-correlation",
45     cl::desc("Use dwarf for profile correlation even when binary contains "
46              "pseudo probe."));
47 
48 static cl::opt<std::string>
49     DWPPath("dwp", cl::init(""),
50             cl::desc("Path of .dwp file. When not specified, it will be "
51                      "<binary>.dwp in the same directory as the main binary."));
52 
53 static cl::list<std::string> DisassembleFunctions(
54     "disassemble-functions", cl::CommaSeparated,
55     cl::desc("List of functions to print disassembly for. Accept demangled "
56              "names only. Only work with show-disassembly-only"));
57 
58 extern cl::opt<bool> ShowDetailedWarning;
59 extern cl::opt<bool> InferMissingFrames;
60 
61 namespace llvm {
62 namespace sampleprof {
63 
64 static const Target *getTarget(const ObjectFile *Obj) {
65   Triple TheTriple = Obj->makeTriple();
66   std::string Error;
67   std::string ArchName;
68   const Target *TheTarget =
69       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
70   if (!TheTarget)
71     exitWithError(Error, Obj->getFileName());
72   return TheTarget;
73 }
74 
75 void BinarySizeContextTracker::addInstructionForContext(
76     const SampleContextFrameVector &Context, uint32_t InstrSize) {
77   ContextTrieNode *CurNode = &RootContext;
78   bool IsLeaf = true;
79   for (const auto &Callsite : reverse(Context)) {
80     StringRef CallerName = Callsite.FuncName;
81     LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
82     CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
83     IsLeaf = false;
84   }
85 
86   CurNode->addFunctionSize(InstrSize);
87 }
88 
89 uint32_t
90 BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
91   ContextTrieNode *CurrNode = &RootContext;
92   ContextTrieNode *PrevNode = nullptr;
93 
94   std::optional<uint32_t> Size;
95 
96   // Start from top-level context-less function, traverse down the reverse
97   // context trie to find the best/longest match for given context, then
98   // retrieve the size.
99   LineLocation CallSiteLoc(0, 0);
100   while (CurrNode && Node->getParentContext() != nullptr) {
101     PrevNode = CurrNode;
102     CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
103     if (CurrNode && CurrNode->getFunctionSize())
104       Size = *CurrNode->getFunctionSize();
105     CallSiteLoc = Node->getCallSiteLoc();
106     Node = Node->getParentContext();
107   }
108 
109   // If we traversed all nodes along the path of the context and haven't
110   // found a size yet, pivot to look for size from sibling nodes, i.e size
111   // of inlinee under different context.
112   if (!Size) {
113     if (!CurrNode)
114       CurrNode = PrevNode;
115     while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
116       CurrNode = &CurrNode->getAllChildContext().begin()->second;
117       if (CurrNode->getFunctionSize())
118         Size = *CurrNode->getFunctionSize();
119     }
120   }
121 
122   assert(Size && "We should at least find one context size.");
123   return *Size;
124 }
125 
126 void BinarySizeContextTracker::trackInlineesOptimizedAway(
127     MCPseudoProbeDecoder &ProbeDecoder) {
128   ProbeFrameStack ProbeContext;
129   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
130     trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
131 }
132 
133 void BinarySizeContextTracker::trackInlineesOptimizedAway(
134     MCPseudoProbeDecoder &ProbeDecoder,
135     MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
136   StringRef FuncName =
137       ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
138   ProbeContext.emplace_back(FuncName, 0);
139 
140   // This ProbeContext has a probe, so it has code before inlining and
141   // optimization. Make sure we mark its size as known.
142   if (!ProbeNode.getProbes().empty()) {
143     ContextTrieNode *SizeContext = &RootContext;
144     for (auto &ProbeFrame : reverse(ProbeContext)) {
145       StringRef CallerName = ProbeFrame.first;
146       LineLocation CallsiteLoc(ProbeFrame.second, 0);
147       SizeContext =
148           SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
149     }
150     // Add 0 size to make known.
151     SizeContext->addFunctionSize(0);
152   }
153 
154   // DFS down the probe inline tree
155   for (const auto &ChildNode : ProbeNode.getChildren()) {
156     InlineSite Location = ChildNode.first;
157     ProbeContext.back().second = std::get<1>(Location);
158     trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
159                                ProbeContext);
160   }
161 
162   ProbeContext.pop_back();
163 }
164 
165 ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
166                                const StringRef DebugBinPath)
167     : Path(ExeBinPath), DebugBinaryPath(DebugBinPath),
168       SymbolizerOpts(getSymbolizerOpts()), ProEpilogTracker(this),
169       Symbolizer(std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts)),
170       TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
171   // Point to executable binary if debug info binary is not specified.
172   SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
173   if (InferMissingFrames)
174     MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
175   load();
176 }
177 
178 ProfiledBinary::~ProfiledBinary() {}
179 
180 void ProfiledBinary::warnNoFuncEntry() {
181   uint64_t NoFuncEntryNum = 0;
182   for (auto &F : BinaryFunctions) {
183     if (F.second.Ranges.empty())
184       continue;
185     bool hasFuncEntry = false;
186     for (auto &R : F.second.Ranges) {
187       if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
188         if (FR->IsFuncEntry) {
189           hasFuncEntry = true;
190           break;
191         }
192       }
193     }
194 
195     if (!hasFuncEntry) {
196       NoFuncEntryNum++;
197       if (ShowDetailedWarning)
198         WithColor::warning()
199             << "Failed to determine function entry for " << F.first
200             << " due to inconsistent name from symbol table and dwarf info.\n";
201     }
202   }
203   emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
204                      "of functions failed to determine function entry due to "
205                      "inconsistent name from symbol table and dwarf info.");
206 }
207 
208 void ProfiledBinary::load() {
209   // Attempt to open the binary.
210   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
211   Binary &ExeBinary = *OBinary.getBinary();
212 
213   auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
214   if (!Obj)
215     exitWithError("not a valid Elf image", Path);
216 
217   TheTriple = Obj->makeTriple();
218 
219   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
220 
221   // Find the preferred load address for text sections.
222   setPreferredTextSegmentAddresses(Obj);
223 
224   // Load debug info of subprograms from DWARF section.
225   // If path of debug info binary is specified, use the debug info from it,
226   // otherwise use the debug info from the executable binary.
227   if (!DebugBinaryPath.empty()) {
228     OwningBinary<Binary> DebugPath =
229         unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
230     loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
231   } else {
232     loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
233   }
234 
235   DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
236                                 DisassembleFunctions.end());
237 
238   checkPseudoProbe(Obj);
239 
240   if (UsePseudoProbes)
241     populateElfSymbolAddressList(Obj);
242 
243   if (ShowDisassemblyOnly)
244     decodePseudoProbe(Obj);
245 
246   // Disassemble the text sections.
247   disassemble(Obj);
248 
249   // Use function start and return address to infer prolog and epilog
250   ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
251   ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
252 
253   warnNoFuncEntry();
254 
255   // TODO: decode other sections.
256 }
257 
258 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
259   const SampleContextFrameVector &Context1 =
260       getCachedFrameLocationStack(Address1);
261   const SampleContextFrameVector &Context2 =
262       getCachedFrameLocationStack(Address2);
263   if (Context1.size() != Context2.size())
264     return false;
265   if (Context1.empty())
266     return false;
267   // The leaf frame contains location within the leaf, and it
268   // needs to be remove that as it's not part of the calling context
269   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
270                     Context2.begin(), Context2.begin() + Context2.size() - 1);
271 }
272 
273 SampleContextFrameVector
274 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
275                                    bool &WasLeafInlined) {
276   SampleContextFrameVector ContextVec;
277   if (Stack.empty())
278     return ContextVec;
279   // Process from frame root to leaf
280   for (auto Address : Stack) {
281     const SampleContextFrameVector &ExpandedContext =
282         getCachedFrameLocationStack(Address);
283     // An instruction without a valid debug line will be ignored by sample
284     // processing
285     if (ExpandedContext.empty())
286       return SampleContextFrameVector();
287     // Set WasLeafInlined to the size of inlined frame count for the last
288     // address which is leaf
289     WasLeafInlined = (ExpandedContext.size() > 1);
290     ContextVec.append(ExpandedContext);
291   }
292 
293   // Replace with decoded base discriminator
294   for (auto &Frame : ContextVec) {
295     Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
296         Frame.Location.Discriminator, UseFSDiscriminator);
297   }
298 
299   assert(ContextVec.size() && "Context length should be at least 1");
300 
301   // Compress the context string except for the leaf frame
302   auto LeafFrame = ContextVec.back();
303   LeafFrame.Location = LineLocation(0, 0);
304   ContextVec.pop_back();
305   CSProfileGenerator::compressRecursionContext(ContextVec);
306   CSProfileGenerator::trimContext(ContextVec);
307   ContextVec.push_back(LeafFrame);
308   return ContextVec;
309 }
310 
311 template <class ELFT>
312 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
313                                                       StringRef FileName) {
314   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
315   // FIXME: This should be the page size of the system running profiling.
316   // However such info isn't available at post-processing time, assuming
317   // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
318   // because we may build the tools on non-linux.
319   uint32_t PageSize = 0x1000;
320   for (const typename ELFT::Phdr &Phdr : PhdrRange) {
321     if (Phdr.p_type == ELF::PT_LOAD) {
322       if (!FirstLoadableAddress)
323         FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
324       if (Phdr.p_flags & ELF::PF_X) {
325         // Segments will always be loaded at a page boundary.
326         PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
327                                                 ~(PageSize - 1U));
328         TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
329       }
330     }
331   }
332 
333   if (PreferredTextSegmentAddresses.empty())
334     exitWithError("no executable segment found", FileName);
335 }
336 
337 void ProfiledBinary::setPreferredTextSegmentAddresses(
338     const ELFObjectFileBase *Obj) {
339   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
340     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
341   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
342     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
343   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
344     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
345   else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
346     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
347   else
348     llvm_unreachable("invalid ELF object format");
349 }
350 
351 void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
352   if (UseDwarfCorrelation)
353     return;
354 
355   bool HasProbeDescSection = false;
356   bool HasPseudoProbeSection = false;
357 
358   StringRef FileName = Obj->getFileName();
359   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
360        SI != SE; ++SI) {
361     const SectionRef &Section = *SI;
362     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
363     if (SectionName == ".pseudo_probe_desc") {
364       HasProbeDescSection = true;
365     } else if (SectionName == ".pseudo_probe") {
366       HasPseudoProbeSection = true;
367     }
368   }
369 
370   // set UsePseudoProbes flag, used for PerfReader
371   UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
372 }
373 
374 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
375   if (!UsePseudoProbes)
376     return;
377 
378   MCPseudoProbeDecoder::Uint64Set GuidFilter;
379   MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
380   if (ShowDisassemblyOnly) {
381     if (DisassembleFunctionSet.empty()) {
382       FuncStartAddresses = SymbolStartAddrs;
383     } else {
384       for (auto &F : DisassembleFunctionSet) {
385         auto GUID = Function::getGUID(F.first());
386         if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
387           FuncStartAddresses[GUID] = StartAddr;
388           FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
389           GuidFilter.insert(Function::getGUID(Range.getFuncName()));
390         }
391       }
392     }
393   } else {
394     for (auto *F : ProfiledFunctions) {
395       GuidFilter.insert(Function::getGUID(F->FuncName));
396       for (auto &Range : F->Ranges) {
397         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
398         for (auto I = GUIDs.first; I != GUIDs.second; ++I)
399           FuncStartAddresses[I->second] = I->first;
400       }
401     }
402   }
403 
404   StringRef FileName = Obj->getFileName();
405   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
406        SI != SE; ++SI) {
407     const SectionRef &Section = *SI;
408     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
409 
410     if (SectionName == ".pseudo_probe_desc") {
411       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
412       if (!ProbeDecoder.buildGUID2FuncDescMap(
413               reinterpret_cast<const uint8_t *>(Contents.data()),
414               Contents.size()))
415         exitWithError(
416             "Pseudo Probe decoder fail in .pseudo_probe_desc section");
417     } else if (SectionName == ".pseudo_probe") {
418       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
419       if (!ProbeDecoder.buildAddress2ProbeMap(
420               reinterpret_cast<const uint8_t *>(Contents.data()),
421               Contents.size(), GuidFilter, FuncStartAddresses))
422         exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
423     }
424   }
425 
426   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
427   // is available
428   if (TrackFuncContextSize) {
429     for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
430       auto *Frame = Child.second.get();
431       StringRef FuncName =
432           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
433       TopLevelProbeFrameMap[FuncName] = Frame;
434     }
435   }
436 
437   if (ShowPseudoProbe)
438     ProbeDecoder.printGUID2FuncDescMap(outs());
439 }
440 
441 void ProfiledBinary::decodePseudoProbe() {
442   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
443   Binary &ExeBinary = *OBinary.getBinary();
444   auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
445   decodePseudoProbe(Obj);
446 }
447 
448 void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
449                                     StringRef RangeSymName) {
450   // Skip external function symbol.
451   if (!FuncRange)
452     return;
453 
454   // Set IsFuncEntry to ture if there is only one range in the function or the
455   // RangeSymName from ELF is equal to its DWARF-based function name.
456   if (FuncRange->Func->Ranges.size() == 1 ||
457       (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
458     FuncRange->IsFuncEntry = true;
459 }
460 
461 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
462                                         SectionSymbolsTy &Symbols,
463                                         const SectionRef &Section) {
464   std::size_t SE = Symbols.size();
465   uint64_t SectionAddress = Section.getAddress();
466   uint64_t SectSize = Section.getSize();
467   uint64_t StartAddress = Symbols[SI].Addr;
468   uint64_t NextStartAddress =
469       (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
470   FuncRange *FRange = findFuncRange(StartAddress);
471   setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
472   StringRef SymbolName =
473       ShowCanonicalFnName
474           ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
475           : Symbols[SI].Name;
476   bool ShowDisassembly =
477       ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
478                               DisassembleFunctionSet.count(SymbolName));
479   if (ShowDisassembly)
480     outs() << '<' << SymbolName << ">:\n";
481 
482   auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
483     WithColor::warning() << "Invalid instructions at "
484                          << format("%8" PRIx64, Start) << " - "
485                          << format("%8" PRIx64, End) << "\n";
486   };
487 
488   uint64_t Address = StartAddress;
489   // Size of a consecutive invalid instruction range starting from Address -1
490   // backwards.
491   uint64_t InvalidInstLength = 0;
492   while (Address < NextStartAddress) {
493     MCInst Inst;
494     uint64_t Size;
495     // Disassemble an instruction.
496     bool Disassembled = DisAsm->getInstruction(
497         Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
498     if (Size == 0)
499       Size = 1;
500 
501     if (ShowDisassembly) {
502       if (ShowPseudoProbe) {
503         ProbeDecoder.printProbeForAddress(outs(), Address);
504       }
505       outs() << format("%8" PRIx64 ":", Address);
506       size_t Start = outs().tell();
507       if (Disassembled)
508         IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs());
509       else
510         outs() << "\t<unknown>";
511       if (ShowSourceLocations) {
512         unsigned Cur = outs().tell() - Start;
513         if (Cur < 40)
514           outs().indent(40 - Cur);
515         InstructionPointer IP(this, Address);
516         outs() << getReversedLocWithContext(
517             symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
518       }
519       outs() << "\n";
520     }
521 
522     if (Disassembled) {
523       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
524 
525       // Record instruction size.
526       AddressToInstSizeMap[Address] = Size;
527 
528       // Populate address maps.
529       CodeAddressVec.push_back(Address);
530       if (MCDesc.isCall()) {
531         CallAddressSet.insert(Address);
532         UncondBranchAddrSet.insert(Address);
533       } else if (MCDesc.isReturn()) {
534         RetAddressSet.insert(Address);
535         UncondBranchAddrSet.insert(Address);
536       } else if (MCDesc.isBranch()) {
537         if (MCDesc.isUnconditionalBranch())
538           UncondBranchAddrSet.insert(Address);
539         BranchAddressSet.insert(Address);
540       }
541 
542       // Record potential call targets for tail frame inference later-on.
543       if (InferMissingFrames && FRange) {
544         uint64_t Target = 0;
545         MIA->evaluateBranch(Inst, Address, Size, Target);
546         if (MCDesc.isCall()) {
547           // Indirect call targets are unknown at this point. Recording the
548           // unknown target (zero) for further LBR-based refinement.
549           MissingContextInferrer->CallEdges[Address].insert(Target);
550         } else if (MCDesc.isUnconditionalBranch()) {
551           assert(Target &&
552                  "target should be known for unconditional direct branch");
553           // Any inter-function unconditional jump is considered tail call at
554           // this point. This is not 100% accurate and could further be
555           // optimized based on some source annotation.
556           FuncRange *ToFRange = findFuncRange(Target);
557           if (ToFRange && ToFRange->Func != FRange->Func)
558             MissingContextInferrer->TailCallEdges[Address].insert(Target);
559           LLVM_DEBUG({
560             dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
561             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
562             dbgs() << "\n";
563           });
564         } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
565           // This is an indirect branch but not necessarily an indirect tail
566           // call. The isBarrier check is to filter out conditional branch.
567           // Similar with indirect call targets, recording the unknown target
568           // (zero) for further LBR-based refinement.
569           MissingContextInferrer->TailCallEdges[Address].insert(Target);
570           LLVM_DEBUG({
571             dbgs() << "Indirect Tail call: "
572                    << format("%8" PRIx64 ":", Address);
573             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
574             dbgs() << "\n";
575           });
576         }
577       }
578 
579       if (InvalidInstLength) {
580         WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
581         InvalidInstLength = 0;
582       }
583     } else {
584       InvalidInstLength += Size;
585     }
586 
587     Address += Size;
588   }
589 
590   if (InvalidInstLength)
591     WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
592 
593   if (ShowDisassembly)
594     outs() << "\n";
595 
596   return true;
597 }
598 
599 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
600   const Target *TheTarget = getTarget(Obj);
601   std::string TripleName = TheTriple.getTriple();
602   StringRef FileName = Obj->getFileName();
603 
604   MRI.reset(TheTarget->createMCRegInfo(TripleName));
605   if (!MRI)
606     exitWithError("no register info for target " + TripleName, FileName);
607 
608   MCTargetOptions MCOptions;
609   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
610   if (!AsmInfo)
611     exitWithError("no assembly info for target " + TripleName, FileName);
612 
613   Expected<SubtargetFeatures> Features = Obj->getFeatures();
614   if (!Features)
615     exitWithError(Features.takeError(), FileName);
616   STI.reset(
617       TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
618   if (!STI)
619     exitWithError("no subtarget info for target " + TripleName, FileName);
620 
621   MII.reset(TheTarget->createMCInstrInfo());
622   if (!MII)
623     exitWithError("no instruction info for target " + TripleName, FileName);
624 
625   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
626   std::unique_ptr<MCObjectFileInfo> MOFI(
627       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
628   Ctx.setObjectFileInfo(MOFI.get());
629   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
630   if (!DisAsm)
631     exitWithError("no disassembler for target " + TripleName, FileName);
632 
633   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
634 
635   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
636   IPrinter.reset(TheTarget->createMCInstPrinter(
637       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
638   IPrinter->setPrintBranchImmAsAddress(true);
639 }
640 
641 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
642   // Set up disassembler and related components.
643   setUpDisassembler(Obj);
644 
645   // Create a mapping from virtual address to symbol name. The symbols in text
646   // sections are the candidates to dissassemble.
647   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
648   StringRef FileName = Obj->getFileName();
649   for (const SymbolRef &Symbol : Obj->symbols()) {
650     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
651     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
652     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
653     if (SecI != Obj->section_end())
654       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
655   }
656 
657   // Sort all the symbols. Use a stable sort to stabilize the output.
658   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
659     stable_sort(SecSyms.second);
660 
661   assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
662          "Functions to disassemble should be only specified together with "
663          "--show-disassembly-only");
664 
665   if (ShowDisassemblyOnly)
666     outs() << "\nDisassembly of " << FileName << ":\n";
667 
668   // Dissassemble a text section.
669   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
670        SI != SE; ++SI) {
671     const SectionRef &Section = *SI;
672     if (!Section.isText())
673       continue;
674 
675     uint64_t ImageLoadAddr = getPreferredBaseAddress();
676     uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
677     uint64_t SectSize = Section.getSize();
678     if (!SectSize)
679       continue;
680 
681     // Register the text section.
682     TextSections.insert({SectionAddress, SectSize});
683 
684     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
685 
686     if (ShowDisassemblyOnly) {
687       outs() << "\nDisassembly of section " << SectionName;
688       outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
689              << format("0x%" PRIx64, Section.getAddress() + SectSize)
690              << "]:\n\n";
691     }
692 
693     if (SectionName == ".plt")
694       continue;
695 
696     // Get the section data.
697     ArrayRef<uint8_t> Bytes =
698         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
699 
700     // Get the list of all the symbols in this section.
701     SectionSymbolsTy &Symbols = AllSymbols[Section];
702 
703     // Disassemble symbol by symbol.
704     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
705       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
706         exitWithError("disassembling error", FileName);
707     }
708   }
709 
710   // Dissassemble rodata section to check if FS discriminator symbol exists.
711   checkUseFSDiscriminator(Obj, AllSymbols);
712 }
713 
714 void ProfiledBinary::checkUseFSDiscriminator(
715     const ELFObjectFileBase *Obj,
716     std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
717   const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
718   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
719        SI != SE; ++SI) {
720     const SectionRef &Section = *SI;
721     if (!Section.isData() || Section.getSize() == 0)
722       continue;
723     SectionSymbolsTy &Symbols = AllSymbols[Section];
724 
725     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
726       if (Symbols[SI].Name == FSDiscriminatorVar) {
727         UseFSDiscriminator = true;
728         return;
729       }
730     }
731   }
732 }
733 
734 void ProfiledBinary::populateElfSymbolAddressList(
735     const ELFObjectFileBase *Obj) {
736   // Create a mapping from virtual address to symbol GUID and the other way
737   // around.
738   StringRef FileName = Obj->getFileName();
739   for (const SymbolRef &Symbol : Obj->symbols()) {
740     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
741     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
742     uint64_t GUID = Function::getGUID(Name);
743     SymbolStartAddrs[GUID] = Addr;
744     StartAddrToSymMap.emplace(Addr, GUID);
745   }
746 }
747 
748 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
749   for (const auto &DieInfo : CompilationUnit.dies()) {
750     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
751 
752     if (!Die.isSubprogramDIE())
753       continue;
754     auto Name = Die.getName(llvm::DINameKind::LinkageName);
755     if (!Name)
756       Name = Die.getName(llvm::DINameKind::ShortName);
757     if (!Name)
758       continue;
759 
760     auto RangesOrError = Die.getAddressRanges();
761     if (!RangesOrError)
762       continue;
763     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
764 
765     if (Ranges.empty())
766       continue;
767 
768     // Different DWARF symbols can have same function name, search or create
769     // BinaryFunction indexed by the name.
770     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
771     auto &Func = Ret.first->second;
772     if (Ret.second)
773       Func.FuncName = Ret.first->first;
774 
775     for (const auto &Range : Ranges) {
776       uint64_t StartAddress = Range.LowPC;
777       uint64_t EndAddress = Range.HighPC;
778 
779       if (EndAddress <= StartAddress ||
780           StartAddress < getPreferredBaseAddress())
781         continue;
782 
783       // We may want to know all ranges for one function. Here group the
784       // ranges and store them into BinaryFunction.
785       Func.Ranges.emplace_back(StartAddress, EndAddress);
786 
787       auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
788       if (R.second) {
789         FuncRange &FRange = R.first->second;
790         FRange.Func = &Func;
791         FRange.StartAddress = StartAddress;
792         FRange.EndAddress = EndAddress;
793       } else {
794         WithColor::warning()
795             << "Duplicated symbol start address at "
796             << format("%8" PRIx64, StartAddress) << " "
797             << R.first->second.getFuncName() << " and " << Name << "\n";
798       }
799     }
800   }
801 }
802 
803 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
804   auto DebugContext = llvm::DWARFContext::create(
805       Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
806   if (!DebugContext)
807     exitWithError("Error creating the debug info context", Path);
808 
809   for (const auto &CompilationUnit : DebugContext->compile_units())
810     loadSymbolsFromDWARFUnit(*CompilationUnit.get());
811 
812   // Handles DWO sections that can either be in .o, .dwo or .dwp files.
813   uint32_t NumOfDWOMissing = 0;
814   for (const auto &CompilationUnit : DebugContext->compile_units()) {
815     DWARFUnit *const DwarfUnit = CompilationUnit.get();
816     if (DwarfUnit->getDWOId()) {
817       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
818       if (!DWOCU->isDWOUnit()) {
819         NumOfDWOMissing++;
820         if (ShowDetailedWarning) {
821           std::string DWOName = dwarf::toString(
822               DwarfUnit->getUnitDIE().find(
823                   {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
824               "");
825           WithColor::warning() << "DWO debug information for " << DWOName
826                                << " was not loaded.\n";
827         }
828         continue;
829       }
830       loadSymbolsFromDWARFUnit(*DWOCU);
831     }
832   }
833 
834   if (NumOfDWOMissing)
835     WithColor::warning()
836         << " DWO debug information was not loaded for " << NumOfDWOMissing
837         << " modules. Please check the .o, .dwo or .dwp path.\n";
838   if (BinaryFunctions.empty())
839     WithColor::warning() << "Loading of DWARF info completed, but no binary "
840                             "functions have been retrieved.\n";
841 }
842 
843 void ProfiledBinary::populateSymbolListFromDWARF(
844     ProfileSymbolList &SymbolList) {
845   for (auto &I : StartAddrToFuncRangeMap)
846     SymbolList.add(I.second.getFuncName());
847 }
848 
849 symbolize::LLVMSymbolizer::Options ProfiledBinary::getSymbolizerOpts() const {
850   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
851   SymbolizerOpts.PrintFunctions =
852       DILineInfoSpecifier::FunctionNameKind::LinkageName;
853   SymbolizerOpts.Demangle = false;
854   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
855   SymbolizerOpts.UseSymbolTable = false;
856   SymbolizerOpts.RelativeAddresses = false;
857   SymbolizerOpts.DWPName = DWPPath;
858   return SymbolizerOpts;
859 }
860 
861 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
862                                                    bool UseCanonicalFnName,
863                                                    bool UseProbeDiscriminator) {
864   assert(this == IP.Binary &&
865          "Binary should only symbolize its own instruction");
866   auto Addr = object::SectionedAddress{IP.Address,
867                                        object::SectionedAddress::UndefSection};
868   DIInliningInfo InlineStack = unwrapOrError(
869       Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
870       SymbolizerPath);
871 
872   SampleContextFrameVector CallStack;
873   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
874     const auto &CallerFrame = InlineStack.getFrame(I);
875     if (CallerFrame.FunctionName.empty() || (CallerFrame.FunctionName == "<invalid>"))
876       break;
877 
878     StringRef FunctionName(CallerFrame.FunctionName);
879     if (UseCanonicalFnName)
880       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
881 
882     uint32_t Discriminator = CallerFrame.Discriminator;
883     uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
884     if (UseProbeDiscriminator) {
885       LineOffset =
886           PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
887       Discriminator = 0;
888     }
889 
890     LineLocation Line(LineOffset, Discriminator);
891     auto It = NameStrings.insert(FunctionName.str());
892     CallStack.emplace_back(*It.first, Line);
893   }
894 
895   return CallStack;
896 }
897 
898 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
899                                                        uint64_t RangeEnd) {
900   InstructionPointer IP(this, RangeBegin, true);
901 
902   if (IP.Address != RangeBegin)
903     WithColor::warning() << "Invalid start instruction at "
904                          << format("%8" PRIx64, RangeBegin) << "\n";
905 
906   if (IP.Address >= RangeEnd)
907     return;
908 
909   do {
910     const SampleContextFrameVector SymbolizedCallStack =
911         getFrameLocationStack(IP.Address, UsePseudoProbes);
912     uint64_t Size = AddressToInstSizeMap[IP.Address];
913     // Record instruction size for the corresponding context
914     FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
915 
916   } while (IP.advance() && IP.Address < RangeEnd);
917 }
918 
919 void ProfiledBinary::computeInlinedContextSizeForFunc(
920     const BinaryFunction *Func) {
921   // Note that a function can be spilt into multiple ranges, so compute for all
922   // ranges of the function.
923   for (const auto &Range : Func->Ranges)
924     computeInlinedContextSizeForRange(Range.first, Range.second);
925 
926   // Track optimized-away inlinee for probed binary. A function inlined and then
927   // optimized away should still have their probes left over in places.
928   if (usePseudoProbes()) {
929     auto I = TopLevelProbeFrameMap.find(Func->FuncName);
930     if (I != TopLevelProbeFrameMap.end()) {
931       BinarySizeContextTracker::ProbeFrameStack ProbeContext;
932       FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
933                                                  ProbeContext);
934     }
935   }
936 }
937 
938 void ProfiledBinary::inferMissingFrames(
939     const SmallVectorImpl<uint64_t> &Context,
940     SmallVectorImpl<uint64_t> &NewContext) {
941   MissingContextInferrer->inferMissingFrames(Context, NewContext);
942 }
943 
944 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
945                                        uint64_t Address, bool RoundToNext)
946     : Binary(Binary), Address(Address) {
947   Index = Binary->getIndexForAddr(Address);
948   if (RoundToNext) {
949     // we might get address which is not the code
950     // it should round to the next valid address
951     if (Index >= Binary->getCodeAddrVecSize())
952       this->Address = UINT64_MAX;
953     else
954       this->Address = Binary->getAddressforIndex(Index);
955   }
956 }
957 
958 bool InstructionPointer::advance() {
959   Index++;
960   if (Index >= Binary->getCodeAddrVecSize()) {
961     Address = UINT64_MAX;
962     return false;
963   }
964   Address = Binary->getAddressforIndex(Index);
965   return true;
966 }
967 
968 bool InstructionPointer::backward() {
969   if (Index == 0) {
970     Address = 0;
971     return false;
972   }
973   Index--;
974   Address = Binary->getAddressforIndex(Index);
975   return true;
976 }
977 
978 void InstructionPointer::update(uint64_t Addr) {
979   Address = Addr;
980   Index = Binary->getIndexForAddr(Address);
981 }
982 
983 } // end namespace sampleprof
984 } // end namespace llvm
985