xref: /llvm-project/llvm/tools/llvm-profgen/ProfiledBinary.cpp (revision 04ebd1907c0561831e4fcf2658e1f3614f8cdd77)
132221694Swlei //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
232221694Swlei //
332221694Swlei // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
432221694Swlei // See https://llvm.org/LICENSE.txt for license information.
532221694Swlei // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
632221694Swlei //
732221694Swlei //===----------------------------------------------------------------------===//
832221694Swlei 
932221694Swlei #include "ProfiledBinary.h"
1032221694Swlei #include "ErrorHandling.h"
115d7950a4SHongtao Yu #include "MissingFrameInferrer.h"
12ac14bb14Swlei #include "ProfileGenerator.h"
13db29f437Sserge-sans-paille #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
1432221694Swlei #include "llvm/Demangle/Demangle.h"
15c3aeabaeSwlei #include "llvm/IR/DebugInfoMetadata.h"
1689b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
178c03f400SHaohai Wen #include "llvm/Object/COFF.h"
1832221694Swlei #include "llvm/Support/CommandLine.h"
195d7950a4SHongtao Yu #include "llvm/Support/Debug.h"
200196b45cSwlei #include "llvm/Support/Format.h"
2132221694Swlei #include "llvm/Support/TargetSelect.h"
2262c7f035SArchibald Elliott #include "llvm/TargetParser/Triple.h"
23286223edSKazu Hirata #include <optional>
2432221694Swlei 
2532221694Swlei #define DEBUG_TYPE "load-binary"
2632221694Swlei 
2732221694Swlei using namespace llvm;
281f05b1a9Swlei using namespace sampleprof;
2932221694Swlei 
30d86a206fSFangrui Song cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
3132221694Swlei                                   cl::desc("Print disassembled code."));
3232221694Swlei 
33d86a206fSFangrui Song cl::opt<bool> ShowSourceLocations("show-source-locations",
340196b45cSwlei                                   cl::desc("Print source locations."));
350196b45cSwlei 
36a316343eSWenlei He static cl::opt<bool>
37557efc9aSFangrui Song     ShowCanonicalFnName("show-canonical-fname",
38dddd590fSwlei                         cl::desc("Print canonical function name."));
39dddd590fSwlei 
40a316343eSWenlei He static cl::opt<bool> ShowPseudoProbe(
41557efc9aSFangrui Song     "show-pseudo-probe",
42b3154d11Swlei     cl::desc("Print pseudo probe section and disassembled info."));
43b3154d11Swlei 
44a316343eSWenlei He static cl::opt<bool> UseDwarfCorrelation(
45557efc9aSFangrui Song     "use-dwarf-correlation",
46a316343eSWenlei He     cl::desc("Use dwarf for profile correlation even when binary contains "
47a316343eSWenlei He              "pseudo probe."));
48a316343eSWenlei He 
49b3a778fbSwlei static cl::opt<std::string>
50557efc9aSFangrui Song     DWPPath("dwp", cl::init(""),
51b3a778fbSwlei             cl::desc("Path of .dwp file. When not specified, it will be "
52b3a778fbSwlei                      "<binary>.dwp in the same directory as the main binary."));
53b3a778fbSwlei 
54734f4d83SHongtao Yu static cl::list<std::string> DisassembleFunctions(
55734f4d83SHongtao Yu     "disassemble-functions", cl::CommaSeparated,
56734f4d83SHongtao Yu     cl::desc("List of functions to print disassembly for. Accept demangled "
57734f4d83SHongtao Yu              "names only. Only work with show-disassembly-only"));
58734f4d83SHongtao Yu 
592fa6eaf9Sxur-llvm static cl::opt<bool>
602fa6eaf9Sxur-llvm     KernelBinary("kernel",
612fa6eaf9Sxur-llvm                  cl::desc("Generate the profile for Linux kernel binary."));
622fa6eaf9Sxur-llvm 
63aab18100Swlei extern cl::opt<bool> ShowDetailedWarning;
645d7950a4SHongtao Yu extern cl::opt<bool> InferMissingFrames;
65aab18100Swlei 
6632221694Swlei namespace llvm {
6732221694Swlei namespace sampleprof {
6832221694Swlei 
6932221694Swlei static const Target *getTarget(const ObjectFile *Obj) {
7032221694Swlei   Triple TheTriple = Obj->makeTriple();
7132221694Swlei   std::string Error;
7232221694Swlei   std::string ArchName;
7332221694Swlei   const Target *TheTarget =
7432221694Swlei       TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
7532221694Swlei   if (!TheTarget)
7632221694Swlei     exitWithError(Error, Obj->getFileName());
7732221694Swlei   return TheTarget;
7832221694Swlei }
7932221694Swlei 
80eca03d27SWenlei He void BinarySizeContextTracker::addInstructionForContext(
81b9db7036SHongtao Yu     const SampleContextFrameVector &Context, uint32_t InstrSize) {
82eca03d27SWenlei He   ContextTrieNode *CurNode = &RootContext;
83eca03d27SWenlei He   bool IsLeaf = true;
84eca03d27SWenlei He   for (const auto &Callsite : reverse(Context)) {
85ef0e0adcSWilliam Junda Huang     FunctionId CallerName = Callsite.Func;
86fb29d812Swlei     LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
87eca03d27SWenlei He     CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
88eca03d27SWenlei He     IsLeaf = false;
89eca03d27SWenlei He   }
90eca03d27SWenlei He 
91a6f15e9aSWenlei He   CurNode->addFunctionSize(InstrSize);
92eca03d27SWenlei He }
93eca03d27SWenlei He 
94eca03d27SWenlei He uint32_t
957e86b13cSwlei BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
96eca03d27SWenlei He   ContextTrieNode *CurrNode = &RootContext;
97eca03d27SWenlei He   ContextTrieNode *PrevNode = nullptr;
987e86b13cSwlei 
99286223edSKazu Hirata   std::optional<uint32_t> Size;
100eca03d27SWenlei He 
101b9db7036SHongtao Yu   // Start from top-level context-less function, traverse down the reverse
102eca03d27SWenlei He   // context trie to find the best/longest match for given context, then
103eca03d27SWenlei He   // retrieve the size.
1047e86b13cSwlei   LineLocation CallSiteLoc(0, 0);
1057e86b13cSwlei   while (CurrNode && Node->getParentContext() != nullptr) {
106eca03d27SWenlei He     PrevNode = CurrNode;
1077e86b13cSwlei     CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
108a7938c74SKazu Hirata     if (CurrNode && CurrNode->getFunctionSize())
10921c4dc79SFangrui Song       Size = *CurrNode->getFunctionSize();
1107e86b13cSwlei     CallSiteLoc = Node->getCallSiteLoc();
1117e86b13cSwlei     Node = Node->getParentContext();
112eca03d27SWenlei He   }
113eca03d27SWenlei He 
114eca03d27SWenlei He   // If we traversed all nodes along the path of the context and haven't
115eca03d27SWenlei He   // found a size yet, pivot to look for size from sibling nodes, i.e size
116eca03d27SWenlei He   // of inlinee under different context.
117a7938c74SKazu Hirata   if (!Size) {
118eca03d27SWenlei He     if (!CurrNode)
119eca03d27SWenlei He       CurrNode = PrevNode;
120a7938c74SKazu Hirata     while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
121eca03d27SWenlei He       CurrNode = &CurrNode->getAllChildContext().begin()->second;
122a7938c74SKazu Hirata       if (CurrNode->getFunctionSize())
12321c4dc79SFangrui Song         Size = *CurrNode->getFunctionSize();
124eca03d27SWenlei He     }
125eca03d27SWenlei He   }
126eca03d27SWenlei He 
127a7938c74SKazu Hirata   assert(Size && "We should at least find one context size.");
12821c4dc79SFangrui Song   return *Size;
129a6f15e9aSWenlei He }
130a6f15e9aSWenlei He 
131a6f15e9aSWenlei He void BinarySizeContextTracker::trackInlineesOptimizedAway(
132a6f15e9aSWenlei He     MCPseudoProbeDecoder &ProbeDecoder) {
133a6f15e9aSWenlei He   ProbeFrameStack ProbeContext;
134a6f15e9aSWenlei He   for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
135*04ebd190SAmir Ayupov     trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext);
136a6f15e9aSWenlei He }
137a6f15e9aSWenlei He 
138a6f15e9aSWenlei He void BinarySizeContextTracker::trackInlineesOptimizedAway(
139a6f15e9aSWenlei He     MCPseudoProbeDecoder &ProbeDecoder,
140242f4e85SAmir Ayupov     const MCDecodedPseudoProbeInlineTree &ProbeNode,
141242f4e85SAmir Ayupov     ProbeFrameStack &ProbeContext) {
142a6f15e9aSWenlei He   StringRef FuncName =
143a6f15e9aSWenlei He       ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
144a6f15e9aSWenlei He   ProbeContext.emplace_back(FuncName, 0);
145a6f15e9aSWenlei He 
146a6f15e9aSWenlei He   // This ProbeContext has a probe, so it has code before inlining and
147a6f15e9aSWenlei He   // optimization. Make sure we mark its size as known.
148a6f15e9aSWenlei He   if (!ProbeNode.getProbes().empty()) {
149a6f15e9aSWenlei He     ContextTrieNode *SizeContext = &RootContext;
150a6f15e9aSWenlei He     for (auto &ProbeFrame : reverse(ProbeContext)) {
151a6f15e9aSWenlei He       StringRef CallerName = ProbeFrame.first;
152a6f15e9aSWenlei He       LineLocation CallsiteLoc(ProbeFrame.second, 0);
153a6f15e9aSWenlei He       SizeContext =
154ef0e0adcSWilliam Junda Huang           SizeContext->getOrCreateChildContext(CallsiteLoc,
155ef0e0adcSWilliam Junda Huang                                                FunctionId(CallerName));
156a6f15e9aSWenlei He     }
157a6f15e9aSWenlei He     // Add 0 size to make known.
158a6f15e9aSWenlei He     SizeContext->addFunctionSize(0);
159a6f15e9aSWenlei He   }
160a6f15e9aSWenlei He 
161a6f15e9aSWenlei He   // DFS down the probe inline tree
162a6f15e9aSWenlei He   for (const auto &ChildNode : ProbeNode.getChildren()) {
163*04ebd190SAmir Ayupov     InlineSite Location = ChildNode.getInlineSite();
164a6f15e9aSWenlei He     ProbeContext.back().second = std::get<1>(Location);
165*04ebd190SAmir Ayupov     trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext);
166a6f15e9aSWenlei He   }
167a6f15e9aSWenlei He 
168a6f15e9aSWenlei He   ProbeContext.pop_back();
169eca03d27SWenlei He }
170eca03d27SWenlei He 
1715d7950a4SHongtao Yu ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
1725d7950a4SHongtao Yu                                const StringRef DebugBinPath)
17327c37327SMark Santaniello     : Path(ExeBinPath), DebugBinaryPath(DebugBinPath),
17427c37327SMark Santaniello       SymbolizerOpts(getSymbolizerOpts()), ProEpilogTracker(this),
17527c37327SMark Santaniello       Symbolizer(std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts)),
1765d7950a4SHongtao Yu       TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
1775d7950a4SHongtao Yu   // Point to executable binary if debug info binary is not specified.
1785d7950a4SHongtao Yu   SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
1795d7950a4SHongtao Yu   if (InferMissingFrames)
1805d7950a4SHongtao Yu     MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
1815d7950a4SHongtao Yu   load();
1825d7950a4SHongtao Yu }
1835d7950a4SHongtao Yu 
1845d7950a4SHongtao Yu ProfiledBinary::~ProfiledBinary() {}
1855d7950a4SHongtao Yu 
186aab18100Swlei void ProfiledBinary::warnNoFuncEntry() {
187aab18100Swlei   uint64_t NoFuncEntryNum = 0;
188aab18100Swlei   for (auto &F : BinaryFunctions) {
189aab18100Swlei     if (F.second.Ranges.empty())
190aab18100Swlei       continue;
191aab18100Swlei     bool hasFuncEntry = false;
192aab18100Swlei     for (auto &R : F.second.Ranges) {
19346765248Swlei       if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
194aab18100Swlei         if (FR->IsFuncEntry) {
195aab18100Swlei           hasFuncEntry = true;
196aab18100Swlei           break;
197aab18100Swlei         }
198aab18100Swlei       }
199aab18100Swlei     }
200aab18100Swlei 
201aab18100Swlei     if (!hasFuncEntry) {
202aab18100Swlei       NoFuncEntryNum++;
203aab18100Swlei       if (ShowDetailedWarning)
204aab18100Swlei         WithColor::warning()
205aab18100Swlei             << "Failed to determine function entry for " << F.first
206aab18100Swlei             << " due to inconsistent name from symbol table and dwarf info.\n";
207aab18100Swlei     }
208aab18100Swlei   }
209aab18100Swlei   emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
210aab18100Swlei                      "of functions failed to determine function entry due to "
211aab18100Swlei                      "inconsistent name from symbol table and dwarf info.");
212aab18100Swlei }
213aab18100Swlei 
21432221694Swlei void ProfiledBinary::load() {
21532221694Swlei   // Attempt to open the binary.
21632221694Swlei   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
2176693c562Swlei   Binary &ExeBinary = *OBinary.getBinary();
21832221694Swlei 
2198c03f400SHaohai Wen   IsCOFF = isa<COFFObjectFile>(&ExeBinary);
2208c03f400SHaohai Wen   if (!isa<ELFObjectFileBase>(&ExeBinary) && !IsCOFF)
2218c03f400SHaohai Wen     exitWithError("not a valid ELF/COFF image", Path);
22232221694Swlei 
2238c03f400SHaohai Wen   auto *Obj = cast<ObjectFile>(&ExeBinary);
22432221694Swlei   TheTriple = Obj->makeTriple();
22509742be8SHongtao Yu 
22632221694Swlei   LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
22732221694Swlei 
2282fa6eaf9Sxur-llvm   // Mark the binary as a kernel image;
2292fa6eaf9Sxur-llvm   IsKernel = KernelBinary;
2302fa6eaf9Sxur-llvm 
23107120384SHongtao Yu   // Find the preferred load address for text sections.
23207120384SHongtao Yu   setPreferredTextSegmentAddresses(Obj);
23332221694Swlei 
23440ca4112Swlei   // Load debug info of subprograms from DWARF section.
2356693c562Swlei   // If path of debug info binary is specified, use the debug info from it,
2366693c562Swlei   // otherwise use the debug info from the executable binary.
2376693c562Swlei   if (!DebugBinaryPath.empty()) {
2386693c562Swlei     OwningBinary<Binary> DebugPath =
2396693c562Swlei         unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
240c56a85fdSSimon Pilgrim     loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
2416693c562Swlei   } else {
242c56a85fdSSimon Pilgrim     loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
2436693c562Swlei   }
24440ca4112Swlei 
245d5a963abSHongtao Yu   DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
246d5a963abSHongtao Yu                                 DisassembleFunctions.end());
247d5a963abSHongtao Yu 
2488c03f400SHaohai Wen   if (auto *ELFObj = dyn_cast<ELFObjectFileBase>(Obj)) {
2498c03f400SHaohai Wen     checkPseudoProbe(ELFObj);
250d5a963abSHongtao Yu     if (UsePseudoProbes)
2518c03f400SHaohai Wen       populateElfSymbolAddressList(ELFObj);
252d5a963abSHongtao Yu 
253d5a963abSHongtao Yu     if (ShowDisassemblyOnly)
2548c03f400SHaohai Wen       decodePseudoProbe(ELFObj);
2558c03f400SHaohai Wen   }
256d5a963abSHongtao Yu 
25732221694Swlei   // Disassemble the text sections.
25832221694Swlei   disassemble(Obj);
25932221694Swlei 
2601f05b1a9Swlei   // Use function start and return address to infer prolog and epilog
26146765248Swlei   ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
26246765248Swlei   ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
2631f05b1a9Swlei 
264aab18100Swlei   warnNoFuncEntry();
265aab18100Swlei 
26632221694Swlei   // TODO: decode other sections.
26732221694Swlei }
26832221694Swlei 
269091c16f7Swlei bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
27091cc53d5Swlei   const SampleContextFrameVector &Context1 =
27191cc53d5Swlei       getCachedFrameLocationStack(Address1);
27291cc53d5Swlei   const SampleContextFrameVector &Context2 =
27391cc53d5Swlei       getCachedFrameLocationStack(Address2);
2741f05b1a9Swlei   if (Context1.size() != Context2.size())
2751f05b1a9Swlei     return false;
276afd8bd60Swlei   if (Context1.empty())
277afd8bd60Swlei     return false;
2781f05b1a9Swlei   // The leaf frame contains location within the leaf, and it
2791f05b1a9Swlei   // needs to be remove that as it's not part of the calling context
2801f05b1a9Swlei   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
2811f05b1a9Swlei                     Context2.begin(), Context2.begin() + Context2.size() - 1);
2821f05b1a9Swlei }
2831f05b1a9Swlei 
284b9db7036SHongtao Yu SampleContextFrameVector
285b9db7036SHongtao Yu ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
286091c16f7Swlei                                    bool &WasLeafInlined) {
287b9db7036SHongtao Yu   SampleContextFrameVector ContextVec;
288bfcb2c11Swlei   if (Stack.empty())
289bfcb2c11Swlei     return ContextVec;
2901f05b1a9Swlei   // Process from frame root to leaf
2913869309aSwlei   for (auto Address : Stack) {
292b9db7036SHongtao Yu     const SampleContextFrameVector &ExpandedContext =
29391cc53d5Swlei         getCachedFrameLocationStack(Address);
294afd8bd60Swlei     // An instruction without a valid debug line will be ignored by sample
295afd8bd60Swlei     // processing
296afd8bd60Swlei     if (ExpandedContext.empty())
297b9db7036SHongtao Yu       return SampleContextFrameVector();
2981410db70SWenlei He     // Set WasLeafInlined to the size of inlined frame count for the last
2991410db70SWenlei He     // address which is leaf
3001410db70SWenlei He     WasLeafInlined = (ExpandedContext.size() > 1);
301b9db7036SHongtao Yu     ContextVec.append(ExpandedContext);
3021f05b1a9Swlei   }
3031f05b1a9Swlei 
30446cf7d75Swlei   // Replace with decoded base discriminator
30546cf7d75Swlei   for (auto &Frame : ContextVec) {
30646cf7d75Swlei     Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
30741a681ceSwlei         Frame.Location.Discriminator, UseFSDiscriminator);
30846cf7d75Swlei   }
30946cf7d75Swlei 
310c2be2d32Swlei   assert(ContextVec.size() && "Context length should be at least 1");
311c2be2d32Swlei 
312ac14bb14Swlei   // Compress the context string except for the leaf frame
313b9db7036SHongtao Yu   auto LeafFrame = ContextVec.back();
314fb29d812Swlei   LeafFrame.Location = LineLocation(0, 0);
315ac14bb14Swlei   ContextVec.pop_back();
316b9db7036SHongtao Yu   CSProfileGenerator::compressRecursionContext(ContextVec);
317b9db7036SHongtao Yu   CSProfileGenerator::trimContext(ContextVec);
318b9db7036SHongtao Yu   ContextVec.push_back(LeafFrame);
319b9db7036SHongtao Yu   return ContextVec;
3201f05b1a9Swlei }
3211f05b1a9Swlei 
32207120384SHongtao Yu template <class ELFT>
3233f970168SHongtao Yu void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
3243f970168SHongtao Yu                                                       StringRef FileName) {
32507120384SHongtao Yu   const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
32647d66355SWenlei He   // FIXME: This should be the page size of the system running profiling.
32747d66355SWenlei He   // However such info isn't available at post-processing time, assuming
32847d66355SWenlei He   // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
32947d66355SWenlei He   // because we may build the tools on non-linux.
3307ff2dc3bSNathan Lanza   uint64_t PageSize = 0x1000;
33107120384SHongtao Yu   for (const typename ELFT::Phdr &Phdr : PhdrRange) {
332f7976edcSWenlei He     if (Phdr.p_type == ELF::PT_LOAD) {
333f7976edcSWenlei He       if (!FirstLoadableAddress)
334f7976edcSWenlei He         FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
335f7976edcSWenlei He       if (Phdr.p_flags & ELF::PF_X) {
33607120384SHongtao Yu         // Segments will always be loaded at a page boundary.
33747d66355SWenlei He         PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
33847d66355SWenlei He                                                 ~(PageSize - 1U));
33947d66355SWenlei He         TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
34032221694Swlei       }
34132221694Swlei     }
342f7976edcSWenlei He   }
34307120384SHongtao Yu 
34407120384SHongtao Yu   if (PreferredTextSegmentAddresses.empty())
34507120384SHongtao Yu     exitWithError("no executable segment found", FileName);
34607120384SHongtao Yu }
34707120384SHongtao Yu 
3488c03f400SHaohai Wen void ProfiledBinary::setPreferredTextSegmentAddresses(const COFFObjectFile *Obj,
3498c03f400SHaohai Wen                                                       StringRef FileName) {
3508c03f400SHaohai Wen   uint64_t ImageBase = Obj->getImageBase();
3518c03f400SHaohai Wen   if (!ImageBase)
3528c03f400SHaohai Wen     exitWithError("Not a COFF image", FileName);
3538c03f400SHaohai Wen 
3548c03f400SHaohai Wen   PreferredTextSegmentAddresses.push_back(ImageBase);
3558c03f400SHaohai Wen   FirstLoadableAddress = ImageBase;
3568c03f400SHaohai Wen 
3578c03f400SHaohai Wen   for (SectionRef Section : Obj->sections()) {
3588c03f400SHaohai Wen     const coff_section *Sec = Obj->getCOFFSection(Section);
3598c03f400SHaohai Wen     if (Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE)
3608c03f400SHaohai Wen       TextSegmentOffsets.push_back(Sec->VirtualAddress);
3618c03f400SHaohai Wen   }
3628c03f400SHaohai Wen }
3638c03f400SHaohai Wen 
3648c03f400SHaohai Wen void ProfiledBinary::setPreferredTextSegmentAddresses(const ObjectFile *Obj) {
36507120384SHongtao Yu   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
36607120384SHongtao Yu     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
36707120384SHongtao Yu   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
36807120384SHongtao Yu     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
36907120384SHongtao Yu   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
37007120384SHongtao Yu     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
3718c03f400SHaohai Wen   else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
37207120384SHongtao Yu     setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
3738c03f400SHaohai Wen   else if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj))
3748c03f400SHaohai Wen     setPreferredTextSegmentAddresses(COFFObj, Obj->getFileName());
37507120384SHongtao Yu   else
3768c03f400SHaohai Wen     llvm_unreachable("invalid object format");
37732221694Swlei }
37832221694Swlei 
3793f970168SHongtao Yu void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
380a316343eSWenlei He   if (UseDwarfCorrelation)
381a316343eSWenlei He     return;
382a316343eSWenlei He 
3833f970168SHongtao Yu   bool HasProbeDescSection = false;
3843f970168SHongtao Yu   bool HasPseudoProbeSection = false;
3853f970168SHongtao Yu 
3863f970168SHongtao Yu   StringRef FileName = Obj->getFileName();
3873f970168SHongtao Yu   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
3883f970168SHongtao Yu        SI != SE; ++SI) {
3893f970168SHongtao Yu     const SectionRef &Section = *SI;
3903f970168SHongtao Yu     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
3913f970168SHongtao Yu     if (SectionName == ".pseudo_probe_desc") {
3923f970168SHongtao Yu       HasProbeDescSection = true;
3933f970168SHongtao Yu     } else if (SectionName == ".pseudo_probe") {
3943f970168SHongtao Yu       HasPseudoProbeSection = true;
3953f970168SHongtao Yu     }
3963f970168SHongtao Yu   }
3973f970168SHongtao Yu 
3983f970168SHongtao Yu   // set UsePseudoProbes flag, used for PerfReader
3993f970168SHongtao Yu   UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
4003f970168SHongtao Yu }
4013f970168SHongtao Yu 
4023f970168SHongtao Yu void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
4033f970168SHongtao Yu   if (!UsePseudoProbes)
4043f970168SHongtao Yu     return;
4053f970168SHongtao Yu 
406d5a963abSHongtao Yu   MCPseudoProbeDecoder::Uint64Set GuidFilter;
407d5a963abSHongtao Yu   MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
408d5a963abSHongtao Yu   if (ShowDisassemblyOnly) {
409d5a963abSHongtao Yu     if (DisassembleFunctionSet.empty()) {
410d5a963abSHongtao Yu       FuncStartAddresses = SymbolStartAddrs;
411d5a963abSHongtao Yu     } else {
412d5a963abSHongtao Yu       for (auto &F : DisassembleFunctionSet) {
413d5a963abSHongtao Yu         auto GUID = Function::getGUID(F.first());
414d5a963abSHongtao Yu         if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
415d5a963abSHongtao Yu           FuncStartAddresses[GUID] = StartAddr;
416d5a963abSHongtao Yu           FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
417d5a963abSHongtao Yu           GuidFilter.insert(Function::getGUID(Range.getFuncName()));
418d5a963abSHongtao Yu         }
419d5a963abSHongtao Yu       }
420d5a963abSHongtao Yu     }
421d5a963abSHongtao Yu   } else {
422d5a963abSHongtao Yu     for (auto *F : ProfiledFunctions) {
423d5a963abSHongtao Yu       GuidFilter.insert(Function::getGUID(F->FuncName));
424d5a963abSHongtao Yu       for (auto &Range : F->Ranges) {
425d5a963abSHongtao Yu         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
426ca53611cSKazu Hirata         for (const auto &[StartAddr, Func] : make_range(GUIDs))
427ca53611cSKazu Hirata           FuncStartAddresses[Func] = StartAddr;
428d5a963abSHongtao Yu       }
429d5a963abSHongtao Yu     }
430d5a963abSHongtao Yu   }
4313f970168SHongtao Yu 
432b3154d11Swlei   StringRef FileName = Obj->getFileName();
433b3154d11Swlei   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
434b3154d11Swlei        SI != SE; ++SI) {
435b3154d11Swlei     const SectionRef &Section = *SI;
436b3154d11Swlei     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
437b3154d11Swlei 
438b3154d11Swlei     if (SectionName == ".pseudo_probe_desc") {
439b3154d11Swlei       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
440ee7d20e8Sjamesluox       if (!ProbeDecoder.buildGUID2FuncDescMap(
441ee7d20e8Sjamesluox               reinterpret_cast<const uint8_t *>(Contents.data()),
442ee7d20e8Sjamesluox               Contents.size()))
4433f970168SHongtao Yu         exitWithError(
4443f970168SHongtao Yu             "Pseudo Probe decoder fail in .pseudo_probe_desc section");
445b3154d11Swlei     } else if (SectionName == ".pseudo_probe") {
446b3154d11Swlei       StringRef Contents = unwrapOrError(Section.getContents(), FileName);
447ee7d20e8Sjamesluox       if (!ProbeDecoder.buildAddress2ProbeMap(
448ee7d20e8Sjamesluox               reinterpret_cast<const uint8_t *>(Contents.data()),
449d5a963abSHongtao Yu               Contents.size(), GuidFilter, FuncStartAddresses))
450ee7d20e8Sjamesluox         exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
451b3154d11Swlei     }
452b3154d11Swlei   }
453b3154d11Swlei 
45434e131b0SHongtao Yu   // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
45534e131b0SHongtao Yu   // is available
4563f970168SHongtao Yu   if (TrackFuncContextSize) {
457*04ebd190SAmir Ayupov     for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
458*04ebd190SAmir Ayupov       auto *Frame = &Child;
45934e131b0SHongtao Yu       StringRef FuncName =
46034e131b0SHongtao Yu           ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
46134e131b0SHongtao Yu       TopLevelProbeFrameMap[FuncName] = Frame;
46234e131b0SHongtao Yu     }
46334e131b0SHongtao Yu   }
46434e131b0SHongtao Yu 
465b3154d11Swlei   if (ShowPseudoProbe)
466b3154d11Swlei     ProbeDecoder.printGUID2FuncDescMap(outs());
467b3154d11Swlei }
468b3154d11Swlei 
4693f970168SHongtao Yu void ProfiledBinary::decodePseudoProbe() {
4703f970168SHongtao Yu   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
4713f970168SHongtao Yu   Binary &ExeBinary = *OBinary.getBinary();
4728c03f400SHaohai Wen   auto *Obj = cast<ELFObjectFileBase>(&ExeBinary);
4733f970168SHongtao Yu   decodePseudoProbe(Obj);
4743f970168SHongtao Yu }
4753f970168SHongtao Yu 
4765d7950a4SHongtao Yu void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
4775d7950a4SHongtao Yu                                     StringRef RangeSymName) {
47840ca4112Swlei   // Skip external function symbol.
47940ca4112Swlei   if (!FuncRange)
48040ca4112Swlei     return;
48140ca4112Swlei 
482aab18100Swlei   // Set IsFuncEntry to ture if there is only one range in the function or the
483aab18100Swlei   // RangeSymName from ELF is equal to its DWARF-based function name.
484aab18100Swlei   if (FuncRange->Func->Ranges.size() == 1 ||
485aab18100Swlei       (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
48640ca4112Swlei     FuncRange->IsFuncEntry = true;
48740ca4112Swlei }
48840ca4112Swlei 
48932221694Swlei bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
49032221694Swlei                                         SectionSymbolsTy &Symbols,
49132221694Swlei                                         const SectionRef &Section) {
49232221694Swlei   std::size_t SE = Symbols.size();
49346765248Swlei   uint64_t SectionAddress = Section.getAddress();
49432221694Swlei   uint64_t SectSize = Section.getSize();
49546765248Swlei   uint64_t StartAddress = Symbols[SI].Addr;
49646765248Swlei   uint64_t NextStartAddress =
49746765248Swlei       (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
4985d7950a4SHongtao Yu   FuncRange *FRange = findFuncRange(StartAddress);
4995d7950a4SHongtao Yu   setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
500dddd590fSwlei   StringRef SymbolName =
501dddd590fSwlei       ShowCanonicalFnName
502dddd590fSwlei           ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
503dddd590fSwlei           : Symbols[SI].Name;
504734f4d83SHongtao Yu   bool ShowDisassembly =
505734f4d83SHongtao Yu       ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
506734f4d83SHongtao Yu                               DisassembleFunctionSet.count(SymbolName));
507734f4d83SHongtao Yu   if (ShowDisassembly)
50832221694Swlei     outs() << '<' << SymbolName << ">:\n";
50932221694Swlei 
51046765248Swlei   uint64_t Address = StartAddress;
51146765248Swlei   // Size of a consecutive invalid instruction range starting from Address -1
51255356c01SHongtao Yu   // backwards.
51355356c01SHongtao Yu   uint64_t InvalidInstLength = 0;
51446765248Swlei   while (Address < NextStartAddress) {
51532221694Swlei     MCInst Inst;
51632221694Swlei     uint64_t Size;
51732221694Swlei     // Disassemble an instruction.
51846765248Swlei     bool Disassembled = DisAsm->getInstruction(
51946765248Swlei         Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
52055356c01SHongtao Yu     if (Size == 0)
52155356c01SHongtao Yu       Size = 1;
52232221694Swlei 
523734f4d83SHongtao Yu     if (ShowDisassembly) {
524b3154d11Swlei       if (ShowPseudoProbe) {
52546765248Swlei         ProbeDecoder.printProbeForAddress(outs(), Address);
526b3154d11Swlei       }
52746765248Swlei       outs() << format("%8" PRIx64 ":", Address);
5280196b45cSwlei       size_t Start = outs().tell();
52955356c01SHongtao Yu       if (Disassembled)
53075bc20ffSKazu Hirata         IPrinter->printInst(&Inst, Address + Size, "", *STI, outs());
53155356c01SHongtao Yu       else
53255356c01SHongtao Yu         outs() << "\t<unknown>";
5330196b45cSwlei       if (ShowSourceLocations) {
5340196b45cSwlei         unsigned Cur = outs().tell() - Start;
5350196b45cSwlei         if (Cur < 40)
5360196b45cSwlei           outs().indent(40 - Cur);
53746765248Swlei         InstructionPointer IP(this, Address);
538eca03d27SWenlei He         outs() << getReversedLocWithContext(
539eca03d27SWenlei He             symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
5400196b45cSwlei       }
54132221694Swlei       outs() << "\n";
54232221694Swlei     }
54332221694Swlei 
54455356c01SHongtao Yu     if (Disassembled) {
54532221694Swlei       const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
546ce40843aSwlei 
547091c16f7Swlei       // Record instruction size.
54846765248Swlei       AddressToInstSizeMap[Address] = Size;
549eca03d27SWenlei He 
55032221694Swlei       // Populate address maps.
55146765248Swlei       CodeAddressVec.push_back(Address);
5529f732af5SHongtao Yu       if (MCDesc.isCall()) {
55346765248Swlei         CallAddressSet.insert(Address);
55446765248Swlei         UncondBranchAddrSet.insert(Address);
5559f732af5SHongtao Yu       } else if (MCDesc.isReturn()) {
55646765248Swlei         RetAddressSet.insert(Address);
55746765248Swlei         UncondBranchAddrSet.insert(Address);
5589f732af5SHongtao Yu       } else if (MCDesc.isBranch()) {
5599f732af5SHongtao Yu         if (MCDesc.isUnconditionalBranch())
56046765248Swlei           UncondBranchAddrSet.insert(Address);
56146765248Swlei         BranchAddressSet.insert(Address);
5629f732af5SHongtao Yu       }
56332221694Swlei 
5645d7950a4SHongtao Yu       // Record potential call targets for tail frame inference later-on.
5655d7950a4SHongtao Yu       if (InferMissingFrames && FRange) {
5665d7950a4SHongtao Yu         uint64_t Target = 0;
5675d7950a4SHongtao Yu         MIA->evaluateBranch(Inst, Address, Size, Target);
5685d7950a4SHongtao Yu         if (MCDesc.isCall()) {
5695d7950a4SHongtao Yu           // Indirect call targets are unknown at this point. Recording the
5705d7950a4SHongtao Yu           // unknown target (zero) for further LBR-based refinement.
5715d7950a4SHongtao Yu           MissingContextInferrer->CallEdges[Address].insert(Target);
5725d7950a4SHongtao Yu         } else if (MCDesc.isUnconditionalBranch()) {
5735d7950a4SHongtao Yu           assert(Target &&
5745d7950a4SHongtao Yu                  "target should be known for unconditional direct branch");
5755d7950a4SHongtao Yu           // Any inter-function unconditional jump is considered tail call at
5765d7950a4SHongtao Yu           // this point. This is not 100% accurate and could further be
5775d7950a4SHongtao Yu           // optimized based on some source annotation.
5785d7950a4SHongtao Yu           FuncRange *ToFRange = findFuncRange(Target);
5795d7950a4SHongtao Yu           if (ToFRange && ToFRange->Func != FRange->Func)
5805d7950a4SHongtao Yu             MissingContextInferrer->TailCallEdges[Address].insert(Target);
5815d7950a4SHongtao Yu           LLVM_DEBUG({
5825d7950a4SHongtao Yu             dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
5835d7950a4SHongtao Yu             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
5845d7950a4SHongtao Yu             dbgs() << "\n";
5855d7950a4SHongtao Yu           });
5865d7950a4SHongtao Yu         } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
5875d7950a4SHongtao Yu           // This is an indirect branch but not necessarily an indirect tail
5885d7950a4SHongtao Yu           // call. The isBarrier check is to filter out conditional branch.
5895d7950a4SHongtao Yu           // Similar with indirect call targets, recording the unknown target
5905d7950a4SHongtao Yu           // (zero) for further LBR-based refinement.
5915d7950a4SHongtao Yu           MissingContextInferrer->TailCallEdges[Address].insert(Target);
5925d7950a4SHongtao Yu           LLVM_DEBUG({
5935d7950a4SHongtao Yu             dbgs() << "Indirect Tail call: "
5945d7950a4SHongtao Yu                    << format("%8" PRIx64 ":", Address);
5955d7950a4SHongtao Yu             IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
5965d7950a4SHongtao Yu             dbgs() << "\n";
5975d7950a4SHongtao Yu           });
5985d7950a4SHongtao Yu         }
5995d7950a4SHongtao Yu       }
6005d7950a4SHongtao Yu 
60155356c01SHongtao Yu       if (InvalidInstLength) {
6027a3db658SHongtao Yu         AddrsWithInvalidInstruction.insert(
6037a3db658SHongtao Yu             {Address - InvalidInstLength, Address - 1});
60455356c01SHongtao Yu         InvalidInstLength = 0;
60555356c01SHongtao Yu       }
60655356c01SHongtao Yu     } else {
60755356c01SHongtao Yu       InvalidInstLength += Size;
60855356c01SHongtao Yu     }
60955356c01SHongtao Yu 
61046765248Swlei     Address += Size;
61132221694Swlei   }
61232221694Swlei 
61355356c01SHongtao Yu   if (InvalidInstLength)
6147a3db658SHongtao Yu     AddrsWithInvalidInstruction.insert(
6157a3db658SHongtao Yu         {Address - InvalidInstLength, Address - 1});
61655356c01SHongtao Yu 
617734f4d83SHongtao Yu   if (ShowDisassembly)
61832221694Swlei     outs() << "\n";
61932221694Swlei 
62032221694Swlei   return true;
62132221694Swlei }
62232221694Swlei 
6238c03f400SHaohai Wen void ProfiledBinary::setUpDisassembler(const ObjectFile *Obj) {
62432221694Swlei   const Target *TheTarget = getTarget(Obj);
62532221694Swlei   std::string TripleName = TheTriple.getTriple();
62632221694Swlei   StringRef FileName = Obj->getFileName();
62732221694Swlei 
62832221694Swlei   MRI.reset(TheTarget->createMCRegInfo(TripleName));
62932221694Swlei   if (!MRI)
63032221694Swlei     exitWithError("no register info for target " + TripleName, FileName);
63132221694Swlei 
63232221694Swlei   MCTargetOptions MCOptions;
63332221694Swlei   AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
63432221694Swlei   if (!AsmInfo)
63532221694Swlei     exitWithError("no assembly info for target " + TripleName, FileName);
63632221694Swlei 
637537cdf92SElena Lepilkina   Expected<SubtargetFeatures> Features = Obj->getFeatures();
638537cdf92SElena Lepilkina   if (!Features)
639537cdf92SElena Lepilkina     exitWithError(Features.takeError(), FileName);
64032221694Swlei   STI.reset(
641537cdf92SElena Lepilkina       TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
64232221694Swlei   if (!STI)
64332221694Swlei     exitWithError("no subtarget info for target " + TripleName, FileName);
64432221694Swlei 
64532221694Swlei   MII.reset(TheTarget->createMCInstrInfo());
64632221694Swlei   if (!MII)
64732221694Swlei     exitWithError("no instruction info for target " + TripleName, FileName);
64832221694Swlei 
649c2f819afSPhilipp Krones   MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
650c2f819afSPhilipp Krones   std::unique_ptr<MCObjectFileInfo> MOFI(
651c2f819afSPhilipp Krones       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
652c2f819afSPhilipp Krones   Ctx.setObjectFileInfo(MOFI.get());
65332221694Swlei   DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
65432221694Swlei   if (!DisAsm)
65532221694Swlei     exitWithError("no disassembler for target " + TripleName, FileName);
65632221694Swlei 
65732221694Swlei   MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
65832221694Swlei 
65932221694Swlei   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
6601f05b1a9Swlei   IPrinter.reset(TheTarget->createMCInstPrinter(
6611f05b1a9Swlei       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
6621f05b1a9Swlei   IPrinter->setPrintBranchImmAsAddress(true);
66332221694Swlei }
66432221694Swlei 
6658c03f400SHaohai Wen void ProfiledBinary::disassemble(const ObjectFile *Obj) {
66632221694Swlei   // Set up disassembler and related components.
66732221694Swlei   setUpDisassembler(Obj);
66832221694Swlei 
66932221694Swlei   // Create a mapping from virtual address to symbol name. The symbols in text
67032221694Swlei   // sections are the candidates to dissassemble.
67132221694Swlei   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
67232221694Swlei   StringRef FileName = Obj->getFileName();
67332221694Swlei   for (const SymbolRef &Symbol : Obj->symbols()) {
67432221694Swlei     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
67532221694Swlei     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
67632221694Swlei     section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
67732221694Swlei     if (SecI != Obj->section_end())
67832221694Swlei       AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
67932221694Swlei   }
68032221694Swlei 
68132221694Swlei   // Sort all the symbols. Use a stable sort to stabilize the output.
68232221694Swlei   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
68332221694Swlei     stable_sort(SecSyms.second);
68432221694Swlei 
685734f4d83SHongtao Yu   assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
686734f4d83SHongtao Yu          "Functions to disassemble should be only specified together with "
687734f4d83SHongtao Yu          "--show-disassembly-only");
688734f4d83SHongtao Yu 
689426e326aSwlei   if (ShowDisassemblyOnly)
69032221694Swlei     outs() << "\nDisassembly of " << FileName << ":\n";
69132221694Swlei 
69232221694Swlei   // Dissassemble a text section.
69332221694Swlei   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
69432221694Swlei        SI != SE; ++SI) {
69532221694Swlei     const SectionRef &Section = *SI;
69632221694Swlei     if (!Section.isText())
69732221694Swlei       continue;
69832221694Swlei 
69907120384SHongtao Yu     uint64_t ImageLoadAddr = getPreferredBaseAddress();
70046765248Swlei     uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
70132221694Swlei     uint64_t SectSize = Section.getSize();
70232221694Swlei     if (!SectSize)
70332221694Swlei       continue;
70432221694Swlei 
70532221694Swlei     // Register the text section.
70646765248Swlei     TextSections.insert({SectionAddress, SectSize});
70732221694Swlei 
70832221694Swlei     StringRef SectionName = unwrapOrError(Section.getName(), FileName);
70932205717Swlei 
71032205717Swlei     if (ShowDisassemblyOnly) {
71132221694Swlei       outs() << "\nDisassembly of section " << SectionName;
71207120384SHongtao Yu       outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
71307120384SHongtao Yu              << format("0x%" PRIx64, Section.getAddress() + SectSize)
71407120384SHongtao Yu              << "]:\n\n";
71532221694Swlei     }
71632221694Swlei 
7178c03f400SHaohai Wen     if (isa<ELFObjectFileBase>(Obj) && SectionName == ".plt")
71832205717Swlei       continue;
71932205717Swlei 
72032221694Swlei     // Get the section data.
72132221694Swlei     ArrayRef<uint8_t> Bytes =
72232221694Swlei         arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
72332221694Swlei 
72432221694Swlei     // Get the list of all the symbols in this section.
72532221694Swlei     SectionSymbolsTy &Symbols = AllSymbols[Section];
72632221694Swlei 
72732221694Swlei     // Disassemble symbol by symbol.
72832221694Swlei     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
72932221694Swlei       if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
73032221694Swlei         exitWithError("disassembling error", FileName);
73132221694Swlei     }
73232221694Swlei   }
73341a681ceSwlei 
7347a3db658SHongtao Yu   if (!AddrsWithInvalidInstruction.empty()) {
7357a3db658SHongtao Yu     if (ShowDetailedWarning) {
7367a3db658SHongtao Yu       for (auto &Addr : AddrsWithInvalidInstruction) {
7377a3db658SHongtao Yu         WithColor::warning()
7387a3db658SHongtao Yu             << "Invalid instructions at " << format("%8" PRIx64, Addr.first)
7397a3db658SHongtao Yu             << " - " << format("%8" PRIx64, Addr.second) << "\n";
7407a3db658SHongtao Yu       }
7417a3db658SHongtao Yu     }
7427a3db658SHongtao Yu     WithColor::warning() << "Found " << AddrsWithInvalidInstruction.size()
7437a3db658SHongtao Yu                          << " invalid instructions\n";
7447a3db658SHongtao Yu     AddrsWithInvalidInstruction.clear();
7457a3db658SHongtao Yu   }
7467a3db658SHongtao Yu 
74741a681ceSwlei   // Dissassemble rodata section to check if FS discriminator symbol exists.
74841a681ceSwlei   checkUseFSDiscriminator(Obj, AllSymbols);
74941a681ceSwlei }
75041a681ceSwlei 
75141a681ceSwlei void ProfiledBinary::checkUseFSDiscriminator(
7528c03f400SHaohai Wen     const ObjectFile *Obj, std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
75341a681ceSwlei   const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
75441a681ceSwlei   for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
75541a681ceSwlei        SI != SE; ++SI) {
75641a681ceSwlei     const SectionRef &Section = *SI;
75741a681ceSwlei     if (!Section.isData() || Section.getSize() == 0)
75841a681ceSwlei       continue;
75941a681ceSwlei     SectionSymbolsTy &Symbols = AllSymbols[Section];
76041a681ceSwlei 
76141a681ceSwlei     for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
76241a681ceSwlei       if (Symbols[SI].Name == FSDiscriminatorVar) {
76341a681ceSwlei         UseFSDiscriminator = true;
76441a681ceSwlei         return;
76541a681ceSwlei       }
76641a681ceSwlei     }
76741a681ceSwlei   }
76832221694Swlei }
7690196b45cSwlei 
770d5a963abSHongtao Yu void ProfiledBinary::populateElfSymbolAddressList(
771d5a963abSHongtao Yu     const ELFObjectFileBase *Obj) {
772d5a963abSHongtao Yu   // Create a mapping from virtual address to symbol GUID and the other way
773d5a963abSHongtao Yu   // around.
774d5a963abSHongtao Yu   StringRef FileName = Obj->getFileName();
775d5a963abSHongtao Yu   for (const SymbolRef &Symbol : Obj->symbols()) {
776d5a963abSHongtao Yu     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
777d5a963abSHongtao Yu     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
778d5a963abSHongtao Yu     uint64_t GUID = Function::getGUID(Name);
779d5a963abSHongtao Yu     SymbolStartAddrs[GUID] = Addr;
780d5a963abSHongtao Yu     StartAddrToSymMap.emplace(Addr, GUID);
781d5a963abSHongtao Yu   }
782d5a963abSHongtao Yu }
783d5a963abSHongtao Yu 
784b3a778fbSwlei void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
785b3a778fbSwlei   for (const auto &DieInfo : CompilationUnit.dies()) {
786b3a778fbSwlei     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
78740ca4112Swlei 
78840ca4112Swlei     if (!Die.isSubprogramDIE())
78940ca4112Swlei       continue;
79040ca4112Swlei     auto Name = Die.getName(llvm::DINameKind::LinkageName);
79140ca4112Swlei     if (!Name)
79240ca4112Swlei       Name = Die.getName(llvm::DINameKind::ShortName);
79340ca4112Swlei     if (!Name)
79440ca4112Swlei       continue;
79540ca4112Swlei 
79640ca4112Swlei     auto RangesOrError = Die.getAddressRanges();
79740ca4112Swlei     if (!RangesOrError)
79840ca4112Swlei       continue;
79940ca4112Swlei     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
80040ca4112Swlei 
80140ca4112Swlei     if (Ranges.empty())
80240ca4112Swlei       continue;
80340ca4112Swlei 
80440ca4112Swlei     // Different DWARF symbols can have same function name, search or create
80540ca4112Swlei     // BinaryFunction indexed by the name.
80640ca4112Swlei     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
80740ca4112Swlei     auto &Func = Ret.first->second;
80840ca4112Swlei     if (Ret.second)
80940ca4112Swlei       Func.FuncName = Ret.first->first;
81040ca4112Swlei 
81140ca4112Swlei     for (const auto &Range : Ranges) {
81246765248Swlei       uint64_t StartAddress = Range.LowPC;
81346765248Swlei       uint64_t EndAddress = Range.HighPC;
81440ca4112Swlei 
81546765248Swlei       if (EndAddress <= StartAddress ||
81646765248Swlei           StartAddress < getPreferredBaseAddress())
81740ca4112Swlei         continue;
81840ca4112Swlei 
81940ca4112Swlei       // We may want to know all ranges for one function. Here group the
82040ca4112Swlei       // ranges and store them into BinaryFunction.
82146765248Swlei       Func.Ranges.emplace_back(StartAddress, EndAddress);
82240ca4112Swlei 
82346765248Swlei       auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
82440ca4112Swlei       if (R.second) {
82540ca4112Swlei         FuncRange &FRange = R.first->second;
82640ca4112Swlei         FRange.Func = &Func;
82746765248Swlei         FRange.StartAddress = StartAddress;
82846765248Swlei         FRange.EndAddress = EndAddress;
82940ca4112Swlei       } else {
8307a3db658SHongtao Yu         AddrsWithMultipleSymbols.insert(StartAddress);
8317a3db658SHongtao Yu         if (ShowDetailedWarning)
83240ca4112Swlei           WithColor::warning()
83340ca4112Swlei               << "Duplicated symbol start address at "
83446765248Swlei               << format("%8" PRIx64, StartAddress) << " "
83546765248Swlei               << R.first->second.getFuncName() << " and " << Name << "\n";
83640ca4112Swlei       }
83740ca4112Swlei     }
83840ca4112Swlei   }
83940ca4112Swlei }
840b3a778fbSwlei 
841b3a778fbSwlei void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
842b3a778fbSwlei   auto DebugContext = llvm::DWARFContext::create(
843b3a778fbSwlei       Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
844b3a778fbSwlei   if (!DebugContext)
845b3a778fbSwlei     exitWithError("Error creating the debug info context", Path);
846b3a778fbSwlei 
847b3a778fbSwlei   for (const auto &CompilationUnit : DebugContext->compile_units())
84875bc20ffSKazu Hirata     loadSymbolsFromDWARFUnit(*CompilationUnit);
849b3a778fbSwlei 
850b3a778fbSwlei   // Handles DWO sections that can either be in .o, .dwo or .dwp files.
85196776783SHongtao Yu   uint32_t NumOfDWOMissing = 0;
852b3a778fbSwlei   for (const auto &CompilationUnit : DebugContext->compile_units()) {
853b3a778fbSwlei     DWARFUnit *const DwarfUnit = CompilationUnit.get();
85401b88dd6STakuya Shimizu     if (DwarfUnit->getDWOId()) {
855b3a778fbSwlei       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
856b3a778fbSwlei       if (!DWOCU->isDWOUnit()) {
85796776783SHongtao Yu         NumOfDWOMissing++;
85896776783SHongtao Yu         if (ShowDetailedWarning) {
859b3a778fbSwlei           std::string DWOName = dwarf::toString(
860b3a778fbSwlei               DwarfUnit->getUnitDIE().find(
861b3a778fbSwlei                   {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
862b3a778fbSwlei               "");
86396776783SHongtao Yu           WithColor::warning() << "DWO debug information for " << DWOName
86496776783SHongtao Yu                                << " was not loaded.\n";
86596776783SHongtao Yu         }
866b3a778fbSwlei         continue;
867b3a778fbSwlei       }
868b3a778fbSwlei       loadSymbolsFromDWARFUnit(*DWOCU);
869b3a778fbSwlei     }
870b3a778fbSwlei   }
871b3a778fbSwlei 
87296776783SHongtao Yu   if (NumOfDWOMissing)
87396776783SHongtao Yu     WithColor::warning()
87496776783SHongtao Yu         << " DWO debug information was not loaded for " << NumOfDWOMissing
87596776783SHongtao Yu         << " modules. Please check the .o, .dwo or .dwp path.\n";
876b3a778fbSwlei   if (BinaryFunctions.empty())
877b3a778fbSwlei     WithColor::warning() << "Loading of DWARF info completed, but no binary "
878b3a778fbSwlei                             "functions have been retrieved.\n";
879ef0e0adcSWilliam Junda Huang   // Populate the hash binary function map for MD5 function name lookup. This
880ef0e0adcSWilliam Junda Huang   // is done after BinaryFunctions are finalized.
881ef0e0adcSWilliam Junda Huang   for (auto &BinaryFunction : BinaryFunctions) {
882ef0e0adcSWilliam Junda Huang     HashBinaryFunctions[MD5Hash(StringRef(BinaryFunction.first))] =
883ef0e0adcSWilliam Junda Huang         &BinaryFunction.second;
884ef0e0adcSWilliam Junda Huang   }
8857a3db658SHongtao Yu 
8867a3db658SHongtao Yu   if (!AddrsWithMultipleSymbols.empty()) {
8877a3db658SHongtao Yu     WithColor::warning() << "Found " << AddrsWithMultipleSymbols.size()
8887a3db658SHongtao Yu                          << " start addresses with multiple symbols\n";
8897a3db658SHongtao Yu     AddrsWithMultipleSymbols.clear();
8907a3db658SHongtao Yu   }
89140ca4112Swlei }
89240ca4112Swlei 
8932f8196dbSwlei void ProfiledBinary::populateSymbolListFromDWARF(
8942f8196dbSwlei     ProfileSymbolList &SymbolList) {
89546765248Swlei   for (auto &I : StartAddrToFuncRangeMap)
8962f8196dbSwlei     SymbolList.add(I.second.getFuncName());
8972f8196dbSwlei }
8982f8196dbSwlei 
89927c37327SMark Santaniello symbolize::LLVMSymbolizer::Options ProfiledBinary::getSymbolizerOpts() const {
9000196b45cSwlei   symbolize::LLVMSymbolizer::Options SymbolizerOpts;
9010196b45cSwlei   SymbolizerOpts.PrintFunctions =
9020196b45cSwlei       DILineInfoSpecifier::FunctionNameKind::LinkageName;
9030196b45cSwlei   SymbolizerOpts.Demangle = false;
9040196b45cSwlei   SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
9050196b45cSwlei   SymbolizerOpts.UseSymbolTable = false;
9060196b45cSwlei   SymbolizerOpts.RelativeAddresses = false;
907b3a778fbSwlei   SymbolizerOpts.DWPName = DWPPath;
90827c37327SMark Santaniello   return SymbolizerOpts;
9090196b45cSwlei }
9100196b45cSwlei 
911b9db7036SHongtao Yu SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
912eca03d27SWenlei He                                                    bool UseCanonicalFnName,
913eca03d27SWenlei He                                                    bool UseProbeDiscriminator) {
9140196b45cSwlei   assert(this == IP.Binary &&
9150196b45cSwlei          "Binary should only symbolize its own instruction");
91646765248Swlei   auto Addr = object::SectionedAddress{IP.Address,
9170196b45cSwlei                                        object::SectionedAddress::UndefSection};
9186693c562Swlei   DIInliningInfo InlineStack = unwrapOrError(
9196693c562Swlei       Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
9206693c562Swlei       SymbolizerPath);
9210196b45cSwlei 
922b9db7036SHongtao Yu   SampleContextFrameVector CallStack;
9230196b45cSwlei   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
9240196b45cSwlei     const auto &CallerFrame = InlineStack.getFrame(I);
9257a3db658SHongtao Yu     if (CallerFrame.FunctionName.empty() ||
9267a3db658SHongtao Yu         (CallerFrame.FunctionName == "<invalid>"))
9270196b45cSwlei       break;
928eca03d27SWenlei He 
9291f05b1a9Swlei     StringRef FunctionName(CallerFrame.FunctionName);
9301f05b1a9Swlei     if (UseCanonicalFnName)
9311f05b1a9Swlei       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
932eca03d27SWenlei He 
933eca03d27SWenlei He     uint32_t Discriminator = CallerFrame.Discriminator;
934484a569eSwlei     uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
935eca03d27SWenlei He     if (UseProbeDiscriminator) {
936eca03d27SWenlei He       LineOffset =
937eca03d27SWenlei He           PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
938eca03d27SWenlei He       Discriminator = 0;
939eca03d27SWenlei He     }
940eca03d27SWenlei He 
941eca03d27SWenlei He     LineLocation Line(LineOffset, Discriminator);
942b9db7036SHongtao Yu     auto It = NameStrings.insert(FunctionName.str());
943ef0e0adcSWilliam Junda Huang     CallStack.emplace_back(FunctionId(StringRef(*It.first)), Line);
9440196b45cSwlei   }
9450196b45cSwlei 
9460196b45cSwlei   return CallStack;
9470196b45cSwlei }
9480196b45cSwlei 
94946765248Swlei void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
95046765248Swlei                                                        uint64_t RangeEnd) {
9515bf191a3Swlei   InstructionPointer IP(this, RangeBegin, true);
952ce40843aSwlei 
9535bf191a3Swlei   if (IP.Address != RangeBegin)
9545bf191a3Swlei     WithColor::warning() << "Invalid start instruction at "
9555bf191a3Swlei                          << format("%8" PRIx64, RangeBegin) << "\n";
9565bf191a3Swlei 
9575bf191a3Swlei   if (IP.Address >= RangeEnd)
9585bf191a3Swlei     return;
9595bf191a3Swlei 
9605bf191a3Swlei   do {
96191cc53d5Swlei     const SampleContextFrameVector SymbolizedCallStack =
96246765248Swlei         getFrameLocationStack(IP.Address, UsePseudoProbes);
96346765248Swlei     uint64_t Size = AddressToInstSizeMap[IP.Address];
964ce40843aSwlei     // Record instruction size for the corresponding context
965ce40843aSwlei     FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
966ce40843aSwlei 
9675bf191a3Swlei   } while (IP.advance() && IP.Address < RangeEnd);
968ce40843aSwlei }
969ce40843aSwlei 
97034e131b0SHongtao Yu void ProfiledBinary::computeInlinedContextSizeForFunc(
97134e131b0SHongtao Yu     const BinaryFunction *Func) {
97234e131b0SHongtao Yu   // Note that a function can be spilt into multiple ranges, so compute for all
97334e131b0SHongtao Yu   // ranges of the function.
97434e131b0SHongtao Yu   for (const auto &Range : Func->Ranges)
97534e131b0SHongtao Yu     computeInlinedContextSizeForRange(Range.first, Range.second);
97634e131b0SHongtao Yu 
97734e131b0SHongtao Yu   // Track optimized-away inlinee for probed binary. A function inlined and then
97834e131b0SHongtao Yu   // optimized away should still have their probes left over in places.
97934e131b0SHongtao Yu   if (usePseudoProbes()) {
98034e131b0SHongtao Yu     auto I = TopLevelProbeFrameMap.find(Func->FuncName);
98134e131b0SHongtao Yu     if (I != TopLevelProbeFrameMap.end()) {
98234e131b0SHongtao Yu       BinarySizeContextTracker::ProbeFrameStack ProbeContext;
98334e131b0SHongtao Yu       FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
98434e131b0SHongtao Yu                                                  ProbeContext);
98534e131b0SHongtao Yu     }
98634e131b0SHongtao Yu   }
98734e131b0SHongtao Yu }
98834e131b0SHongtao Yu 
9895d7950a4SHongtao Yu void ProfiledBinary::inferMissingFrames(
9905d7950a4SHongtao Yu     const SmallVectorImpl<uint64_t> &Context,
9915d7950a4SHongtao Yu     SmallVectorImpl<uint64_t> &NewContext) {
9925d7950a4SHongtao Yu   MissingContextInferrer->inferMissingFrames(Context, NewContext);
9935d7950a4SHongtao Yu }
9945d7950a4SHongtao Yu 
995964053d5Swlei InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
996964053d5Swlei                                        uint64_t Address, bool RoundToNext)
9971f05b1a9Swlei     : Binary(Binary), Address(Address) {
9981f05b1a9Swlei   Index = Binary->getIndexForAddr(Address);
9991f05b1a9Swlei   if (RoundToNext) {
10001f05b1a9Swlei     // we might get address which is not the code
10011f05b1a9Swlei     // it should round to the next valid address
100246765248Swlei     if (Index >= Binary->getCodeAddrVecSize())
10035bf191a3Swlei       this->Address = UINT64_MAX;
10045bf191a3Swlei     else
10051f05b1a9Swlei       this->Address = Binary->getAddressforIndex(Index);
10061f05b1a9Swlei   }
10071f05b1a9Swlei }
10081f05b1a9Swlei 
10095bf191a3Swlei bool InstructionPointer::advance() {
10101f05b1a9Swlei   Index++;
101146765248Swlei   if (Index >= Binary->getCodeAddrVecSize()) {
10125bf191a3Swlei     Address = UINT64_MAX;
10135bf191a3Swlei     return false;
10145bf191a3Swlei   }
10151f05b1a9Swlei   Address = Binary->getAddressforIndex(Index);
10165bf191a3Swlei   return true;
10171f05b1a9Swlei }
10181f05b1a9Swlei 
10195bf191a3Swlei bool InstructionPointer::backward() {
10205bf191a3Swlei   if (Index == 0) {
10215bf191a3Swlei     Address = 0;
10225bf191a3Swlei     return false;
10235bf191a3Swlei   }
10241f05b1a9Swlei   Index--;
10251f05b1a9Swlei   Address = Binary->getAddressforIndex(Index);
10265bf191a3Swlei   return true;
10271f05b1a9Swlei }
10281f05b1a9Swlei 
10291f05b1a9Swlei void InstructionPointer::update(uint64_t Addr) {
10301f05b1a9Swlei   Address = Addr;
10311f05b1a9Swlei   Index = Binary->getIndexForAddr(Address);
10321f05b1a9Swlei }
10331f05b1a9Swlei 
103432221694Swlei } // end namespace sampleprof
103532221694Swlei } // end namespace llvm
1036