xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 6e8a1a45a783c13e4cd19bfd20b7a56cab6f7d81)
12f09f445SMaksim Panchenko //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the BinaryContext class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryEmitter.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "bolt/Utils/CommandLineOpts.h"
17a34c753fSRafael Auler #include "bolt/Utils/Utils.h"
1872e5b14fSAmir Ayupov #include "llvm/ADT/STLExtras.h"
19a34c753fSRafael Auler #include "llvm/ADT/Twine.h"
20290e4823Sserge-sans-paille #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23a34c753fSRafael Auler #include "llvm/MC/MCAssembler.h"
24a34c753fSRafael Auler #include "llvm/MC/MCContext.h"
25a34c753fSRafael Auler #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26a34c753fSRafael Auler #include "llvm/MC/MCInstPrinter.h"
27a34c753fSRafael Auler #include "llvm/MC/MCObjectStreamer.h"
28a34c753fSRafael Auler #include "llvm/MC/MCObjectWriter.h"
2957f7c7d9Sserge-sans-paille #include "llvm/MC/MCRegisterInfo.h"
30a34c753fSRafael Auler #include "llvm/MC/MCSectionELF.h"
31a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h"
3257f7c7d9Sserge-sans-paille #include "llvm/MC/MCSubtargetInfo.h"
33a34c753fSRafael Auler #include "llvm/MC/MCSymbol.h"
34a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
3532d2473aSAmir Ayupov #include "llvm/Support/Error.h"
36a34c753fSRafael Auler #include "llvm/Support/Regex.h"
376aa735ceSAmir Ayupov #include <algorithm>
38a34c753fSRafael Auler #include <functional>
39a34c753fSRafael Auler #include <iterator>
406aa735ceSAmir Ayupov #include <unordered_set>
41a34c753fSRafael Auler 
42a34c753fSRafael Auler using namespace llvm;
43a34c753fSRafael Auler 
44a34c753fSRafael Auler #undef  DEBUG_TYPE
45a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
46a34c753fSRafael Auler 
47a34c753fSRafael Auler namespace opts {
48a34c753fSRafael Auler 
49b92436efSFangrui Song cl::opt<bool> NoHugePages("no-huge-pages",
50a34c753fSRafael Auler                           cl::desc("use regular size pages for code alignment"),
51b92436efSFangrui Song                           cl::Hidden, cl::cat(BoltCategory));
52a34c753fSRafael Auler 
53a34c753fSRafael Auler static cl::opt<bool>
54a34c753fSRafael Auler PrintDebugInfo("print-debug-info",
55a34c753fSRafael Auler   cl::desc("print debug info when printing functions"),
56a34c753fSRafael Auler   cl::Hidden,
57a34c753fSRafael Auler   cl::ZeroOrMore,
58a34c753fSRafael Auler   cl::cat(BoltCategory));
59a34c753fSRafael Auler 
60b92436efSFangrui Song cl::opt<bool> PrintRelocations(
61b92436efSFangrui Song     "print-relocations",
62b92436efSFangrui Song     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63a34c753fSRafael Auler     cl::cat(BoltCategory));
64a34c753fSRafael Auler 
65a34c753fSRafael Auler static cl::opt<bool>
66a34c753fSRafael Auler PrintMemData("print-mem-data",
67a34c753fSRafael Auler   cl::desc("print memory data annotations when printing functions"),
68a34c753fSRafael Auler   cl::Hidden,
69a34c753fSRafael Auler   cl::ZeroOrMore,
70a34c753fSRafael Auler   cl::cat(BoltCategory));
71a34c753fSRafael Auler 
727d272722SAlexander Yermolovich cl::opt<std::string> CompDirOverride(
737d272722SAlexander Yermolovich     "comp-dir-override",
74d251a328SJordan Brantner     cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
757d272722SAlexander Yermolovich              "location, which is used with DW_AT_dwo_name to construct a path "
767d272722SAlexander Yermolovich              "to *.dwo files."),
777d272722SAlexander Yermolovich     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78a34c753fSRafael Auler } // namespace opts
79a34c753fSRafael Auler 
80a34c753fSRafael Auler namespace llvm {
81a34c753fSRafael Auler namespace bolt {
82a34c753fSRafael Auler 
83fa7dd491SAmir Ayupov char BOLTError::ID = 0;
84fa7dd491SAmir Ayupov 
85fa7dd491SAmir Ayupov BOLTError::BOLTError(bool IsFatal, const Twine &S)
86fa7dd491SAmir Ayupov     : IsFatal(IsFatal), Msg(S.str()) {}
87fa7dd491SAmir Ayupov 
88fa7dd491SAmir Ayupov void BOLTError::log(raw_ostream &OS) const {
89fa7dd491SAmir Ayupov   if (IsFatal)
90fa7dd491SAmir Ayupov     OS << "FATAL ";
91fa7dd491SAmir Ayupov   StringRef ErrMsg = StringRef(Msg);
92fa7dd491SAmir Ayupov   // Prepend our error prefix if it is missing
93fa7dd491SAmir Ayupov   if (ErrMsg.empty()) {
94fa7dd491SAmir Ayupov     OS << "BOLT-ERROR\n";
95fa7dd491SAmir Ayupov   } else {
96fa7dd491SAmir Ayupov     if (!ErrMsg.starts_with("BOLT-ERROR"))
97fa7dd491SAmir Ayupov       OS << "BOLT-ERROR: ";
98fa7dd491SAmir Ayupov     OS << ErrMsg << "\n";
99fa7dd491SAmir Ayupov   }
100fa7dd491SAmir Ayupov }
101fa7dd491SAmir Ayupov 
102fa7dd491SAmir Ayupov std::error_code BOLTError::convertToErrorCode() const {
103fa7dd491SAmir Ayupov   return inconvertibleErrorCode();
104fa7dd491SAmir Ayupov }
105fa7dd491SAmir Ayupov 
106fa7dd491SAmir Ayupov Error createNonFatalBOLTError(const Twine &S) {
107fa7dd491SAmir Ayupov   return make_error<BOLTError>(/*IsFatal*/ false, S);
108fa7dd491SAmir Ayupov }
109fa7dd491SAmir Ayupov 
110fa7dd491SAmir Ayupov Error createFatalBOLTError(const Twine &S) {
111fa7dd491SAmir Ayupov   return make_error<BOLTError>(/*IsFatal*/ true, S);
112fa7dd491SAmir Ayupov }
113fa7dd491SAmir Ayupov 
11452cf0711SAmir Ayupov void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
11552cf0711SAmir Ayupov   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
11652cf0711SAmir Ayupov     if (!E.getMessage().empty())
11752cf0711SAmir Ayupov       E.log(this->errs());
11852cf0711SAmir Ayupov     if (E.isFatal())
11952cf0711SAmir Ayupov       exit(1);
12052cf0711SAmir Ayupov   });
12152cf0711SAmir Ayupov }
12252cf0711SAmir Ayupov 
123a34c753fSRafael Auler BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124a34c753fSRafael Auler                              std::unique_ptr<DWARFContext> DwCtx,
125a34c753fSRafael Auler                              std::unique_ptr<Triple> TheTriple,
1262ccf7ed2SJared Wyles                              std::shared_ptr<orc::SymbolStringPool> SSP,
12740c2e0faSMaksim Panchenko                              const Target *TheTarget, std::string TripleName,
128a34c753fSRafael Auler                              std::unique_ptr<MCCodeEmitter> MCE,
129a34c753fSRafael Auler                              std::unique_ptr<MCObjectFileInfo> MOFI,
130a34c753fSRafael Auler                              std::unique_ptr<const MCAsmInfo> AsmInfo,
131a34c753fSRafael Auler                              std::unique_ptr<const MCInstrInfo> MII,
132a34c753fSRafael Auler                              std::unique_ptr<const MCSubtargetInfo> STI,
133a34c753fSRafael Auler                              std::unique_ptr<MCInstPrinter> InstPrinter,
134a34c753fSRafael Auler                              std::unique_ptr<const MCInstrAnalysis> MIA,
135a34c753fSRafael Auler                              std::unique_ptr<MCPlusBuilder> MIB,
136a34c753fSRafael Auler                              std::unique_ptr<const MCRegisterInfo> MRI,
13752cf0711SAmir Ayupov                              std::unique_ptr<MCDisassembler> DisAsm,
13852cf0711SAmir Ayupov                              JournalingStreams Logger)
13940c2e0faSMaksim Panchenko     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
1402ccf7ed2SJared Wyles       TheTriple(std::move(TheTriple)), SSP(std::move(SSP)),
1412ccf7ed2SJared Wyles       TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)),
1422ccf7ed2SJared Wyles       MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)),
1432ccf7ed2SJared Wyles       STI(std::move(STI)), InstPrinter(std::move(InstPrinter)),
1442ccf7ed2SJared Wyles       MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)),
1452ccf7ed2SJared Wyles       DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) {
146db65429dSElvina Yakubova   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
147a34c753fSRafael Auler   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
148a34c753fSRafael Auler }
149a34c753fSRafael Auler 
150a34c753fSRafael Auler BinaryContext::~BinaryContext() {
1513652483cSRafael Auler   for (BinarySection *Section : Sections)
152a34c753fSRafael Auler     delete Section;
1533652483cSRafael Auler   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
154a34c753fSRafael Auler     delete InjectedFunction;
1553652483cSRafael Auler   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
156a34c753fSRafael Auler     delete JTI.second;
157a34c753fSRafael Auler   clearBinaryData();
158a34c753fSRafael Auler }
159a34c753fSRafael Auler 
160a34c753fSRafael Auler /// Create BinaryContext for a given architecture \p ArchName and
161a34c753fSRafael Auler /// triple \p TripleName.
162c0febca3SAmir Ayupov Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
1632ccf7ed2SJared Wyles     Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
1642ccf7ed2SJared Wyles     StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
1652ccf7ed2SJared Wyles     std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
166a34c753fSRafael Auler   StringRef ArchName = "";
16786bc4867SJob Noorman   std::string FeaturesStr = "";
168c0febca3SAmir Ayupov   switch (TheTriple.getArch()) {
169a34c753fSRafael Auler   case llvm::Triple::x86_64:
170c0febca3SAmir Ayupov     if (Features)
171c0febca3SAmir Ayupov       return createFatalBOLTError(
172c0febca3SAmir Ayupov           "x86_64 target does not use SubtargetFeatures");
173a34c753fSRafael Auler     ArchName = "x86-64";
174a34c753fSRafael Auler     FeaturesStr = "+nopl";
175a34c753fSRafael Auler     break;
176a34c753fSRafael Auler   case llvm::Triple::aarch64:
177c0febca3SAmir Ayupov     if (Features)
178c0febca3SAmir Ayupov       return createFatalBOLTError(
179c0febca3SAmir Ayupov           "AArch64 target does not use SubtargetFeatures");
180a34c753fSRafael Auler     ArchName = "aarch64";
18175641678SDenis Revunov     FeaturesStr = "+all";
182a34c753fSRafael Auler     break;
18386bc4867SJob Noorman   case llvm::Triple::riscv64: {
184f8730293SJob Noorman     ArchName = "riscv64";
185c0febca3SAmir Ayupov     if (!Features)
186c0febca3SAmir Ayupov       return createFatalBOLTError("RISCV target needs SubtargetFeatures");
18786bc4867SJob Noorman     // We rely on relaxation for some transformations (e.g., promoting all calls
18886bc4867SJob Noorman     // to PseudoCALL and then making JITLink relax them). Since the relax
18986bc4867SJob Noorman     // feature is not stored in the object file, we manually enable it.
19086bc4867SJob Noorman     Features->AddFeature("relax");
19186bc4867SJob Noorman     FeaturesStr = Features->getString();
192f8730293SJob Noorman     break;
19386bc4867SJob Noorman   }
194a34c753fSRafael Auler   default:
19532d2473aSAmir Ayupov     return createStringError(std::errc::not_supported,
19632d2473aSAmir Ayupov                              "BOLT-ERROR: Unrecognized machine in ELF file");
197a34c753fSRafael Auler   }
198a34c753fSRafael Auler 
199c0febca3SAmir Ayupov   const std::string TripleName = TheTriple.str();
200a34c753fSRafael Auler 
201a34c753fSRafael Auler   std::string Error;
202a34c753fSRafael Auler   const Target *TheTarget =
203c0febca3SAmir Ayupov       TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
20432d2473aSAmir Ayupov   if (!TheTarget)
20532d2473aSAmir Ayupov     return createStringError(make_error_code(std::errc::not_supported),
20632d2473aSAmir Ayupov                              Twine("BOLT-ERROR: ", Error));
207a34c753fSRafael Auler 
208a34c753fSRafael Auler   std::unique_ptr<const MCRegisterInfo> MRI(
209a34c753fSRafael Auler       TheTarget->createMCRegInfo(TripleName));
21032d2473aSAmir Ayupov   if (!MRI)
21132d2473aSAmir Ayupov     return createStringError(
21232d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
21332d2473aSAmir Ayupov         Twine("BOLT-ERROR: no register info for target ", TripleName));
214a34c753fSRafael Auler 
215a34c753fSRafael Auler   // Set up disassembler.
216c31af7cfSAmir Ayupov   std::unique_ptr<MCAsmInfo> AsmInfo(
217a34c753fSRafael Auler       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
21832d2473aSAmir Ayupov   if (!AsmInfo)
21932d2473aSAmir Ayupov     return createStringError(
22032d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
22132d2473aSAmir Ayupov         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
222c31af7cfSAmir Ayupov   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223c31af7cfSAmir Ayupov   // we want to emit such names as using @PLT without double quotes to convey
224c31af7cfSAmir Ayupov   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225c31af7cfSAmir Ayupov   // override the default AsmInfo behavior to emit names the way we want.
226c31af7cfSAmir Ayupov   AsmInfo->setAllowAtInName(true);
227a34c753fSRafael Auler 
228a34c753fSRafael Auler   std::unique_ptr<const MCSubtargetInfo> STI(
229a34c753fSRafael Auler       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
23032d2473aSAmir Ayupov   if (!STI)
23132d2473aSAmir Ayupov     return createStringError(
23232d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
23332d2473aSAmir Ayupov         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
234a34c753fSRafael Auler 
235a34c753fSRafael Auler   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
23632d2473aSAmir Ayupov   if (!MII)
23732d2473aSAmir Ayupov     return createStringError(
23832d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
23932d2473aSAmir Ayupov         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
240a34c753fSRafael Auler 
241a34c753fSRafael Auler   std::unique_ptr<MCContext> Ctx(
242c0febca3SAmir Ayupov       new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
243a34c753fSRafael Auler   std::unique_ptr<MCObjectFileInfo> MOFI(
244a34c753fSRafael Auler       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
245a34c753fSRafael Auler   Ctx->setObjectFileInfo(MOFI.get());
246a34c753fSRafael Auler   // We do not support X86 Large code model. Change this in the future.
247a34c753fSRafael Auler   bool Large = false;
248c0febca3SAmir Ayupov   if (TheTriple.getArch() == llvm::Triple::aarch64)
249a34c753fSRafael Auler     Large = true;
250a34c753fSRafael Auler   unsigned LSDAEncoding =
251a34c753fSRafael Auler       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
252a34c753fSRafael Auler   if (IsPIC) {
253a34c753fSRafael Auler     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
254a34c753fSRafael Auler                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
255a34c753fSRafael Auler   }
256a34c753fSRafael Auler 
257a34c753fSRafael Auler   std::unique_ptr<MCDisassembler> DisAsm(
258a34c753fSRafael Auler       TheTarget->createMCDisassembler(*STI, *Ctx));
259a34c753fSRafael Auler 
26032d2473aSAmir Ayupov   if (!DisAsm)
26132d2473aSAmir Ayupov     return createStringError(
26232d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
26332d2473aSAmir Ayupov         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
264a34c753fSRafael Auler 
265a34c753fSRafael Auler   std::unique_ptr<const MCInstrAnalysis> MIA(
266a34c753fSRafael Auler       TheTarget->createMCInstrAnalysis(MII.get()));
26732d2473aSAmir Ayupov   if (!MIA)
26832d2473aSAmir Ayupov     return createStringError(
26932d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
27032d2473aSAmir Ayupov         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
27132d2473aSAmir Ayupov               TripleName));
272a34c753fSRafael Auler 
273a34c753fSRafael Auler   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
274a34c753fSRafael Auler   std::unique_ptr<MCInstPrinter> InstructionPrinter(
275c0febca3SAmir Ayupov       TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
276a34c753fSRafael Auler                                      *MII, *MRI));
27732d2473aSAmir Ayupov   if (!InstructionPrinter)
27832d2473aSAmir Ayupov     return createStringError(
27932d2473aSAmir Ayupov         make_error_code(std::errc::not_supported),
28032d2473aSAmir Ayupov         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
281a34c753fSRafael Auler   InstructionPrinter->setPrintImmHex(true);
282a34c753fSRafael Auler 
283a34c753fSRafael Auler   std::unique_ptr<MCCodeEmitter> MCE(
2842aed07e9SShao-Ce SUN       TheTarget->createMCCodeEmitter(*MII, *Ctx));
285a34c753fSRafael Auler 
286a34c753fSRafael Auler   auto BC = std::make_unique<BinaryContext>(
287c0febca3SAmir Ayupov       std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
2882ccf7ed2SJared Wyles       std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE),
2892ccf7ed2SJared Wyles       std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI),
29040c2e0faSMaksim Panchenko       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
29152cf0711SAmir Ayupov       std::move(DisAsm), Logger);
292a34c753fSRafael Auler 
293a34c753fSRafael Auler   BC->LSDAEncoding = LSDAEncoding;
294a34c753fSRafael Auler 
295a34c753fSRafael Auler   BC->MAB = std::unique_ptr<MCAsmBackend>(
296a34c753fSRafael Auler       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
297a34c753fSRafael Auler 
298c0febca3SAmir Ayupov   BC->setFilename(InputFileName);
299a34c753fSRafael Auler 
300a34c753fSRafael Auler   BC->HasFixedLoadAddress = !IsPIC;
301a34c753fSRafael Auler 
302e290133cSMaksim Panchenko   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
303e290133cSMaksim Panchenko       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
304e290133cSMaksim Panchenko 
305e290133cSMaksim Panchenko   if (!BC->SymbolicDisAsm)
306e290133cSMaksim Panchenko     return createStringError(
307e290133cSMaksim Panchenko         make_error_code(std::errc::not_supported),
308e290133cSMaksim Panchenko         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
309e290133cSMaksim Panchenko 
31063686af1SVladislav Khmelevsky   return std::move(BC);
311a34c753fSRafael Auler }
312a34c753fSRafael Auler 
313a34c753fSRafael Auler bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
31440c2e0faSMaksim Panchenko   if (opts::HotText &&
31540c2e0faSMaksim Panchenko       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
316a34c753fSRafael Auler     return true;
317a34c753fSRafael Auler 
31840c2e0faSMaksim Panchenko   if (opts::HotData &&
31940c2e0faSMaksim Panchenko       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
320a34c753fSRafael Auler     return true;
321a34c753fSRafael Auler 
322a34c753fSRafael Auler   if (SymbolName == "_end")
323a34c753fSRafael Auler     return true;
324a34c753fSRafael Auler 
325a34c753fSRafael Auler   return false;
326a34c753fSRafael Auler }
327a34c753fSRafael Auler 
328a34c753fSRafael Auler std::unique_ptr<MCObjectWriter>
329a34c753fSRafael Auler BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
330a34c753fSRafael Auler   return MAB->createObjectWriter(OS);
331a34c753fSRafael Auler }
332a34c753fSRafael Auler 
333a34c753fSRafael Auler bool BinaryContext::validateObjectNesting() const {
334a34c753fSRafael Auler   auto Itr = BinaryDataMap.begin();
335a34c753fSRafael Auler   auto End = BinaryDataMap.end();
336a34c753fSRafael Auler   bool Valid = true;
337a34c753fSRafael Auler   while (Itr != End) {
338a34c753fSRafael Auler     auto Next = std::next(Itr);
339a34c753fSRafael Auler     while (Next != End &&
340a34c753fSRafael Auler            Itr->second->getSection() == Next->second->getSection() &&
341a34c753fSRafael Auler            Itr->second->containsRange(Next->second->getAddress(),
342a34c753fSRafael Auler                                       Next->second->getSize())) {
343a34c753fSRafael Auler       if (Next->second->Parent != Itr->second) {
34452cf0711SAmir Ayupov         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345a34c753fSRafael Auler                      << "BOLT-WARNING:  " << *Itr->second << "\n"
346a34c753fSRafael Auler                      << "BOLT-WARNING:  " << *Next->second << "\n";
347a34c753fSRafael Auler         Valid = false;
348a34c753fSRafael Auler       }
349a34c753fSRafael Auler       ++Next;
350a34c753fSRafael Auler     }
351a34c753fSRafael Auler     Itr = Next;
352a34c753fSRafael Auler   }
353a34c753fSRafael Auler   return Valid;
354a34c753fSRafael Auler }
355a34c753fSRafael Auler 
356a34c753fSRafael Auler bool BinaryContext::validateHoles() const {
357a34c753fSRafael Auler   bool Valid = true;
358a34c753fSRafael Auler   for (BinarySection &Section : sections()) {
359a34c753fSRafael Auler     for (const Relocation &Rel : Section.relocations()) {
360a34c753fSRafael Auler       uint64_t RelAddr = Rel.Offset + Section.getAddress();
361a34c753fSRafael Auler       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
362a34c753fSRafael Auler       if (!BD) {
36352cf0711SAmir Ayupov         this->errs()
36452cf0711SAmir Ayupov             << "BOLT-WARNING: no BinaryData found for relocation at address"
36552cf0711SAmir Ayupov             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
36652cf0711SAmir Ayupov             << "\n";
367a34c753fSRafael Auler         Valid = false;
368a34c753fSRafael Auler       } else if (!BD->getAtomicRoot()) {
36952cf0711SAmir Ayupov         this->errs()
37052cf0711SAmir Ayupov             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371a34c753fSRafael Auler             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
372a34c753fSRafael Auler             << Section.getName() << "\n";
373a34c753fSRafael Auler         Valid = false;
374a34c753fSRafael Auler       }
375a34c753fSRafael Auler     }
376a34c753fSRafael Auler   }
377a34c753fSRafael Auler   return Valid;
378a34c753fSRafael Auler }
379a34c753fSRafael Auler 
380a34c753fSRafael Auler void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
381a34c753fSRafael Auler   const uint64_t Address = GAI->second->getAddress();
382a34c753fSRafael Auler   const uint64_t Size = GAI->second->getSize();
383a34c753fSRafael Auler 
38440c2e0faSMaksim Panchenko   auto fixParents = [&](BinaryDataMapType::iterator Itr,
38540c2e0faSMaksim Panchenko                         BinaryData *NewParent) {
386a34c753fSRafael Auler     BinaryData *OldParent = Itr->second->Parent;
387a34c753fSRafael Auler     Itr->second->Parent = NewParent;
388a34c753fSRafael Auler     ++Itr;
389a34c753fSRafael Auler     while (Itr != BinaryDataMap.end() && OldParent &&
390a34c753fSRafael Auler            Itr->second->Parent == OldParent) {
391a34c753fSRafael Auler       Itr->second->Parent = NewParent;
392a34c753fSRafael Auler       ++Itr;
393a34c753fSRafael Auler     }
394a34c753fSRafael Auler   };
395a34c753fSRafael Auler 
396a34c753fSRafael Auler   // Check if the previous symbol contains the newly added symbol.
397a34c753fSRafael Auler   if (GAI != BinaryDataMap.begin()) {
398a34c753fSRafael Auler     BinaryData *Prev = std::prev(GAI)->second;
399a34c753fSRafael Auler     while (Prev) {
400a34c753fSRafael Auler       if (Prev->getSection() == GAI->second->getSection() &&
401a34c753fSRafael Auler           Prev->containsRange(Address, Size)) {
402a34c753fSRafael Auler         fixParents(GAI, Prev);
403a34c753fSRafael Auler       } else {
404a34c753fSRafael Auler         fixParents(GAI, nullptr);
405a34c753fSRafael Auler       }
406a34c753fSRafael Auler       Prev = Prev->Parent;
407a34c753fSRafael Auler     }
408a34c753fSRafael Auler   }
409a34c753fSRafael Auler 
410a34c753fSRafael Auler   // Check if the newly added symbol contains any subsequent symbols.
411a34c753fSRafael Auler   if (Size != 0) {
412a34c753fSRafael Auler     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
413a34c753fSRafael Auler     auto Itr = std::next(GAI);
41440c2e0faSMaksim Panchenko     while (
41540c2e0faSMaksim Panchenko         Itr != BinaryDataMap.end() &&
41640c2e0faSMaksim Panchenko         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
417a34c753fSRafael Auler       Itr->second->Parent = BD;
418a34c753fSRafael Auler       ++Itr;
419a34c753fSRafael Auler     }
420a34c753fSRafael Auler   }
421a34c753fSRafael Auler }
422a34c753fSRafael Auler 
423a34c753fSRafael Auler iterator_range<BinaryContext::binary_data_iterator>
424a34c753fSRafael Auler BinaryContext::getSubBinaryData(BinaryData *BD) {
425a34c753fSRafael Auler   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
426a34c753fSRafael Auler   auto End = Start;
4273652483cSRafael Auler   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
428a34c753fSRafael Auler     ++End;
429a34c753fSRafael Auler   return make_range(Start, End);
430a34c753fSRafael Auler }
431a34c753fSRafael Auler 
432a34c753fSRafael Auler std::pair<const MCSymbol *, uint64_t>
433a34c753fSRafael Auler BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
434a34c753fSRafael Auler                                 bool IsPCRel) {
435a34c753fSRafael Auler   if (isAArch64()) {
436a34c753fSRafael Auler     // Check if this is an access to a constant island and create bookkeeping
437a34c753fSRafael Auler     // to keep track of it and emit it later as part of this function.
438a34c753fSRafael Auler     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
4398d1fc45dSRafael Auler       return std::make_pair(IslandSym, 0);
440a34c753fSRafael Auler 
441a34c753fSRafael Auler     // Detect custom code written in assembly that refers to arbitrary
442a34c753fSRafael Auler     // constant islands from other functions. Write this reference so we
443a34c753fSRafael Auler     // can pull this constant island and emit it as part of this function
444a34c753fSRafael Auler     // too.
445a34c753fSRafael Auler     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
4466040415eSDenis Revunov 
4476040415eSDenis Revunov     if (IslandIter != AddressToConstantIslandMap.begin() &&
4486040415eSDenis Revunov         (IslandIter == AddressToConstantIslandMap.end() ||
4496040415eSDenis Revunov          IslandIter->first > Address))
4506040415eSDenis Revunov       --IslandIter;
4516040415eSDenis Revunov 
452a34c753fSRafael Auler     if (IslandIter != AddressToConstantIslandMap.end()) {
4537117af52SVladislav Khmelevsky       // Fall-back to referencing the original constant island in the presence
4547117af52SVladislav Khmelevsky       // of dynamic relocs, as we currently do not support cloning them.
4557117af52SVladislav Khmelevsky       // Notice: we might fail to link because of this, if the original constant
4567117af52SVladislav Khmelevsky       // island we are referring would be emitted too far away.
4577117af52SVladislav Khmelevsky       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
4587117af52SVladislav Khmelevsky         MCSymbol *IslandSym =
4597117af52SVladislav Khmelevsky             IslandIter->second->getOrCreateIslandAccess(Address);
4607117af52SVladislav Khmelevsky         if (IslandSym)
4617117af52SVladislav Khmelevsky           return std::make_pair(IslandSym, 0);
4627117af52SVladislav Khmelevsky       } else if (MCSymbol *IslandSym =
4637117af52SVladislav Khmelevsky                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
4647117af52SVladislav Khmelevsky                                                                       BF)) {
465a34c753fSRafael Auler         BF.createIslandDependency(IslandSym, IslandIter->second);
4668d1fc45dSRafael Auler         return std::make_pair(IslandSym, 0);
467a34c753fSRafael Auler       }
468a34c753fSRafael Auler     }
469a34c753fSRafael Auler   }
470a34c753fSRafael Auler 
471a34c753fSRafael Auler   // Note that the address does not necessarily have to reside inside
472a34c753fSRafael Auler   // a section, it could be an absolute address too.
473a34c753fSRafael Auler   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474a34c753fSRafael Auler   if (Section && Section->isText()) {
475a34c753fSRafael Auler     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
476a34c753fSRafael Auler       if (Address != BF.getAddress()) {
477a34c753fSRafael Auler         // The address could potentially escape. Mark it as another entry
478a34c753fSRafael Auler         // point into the function.
479a34c753fSRafael Auler         if (opts::Verbosity >= 1) {
48052cf0711SAmir Ayupov           this->outs() << "BOLT-INFO: potentially escaped address 0x"
48152cf0711SAmir Ayupov                        << Twine::utohexstr(Address) << " in function " << BF
48252cf0711SAmir Ayupov                        << '\n';
483a34c753fSRafael Auler         }
484a34c753fSRafael Auler         BF.HasInternalLabelReference = true;
485a34c753fSRafael Auler         return std::make_pair(
4868d1fc45dSRafael Auler             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
487a34c753fSRafael Auler       }
488a34c753fSRafael Auler     } else {
48935efe1d8SVladislav Khmelevsky       addInterproceduralReference(&BF, Address);
490a34c753fSRafael Auler     }
491a34c753fSRafael Auler   }
492a34c753fSRafael Auler 
493a34c753fSRafael Auler   // With relocations, catch jump table references outside of the basic block
494a34c753fSRafael Auler   // containing the indirect jump.
495a34c753fSRafael Auler   if (HasRelocations) {
496a34c753fSRafael Auler     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
497a34c753fSRafael Auler     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
498a34c753fSRafael Auler       const MCSymbol *Symbol =
499a34c753fSRafael Auler           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
500a34c753fSRafael Auler 
5018d1fc45dSRafael Auler       return std::make_pair(Symbol, 0);
502a34c753fSRafael Auler     }
503a34c753fSRafael Auler   }
504a34c753fSRafael Auler 
5053652483cSRafael Auler   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
506a34c753fSRafael Auler     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
507a34c753fSRafael Auler 
508a34c753fSRafael Auler   // TODO: use DWARF info to get size/alignment here?
509a34c753fSRafael Auler   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
510a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
5118d1fc45dSRafael Auler   return std::make_pair(TargetSymbol, 0);
512a34c753fSRafael Auler }
513a34c753fSRafael Auler 
51440c2e0faSMaksim Panchenko MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
51540c2e0faSMaksim Panchenko                                                   BinaryFunction &BF) {
516a34c753fSRafael Auler   if (!isX86())
517a34c753fSRafael Auler     return MemoryContentsType::UNKNOWN;
518a34c753fSRafael Auler 
519a34c753fSRafael Auler   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520a34c753fSRafael Auler   if (!Section) {
521a34c753fSRafael Auler     // No section - possibly an absolute address. Since we don't allow
522a34c753fSRafael Auler     // internal function addresses to escape the function scope - we
523a34c753fSRafael Auler     // consider it a tail call.
524a34c753fSRafael Auler     if (opts::Verbosity > 1) {
52552cf0711SAmir Ayupov       this->errs() << "BOLT-WARNING: no section for address 0x"
52652cf0711SAmir Ayupov                    << Twine::utohexstr(Address) << " referenced from function "
52752cf0711SAmir Ayupov                    << BF << '\n';
528a34c753fSRafael Auler     }
529a34c753fSRafael Auler     return MemoryContentsType::UNKNOWN;
530a34c753fSRafael Auler   }
531a34c753fSRafael Auler 
532a34c753fSRafael Auler   if (Section->isVirtual()) {
533a34c753fSRafael Auler     // The contents are filled at runtime.
534a34c753fSRafael Auler     return MemoryContentsType::UNKNOWN;
535a34c753fSRafael Auler   }
536a34c753fSRafael Auler 
537a34c753fSRafael Auler   // No support for jump tables in code yet.
538a34c753fSRafael Auler   if (Section->isText())
539a34c753fSRafael Auler     return MemoryContentsType::UNKNOWN;
540a34c753fSRafael Auler 
541a34c753fSRafael Auler   // Start with checking for PIC jump table. We expect non-PIC jump tables
542a34c753fSRafael Auler   // to have high 32 bits set to 0.
543a34c753fSRafael Auler   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
544a34c753fSRafael Auler     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
545a34c753fSRafael Auler 
546a34c753fSRafael Auler   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
547a34c753fSRafael Auler     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
548a34c753fSRafael Auler 
549a34c753fSRafael Auler   return MemoryContentsType::UNKNOWN;
550a34c753fSRafael Auler }
551a34c753fSRafael Auler 
55208ab4fafSAmir Ayupov bool BinaryContext::analyzeJumpTable(const uint64_t Address,
55308ab4fafSAmir Ayupov                                      const JumpTable::JumpTableType Type,
55408ab4fafSAmir Ayupov                                      const BinaryFunction &BF,
55508ab4fafSAmir Ayupov                                      const uint64_t NextJTAddress,
55608ab4fafSAmir Ayupov                                      JumpTable::AddressesType *EntriesAsAddress,
55708ab4fafSAmir Ayupov                                      bool *HasEntryInFragment) const {
55843d0891dSMaksim Panchenko   // Target address of __builtin_unreachable.
55943d0891dSMaksim Panchenko   const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
56043d0891dSMaksim Panchenko 
561a34c753fSRafael Auler   // Is one of the targets __builtin_unreachable?
562a34c753fSRafael Auler   bool HasUnreachable = false;
563a34c753fSRafael Auler 
5641e4ee588SMaksim Panchenko   // Does one of the entries match function start address?
5651e4ee588SMaksim Panchenko   bool HasStartAsEntry = false;
5661e4ee588SMaksim Panchenko 
567a34c753fSRafael Auler   // Number of targets other than __builtin_unreachable.
568a34c753fSRafael Auler   uint64_t NumRealEntries = 0;
569a34c753fSRafael Auler 
57043d0891dSMaksim Panchenko   // Size of the jump table without trailing __builtin_unreachable entries.
57143d0891dSMaksim Panchenko   size_t TrimmedSize = 0;
57243d0891dSMaksim Panchenko 
57343d0891dSMaksim Panchenko   auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
57443d0891dSMaksim Panchenko     if (!EntriesAsAddress)
57543d0891dSMaksim Panchenko       return;
57605523dc3SHuan Nguyen     EntriesAsAddress->emplace_back(EntryAddress);
57743d0891dSMaksim Panchenko     if (!Unreachable)
57843d0891dSMaksim Panchenko       TrimmedSize = EntriesAsAddress->size();
579a34c753fSRafael Auler   };
580a34c753fSRafael Auler 
58108ab4fafSAmir Ayupov   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
582a34c753fSRafael Auler   if (!Section)
583a34c753fSRafael Auler     return false;
584a34c753fSRafael Auler 
585a34c753fSRafael Auler   // The upper bound is defined by containing object, section limits, and
586a34c753fSRafael Auler   // the next jump table in memory.
587a34c753fSRafael Auler   uint64_t UpperBound = Section->getEndAddress();
588a34c753fSRafael Auler   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
589a34c753fSRafael Auler   if (JumpTableBD && JumpTableBD->getSize()) {
590a34c753fSRafael Auler     assert(JumpTableBD->getEndAddress() <= UpperBound &&
591a34c753fSRafael Auler            "data object cannot cross a section boundary");
592a34c753fSRafael Auler     UpperBound = JumpTableBD->getEndAddress();
593a34c753fSRafael Auler   }
5943652483cSRafael Auler   if (NextJTAddress)
595a34c753fSRafael Auler     UpperBound = std::min(NextJTAddress, UpperBound);
596a34c753fSRafael Auler 
597556efdbaSAmir Ayupov   LLVM_DEBUG({
598556efdbaSAmir Ayupov     using JTT = JumpTable::JumpTableType;
599556efdbaSAmir Ayupov     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600556efdbaSAmir Ayupov                       Address, BF.getPrintName(),
601556efdbaSAmir Ayupov                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
602556efdbaSAmir Ayupov   });
603a34c753fSRafael Auler   const uint64_t EntrySize = getJumpTableEntrySize(Type);
604a34c753fSRafael Auler   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
605a34c753fSRafael Auler        EntryAddress += EntrySize) {
606a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
607a34c753fSRafael Auler                       << " -> ");
608a34c753fSRafael Auler     // Check if there's a proper relocation against the jump table entry.
609a34c753fSRafael Auler     if (HasRelocations) {
610a34c753fSRafael Auler       if (Type == JumpTable::JTT_PIC &&
611a34c753fSRafael Auler           !DataPCRelocations.count(EntryAddress)) {
612a34c753fSRafael Auler         LLVM_DEBUG(
613a34c753fSRafael Auler             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
614a34c753fSRafael Auler         break;
615a34c753fSRafael Auler       }
616a34c753fSRafael Auler       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
617a34c753fSRafael Auler         LLVM_DEBUG(
618a34c753fSRafael Auler             dbgs()
619a34c753fSRafael Auler             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
620a34c753fSRafael Auler         break;
621a34c753fSRafael Auler       }
622a34c753fSRafael Auler     }
623a34c753fSRafael Auler 
62440c2e0faSMaksim Panchenko     const uint64_t Value =
62540c2e0faSMaksim Panchenko         (Type == JumpTable::JTT_PIC)
626a34c753fSRafael Auler             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
627a34c753fSRafael Auler             : *getPointerAtAddress(EntryAddress);
628a34c753fSRafael Auler 
629a34c753fSRafael Auler     // __builtin_unreachable() case.
63043d0891dSMaksim Panchenko     if (Value == UnreachableAddress) {
63143d0891dSMaksim Panchenko       addEntryAddress(Value, /*Unreachable*/ true);
632a34c753fSRafael Auler       HasUnreachable = true;
633556efdbaSAmir Ayupov       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
634a34c753fSRafael Auler       continue;
635a34c753fSRafael Auler     }
636a34c753fSRafael Auler 
6371e4ee588SMaksim Panchenko     // Function start is another special case. It is allowed in the jump table,
6381e4ee588SMaksim Panchenko     // but we need at least one another regular entry to distinguish the table
6391e4ee588SMaksim Panchenko     // from, e.g. a function pointer array.
6401e4ee588SMaksim Panchenko     if (Value == BF.getAddress()) {
6411e4ee588SMaksim Panchenko       HasStartAsEntry = true;
6421e4ee588SMaksim Panchenko       addEntryAddress(Value);
6431e4ee588SMaksim Panchenko       continue;
6441e4ee588SMaksim Panchenko     }
6451e4ee588SMaksim Panchenko 
646a34c753fSRafael Auler     // Function or one of its fragments.
64708ab4fafSAmir Ayupov     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
6481e4ee588SMaksim Panchenko     const bool DoesBelongToFunction =
6491e4ee588SMaksim Panchenko         BF.containsAddress(Value) ||
65083ea7ce3SAmir Ayupov         (TargetBF && areRelatedFragments(TargetBF, &BF));
6511e4ee588SMaksim Panchenko     if (!DoesBelongToFunction) {
652a34c753fSRafael Auler       LLVM_DEBUG({
653a34c753fSRafael Auler         if (!BF.containsAddress(Value)) {
654a34c753fSRafael Auler           dbgs() << "FAIL: function doesn't contain this address\n";
655a34c753fSRafael Auler           if (TargetBF) {
656a34c753fSRafael Auler             dbgs() << "  ! function containing this address: "
657a34c753fSRafael Auler                    << TargetBF->getPrintName() << '\n';
658556efdbaSAmir Ayupov             if (TargetBF->isFragment()) {
659556efdbaSAmir Ayupov               dbgs() << "  ! is a fragment";
660556efdbaSAmir Ayupov               for (BinaryFunction *Parent : TargetBF->ParentFragments)
661556efdbaSAmir Ayupov                 dbgs() << ", parent: " << Parent->getPrintName();
662556efdbaSAmir Ayupov               dbgs() << '\n';
663556efdbaSAmir Ayupov             }
664a34c753fSRafael Auler           }
665a34c753fSRafael Auler         }
666a34c753fSRafael Auler       });
667a34c753fSRafael Auler       break;
668a34c753fSRafael Auler     }
669a34c753fSRafael Auler 
670a34c753fSRafael Auler     // Check there's an instruction at this offset.
671a34c753fSRafael Auler     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672a34c753fSRafael Auler         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
673556efdbaSAmir Ayupov       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674a34c753fSRafael Auler       break;
675a34c753fSRafael Auler     }
676a34c753fSRafael Auler 
677a34c753fSRafael Auler     ++NumRealEntries;
678556efdbaSAmir Ayupov     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679a34c753fSRafael Auler 
68008ab4fafSAmir Ayupov     if (TargetBF != &BF && HasEntryInFragment)
68108ab4fafSAmir Ayupov       *HasEntryInFragment = true;
68205523dc3SHuan Nguyen     addEntryAddress(Value);
683a34c753fSRafael Auler   }
684a34c753fSRafael Auler 
68543d0891dSMaksim Panchenko   // Trim direct/normal jump table to exclude trailing unreachable entries that
68643d0891dSMaksim Panchenko   // can collide with a function address.
68743d0891dSMaksim Panchenko   if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
68843d0891dSMaksim Panchenko       TrimmedSize != EntriesAsAddress->size() &&
68943d0891dSMaksim Panchenko       getBinaryFunctionAtAddress(UnreachableAddress))
69043d0891dSMaksim Panchenko     EntriesAsAddress->resize(TrimmedSize);
69143d0891dSMaksim Panchenko 
692a34c753fSRafael Auler   // It's a jump table if the number of real entries is more than 1, or there's
6931e4ee588SMaksim Panchenko   // one real entry and one or more special targets. If there are only multiple
6941e4ee588SMaksim Panchenko   // special targets, then it's not a jump table.
6951e4ee588SMaksim Panchenko   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
696a34c753fSRafael Auler }
697a34c753fSRafael Auler 
698a34c753fSRafael Auler void BinaryContext::populateJumpTables() {
699a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700a34c753fSRafael Auler                     << '\n');
701a34c753fSRafael Auler   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702a34c753fSRafael Auler        ++JTI) {
703a34c753fSRafael Auler     JumpTable *JT = JTI->second;
704a34c753fSRafael Auler 
70505523dc3SHuan Nguyen     bool NonSimpleParent = false;
70605523dc3SHuan Nguyen     for (BinaryFunction *BF : JT->Parents)
70705523dc3SHuan Nguyen       NonSimpleParent |= !BF->isSimple();
70805523dc3SHuan Nguyen     if (NonSimpleParent)
709a34c753fSRafael Auler       continue;
710a34c753fSRafael Auler 
711a34c753fSRafael Auler     uint64_t NextJTAddress = 0;
712a34c753fSRafael Auler     auto NextJTI = std::next(JTI);
7133652483cSRafael Auler     if (NextJTI != JTE)
714a34c753fSRafael Auler       NextJTAddress = NextJTI->second->getAddress();
715a34c753fSRafael Auler 
71605523dc3SHuan Nguyen     const bool Success =
71705523dc3SHuan Nguyen         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
71808ab4fafSAmir Ayupov                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
719a34c753fSRafael Auler     if (!Success) {
720055f9f6dSAmir Ayupov       LLVM_DEBUG({
721055f9f6dSAmir Ayupov         dbgs() << "failed to analyze ";
722a34c753fSRafael Auler         JT->print(dbgs());
723a34c753fSRafael Auler         if (NextJTI != JTE) {
724055f9f6dSAmir Ayupov           dbgs() << "next ";
725a34c753fSRafael Auler           NextJTI->second->print(dbgs());
726a34c753fSRafael Auler         }
727055f9f6dSAmir Ayupov       });
728468d4f6dSAmir Ayupov       llvm_unreachable("jump table heuristic failure");
729a34c753fSRafael Auler     }
73005523dc3SHuan Nguyen     for (BinaryFunction *Frag : JT->Parents) {
73108ab4fafSAmir Ayupov       if (JT->IsSplit)
73208ab4fafSAmir Ayupov         Frag->setHasIndirectTargetToSplitFragment(true);
73305523dc3SHuan Nguyen       for (uint64_t EntryAddress : JT->EntriesAsAddress)
73405523dc3SHuan Nguyen         // if target is builtin_unreachable
73505523dc3SHuan Nguyen         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
73605523dc3SHuan Nguyen           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
73705523dc3SHuan Nguyen                                              Frag->getSize());
73805523dc3SHuan Nguyen         } else if (EntryAddress >= Frag->getAddress() &&
73905523dc3SHuan Nguyen                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
74005523dc3SHuan Nguyen           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
74105523dc3SHuan Nguyen         }
742a34c753fSRafael Auler     }
743a34c753fSRafael Auler 
744a34c753fSRafael Auler     // In strict mode, erase PC-relative relocation record. Later we check that
745a34c753fSRafael Auler     // all such records are erased and thus have been accounted for.
746a34c753fSRafael Auler     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
747a34c753fSRafael Auler       for (uint64_t Address = JT->getAddress();
748a34c753fSRafael Auler            Address < JT->getAddress() + JT->getSize();
749a34c753fSRafael Auler            Address += JT->EntrySize) {
750a34c753fSRafael Auler         DataPCRelocations.erase(DataPCRelocations.find(Address));
751a34c753fSRafael Auler       }
752a34c753fSRafael Auler     }
753a34c753fSRafael Auler 
754a34c753fSRafael Auler     // Mark to skip the function and all its fragments.
75505523dc3SHuan Nguyen     for (BinaryFunction *Frag : JT->Parents)
75605523dc3SHuan Nguyen       if (Frag->hasIndirectTargetToSplitFragment())
75705523dc3SHuan Nguyen         addFragmentsToSkip(Frag);
758a34c753fSRafael Auler   }
759a34c753fSRafael Auler 
760a34c753fSRafael Auler   if (opts::StrictMode && DataPCRelocations.size()) {
761a34c753fSRafael Auler     LLVM_DEBUG({
762a34c753fSRafael Auler       dbgs() << DataPCRelocations.size()
763a34c753fSRafael Auler              << " unclaimed PC-relative relocations left in data:\n";
764a34c753fSRafael Auler       for (uint64_t Reloc : DataPCRelocations)
765a34c753fSRafael Auler         dbgs() << Twine::utohexstr(Reloc) << '\n';
766a34c753fSRafael Auler     });
767a34c753fSRafael Auler     assert(0 && "unclaimed PC-relative relocations left in data\n");
768a34c753fSRafael Auler   }
769a34c753fSRafael Auler   clearList(DataPCRelocations);
770a34c753fSRafael Auler }
7716aa735ceSAmir Ayupov 
7726aa735ceSAmir Ayupov void BinaryContext::skipMarkedFragments() {
77305523dc3SHuan Nguyen   std::vector<BinaryFunction *> FragmentQueue;
77405523dc3SHuan Nguyen   // Copy the functions to FragmentQueue.
77505523dc3SHuan Nguyen   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
7766aa735ceSAmir Ayupov   auto addToWorklist = [&](BinaryFunction *Function) -> void {
77705523dc3SHuan Nguyen     if (FragmentsToSkip.count(Function))
7786aa735ceSAmir Ayupov       return;
77905523dc3SHuan Nguyen     FragmentQueue.push_back(Function);
78005523dc3SHuan Nguyen     addFragmentsToSkip(Function);
7816aa735ceSAmir Ayupov   };
7826aa735ceSAmir Ayupov   // Functions containing split jump tables need to be skipped with all
7836aa735ceSAmir Ayupov   // fragments (transitively).
78405523dc3SHuan Nguyen   for (size_t I = 0; I != FragmentQueue.size(); I++) {
78505523dc3SHuan Nguyen     BinaryFunction *BF = FragmentQueue[I];
78605523dc3SHuan Nguyen     assert(FragmentsToSkip.count(BF) &&
7876aa735ceSAmir Ayupov            "internal error in traversing function fragments");
7886aa735ceSAmir Ayupov     if (opts::Verbosity >= 1)
78952cf0711SAmir Ayupov       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
79082095bd5SHuan Nguyen     BF->setSimple(false);
79105523dc3SHuan Nguyen     BF->setHasIndirectTargetToSplitFragment(true);
79282095bd5SHuan Nguyen 
793d2c87699SAmir Ayupov     llvm::for_each(BF->Fragments, addToWorklist);
794d2c87699SAmir Ayupov     llvm::for_each(BF->ParentFragments, addToWorklist);
7956aa735ceSAmir Ayupov   }
796641e92d4SMaksim Panchenko   if (!FragmentsToSkip.empty())
79752cf0711SAmir Ayupov     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
79852cf0711SAmir Ayupov                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
799641e92d4SMaksim Panchenko                  << " due to cold fragments\n";
800a34c753fSRafael Auler }
801a34c753fSRafael Auler 
80240c2e0faSMaksim Panchenko MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
803a34c753fSRafael Auler                                                  uint64_t Size,
804a34c753fSRafael Auler                                                  uint16_t Alignment,
805a34c753fSRafael Auler                                                  unsigned Flags) {
806a34c753fSRafael Auler   auto Itr = BinaryDataMap.find(Address);
807a34c753fSRafael Auler   if (Itr != BinaryDataMap.end()) {
808a34c753fSRafael Auler     assert(Itr->second->getSize() == Size || !Size);
809a34c753fSRafael Auler     return Itr->second->getSymbol();
810a34c753fSRafael Auler   }
811a34c753fSRafael Auler 
812a34c753fSRafael Auler   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
813a34c753fSRafael Auler   assert(!GlobalSymbols.count(Name) && "created name is not unique");
814a34c753fSRafael Auler   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
815a34c753fSRafael Auler }
816a34c753fSRafael Auler 
817a34c753fSRafael Auler MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
818a34c753fSRafael Auler   return Ctx->getOrCreateSymbol(Name);
819a34c753fSRafael Auler }
820a34c753fSRafael Auler 
821a34c753fSRafael Auler BinaryFunction *BinaryContext::createBinaryFunction(
822a34c753fSRafael Auler     const std::string &Name, BinarySection &Section, uint64_t Address,
823a34c753fSRafael Auler     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
824a34c753fSRafael Auler   auto Result = BinaryFunctions.emplace(
825a34c753fSRafael Auler       Address, BinaryFunction(Name, Section, Address, Size, *this));
826a34c753fSRafael Auler   assert(Result.second == true && "unexpected duplicate function");
827a34c753fSRafael Auler   BinaryFunction *BF = &Result.first->second;
828a34c753fSRafael Auler   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
829a34c753fSRafael Auler                         Alignment);
830a34c753fSRafael Auler   setSymbolToFunctionMap(BF->getSymbol(), BF);
831a34c753fSRafael Auler   return BF;
832a34c753fSRafael Auler }
833a34c753fSRafael Auler 
834a34c753fSRafael Auler const MCSymbol *
835a34c753fSRafael Auler BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
836a34c753fSRafael Auler                                     JumpTable::JumpTableType Type) {
83705523dc3SHuan Nguyen   // Two fragments of same function access same jump table
838a34c753fSRafael Auler   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
839a34c753fSRafael Auler     assert(JT->Type == Type && "jump table types have to match");
840a34c753fSRafael Auler     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
841a34c753fSRafael Auler 
84205523dc3SHuan Nguyen     // Prevent associating a jump table to a specific fragment twice.
8439d2dd009SAmir Ayupov     if (!llvm::is_contained(JT->Parents, &Function)) {
8449d2dd009SAmir Ayupov       assert(llvm::all_of(JT->Parents,
8459d2dd009SAmir Ayupov                           [&](const BinaryFunction *BF) {
8469d2dd009SAmir Ayupov                             return areRelatedFragments(&Function, BF);
8479d2dd009SAmir Ayupov                           }) &&
84805523dc3SHuan Nguyen              "cannot re-use jump table of a different function");
84928b1dcb1SHuan Nguyen       // Duplicate the entry for the parent function for easy access
85005523dc3SHuan Nguyen       JT->Parents.push_back(&Function);
85128b1dcb1SHuan Nguyen       if (opts::Verbosity > 2) {
85252cf0711SAmir Ayupov         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
85305523dc3SHuan Nguyen                      << JT->Parents[0]->getPrintName() << "; "
85405523dc3SHuan Nguyen                      << Function.getPrintName() << "\n";
85552cf0711SAmir Ayupov         JT->print(this->outs());
85628b1dcb1SHuan Nguyen       }
85728b1dcb1SHuan Nguyen       Function.JumpTables.emplace(Address, JT);
8589d2dd009SAmir Ayupov       for (BinaryFunction *Parent : JT->Parents)
8599d2dd009SAmir Ayupov         Parent->setHasIndirectTargetToSplitFragment(true);
86028b1dcb1SHuan Nguyen     }
86105523dc3SHuan Nguyen 
86205523dc3SHuan Nguyen     bool IsJumpTableParent = false;
8630c925861SThorsten Schütt     (void)IsJumpTableParent;
86405523dc3SHuan Nguyen     for (BinaryFunction *Frag : JT->Parents)
86505523dc3SHuan Nguyen       if (Frag == &Function)
86605523dc3SHuan Nguyen         IsJumpTableParent = true;
86705523dc3SHuan Nguyen     assert(IsJumpTableParent &&
86805523dc3SHuan Nguyen            "cannot re-use jump table of a different function");
869a34c753fSRafael Auler     return JT->getFirstLabel();
870a34c753fSRafael Auler   }
871a34c753fSRafael Auler 
872a34c753fSRafael Auler   // Re-use the existing symbol if possible.
873a34c753fSRafael Auler   MCSymbol *JTLabel = nullptr;
874a34c753fSRafael Auler   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
875a34c753fSRafael Auler     if (!isInternalSymbolName(Object->getSymbol()->getName()))
876a34c753fSRafael Auler       JTLabel = Object->getSymbol();
877a34c753fSRafael Auler   }
878a34c753fSRafael Auler 
879a34c753fSRafael Auler   const uint64_t EntrySize = getJumpTableEntrySize(Type);
880a34c753fSRafael Auler   if (!JTLabel) {
881a34c753fSRafael Auler     const std::string JumpTableName = generateJumpTableName(Function, Address);
882a34c753fSRafael Auler     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
883a34c753fSRafael Auler   }
884a34c753fSRafael Auler 
885a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
886a34c753fSRafael Auler                     << " in function " << Function << '\n');
887a34c753fSRafael Auler 
888a34c753fSRafael Auler   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
88905523dc3SHuan Nguyen                                 JumpTable::LabelMapType{{0, JTLabel}},
890a34c753fSRafael Auler                                 *getSectionForAddress(Address));
89105523dc3SHuan Nguyen   JT->Parents.push_back(&Function);
89205523dc3SHuan Nguyen   if (opts::Verbosity > 2)
89352cf0711SAmir Ayupov     JT->print(this->outs());
894a34c753fSRafael Auler   JumpTables.emplace(Address, JT);
895a34c753fSRafael Auler 
896a34c753fSRafael Auler   // Duplicate the entry for the parent function for easy access.
897a34c753fSRafael Auler   Function.JumpTables.emplace(Address, JT);
898a34c753fSRafael Auler   return JTLabel;
899a34c753fSRafael Auler }
900a34c753fSRafael Auler 
901a34c753fSRafael Auler std::pair<uint64_t, const MCSymbol *>
902a34c753fSRafael Auler BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
903a34c753fSRafael Auler                                   const MCSymbol *OldLabel) {
904a34c753fSRafael Auler   auto L = scopeLock();
905a34c753fSRafael Auler   unsigned Offset = 0;
906a34c753fSRafael Auler   bool Found = false;
907a34c753fSRafael Auler   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
908a34c753fSRafael Auler     if (Elmt.second != OldLabel)
909a34c753fSRafael Auler       continue;
910a34c753fSRafael Auler     Offset = Elmt.first;
911a34c753fSRafael Auler     Found = true;
912a34c753fSRafael Auler     break;
913a34c753fSRafael Auler   }
914a34c753fSRafael Auler   assert(Found && "Label not found");
915c907d6e0SAmir Ayupov   (void)Found;
916a34c753fSRafael Auler   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
917a34c753fSRafael Auler   JumpTable *NewJT =
918a34c753fSRafael Auler       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
91905523dc3SHuan Nguyen                     JumpTable::LabelMapType{{Offset, NewLabel}},
920a34c753fSRafael Auler                     *getSectionForAddress(JT->getAddress()));
92105523dc3SHuan Nguyen   NewJT->Parents = JT->Parents;
922a34c753fSRafael Auler   NewJT->Entries = JT->Entries;
923a34c753fSRafael Auler   NewJT->Counts = JT->Counts;
924a34c753fSRafael Auler   uint64_t JumpTableID = ++DuplicatedJumpTables;
925a34c753fSRafael Auler   // Invert it to differentiate from regular jump tables whose IDs are their
926a34c753fSRafael Auler   // addresses in the input binary memory space
927a34c753fSRafael Auler   JumpTableID = ~JumpTableID;
928a34c753fSRafael Auler   JumpTables.emplace(JumpTableID, NewJT);
929a34c753fSRafael Auler   Function.JumpTables.emplace(JumpTableID, NewJT);
930a34c753fSRafael Auler   return std::make_pair(JumpTableID, NewLabel);
931a34c753fSRafael Auler }
932a34c753fSRafael Auler 
933a34c753fSRafael Auler std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
934a34c753fSRafael Auler                                                  uint64_t Address) {
935a34c753fSRafael Auler   size_t Id;
936a34c753fSRafael Auler   uint64_t Offset = 0;
937a34c753fSRafael Auler   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
938a34c753fSRafael Auler     Offset = Address - JT->getAddress();
939c8fc234eSshaw young     auto JTLabelsIt = JT->Labels.find(Offset);
940c8fc234eSshaw young     if (JTLabelsIt != JT->Labels.end())
941c8fc234eSshaw young       return std::string(JTLabelsIt->second->getName());
942c8fc234eSshaw young 
943c8fc234eSshaw young     auto JTIdsIt = JumpTableIds.find(JT->getAddress());
944c8fc234eSshaw young     assert(JTIdsIt != JumpTableIds.end());
945c8fc234eSshaw young     Id = JTIdsIt->second;
946a34c753fSRafael Auler   } else {
947a34c753fSRafael Auler     Id = JumpTableIds[Address] = BF.JumpTables.size();
948a34c753fSRafael Auler   }
949a34c753fSRafael Auler   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
950a34c753fSRafael Auler           (Offset ? ("." + std::to_string(Offset)) : ""));
951a34c753fSRafael Auler }
952a34c753fSRafael Auler 
953a34c753fSRafael Auler bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
954a34c753fSRafael Auler   // FIXME: aarch64 support is missing.
955a34c753fSRafael Auler   if (!isX86())
956a34c753fSRafael Auler     return true;
957a34c753fSRafael Auler 
958a34c753fSRafael Auler   if (BF.getSize() == BF.getMaxSize())
959a34c753fSRafael Auler     return true;
960a34c753fSRafael Auler 
961a34c753fSRafael Auler   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
962a34c753fSRafael Auler   assert(FunctionData && "cannot get function as data");
963a34c753fSRafael Auler 
964a34c753fSRafael Auler   uint64_t Offset = BF.getSize();
965a34c753fSRafael Auler   MCInst Instr;
966a34c753fSRafael Auler   uint64_t InstrSize = 0;
967a34c753fSRafael Auler   uint64_t InstrAddress = BF.getAddress() + Offset;
968a34c753fSRafael Auler   using std::placeholders::_1;
969a34c753fSRafael Auler 
970a34c753fSRafael Auler   // Skip instructions that satisfy the predicate condition.
971a34c753fSRafael Auler   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
972a34c753fSRafael Auler     const uint64_t StartOffset = Offset;
973a34c753fSRafael Auler     for (; Offset < BF.getMaxSize();
974a34c753fSRafael Auler          Offset += InstrSize, InstrAddress += InstrSize) {
97540c2e0faSMaksim Panchenko       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
97640c2e0faSMaksim Panchenko                                   InstrAddress, nulls()))
977a34c753fSRafael Auler         break;
978a34c753fSRafael Auler       if (!Predicate(Instr))
979a34c753fSRafael Auler         break;
980a34c753fSRafael Auler     }
981a34c753fSRafael Auler 
982a34c753fSRafael Auler     return Offset - StartOffset;
983a34c753fSRafael Auler   };
984a34c753fSRafael Auler 
985a34c753fSRafael Auler   // Skip a sequence of zero bytes.
986a34c753fSRafael Auler   auto skipZeros = [&]() {
987a34c753fSRafael Auler     const uint64_t StartOffset = Offset;
988a34c753fSRafael Auler     for (; Offset < BF.getMaxSize(); ++Offset)
989a34c753fSRafael Auler       if ((*FunctionData)[Offset] != 0)
990a34c753fSRafael Auler         break;
991a34c753fSRafael Auler 
992a34c753fSRafael Auler     return Offset - StartOffset;
993a34c753fSRafael Auler   };
994a34c753fSRafael Auler 
995a34c753fSRafael Auler   // Accept the whole padding area filled with breakpoints.
996a34c753fSRafael Auler   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
997a34c753fSRafael Auler   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
998a34c753fSRafael Auler     return true;
999a34c753fSRafael Auler 
1000a34c753fSRafael Auler   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
1001a34c753fSRafael Auler 
1002a34c753fSRafael Auler   // Some functions have a jump to the next function or to the padding area
1003a34c753fSRafael Auler   // inserted after the body.
1004a34c753fSRafael Auler   auto isSkipJump = [&](const MCInst &Instr) {
1005a34c753fSRafael Auler     uint64_t TargetAddress = 0;
1006a34c753fSRafael Auler     if (MIB->isUnconditionalBranch(Instr) &&
1007a34c753fSRafael Auler         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1008a34c753fSRafael Auler       if (TargetAddress >= InstrAddress + InstrSize &&
1009a34c753fSRafael Auler           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1010a34c753fSRafael Auler         return true;
1011a34c753fSRafael Auler       }
1012a34c753fSRafael Auler     }
1013a34c753fSRafael Auler     return false;
1014a34c753fSRafael Auler   };
1015a34c753fSRafael Auler 
1016a34c753fSRafael Auler   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
101740c2e0faSMaksim Panchenko   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1018a34c753fSRafael Auler          skipZeros())
1019a34c753fSRafael Auler     ;
1020a34c753fSRafael Auler 
1021a34c753fSRafael Auler   if (Offset == BF.getMaxSize())
1022a34c753fSRafael Auler     return true;
1023a34c753fSRafael Auler 
1024a34c753fSRafael Auler   if (opts::Verbosity >= 1) {
102552cf0711SAmir Ayupov     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1026a34c753fSRafael Auler                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
102740c2e0faSMaksim Panchenko                  << " starting at offset " << (Offset - BF.getSize())
102840c2e0faSMaksim Panchenko                  << " in function " << BF << '\n'
102952cf0711SAmir Ayupov                  << FunctionData->slice(BF.getSize(),
103052cf0711SAmir Ayupov                                         BF.getMaxSize() - BF.getSize())
1031a34c753fSRafael Auler                  << '\n';
1032a34c753fSRafael Auler   }
1033a34c753fSRafael Auler 
1034a34c753fSRafael Auler   return false;
1035a34c753fSRafael Auler }
1036a34c753fSRafael Auler 
1037a34c753fSRafael Auler void BinaryContext::adjustCodePadding() {
1038a34c753fSRafael Auler   for (auto &BFI : BinaryFunctions) {
1039a34c753fSRafael Auler     BinaryFunction &BF = BFI.second;
1040a34c753fSRafael Auler     if (!shouldEmit(BF))
1041a34c753fSRafael Auler       continue;
1042a34c753fSRafael Auler 
1043a34c753fSRafael Auler     if (!hasValidCodePadding(BF)) {
1044a34c753fSRafael Auler       if (HasRelocations) {
1045a34c753fSRafael Auler         if (opts::Verbosity >= 1) {
104652cf0711SAmir Ayupov           this->outs() << "BOLT-INFO: function " << BF
1047a34c753fSRafael Auler                        << " has invalid padding. Ignoring the function.\n";
1048a34c753fSRafael Auler         }
1049a34c753fSRafael Auler         BF.setIgnored();
1050a34c753fSRafael Auler       } else {
1051a34c753fSRafael Auler         BF.setMaxSize(BF.getSize());
1052a34c753fSRafael Auler       }
1053a34c753fSRafael Auler     }
1054a34c753fSRafael Auler   }
1055a34c753fSRafael Auler }
1056a34c753fSRafael Auler 
105740c2e0faSMaksim Panchenko MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1058a34c753fSRafael Auler                                                uint64_t Size,
1059a34c753fSRafael Auler                                                uint16_t Alignment,
1060e49549ffSDavide Italiano                                                unsigned Flags) {
1061a34c753fSRafael Auler   // Register the name with MCContext.
1062a34c753fSRafael Auler   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1063a34c753fSRafael Auler 
1064a34c753fSRafael Auler   auto GAI = BinaryDataMap.find(Address);
1065e49549ffSDavide Italiano   BinaryData *BD;
1066a34c753fSRafael Auler   if (GAI == BinaryDataMap.end()) {
1067a34c753fSRafael Auler     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1068e49549ffSDavide Italiano     BinarySection &Section =
1069e49549ffSDavide Italiano         SectionOrErr ? SectionOrErr.get() : absoluteSection();
107040c2e0faSMaksim Panchenko     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1071e49549ffSDavide Italiano                         Section, Flags);
1072a34c753fSRafael Auler     GAI = BinaryDataMap.emplace(Address, BD).first;
1073a34c753fSRafael Auler     GlobalSymbols[Name] = BD;
1074a34c753fSRafael Auler     updateObjectNesting(GAI);
1075a34c753fSRafael Auler   } else {
1076a34c753fSRafael Auler     BD = GAI->second;
1077a34c753fSRafael Auler     if (!BD->hasName(Name)) {
1078a34c753fSRafael Auler       GlobalSymbols[Name] = BD;
1079*6e8a1a45SFranklin       BD->updateSize(Size);
1080a34c753fSRafael Auler       BD->Symbols.push_back(Symbol);
1081a34c753fSRafael Auler     }
1082a34c753fSRafael Auler   }
1083a34c753fSRafael Auler 
1084a34c753fSRafael Auler   return Symbol;
1085a34c753fSRafael Auler }
1086a34c753fSRafael Auler 
1087a34c753fSRafael Auler const BinaryData *
1088a34c753fSRafael Auler BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1089a34c753fSRafael Auler   auto NI = BinaryDataMap.lower_bound(Address);
1090a34c753fSRafael Auler   auto End = BinaryDataMap.end();
1091a34c753fSRafael Auler   if ((NI != End && Address == NI->first) ||
1092a34c753fSRafael Auler       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
10933652483cSRafael Auler     if (NI->second->containsAddress(Address))
1094a34c753fSRafael Auler       return NI->second;
1095a34c753fSRafael Auler 
1096a34c753fSRafael Auler     // If this is a sub-symbol, see if a parent data contains the address.
1097a34c753fSRafael Auler     const BinaryData *BD = NI->second->getParent();
1098a34c753fSRafael Auler     while (BD) {
1099a34c753fSRafael Auler       if (BD->containsAddress(Address))
1100a34c753fSRafael Auler         return BD;
1101a34c753fSRafael Auler       BD = BD->getParent();
1102a34c753fSRafael Auler     }
1103a34c753fSRafael Auler   }
1104a34c753fSRafael Auler   return nullptr;
1105a34c753fSRafael Auler }
1106a34c753fSRafael Auler 
1107853e126cSRafael Auler BinaryData *BinaryContext::getGOTSymbol() {
1108853e126cSRafael Auler   // First tries to find a global symbol with that name
1109853e126cSRafael Auler   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1110853e126cSRafael Auler   if (GOTSymBD)
1111853e126cSRafael Auler     return GOTSymBD;
1112853e126cSRafael Auler 
1113853e126cSRafael Auler   // This symbol might be hidden from run-time link, so fetch the local
1114853e126cSRafael Auler   // definition if available.
1115853e126cSRafael Auler   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1116853e126cSRafael Auler   if (!GOTSymBD)
1117853e126cSRafael Auler     return nullptr;
1118853e126cSRafael Auler 
1119853e126cSRafael Auler   // If the local symbol is not unique, fail
1120853e126cSRafael Auler   unsigned Index = 2;
1121853e126cSRafael Auler   SmallString<30> Storage;
1122853e126cSRafael Auler   while (const BinaryData *BD =
1123853e126cSRafael Auler              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1124853e126cSRafael Auler                                      .concat(Twine(Index++))
1125853e126cSRafael Auler                                      .toStringRef(Storage)))
1126853e126cSRafael Auler     if (BD->getAddress() != GOTSymBD->getAddress())
1127853e126cSRafael Auler       return nullptr;
1128853e126cSRafael Auler 
1129853e126cSRafael Auler   return GOTSymBD;
1130853e126cSRafael Auler }
1131853e126cSRafael Auler 
1132a34c753fSRafael Auler bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1133a34c753fSRafael Auler   auto NI = BinaryDataMap.find(Address);
1134a34c753fSRafael Auler   assert(NI != BinaryDataMap.end());
1135a34c753fSRafael Auler   if (NI == BinaryDataMap.end())
1136a34c753fSRafael Auler     return false;
1137a34c753fSRafael Auler   // TODO: it's possible that a jump table starts at the same address
1138a34c753fSRafael Auler   // as a larger blob of private data.  When we set the size of the
1139a34c753fSRafael Auler   // jump table, it might be smaller than the total blob size.  In this
1140a34c753fSRafael Auler   // case we just leave the original size since (currently) it won't really
1141933df2a4SMaksim Panchenko   // affect anything.
1142a34c753fSRafael Auler   assert((!NI->second->Size || NI->second->Size == Size ||
1143a34c753fSRafael Auler           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1144a34c753fSRafael Auler          "can't change the size of a symbol that has already had its "
1145a34c753fSRafael Auler          "size set");
1146a34c753fSRafael Auler   if (!NI->second->Size) {
1147a34c753fSRafael Auler     NI->second->Size = Size;
1148a34c753fSRafael Auler     updateObjectNesting(NI);
1149a34c753fSRafael Auler     return true;
1150a34c753fSRafael Auler   }
1151a34c753fSRafael Auler   return false;
1152a34c753fSRafael Auler }
1153a34c753fSRafael Auler 
1154a34c753fSRafael Auler void BinaryContext::generateSymbolHashes() {
1155a34c753fSRafael Auler   auto isPadding = [](const BinaryData &BD) {
1156a34c753fSRafael Auler     StringRef Contents = BD.getSection().getContents();
1157a34c753fSRafael Auler     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1158ad8fd5b1SKazu Hirata     return (BD.getName().starts_with("HOLEat") ||
1159a34c753fSRafael Auler             SymData.find_first_not_of(0) == StringRef::npos);
1160a34c753fSRafael Auler   };
1161a34c753fSRafael Auler 
1162a34c753fSRafael Auler   uint64_t NumCollisions = 0;
1163a34c753fSRafael Auler   for (auto &Entry : BinaryDataMap) {
1164a34c753fSRafael Auler     BinaryData &BD = *Entry.second;
1165a34c753fSRafael Auler     StringRef Name = BD.getName();
1166a34c753fSRafael Auler 
1167a34c753fSRafael Auler     if (!isInternalSymbolName(Name))
1168a34c753fSRafael Auler       continue;
1169a34c753fSRafael Auler 
1170a34c753fSRafael Auler     // First check if a non-anonymous alias exists and move it to the front.
1171a34c753fSRafael Auler     if (BD.getSymbols().size() > 1) {
1172d2c87699SAmir Ayupov       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1173a34c753fSRafael Auler         return !isInternalSymbolName(Symbol->getName());
1174a34c753fSRafael Auler       });
1175a34c753fSRafael Auler       if (Itr != BD.getSymbols().end()) {
1176a34c753fSRafael Auler         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1177a34c753fSRafael Auler         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1178a34c753fSRafael Auler         continue;
1179a34c753fSRafael Auler       }
1180a34c753fSRafael Auler     }
1181a34c753fSRafael Auler 
1182a34c753fSRafael Auler     // We have to skip 0 size symbols since they will all collide.
1183a34c753fSRafael Auler     if (BD.getSize() == 0) {
1184a34c753fSRafael Auler       continue;
1185a34c753fSRafael Auler     }
1186a34c753fSRafael Auler 
1187a34c753fSRafael Auler     const uint64_t Hash = BD.getSection().hash(BD);
1188a34c753fSRafael Auler     const size_t Idx = Name.find("0x");
118940c2e0faSMaksim Panchenko     std::string NewName =
119040c2e0faSMaksim Panchenko         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1191a34c753fSRafael Auler     if (getBinaryDataByName(NewName)) {
1192a34c753fSRafael Auler       // Ignore collisions for symbols that appear to be padding
1193a34c753fSRafael Auler       // (i.e. all zeros or a "hole")
1194a34c753fSRafael Auler       if (!isPadding(BD)) {
1195a34c753fSRafael Auler         if (opts::Verbosity) {
119652cf0711SAmir Ayupov           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1197a34c753fSRafael Auler                        << " with new name (" << NewName << "), skipping.\n";
1198a34c753fSRafael Auler         }
1199a34c753fSRafael Auler         ++NumCollisions;
1200a34c753fSRafael Auler       }
1201a34c753fSRafael Auler       continue;
1202a34c753fSRafael Auler     }
120340c2e0faSMaksim Panchenko     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1204a34c753fSRafael Auler     GlobalSymbols[NewName] = &BD;
1205a34c753fSRafael Auler   }
1206a34c753fSRafael Auler   if (NumCollisions) {
120752cf0711SAmir Ayupov     this->errs() << "BOLT-WARNING: " << NumCollisions
1208a34c753fSRafael Auler                  << " collisions detected while hashing binary objects";
1209a34c753fSRafael Auler     if (!opts::Verbosity)
121052cf0711SAmir Ayupov       this->errs() << ". Use -v=1 to see the list.";
121152cf0711SAmir Ayupov     this->errs() << '\n';
1212a34c753fSRafael Auler   }
1213a34c753fSRafael Auler }
1214a34c753fSRafael Auler 
12156aa735ceSAmir Ayupov bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
121683ea7ce3SAmir Ayupov                                      BinaryFunction &Function) {
12176aa735ceSAmir Ayupov   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1218e88122f5SAmir Ayupov   if (TargetFunction.isChildOf(Function))
12196aa735ceSAmir Ayupov     return true;
12206aa735ceSAmir Ayupov   TargetFunction.addParentFragment(Function);
1221a34c753fSRafael Auler   Function.addFragment(TargetFunction);
122283ea7ce3SAmir Ayupov   FragmentClasses.unionSets(&TargetFunction, &Function);
1223a34c753fSRafael Auler   if (!HasRelocations) {
1224a34c753fSRafael Auler     TargetFunction.setSimple(false);
1225a34c753fSRafael Auler     Function.setSimple(false);
1226a34c753fSRafael Auler   }
1227a34c753fSRafael Auler   if (opts::Verbosity >= 1) {
122852cf0711SAmir Ayupov     this->outs() << "BOLT-INFO: marking " << TargetFunction
122952cf0711SAmir Ayupov                  << " as a fragment of " << Function << '\n';
1230a34c753fSRafael Auler   }
12316aa735ceSAmir Ayupov   return true;
1232a34c753fSRafael Auler }
1233a34c753fSRafael Auler 
123435efe1d8SVladislav Khmelevsky void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
123535efe1d8SVladislav Khmelevsky                                            MCInst &LoadLowBits,
123635efe1d8SVladislav Khmelevsky                                            MCInst &LoadHiBits,
123735efe1d8SVladislav Khmelevsky                                            uint64_t Target) {
123835efe1d8SVladislav Khmelevsky   const MCSymbol *TargetSymbol;
123935efe1d8SVladislav Khmelevsky   uint64_t Addend = 0;
124035efe1d8SVladislav Khmelevsky   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
124135efe1d8SVladislav Khmelevsky                                                     /*IsPCRel*/ true);
124235efe1d8SVladislav Khmelevsky   int64_t Val;
124335efe1d8SVladislav Khmelevsky   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
124435efe1d8SVladislav Khmelevsky                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
124535efe1d8SVladislav Khmelevsky   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
124635efe1d8SVladislav Khmelevsky                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
124735efe1d8SVladislav Khmelevsky }
124835efe1d8SVladislav Khmelevsky 
124935efe1d8SVladislav Khmelevsky bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
125035efe1d8SVladislav Khmelevsky   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
125135efe1d8SVladislav Khmelevsky   if (TargetFunction)
125235efe1d8SVladislav Khmelevsky     return false;
125335efe1d8SVladislav Khmelevsky 
125435efe1d8SVladislav Khmelevsky   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
125535efe1d8SVladislav Khmelevsky   assert(Section && "cannot get section for referenced address");
125635efe1d8SVladislav Khmelevsky   if (!Section->isText())
125735efe1d8SVladislav Khmelevsky     return false;
125835efe1d8SVladislav Khmelevsky 
125935efe1d8SVladislav Khmelevsky   bool Ret = false;
126035efe1d8SVladislav Khmelevsky   StringRef SectionContents = Section->getContents();
126135efe1d8SVladislav Khmelevsky   uint64_t Offset = Address - Section->getAddress();
126235efe1d8SVladislav Khmelevsky   const uint64_t MaxSize = SectionContents.size() - Offset;
126335efe1d8SVladislav Khmelevsky   const uint8_t *Bytes =
126435efe1d8SVladislav Khmelevsky       reinterpret_cast<const uint8_t *>(SectionContents.data());
126535efe1d8SVladislav Khmelevsky   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
126635efe1d8SVladislav Khmelevsky 
126735efe1d8SVladislav Khmelevsky   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
126835efe1d8SVladislav Khmelevsky                          MCInst &Instruction, uint64_t Offset,
126935efe1d8SVladislav Khmelevsky                          uint64_t AbsoluteInstrAddr,
127035efe1d8SVladislav Khmelevsky                          uint64_t TotalSize) -> bool {
127135efe1d8SVladislav Khmelevsky     MCInst *TargetHiBits, *TargetLowBits;
127235efe1d8SVladislav Khmelevsky     uint64_t TargetAddress, Count;
127335efe1d8SVladislav Khmelevsky     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
127435efe1d8SVladislav Khmelevsky                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
127535efe1d8SVladislav Khmelevsky                                    TargetLowBits, TargetAddress);
127635efe1d8SVladislav Khmelevsky     if (!Count)
127735efe1d8SVladislav Khmelevsky       return false;
127835efe1d8SVladislav Khmelevsky 
127935efe1d8SVladislav Khmelevsky     if (MatchOnly)
128035efe1d8SVladislav Khmelevsky       return true;
128135efe1d8SVladislav Khmelevsky 
128235efe1d8SVladislav Khmelevsky     // NOTE The target symbol was created during disassemble's
128335efe1d8SVladislav Khmelevsky     // handleExternalReference
128435efe1d8SVladislav Khmelevsky     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
128535efe1d8SVladislav Khmelevsky     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
128635efe1d8SVladislav Khmelevsky                                                   *Section, Address, TotalSize);
128735efe1d8SVladislav Khmelevsky     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
128835efe1d8SVladislav Khmelevsky                            TargetAddress);
128935efe1d8SVladislav Khmelevsky     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
129035efe1d8SVladislav Khmelevsky     Veneer->addInstruction(Offset, std::move(Instruction));
129135efe1d8SVladislav Khmelevsky     --Count;
1292f65e8c3cSNico Weber     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
129335efe1d8SVladislav Khmelevsky       MIB->addAnnotation(It->second, "AArch64Veneer", true);
129435efe1d8SVladislav Khmelevsky       Veneer->addInstruction(It->first, std::move(It->second));
129535efe1d8SVladislav Khmelevsky     }
129635efe1d8SVladislav Khmelevsky 
129735efe1d8SVladislav Khmelevsky     Veneer->getOrCreateLocalLabel(Address);
129835efe1d8SVladislav Khmelevsky     Veneer->setMaxSize(TotalSize);
129935efe1d8SVladislav Khmelevsky     Veneer->updateState(BinaryFunction::State::Disassembled);
1300c3bbc3a5Ssinan     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1301c3bbc3a5Ssinan                       << Twine::utohexstr(Address) << "\n");
130235efe1d8SVladislav Khmelevsky     return true;
130335efe1d8SVladislav Khmelevsky   };
130435efe1d8SVladislav Khmelevsky 
130535efe1d8SVladislav Khmelevsky   uint64_t Size = 0, TotalSize = 0;
130635efe1d8SVladislav Khmelevsky   BinaryFunction::InstrMapType VeneerInstructions;
130735efe1d8SVladislav Khmelevsky   for (Offset = 0; Offset < MaxSize; Offset += Size) {
130835efe1d8SVladislav Khmelevsky     MCInst Instruction;
130935efe1d8SVladislav Khmelevsky     const uint64_t AbsoluteInstrAddr = Address + Offset;
131035efe1d8SVladislav Khmelevsky     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
131135efe1d8SVladislav Khmelevsky                                         AbsoluteInstrAddr, nulls()))
131235efe1d8SVladislav Khmelevsky       break;
131335efe1d8SVladislav Khmelevsky 
131435efe1d8SVladislav Khmelevsky     TotalSize += Size;
131535efe1d8SVladislav Khmelevsky     if (MIB->isBranch(Instruction)) {
131635efe1d8SVladislav Khmelevsky       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
131735efe1d8SVladislav Khmelevsky                         AbsoluteInstrAddr, TotalSize);
131835efe1d8SVladislav Khmelevsky       break;
131935efe1d8SVladislav Khmelevsky     }
132035efe1d8SVladislav Khmelevsky 
132135efe1d8SVladislav Khmelevsky     VeneerInstructions.emplace(Offset, std::move(Instruction));
132235efe1d8SVladislav Khmelevsky   }
132335efe1d8SVladislav Khmelevsky 
132435efe1d8SVladislav Khmelevsky   return Ret;
132535efe1d8SVladislav Khmelevsky }
132635efe1d8SVladislav Khmelevsky 
132735efe1d8SVladislav Khmelevsky void BinaryContext::processInterproceduralReferences() {
132835efe1d8SVladislav Khmelevsky   for (const std::pair<BinaryFunction *, uint64_t> &It :
132935efe1d8SVladislav Khmelevsky        InterproceduralReferences) {
133035efe1d8SVladislav Khmelevsky     BinaryFunction &Function = *It.first;
133135efe1d8SVladislav Khmelevsky     uint64_t Address = It.second;
1332935b946bSAmir Ayupov     // Process interprocedural references from ignored functions in BAT mode
1333935b946bSAmir Ayupov     // (non-simple in non-relocation mode) to properly register entry points
1334935b946bSAmir Ayupov     if (!Address || (Function.isIgnored() && !HasBATSection))
1335a34c753fSRafael Auler       continue;
1336a34c753fSRafael Auler 
1337a34c753fSRafael Auler     BinaryFunction *TargetFunction =
1338a34c753fSRafael Auler         getBinaryFunctionContainingAddress(Address);
1339a34c753fSRafael Auler     if (&Function == TargetFunction)
1340a34c753fSRafael Auler       continue;
1341a34c753fSRafael Auler 
1342a34c753fSRafael Auler     if (TargetFunction) {
134335efe1d8SVladislav Khmelevsky       if (TargetFunction->isFragment() &&
134483ea7ce3SAmir Ayupov           !areRelatedFragments(TargetFunction, &Function)) {
134552cf0711SAmir Ayupov         this->errs()
134652cf0711SAmir Ayupov             << "BOLT-WARNING: interprocedural reference between unrelated "
13476aa735ceSAmir Ayupov                "fragments: "
13486aa735ceSAmir Ayupov             << Function.getPrintName() << " and "
13496aa735ceSAmir Ayupov             << TargetFunction->getPrintName() << '\n';
13506aa735ceSAmir Ayupov       }
1351a34c753fSRafael Auler       if (uint64_t Offset = Address - TargetFunction->getAddress())
1352a34c753fSRafael Auler         TargetFunction->addEntryPointAtOffset(Offset);
1353a34c753fSRafael Auler 
1354a34c753fSRafael Auler       continue;
1355a34c753fSRafael Auler     }
1356a34c753fSRafael Auler 
1357a34c753fSRafael Auler     // Check if address falls in function padding space - this could be
1358a34c753fSRafael Auler     // unmarked data in code. In this case adjust the padding space size.
1359a34c753fSRafael Auler     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1360a34c753fSRafael Auler     assert(Section && "cannot get section for referenced address");
1361a34c753fSRafael Auler 
1362a34c753fSRafael Auler     if (!Section->isText())
1363a34c753fSRafael Auler       continue;
1364a34c753fSRafael Auler 
1365a34c753fSRafael Auler     // PLT requires special handling and could be ignored in this context.
1366a34c753fSRafael Auler     StringRef SectionName = Section->getName();
1367a34c753fSRafael Auler     if (SectionName == ".plt" || SectionName == ".plt.got")
1368a34c753fSRafael Auler       continue;
1369a34c753fSRafael Auler 
137035efe1d8SVladislav Khmelevsky     // Check if it is aarch64 veneer written at Address
137135efe1d8SVladislav Khmelevsky     if (isAArch64() && handleAArch64Veneer(Address))
137235efe1d8SVladislav Khmelevsky       continue;
137335efe1d8SVladislav Khmelevsky 
1374a34c753fSRafael Auler     if (opts::processAllFunctions()) {
137552cf0711SAmir Ayupov       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
137652cf0711SAmir Ayupov                    << "object in code at address 0x"
137752cf0711SAmir Ayupov                    << Twine::utohexstr(Address) << " belonging to section "
137852cf0711SAmir Ayupov                    << SectionName << " in current mode\n";
1379a34c753fSRafael Auler       exit(1);
1380a34c753fSRafael Auler     }
1381a34c753fSRafael Auler 
138240c2e0faSMaksim Panchenko     TargetFunction = getBinaryFunctionContainingAddress(Address,
1383a34c753fSRafael Auler                                                         /*CheckPastEnd=*/false,
1384a34c753fSRafael Auler                                                         /*UseMaxSize=*/true);
1385a34c753fSRafael Auler     // We are not going to overwrite non-simple functions, but for simple
1386a34c753fSRafael Auler     // ones - adjust the padding size.
1387a34c753fSRafael Auler     if (TargetFunction && TargetFunction->isSimple()) {
138852cf0711SAmir Ayupov       this->errs()
138952cf0711SAmir Ayupov           << "BOLT-WARNING: function " << *TargetFunction
1390a34c753fSRafael Auler           << " has an object detected in a padding region at address 0x"
1391a34c753fSRafael Auler           << Twine::utohexstr(Address) << '\n';
1392a34c753fSRafael Auler       TargetFunction->setMaxSize(TargetFunction->getSize());
1393a34c753fSRafael Auler     }
1394a34c753fSRafael Auler   }
1395a34c753fSRafael Auler 
139635efe1d8SVladislav Khmelevsky   InterproceduralReferences.clear();
1397a34c753fSRafael Auler }
1398a34c753fSRafael Auler 
1399a34c753fSRafael Auler void BinaryContext::postProcessSymbolTable() {
1400a34c753fSRafael Auler   fixBinaryDataHoles();
1401a34c753fSRafael Auler   bool Valid = true;
1402a34c753fSRafael Auler   for (auto &Entry : BinaryDataMap) {
1403a34c753fSRafael Auler     BinaryData *BD = Entry.second;
1404ad8fd5b1SKazu Hirata     if ((BD->getName().starts_with("SYMBOLat") ||
1405ad8fd5b1SKazu Hirata          BD->getName().starts_with("DATAat")) &&
140640c2e0faSMaksim Panchenko         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1407e49549ffSDavide Italiano         BD->getSection()) {
140852cf0711SAmir Ayupov       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
140952cf0711SAmir Ayupov                    << "\n";
1410a34c753fSRafael Auler       Valid = false;
1411a34c753fSRafael Auler     }
1412a34c753fSRafael Auler   }
1413a34c753fSRafael Auler   assert(Valid);
1414c907d6e0SAmir Ayupov   (void)Valid;
1415a34c753fSRafael Auler   generateSymbolHashes();
1416a34c753fSRafael Auler }
1417a34c753fSRafael Auler 
1418a34c753fSRafael Auler void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1419a34c753fSRafael Auler                                  BinaryFunction &ParentBF) {
1420a34c753fSRafael Auler   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1421a34c753fSRafael Auler          "cannot merge functions with multiple entry points");
1422a34c753fSRafael Auler 
1423e8ce5f1eSNico Weber   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1424e8ce5f1eSNico Weber   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1425a34c753fSRafael Auler       SymbolToFunctionMapMutex, std::defer_lock);
1426a34c753fSRafael Auler 
1427a34c753fSRafael Auler   const StringRef ChildName = ChildBF.getOneName();
1428a34c753fSRafael Auler 
1429a34c753fSRafael Auler   // Move symbols over and update bookkeeping info.
1430a34c753fSRafael Auler   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1431a34c753fSRafael Auler     ParentBF.getSymbols().push_back(Symbol);
1432a34c753fSRafael Auler     WriteSymbolMapLock.lock();
1433a34c753fSRafael Auler     SymbolToFunctionMap[Symbol] = &ParentBF;
1434a34c753fSRafael Auler     WriteSymbolMapLock.unlock();
1435a34c753fSRafael Auler     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1436a34c753fSRafael Auler   }
1437a34c753fSRafael Auler   ChildBF.getSymbols().clear();
1438a34c753fSRafael Auler 
1439a34c753fSRafael Auler   // Move other names the child function is known under.
1440d2c87699SAmir Ayupov   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1441a34c753fSRafael Auler   ChildBF.Aliases.clear();
1442a34c753fSRafael Auler 
1443a34c753fSRafael Auler   if (HasRelocations) {
1444a34c753fSRafael Auler     // Merge execution counts of ChildBF into those of ParentBF.
1445a34c753fSRafael Auler     // Without relocations, we cannot reliably merge profiles as both functions
1446a34c753fSRafael Auler     // continue to exist and either one can be executed.
1447a34c753fSRafael Auler     ChildBF.mergeProfileDataInto(ParentBF);
1448a34c753fSRafael Auler 
1449e8ce5f1eSNico Weber     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1450a34c753fSRafael Auler                                                      std::defer_lock);
1451e8ce5f1eSNico Weber     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1452a34c753fSRafael Auler                                                       std::defer_lock);
1453a34c753fSRafael Auler     // Remove ChildBF from the global set of functions in relocs mode.
1454a34c753fSRafael Auler     ReadBfsLock.lock();
1455a34c753fSRafael Auler     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1456a34c753fSRafael Auler     ReadBfsLock.unlock();
1457a34c753fSRafael Auler 
1458a34c753fSRafael Auler     assert(FI != BinaryFunctions.end() && "function not found");
1459a34c753fSRafael Auler     assert(&ChildBF == &FI->second && "function mismatch");
1460a34c753fSRafael Auler 
1461a34c753fSRafael Auler     WriteBfsLock.lock();
1462a34c753fSRafael Auler     ChildBF.clearDisasmState();
1463a34c753fSRafael Auler     FI = BinaryFunctions.erase(FI);
1464a34c753fSRafael Auler     WriteBfsLock.unlock();
1465a34c753fSRafael Auler 
1466a34c753fSRafael Auler   } else {
1467a34c753fSRafael Auler     // In non-relocation mode we keep the function, but rename it.
1468a34c753fSRafael Auler     std::string NewName = "__ICF_" + ChildName.str();
1469a34c753fSRafael Auler 
1470a34c753fSRafael Auler     WriteCtxLock.lock();
1471a34c753fSRafael Auler     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1472a34c753fSRafael Auler     WriteCtxLock.unlock();
1473a34c753fSRafael Auler 
1474a34c753fSRafael Auler     ChildBF.setFolded(&ParentBF);
1475a34c753fSRafael Auler   }
147603e94f66SMaksim Panchenko 
147703e94f66SMaksim Panchenko   ParentBF.setHasFunctionsFoldedInto();
1478a34c753fSRafael Auler }
1479a34c753fSRafael Auler 
1480a34c753fSRafael Auler void BinaryContext::fixBinaryDataHoles() {
14811a2f8336Sspaette   assert(validateObjectNesting() && "object nesting inconsistency detected");
1482a34c753fSRafael Auler 
1483a34c753fSRafael Auler   for (BinarySection &Section : allocatableSections()) {
1484a34c753fSRafael Auler     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1485a34c753fSRafael Auler 
1486a34c753fSRafael Auler     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1487a34c753fSRafael Auler       BinaryData *BD = Itr->second;
148840c2e0faSMaksim Panchenko       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1489ad8fd5b1SKazu Hirata                      (BD->getName().starts_with("SYMBOLat0x") ||
1490ad8fd5b1SKazu Hirata                       BD->getName().starts_with("DATAat0x") ||
1491ad8fd5b1SKazu Hirata                       BD->getName().starts_with("ANONYMOUS")));
1492a34c753fSRafael Auler       return !isHole && BD->getSection() == Section && !BD->getParent();
1493a34c753fSRafael Auler     };
1494a34c753fSRafael Auler 
1495a34c753fSRafael Auler     auto BDStart = BinaryDataMap.begin();
1496a34c753fSRafael Auler     auto BDEnd = BinaryDataMap.end();
1497a34c753fSRafael Auler     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1498a34c753fSRafael Auler     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1499a34c753fSRafael Auler 
1500a34c753fSRafael Auler     uint64_t EndAddress = Section.getAddress();
1501a34c753fSRafael Auler 
1502a34c753fSRafael Auler     while (Itr != End) {
1503a34c753fSRafael Auler       if (Itr->second->getAddress() > EndAddress) {
1504a34c753fSRafael Auler         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1505a34c753fSRafael Auler         Holes.emplace_back(EndAddress, Gap);
1506a34c753fSRafael Auler       }
1507a34c753fSRafael Auler       EndAddress = Itr->second->getEndAddress();
1508a34c753fSRafael Auler       ++Itr;
1509a34c753fSRafael Auler     }
1510a34c753fSRafael Auler 
15113652483cSRafael Auler     if (EndAddress < Section.getEndAddress())
1512a34c753fSRafael Auler       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1513a34c753fSRafael Auler 
1514a34c753fSRafael Auler     // If there is already a symbol at the start of the hole, grow that symbol
1515a34c753fSRafael Auler     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1516a34c753fSRafael Auler     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1517a34c753fSRafael Auler       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1518a34c753fSRafael Auler       if (BD) {
1519a34c753fSRafael Auler         // BD->getSection() can be != Section if there are sections that
1520a34c753fSRafael Auler         // overlap.  In this case it is probably safe to just skip the holes
1521a34c753fSRafael Auler         // since the overlapping section will not(?) have any symbols in it.
1522a34c753fSRafael Auler         if (BD->getSection() == Section)
1523a34c753fSRafael Auler           setBinaryDataSize(Hole.first, Hole.second);
1524a34c753fSRafael Auler       } else {
1525a34c753fSRafael Auler         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1526a34c753fSRafael Auler       }
1527a34c753fSRafael Auler     }
1528a34c753fSRafael Auler   }
1529a34c753fSRafael Auler 
15301a2f8336Sspaette   assert(validateObjectNesting() && "object nesting inconsistency detected");
1531a34c753fSRafael Auler   assert(validateHoles() && "top level hole detected in object map");
1532a34c753fSRafael Auler }
1533a34c753fSRafael Auler 
1534a34c753fSRafael Auler void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1535a34c753fSRafael Auler   const BinarySection *CurrentSection = nullptr;
1536a34c753fSRafael Auler   bool FirstSection = true;
1537a34c753fSRafael Auler 
1538a34c753fSRafael Auler   for (auto &Entry : BinaryDataMap) {
1539a34c753fSRafael Auler     const BinaryData *BD = Entry.second;
1540a34c753fSRafael Auler     const BinarySection &Section = BD->getSection();
1541a34c753fSRafael Auler     if (FirstSection || Section != *CurrentSection) {
1542a34c753fSRafael Auler       uint64_t Address, Size;
1543a34c753fSRafael Auler       StringRef Name = Section.getName();
1544a34c753fSRafael Auler       if (Section) {
1545a34c753fSRafael Auler         Address = Section.getAddress();
1546a34c753fSRafael Auler         Size = Section.getSize();
1547a34c753fSRafael Auler       } else {
1548a34c753fSRafael Auler         Address = BD->getAddress();
1549a34c753fSRafael Auler         Size = BD->getSize();
1550a34c753fSRafael Auler       }
1551a34c753fSRafael Auler       OS << "BOLT-INFO: Section " << Name << ", "
1552a34c753fSRafael Auler          << "0x" + Twine::utohexstr(Address) << ":"
155340c2e0faSMaksim Panchenko          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1554a34c753fSRafael Auler       CurrentSection = &Section;
1555a34c753fSRafael Auler       FirstSection = false;
1556a34c753fSRafael Auler     }
1557a34c753fSRafael Auler 
1558a34c753fSRafael Auler     OS << "BOLT-INFO: ";
1559a34c753fSRafael Auler     const BinaryData *P = BD->getParent();
1560a34c753fSRafael Auler     while (P) {
1561a34c753fSRafael Auler       OS << "  ";
1562a34c753fSRafael Auler       P = P->getParent();
1563a34c753fSRafael Auler     }
1564a34c753fSRafael Auler     OS << *BD << "\n";
1565a34c753fSRafael Auler   }
1566a34c753fSRafael Auler }
1567a34c753fSRafael Auler 
1568014cd37fSAlexander Yermolovich Expected<unsigned> BinaryContext::getDwarfFile(
1569014cd37fSAlexander Yermolovich     StringRef Directory, StringRef FileName, unsigned FileNumber,
1570f4c16c44SFangrui Song     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1571014cd37fSAlexander Yermolovich     unsigned CUID, unsigned DWARFVersion) {
1572a34c753fSRafael Auler   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1573014cd37fSAlexander Yermolovich   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1574014cd37fSAlexander Yermolovich                           FileNumber);
1575a34c753fSRafael Auler }
1576a34c753fSRafael Auler 
1577a34c753fSRafael Auler unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1578a34c753fSRafael Auler                                                const uint32_t SrcCUID,
1579a34c753fSRafael Auler                                                unsigned FileIndex) {
1580a34c753fSRafael Auler   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1581a34c753fSRafael Auler   const DWARFDebugLine::LineTable *LineTable =
1582a34c753fSRafael Auler       DwCtx->getLineTableForUnit(SrcUnit);
1583a34c753fSRafael Auler   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1584a34c753fSRafael Auler       LineTable->Prologue.FileNames;
1585a34c753fSRafael Auler   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1586a34c753fSRafael Auler   // means empty dir.
1587a34c753fSRafael Auler   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1588a34c753fSRafael Auler          "FileIndex out of range for the compilation unit.");
1589a34c753fSRafael Auler   StringRef Dir = "";
1590a34c753fSRafael Auler   if (FileNames[FileIndex - 1].DirIdx != 0) {
159189fab98eSFangrui Song     if (std::optional<const char *> DirName = dwarf::toString(
1592a34c753fSRafael Auler             LineTable->Prologue
1593a34c753fSRafael Auler                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1594a34c753fSRafael Auler       Dir = *DirName;
1595a34c753fSRafael Auler     }
1596a34c753fSRafael Auler   }
1597a34c753fSRafael Auler   StringRef FileName = "";
159889fab98eSFangrui Song   if (std::optional<const char *> FName =
1599a34c753fSRafael Auler           dwarf::toString(FileNames[FileIndex - 1].Name))
1600a34c753fSRafael Auler     FileName = *FName;
1601a34c753fSRafael Auler   assert(FileName != "");
1602014cd37fSAlexander Yermolovich   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1603e324a80fSKazu Hirata   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1604e324a80fSKazu Hirata                                DestCUID, DstUnit->getVersion()));
1605a34c753fSRafael Auler }
1606a34c753fSRafael Auler 
1607a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1608a34c753fSRafael Auler   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
160972e5b14fSAmir Ayupov   llvm::transform(llvm::make_second_range(BinaryFunctions),
161072e5b14fSAmir Ayupov                   SortedFunctions.begin(),
161172e5b14fSAmir Ayupov                   [](BinaryFunction &BF) { return &BF; });
1612a34c753fSRafael Auler 
16134d2bc0adSEnna1   llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
1614a34c753fSRafael Auler   return SortedFunctions;
1615a34c753fSRafael Auler }
1616a34c753fSRafael Auler 
1617a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1618a34c753fSRafael Auler   std::vector<BinaryFunction *> AllFunctions;
1619a34c753fSRafael Auler   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
162072e5b14fSAmir Ayupov   llvm::transform(llvm::make_second_range(BinaryFunctions),
162172e5b14fSAmir Ayupov                   std::back_inserter(AllFunctions),
162272e5b14fSAmir Ayupov                   [](BinaryFunction &BF) { return &BF; });
1623d2c87699SAmir Ayupov   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1624a34c753fSRafael Auler 
1625a34c753fSRafael Auler   return AllFunctions;
1626a34c753fSRafael Auler }
1627a34c753fSRafael Auler 
1628e8f5743eSAmir Ayupov std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1629a34c753fSRafael Auler   auto Iter = DWOCUs.find(DWOId);
1630a34c753fSRafael Auler   if (Iter == DWOCUs.end())
1631e324a80fSKazu Hirata     return std::nullopt;
1632a34c753fSRafael Auler 
1633a34c753fSRafael Auler   return Iter->second;
1634a34c753fSRafael Auler }
1635a34c753fSRafael Auler 
16367dee646bSAmir Ayupov DWARFContext *BinaryContext::getDWOContext() const {
1637a34c753fSRafael Auler   if (DWOCUs.empty())
1638a34c753fSRafael Auler     return nullptr;
1639a34c753fSRafael Auler   return &DWOCUs.begin()->second->getContext();
1640a34c753fSRafael Auler }
1641a34c753fSRafael Auler 
1642a34c753fSRafael Auler /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1643a34c753fSRafael Auler void BinaryContext::preprocessDWODebugInfo() {
1644a34c753fSRafael Auler   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1645a34c753fSRafael Auler     DWARFUnit *const DwarfUnit = CU.get();
164689fab98eSFangrui Song     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1647a34c753fSRafael Auler       std::string DWOName = dwarf::toString(
1648a34c753fSRafael Auler           DwarfUnit->getUnitDIE().find(
1649a34c753fSRafael Auler               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1650a34c753fSRafael Auler           "");
16517d272722SAlexander Yermolovich       SmallString<16> AbsolutePath;
16527d272722SAlexander Yermolovich       if (!opts::CompDirOverride.empty()) {
16537d272722SAlexander Yermolovich         sys::path::append(AbsolutePath, opts::CompDirOverride);
16547d272722SAlexander Yermolovich         sys::path::append(AbsolutePath, DWOName);
16557d272722SAlexander Yermolovich       }
16567d272722SAlexander Yermolovich       DWARFUnit *DWOCU =
16577d272722SAlexander Yermolovich           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
16587d272722SAlexander Yermolovich       if (!DWOCU->isDWOUnit()) {
165952cf0711SAmir Ayupov         this->outs()
166052cf0711SAmir Ayupov             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1661a34c753fSRafael Auler             << DWOName
1662a34c753fSRafael Auler             << " was not retrieved and won't be updated. Please check "
1663a34c753fSRafael Auler                "relative path.\n";
1664a34c753fSRafael Auler         continue;
1665a34c753fSRafael Auler       }
1666a34c753fSRafael Auler       DWOCUs[*DWOId] = DWOCU;
1667a34c753fSRafael Auler     }
1668a34c753fSRafael Auler   }
1669864133c5SAlexander Yermolovich   if (!DWOCUs.empty())
167052cf0711SAmir Ayupov     this->outs() << "BOLT-INFO: processing split DWARF\n";
1671a34c753fSRafael Auler }
1672a34c753fSRafael Auler 
1673a34c753fSRafael Auler void BinaryContext::preprocessDebugInfo() {
1674a34c753fSRafael Auler   struct CURange {
1675a34c753fSRafael Auler     uint64_t LowPC;
1676a34c753fSRafael Auler     uint64_t HighPC;
1677a34c753fSRafael Auler     DWARFUnit *Unit;
1678a34c753fSRafael Auler 
167940c2e0faSMaksim Panchenko     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1680a34c753fSRafael Auler   };
1681a34c753fSRafael Auler 
1682a34c753fSRafael Auler   // Building a map of address ranges to CUs similar to .debug_aranges and use
1683a34c753fSRafael Auler   // it to assign CU to functions.
1684a34c753fSRafael Auler   std::vector<CURange> AllRanges;
1685a34c753fSRafael Auler   AllRanges.reserve(DwCtx->getNumCompileUnits());
1686a34c753fSRafael Auler   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1687a34c753fSRafael Auler     Expected<DWARFAddressRangesVector> RangesOrError =
1688a34c753fSRafael Auler         CU->getUnitDIE().getAddressRanges();
1689a34c753fSRafael Auler     if (!RangesOrError) {
1690a34c753fSRafael Auler       consumeError(RangesOrError.takeError());
1691a34c753fSRafael Auler       continue;
1692a34c753fSRafael Auler     }
1693a34c753fSRafael Auler     for (DWARFAddressRange &Range : *RangesOrError) {
1694a34c753fSRafael Auler       // Parts of the debug info could be invalidated due to corresponding code
1695a34c753fSRafael Auler       // being removed from the binary by the linker. Hence we check if the
1696a34c753fSRafael Auler       // address is a valid one.
1697a34c753fSRafael Auler       if (containsAddress(Range.LowPC))
1698a34c753fSRafael Auler         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1699a34c753fSRafael Auler     }
1700014cd37fSAlexander Yermolovich 
1701014cd37fSAlexander Yermolovich     ContainsDwarf5 |= CU->getVersion() >= 5;
1702014cd37fSAlexander Yermolovich     ContainsDwarfLegacy |= CU->getVersion() < 5;
1703a34c753fSRafael Auler   }
1704a34c753fSRafael Auler 
1705d2c87699SAmir Ayupov   llvm::sort(AllRanges);
1706a34c753fSRafael Auler   for (auto &KV : BinaryFunctions) {
1707a34c753fSRafael Auler     const uint64_t FunctionAddress = KV.first;
1708a34c753fSRafael Auler     BinaryFunction &Function = KV.second;
1709a34c753fSRafael Auler 
1710d2c87699SAmir Ayupov     auto It = llvm::partition_point(
1711d2c87699SAmir Ayupov         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1712d2c87699SAmir Ayupov     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1713a34c753fSRafael Auler       Function.setDWARFUnit(It->Unit);
1714a34c753fSRafael Auler   }
1715a34c753fSRafael Auler 
1716a34c753fSRafael Auler   // Discover units with debug info that needs to be updated.
1717a34c753fSRafael Auler   for (const auto &KV : BinaryFunctions) {
1718a34c753fSRafael Auler     const BinaryFunction &BF = KV.second;
1719a34c753fSRafael Auler     if (shouldEmit(BF) && BF.getDWARFUnit())
1720a34c753fSRafael Auler       ProcessedCUs.insert(BF.getDWARFUnit());
1721a34c753fSRafael Auler   }
1722a34c753fSRafael Auler 
1723a34c753fSRafael Auler   // Clear debug info for functions from units that we are not going to process.
1724a34c753fSRafael Auler   for (auto &KV : BinaryFunctions) {
1725a34c753fSRafael Auler     BinaryFunction &BF = KV.second;
1726a34c753fSRafael Auler     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1727a34c753fSRafael Auler       BF.setDWARFUnit(nullptr);
1728a34c753fSRafael Auler   }
1729a34c753fSRafael Auler 
1730a34c753fSRafael Auler   if (opts::Verbosity >= 1) {
173152cf0711SAmir Ayupov     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1732a34c753fSRafael Auler                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1733a34c753fSRafael Auler   }
1734a34c753fSRafael Auler 
1735ba1ac98cSAlexander Yermolovich   preprocessDWODebugInfo();
1736ba1ac98cSAlexander Yermolovich 
1737a34c753fSRafael Auler   // Populate MCContext with DWARF files from all units.
1738a34c753fSRafael Auler   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1739a34c753fSRafael Auler   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1740a34c753fSRafael Auler     const uint64_t CUID = CU->getOffset();
1741014cd37fSAlexander Yermolovich     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1742014cd37fSAlexander Yermolovich     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1743a34c753fSRafael Auler         GlobalPrefix + "line_table_start" + Twine(CUID)));
1744a34c753fSRafael Auler 
1745a34c753fSRafael Auler     if (!ProcessedCUs.count(CU.get()))
1746a34c753fSRafael Auler       continue;
1747a34c753fSRafael Auler 
1748a34c753fSRafael Auler     const DWARFDebugLine::LineTable *LineTable =
1749a34c753fSRafael Auler         DwCtx->getLineTableForUnit(CU.get());
1750a34c753fSRafael Auler     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1751a34c753fSRafael Auler         LineTable->Prologue.FileNames;
1752a34c753fSRafael Auler 
1753014cd37fSAlexander Yermolovich     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1754014cd37fSAlexander Yermolovich     if (DwarfVersion >= 5) {
1755f4c16c44SFangrui Song       std::optional<MD5::MD5Result> Checksum;
1756014cd37fSAlexander Yermolovich       if (LineTable->Prologue.ContentTypes.HasMD5)
1757014cd37fSAlexander Yermolovich         Checksum = LineTable->Prologue.FileNames[0].Checksum;
175889fab98eSFangrui Song       std::optional<const char *> Name =
1759ba1ac98cSAlexander Yermolovich           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
176089fab98eSFangrui Song       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1761ba1ac98cSAlexander Yermolovich         auto Iter = DWOCUs.find(*DWOID);
1762ba1ac98cSAlexander Yermolovich         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1763ba1ac98cSAlexander Yermolovich         Name = dwarf::toString(
1764ba1ac98cSAlexander Yermolovich             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1765ba1ac98cSAlexander Yermolovich       }
1766ba1ac98cSAlexander Yermolovich       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1767e324a80fSKazu Hirata                                   std::nullopt);
1768014cd37fSAlexander Yermolovich     }
1769014cd37fSAlexander Yermolovich 
1770014cd37fSAlexander Yermolovich     BinaryLineTable.setDwarfVersion(DwarfVersion);
1771014cd37fSAlexander Yermolovich 
1772a34c753fSRafael Auler     // Assign a unique label to every line table, one per CU.
1773a34c753fSRafael Auler     // Make sure empty debug line tables are registered too.
1774a34c753fSRafael Auler     if (FileNames.empty()) {
1775e324a80fSKazu Hirata       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1776e324a80fSKazu Hirata                             CUID, DwarfVersion));
1777a34c753fSRafael Auler       continue;
1778a34c753fSRafael Auler     }
1779014cd37fSAlexander Yermolovich     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1780a34c753fSRafael Auler     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1781a34c753fSRafael Auler       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1782a34c753fSRafael Auler       // means empty dir.
1783a34c753fSRafael Auler       StringRef Dir = "";
1784014cd37fSAlexander Yermolovich       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
178589fab98eSFangrui Song         if (std::optional<const char *> DirName = dwarf::toString(
1786a34c753fSRafael Auler                 LineTable->Prologue
1787014cd37fSAlexander Yermolovich                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1788a34c753fSRafael Auler           Dir = *DirName;
1789a34c753fSRafael Auler       StringRef FileName = "";
179089fab98eSFangrui Song       if (std::optional<const char *> FName =
179189fab98eSFangrui Song               dwarf::toString(FileNames[I].Name))
1792a34c753fSRafael Auler         FileName = *FName;
1793a34c753fSRafael Auler       assert(FileName != "");
1794f4c16c44SFangrui Song       std::optional<MD5::MD5Result> Checksum;
1795014cd37fSAlexander Yermolovich       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1796014cd37fSAlexander Yermolovich         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1797e324a80fSKazu Hirata       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1798e324a80fSKazu Hirata                             DwarfVersion));
1799a34c753fSRafael Auler     }
1800a34c753fSRafael Auler   }
1801a34c753fSRafael Auler }
1802a34c753fSRafael Auler 
1803a34c753fSRafael Auler bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
18044c14519eSVladislav Khmelevsky   if (Function.isPseudo())
18054c14519eSVladislav Khmelevsky     return false;
18064c14519eSVladislav Khmelevsky 
1807a34c753fSRafael Auler   if (opts::processAllFunctions())
1808a34c753fSRafael Auler     return true;
1809a34c753fSRafael Auler 
1810a34c753fSRafael Auler   if (Function.isIgnored())
1811a34c753fSRafael Auler     return false;
1812a34c753fSRafael Auler 
1813a34c753fSRafael Auler   // In relocation mode we will emit non-simple functions with CFG.
1814a34c753fSRafael Auler   // If the function does not have a CFG it should be marked as ignored.
1815a34c753fSRafael Auler   return HasRelocations || Function.isSimple();
1816a34c753fSRafael Auler }
1817a34c753fSRafael Auler 
1818f3e54f2fSllongint void BinaryContext::dump(const MCInst &Inst) const {
1819f3e54f2fSllongint   if (LLVM_UNLIKELY(!InstPrinter)) {
1820f3e54f2fSllongint     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1821f3e54f2fSllongint     return;
1822f3e54f2fSllongint   }
1823f3e54f2fSllongint   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1824f3e54f2fSllongint   dbgs() << "\n";
1825f3e54f2fSllongint }
1826f3e54f2fSllongint 
1827a34c753fSRafael Auler void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1828a34c753fSRafael Auler   uint32_t Operation = Inst.getOperation();
1829a34c753fSRafael Auler   switch (Operation) {
1830a34c753fSRafael Auler   case MCCFIInstruction::OpSameValue:
1831a34c753fSRafael Auler     OS << "OpSameValue Reg" << Inst.getRegister();
1832a34c753fSRafael Auler     break;
1833a34c753fSRafael Auler   case MCCFIInstruction::OpRememberState:
1834a34c753fSRafael Auler     OS << "OpRememberState";
1835a34c753fSRafael Auler     break;
1836a34c753fSRafael Auler   case MCCFIInstruction::OpRestoreState:
1837a34c753fSRafael Auler     OS << "OpRestoreState";
1838a34c753fSRafael Auler     break;
1839a34c753fSRafael Auler   case MCCFIInstruction::OpOffset:
1840a34c753fSRafael Auler     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1841a34c753fSRafael Auler     break;
1842a34c753fSRafael Auler   case MCCFIInstruction::OpDefCfaRegister:
1843a34c753fSRafael Auler     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1844a34c753fSRafael Auler     break;
1845a34c753fSRafael Auler   case MCCFIInstruction::OpDefCfaOffset:
1846a34c753fSRafael Auler     OS << "OpDefCfaOffset " << Inst.getOffset();
1847a34c753fSRafael Auler     break;
1848a34c753fSRafael Auler   case MCCFIInstruction::OpDefCfa:
1849a34c753fSRafael Auler     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850a34c753fSRafael Auler     break;
1851a34c753fSRafael Auler   case MCCFIInstruction::OpRelOffset:
1852a34c753fSRafael Auler     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1853a34c753fSRafael Auler     break;
1854a34c753fSRafael Auler   case MCCFIInstruction::OpAdjustCfaOffset:
1855a34c753fSRafael Auler     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1856a34c753fSRafael Auler     break;
1857a34c753fSRafael Auler   case MCCFIInstruction::OpEscape:
1858a34c753fSRafael Auler     OS << "OpEscape";
1859a34c753fSRafael Auler     break;
1860a34c753fSRafael Auler   case MCCFIInstruction::OpRestore:
1861a34c753fSRafael Auler     OS << "OpRestore Reg" << Inst.getRegister();
1862a34c753fSRafael Auler     break;
1863a34c753fSRafael Auler   case MCCFIInstruction::OpUndefined:
1864a34c753fSRafael Auler     OS << "OpUndefined Reg" << Inst.getRegister();
1865a34c753fSRafael Auler     break;
1866a34c753fSRafael Auler   case MCCFIInstruction::OpRegister:
1867a34c753fSRafael Auler     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1868a34c753fSRafael Auler        << Inst.getRegister2();
1869a34c753fSRafael Auler     break;
1870a34c753fSRafael Auler   case MCCFIInstruction::OpWindowSave:
1871a34c753fSRafael Auler     OS << "OpWindowSave";
1872a34c753fSRafael Auler     break;
1873a34c753fSRafael Auler   case MCCFIInstruction::OpGnuArgsSize:
1874a34c753fSRafael Auler     OS << "OpGnuArgsSize";
1875a34c753fSRafael Auler     break;
1876a34c753fSRafael Auler   default:
1877a34c753fSRafael Auler     OS << "Op#" << Operation;
1878a34c753fSRafael Auler     break;
1879a34c753fSRafael Auler   }
1880a34c753fSRafael Auler }
1881a34c753fSRafael Auler 
18828579db96SDenis Revunov MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1883fc395884SJob Noorman   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1884fc395884SJob Noorman   // in the code section (see IHI0056B). $x identifies a symbol starting code or
18851a2f8336Sspaette   // the end of a data chunk inside code, $d identifies start of data.
1886603fa4c6SNathan Sidwell   if (isX86() || ELFSymbolRef(Symbol).getSize())
18878579db96SDenis Revunov     return MarkerSymType::NONE;
18888579db96SDenis Revunov 
18898579db96SDenis Revunov   Expected<StringRef> NameOrError = Symbol.getName();
18908579db96SDenis Revunov   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
18918579db96SDenis Revunov 
18928579db96SDenis Revunov   if (!TypeOrError || !NameOrError)
18938579db96SDenis Revunov     return MarkerSymType::NONE;
18948579db96SDenis Revunov 
18958579db96SDenis Revunov   if (*TypeOrError != SymbolRef::ST_Unknown)
18968579db96SDenis Revunov     return MarkerSymType::NONE;
18978579db96SDenis Revunov 
1898ad8fd5b1SKazu Hirata   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
18998579db96SDenis Revunov     return MarkerSymType::CODE;
19008579db96SDenis Revunov 
1901c6f065d9SJob Noorman   // $x<ISA>
1902ad8fd5b1SKazu Hirata   if (isRISCV() && NameOrError->starts_with("$x"))
1903c6f065d9SJob Noorman     return MarkerSymType::CODE;
1904c6f065d9SJob Noorman 
1905ad8fd5b1SKazu Hirata   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
19068579db96SDenis Revunov     return MarkerSymType::DATA;
19078579db96SDenis Revunov 
19088579db96SDenis Revunov   return MarkerSymType::NONE;
19098579db96SDenis Revunov }
19108579db96SDenis Revunov 
19118579db96SDenis Revunov bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
19128579db96SDenis Revunov   return getMarkerType(Symbol) != MarkerSymType::NONE;
19138579db96SDenis Revunov }
19148579db96SDenis Revunov 
19157dee646bSAmir Ayupov static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
19167dee646bSAmir Ayupov                            const BinaryFunction *Function,
19177dee646bSAmir Ayupov                            DWARFContext *DwCtx) {
19187dee646bSAmir Ayupov   DebugLineTableRowRef RowRef =
19197dee646bSAmir Ayupov       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
19207dee646bSAmir Ayupov   if (RowRef == DebugLineTableRowRef::NULL_ROW)
19217dee646bSAmir Ayupov     return;
19227dee646bSAmir Ayupov 
19237dee646bSAmir Ayupov   const DWARFDebugLine::LineTable *LineTable;
19247dee646bSAmir Ayupov   if (Function && Function->getDWARFUnit() &&
19257dee646bSAmir Ayupov       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
19267dee646bSAmir Ayupov     LineTable = Function->getDWARFLineTable();
19277dee646bSAmir Ayupov   } else {
19287dee646bSAmir Ayupov     LineTable = DwCtx->getLineTableForUnit(
19297dee646bSAmir Ayupov         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
19307dee646bSAmir Ayupov   }
19317dee646bSAmir Ayupov   assert(LineTable && "line table expected for instruction with debug info");
19327dee646bSAmir Ayupov 
19337dee646bSAmir Ayupov   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
19347dee646bSAmir Ayupov   StringRef FileName = "";
193589fab98eSFangrui Song   if (std::optional<const char *> FName =
19367dee646bSAmir Ayupov           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
19377dee646bSAmir Ayupov     FileName = *FName;
19387dee646bSAmir Ayupov   OS << " # debug line " << FileName << ":" << Row.Line;
19397dee646bSAmir Ayupov   if (Row.Column)
19407dee646bSAmir Ayupov     OS << ":" << Row.Column;
19417dee646bSAmir Ayupov   if (Row.Discriminator)
19427dee646bSAmir Ayupov     OS << " discriminator:" << Row.Discriminator;
19437dee646bSAmir Ayupov }
19447dee646bSAmir Ayupov 
194540c2e0faSMaksim Panchenko void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1946a34c753fSRafael Auler                                      uint64_t Offset,
1947a34c753fSRafael Auler                                      const BinaryFunction *Function,
194840c2e0faSMaksim Panchenko                                      bool PrintMCInst, bool PrintMemData,
194969f87b6cSAmir Ayupov                                      bool PrintRelocations,
195069f87b6cSAmir Ayupov                                      StringRef Endl) const {
1951a34c753fSRafael Auler   OS << format("    %08" PRIx64 ": ", Offset);
1952a34c753fSRafael Auler   if (MIB->isCFI(Instruction)) {
1953a34c753fSRafael Auler     uint32_t Offset = Instruction.getOperand(0).getImm();
1954a34c753fSRafael Auler     OS << "\t!CFI\t$" << Offset << "\t; ";
1955a34c753fSRafael Auler     if (Function)
1956a34c753fSRafael Auler       printCFI(OS, *Function->getCFIFor(Instruction));
195769f87b6cSAmir Ayupov     OS << Endl;
1958a34c753fSRafael Auler     return;
1959a34c753fSRafael Auler   }
19606b1cf004SMaksim Panchenko   if (std::optional<uint32_t> DynamicID =
19616b1cf004SMaksim Panchenko           MIB->getDynamicBranchID(Instruction)) {
19626b1cf004SMaksim Panchenko     OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
19636b1cf004SMaksim Panchenko        << " # ID: " << DynamicID;
19646b1cf004SMaksim Panchenko   } else {
19654111841fSKristof Beyls     // If there are annotations on the instruction, the MCInstPrinter will fail
19664111841fSKristof Beyls     // to print the preferred alias as it only does so when the number of
19674111841fSKristof Beyls     // operands is as expected. See
19684111841fSKristof Beyls     // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
19694111841fSKristof Beyls     // Therefore, create a temporary copy of the Inst from which the annotations
19704111841fSKristof Beyls     // are removed, and print that Inst.
19714111841fSKristof Beyls     MCInst InstNoAnnot = Instruction;
19724111841fSKristof Beyls     MIB->stripAnnotations(InstNoAnnot);
19734111841fSKristof Beyls     InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS);
19746b1cf004SMaksim Panchenko   }
1975a34c753fSRafael Auler   if (MIB->isCall(Instruction)) {
1976a34c753fSRafael Auler     if (MIB->isTailCall(Instruction))
1977a34c753fSRafael Auler       OS << " # TAILCALL ";
1978a34c753fSRafael Auler     if (MIB->isInvoke(Instruction)) {
19792563fd63SAmir Ayupov       const std::optional<MCPlus::MCLandingPad> EHInfo =
19802563fd63SAmir Ayupov           MIB->getEHInfo(Instruction);
1981a34c753fSRafael Auler       OS << " # handler: ";
1982a34c753fSRafael Auler       if (EHInfo->first)
1983a34c753fSRafael Auler         OS << *EHInfo->first;
1984a34c753fSRafael Auler       else
1985a34c753fSRafael Auler         OS << '0';
1986a34c753fSRafael Auler       OS << "; action: " << EHInfo->second;
1987a34c753fSRafael Auler       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1988a34c753fSRafael Auler       if (GnuArgsSize >= 0)
1989a34c753fSRafael Auler         OS << "; GNU_args_size = " << GnuArgsSize;
1990a34c753fSRafael Auler     }
1991a34c753fSRafael Auler   } else if (MIB->isIndirectBranch(Instruction)) {
1992a34c753fSRafael Auler     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1993a34c753fSRafael Auler       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1994a34c753fSRafael Auler     } else {
1995a34c753fSRafael Auler       OS << " # UNKNOWN CONTROL FLOW";
1996a34c753fSRafael Auler     }
1997a34c753fSRafael Auler   }
19982563fd63SAmir Ayupov   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1999a9cd49d5SAmir Ayupov     OS << " # Offset: " << *Offset;
20002db9b6a9SMaksim Panchenko   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
20012db9b6a9SMaksim Panchenko     OS << " # Size: " << *Size;
20027c206c78SMaksim Panchenko   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
2003e28c393bSmaksfb     OS << " # Label: " << *Label;
2004a34c753fSRafael Auler 
2005a34c753fSRafael Auler   MIB->printAnnotations(Instruction, OS);
2006a34c753fSRafael Auler 
20077dee646bSAmir Ayupov   if (opts::PrintDebugInfo)
20087dee646bSAmir Ayupov     printDebugInfo(OS, Instruction, Function, DwCtx.get());
2009a34c753fSRafael Auler 
2010a34c753fSRafael Auler   if ((opts::PrintRelocations || PrintRelocations) && Function) {
2011a34c753fSRafael Auler     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2012a34c753fSRafael Auler     Function->printRelocations(OS, Offset, Size);
2013a34c753fSRafael Auler   }
2014a34c753fSRafael Auler 
201569f87b6cSAmir Ayupov   OS << Endl;
2016a34c753fSRafael Auler 
2017a34c753fSRafael Auler   if (PrintMCInst) {
2018a34c753fSRafael Auler     Instruction.dump_pretty(OS, InstPrinter.get());
201969f87b6cSAmir Ayupov     OS << Endl;
2020a34c753fSRafael Auler   }
2021a34c753fSRafael Auler }
2022a34c753fSRafael Auler 
2023e8f5743eSAmir Ayupov std::optional<uint64_t>
202477b75ca5SMaksim Panchenko BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
202577b75ca5SMaksim Panchenko                                         uint64_t FileOffset) const {
202677b75ca5SMaksim Panchenko   // Find a segment with a matching file offset.
202777b75ca5SMaksim Panchenko   for (auto &KV : SegmentMapInfo) {
202877b75ca5SMaksim Panchenko     const SegmentInfo &SegInfo = KV.second;
20296d216fb7SKristof Beyls     // Only consider executable segments.
20306d216fb7SKristof Beyls     if (!SegInfo.IsExecutable)
20316d216fb7SKristof Beyls       continue;
2032ae51ec84SJohnLee1243     // FileOffset is got from perf event,
2033ae51ec84SJohnLee1243     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2034ae51ec84SJohnLee1243     // If the pagesize is not equal to SegInfo.Alignment.
2035ae51ec84SJohnLee1243     // FileOffset and SegInfo.FileOffset should be aligned first,
2036ae51ec84SJohnLee1243     // and then judge whether they are equal.
2037ae51ec84SJohnLee1243     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2038ae51ec84SJohnLee1243         alignDown(FileOffset, SegInfo.Alignment)) {
2039ae51ec84SJohnLee1243       // The function's offset from base address in VAS is aligned by pagesize
2040ae51ec84SJohnLee1243       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2041ae51ec84SJohnLee1243       // However, The ELF document says that SegInfo.FileOffset should equal
2042ae51ec84SJohnLee1243       // to SegInfo.Address, modulo the pagesize.
2043ae51ec84SJohnLee1243       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2044ae51ec84SJohnLee1243 
2045ae51ec84SJohnLee1243       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2046ae51ec84SJohnLee1243       // alignDown(SegInfo.Address, pagesize)
2047ae51ec84SJohnLee1243       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2048ae51ec84SJohnLee1243       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2049ae51ec84SJohnLee1243       //   = SegInfo.Address - SegInfo.FileOffset +
2050ae51ec84SJohnLee1243       //     alignDown(SegInfo.FileOffset, pagesize)
2051ae51ec84SJohnLee1243       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2052ae51ec84SJohnLee1243       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
205377b75ca5SMaksim Panchenko     }
205477b75ca5SMaksim Panchenko   }
205577b75ca5SMaksim Panchenko 
2056e324a80fSKazu Hirata   return std::nullopt;
205777b75ca5SMaksim Panchenko }
205877b75ca5SMaksim Panchenko 
2059a34c753fSRafael Auler ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2060a34c753fSRafael Auler   auto SI = AddressToSection.upper_bound(Address);
2061a34c753fSRafael Auler   if (SI != AddressToSection.begin()) {
2062a34c753fSRafael Auler     --SI;
2063a34c753fSRafael Auler     uint64_t UpperBound = SI->first + SI->second->getSize();
2064a34c753fSRafael Auler     if (!SI->second->getSize())
2065a34c753fSRafael Auler       UpperBound += 1;
2066a34c753fSRafael Auler     if (UpperBound > Address)
2067a34c753fSRafael Auler       return *SI->second;
2068a34c753fSRafael Auler   }
2069a34c753fSRafael Auler   return std::make_error_code(std::errc::bad_address);
2070a34c753fSRafael Auler }
2071a34c753fSRafael Auler 
2072a34c753fSRafael Auler ErrorOr<StringRef>
2073a34c753fSRafael Auler BinaryContext::getSectionNameForAddress(uint64_t Address) const {
20743652483cSRafael Auler   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2075a34c753fSRafael Auler     return Section->getName();
2076a34c753fSRafael Auler   return std::make_error_code(std::errc::bad_address);
2077a34c753fSRafael Auler }
2078a34c753fSRafael Auler 
2079a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2080a34c753fSRafael Auler   auto Res = Sections.insert(Section);
2081a34c753fSRafael Auler   (void)Res;
2082a34c753fSRafael Auler   assert(Res.second && "can't register the same section twice.");
2083a34c753fSRafael Auler 
2084a34c753fSRafael Auler   // Only register allocatable sections in the AddressToSection map.
2085a34c753fSRafael Auler   if (Section->isAllocatable() && Section->getAddress())
2086a34c753fSRafael Auler     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2087a34c753fSRafael Auler   NameToSection.insert(
2088a34c753fSRafael Auler       std::make_pair(std::string(Section->getName()), Section));
20894d3a0cadSMaksim Panchenko   if (Section->hasSectionRef())
20904d3a0cadSMaksim Panchenko     SectionRefToBinarySection.insert(
20914d3a0cadSMaksim Panchenko         std::make_pair(Section->getSectionRef(), Section));
20924d3a0cadSMaksim Panchenko 
2093a34c753fSRafael Auler   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2094a34c753fSRafael Auler   return *Section;
2095a34c753fSRafael Auler }
2096a34c753fSRafael Auler 
2097a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(SectionRef Section) {
2098a34c753fSRafael Auler   return registerSection(new BinarySection(*this, Section));
2099a34c753fSRafael Auler }
2100a34c753fSRafael Auler 
2101a34c753fSRafael Auler BinarySection &
21024d3a0cadSMaksim Panchenko BinaryContext::registerSection(const Twine &SectionName,
2103a34c753fSRafael Auler                                const BinarySection &OriginalSection) {
210440c2e0faSMaksim Panchenko   return registerSection(
210540c2e0faSMaksim Panchenko       new BinarySection(*this, SectionName, OriginalSection));
2106a34c753fSRafael Auler }
2107a34c753fSRafael Auler 
210840c2e0faSMaksim Panchenko BinarySection &
21094d3a0cadSMaksim Panchenko BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
211040c2e0faSMaksim Panchenko                                        unsigned ELFFlags, uint8_t *Data,
211140c2e0faSMaksim Panchenko                                        uint64_t Size, unsigned Alignment) {
2112a34c753fSRafael Auler   auto NamedSections = getSectionByName(Name);
2113a34c753fSRafael Auler   if (NamedSections.begin() != NamedSections.end()) {
2114a34c753fSRafael Auler     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2115a34c753fSRafael Auler            "can only update unique sections");
2116a34c753fSRafael Auler     BinarySection *Section = NamedSections.begin()->second;
2117a34c753fSRafael Auler 
2118a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2119a34c753fSRafael Auler     const bool Flag = Section->isAllocatable();
2120a34c753fSRafael Auler     (void)Flag;
2121a34c753fSRafael Auler     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2122a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << *Section << "\n");
2123a34c753fSRafael Auler     // FIXME: Fix section flags/attributes for MachO.
2124a34c753fSRafael Auler     if (isELF())
2125a34c753fSRafael Auler       assert(Flag == Section->isAllocatable() &&
2126a34c753fSRafael Auler              "can't change section allocation status");
2127a34c753fSRafael Auler     return *Section;
2128a34c753fSRafael Auler   }
2129a34c753fSRafael Auler 
213040c2e0faSMaksim Panchenko   return registerSection(
213140c2e0faSMaksim Panchenko       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2132a34c753fSRafael Auler }
2133a34c753fSRafael Auler 
21344d3a0cadSMaksim Panchenko void BinaryContext::deregisterSectionName(const BinarySection &Section) {
21354d3a0cadSMaksim Panchenko   auto NameRange = NameToSection.equal_range(Section.getName().str());
21364d3a0cadSMaksim Panchenko   while (NameRange.first != NameRange.second) {
21374d3a0cadSMaksim Panchenko     if (NameRange.first->second == &Section) {
21384d3a0cadSMaksim Panchenko       NameToSection.erase(NameRange.first);
21394d3a0cadSMaksim Panchenko       break;
21404d3a0cadSMaksim Panchenko     }
21414d3a0cadSMaksim Panchenko     ++NameRange.first;
21424d3a0cadSMaksim Panchenko   }
21434d3a0cadSMaksim Panchenko }
21444d3a0cadSMaksim Panchenko 
21454d3a0cadSMaksim Panchenko void BinaryContext::deregisterUnusedSections() {
21464d3a0cadSMaksim Panchenko   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
21474d3a0cadSMaksim Panchenko   for (auto SI = Sections.begin(); SI != Sections.end();) {
21484d3a0cadSMaksim Panchenko     BinarySection *Section = *SI;
214905634f73SJob Noorman     // We check getOutputData() instead of getOutputSize() because sometimes
215005634f73SJob Noorman     // zero-sized .text.cold sections are allocated.
215105634f73SJob Noorman     if (Section->hasSectionRef() || Section->getOutputData() ||
21524d3a0cadSMaksim Panchenko         (AbsSection && Section == &AbsSection.get())) {
21534d3a0cadSMaksim Panchenko       ++SI;
21544d3a0cadSMaksim Panchenko       continue;
21554d3a0cadSMaksim Panchenko     }
21564d3a0cadSMaksim Panchenko 
21574d3a0cadSMaksim Panchenko     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
21584d3a0cadSMaksim Panchenko                       << '\n';);
21594d3a0cadSMaksim Panchenko     deregisterSectionName(*Section);
21604d3a0cadSMaksim Panchenko     SI = Sections.erase(SI);
21614d3a0cadSMaksim Panchenko     delete Section;
21624d3a0cadSMaksim Panchenko   }
21634d3a0cadSMaksim Panchenko }
21644d3a0cadSMaksim Panchenko 
2165a34c753fSRafael Auler bool BinaryContext::deregisterSection(BinarySection &Section) {
2166a34c753fSRafael Auler   BinarySection *SectionPtr = &Section;
2167a34c753fSRafael Auler   auto Itr = Sections.find(SectionPtr);
2168a34c753fSRafael Auler   if (Itr != Sections.end()) {
2169a34c753fSRafael Auler     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2170a34c753fSRafael Auler     while (Range.first != Range.second) {
2171a34c753fSRafael Auler       if (Range.first->second == SectionPtr) {
2172a34c753fSRafael Auler         AddressToSection.erase(Range.first);
2173a34c753fSRafael Auler         break;
2174a34c753fSRafael Auler       }
2175a34c753fSRafael Auler       ++Range.first;
2176a34c753fSRafael Auler     }
2177a34c753fSRafael Auler 
21784d3a0cadSMaksim Panchenko     deregisterSectionName(*SectionPtr);
2179a34c753fSRafael Auler     Sections.erase(Itr);
2180a34c753fSRafael Auler     delete SectionPtr;
2181a34c753fSRafael Auler     return true;
2182a34c753fSRafael Auler   }
2183a34c753fSRafael Auler   return false;
2184a34c753fSRafael Auler }
2185a34c753fSRafael Auler 
21864d3a0cadSMaksim Panchenko void BinaryContext::renameSection(BinarySection &Section,
21874d3a0cadSMaksim Panchenko                                   const Twine &NewName) {
21884d3a0cadSMaksim Panchenko   auto Itr = Sections.find(&Section);
21894d3a0cadSMaksim Panchenko   assert(Itr != Sections.end() && "Section must exist to be renamed.");
21904d3a0cadSMaksim Panchenko   Sections.erase(Itr);
21914d3a0cadSMaksim Panchenko 
21924d3a0cadSMaksim Panchenko   deregisterSectionName(Section);
21934d3a0cadSMaksim Panchenko 
21944d3a0cadSMaksim Panchenko   Section.Name = NewName.str();
2195c92ff2a3Srevunov.denis@huawei.com   Section.setOutputName(Section.Name);
21964d3a0cadSMaksim Panchenko 
2197c92ff2a3Srevunov.denis@huawei.com   NameToSection.insert(std::make_pair(Section.Name, &Section));
21984d3a0cadSMaksim Panchenko 
21994d3a0cadSMaksim Panchenko   // Reinsert with the new name.
22004d3a0cadSMaksim Panchenko   Sections.insert(&Section);
22014d3a0cadSMaksim Panchenko }
22024d3a0cadSMaksim Panchenko 
2203a34c753fSRafael Auler void BinaryContext::printSections(raw_ostream &OS) const {
22043652483cSRafael Auler   for (BinarySection *const &Section : Sections)
2205a34c753fSRafael Auler     OS << "BOLT-INFO: " << *Section << "\n";
2206a34c753fSRafael Auler }
2207a34c753fSRafael Auler 
2208a34c753fSRafael Auler BinarySection &BinaryContext::absoluteSection() {
2209a34c753fSRafael Auler   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2210a34c753fSRafael Auler     return *Section;
2211a34c753fSRafael Auler   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2212a34c753fSRafael Auler }
2213a34c753fSRafael Auler 
221440c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2215a34c753fSRafael Auler                                                            size_t Size) const {
2216a34c753fSRafael Auler   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2217a34c753fSRafael Auler   if (!Section)
2218a34c753fSRafael Auler     return std::make_error_code(std::errc::bad_address);
2219a34c753fSRafael Auler 
2220a34c753fSRafael Auler   if (Section->isVirtual())
2221a34c753fSRafael Auler     return 0;
2222a34c753fSRafael Auler 
2223a34c753fSRafael Auler   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2224a34c753fSRafael Auler                    AsmInfo->getCodePointerSize());
2225a34c753fSRafael Auler   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2226a34c753fSRafael Auler   return DE.getUnsigned(&ValueOffset, Size);
2227a34c753fSRafael Auler }
2228a34c753fSRafael Auler 
2229c460e454SAmir Ayupov ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2230a34c753fSRafael Auler                                                         size_t Size) const {
2231a34c753fSRafael Auler   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2232a34c753fSRafael Auler   if (!Section)
2233a34c753fSRafael Auler     return std::make_error_code(std::errc::bad_address);
2234a34c753fSRafael Auler 
2235a34c753fSRafael Auler   if (Section->isVirtual())
2236a34c753fSRafael Auler     return 0;
2237a34c753fSRafael Auler 
2238a34c753fSRafael Auler   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2239a34c753fSRafael Auler                    AsmInfo->getCodePointerSize());
2240a34c753fSRafael Auler   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2241a34c753fSRafael Auler   return DE.getSigned(&ValueOffset, Size);
2242a34c753fSRafael Auler }
2243a34c753fSRafael Auler 
224440c2e0faSMaksim Panchenko void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
224540c2e0faSMaksim Panchenko                                   uint64_t Type, uint64_t Addend,
2246a34c753fSRafael Auler                                   uint64_t Value) {
2247a34c753fSRafael Auler   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2248a34c753fSRafael Auler   assert(Section && "cannot find section for address");
224940c2e0faSMaksim Panchenko   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2250a34c753fSRafael Auler                          Value);
2251a34c753fSRafael Auler }
2252a34c753fSRafael Auler 
225340c2e0faSMaksim Panchenko void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
225440c2e0faSMaksim Panchenko                                          uint64_t Type, uint64_t Addend,
2255a34c753fSRafael Auler                                          uint64_t Value) {
2256a34c753fSRafael Auler   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2257a34c753fSRafael Auler   assert(Section && "cannot find section for address");
225840c2e0faSMaksim Panchenko   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
225940c2e0faSMaksim Panchenko                                 Addend, Value);
2260a34c753fSRafael Auler }
2261a34c753fSRafael Auler 
2262a34c753fSRafael Auler bool BinaryContext::removeRelocationAt(uint64_t Address) {
2263a34c753fSRafael Auler   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2264a34c753fSRafael Auler   assert(Section && "cannot find section for address");
2265a34c753fSRafael Auler   return Section->removeRelocationAt(Address - Section->getAddress());
2266a34c753fSRafael Auler }
2267a34c753fSRafael Auler 
226808ab4fafSAmir Ayupov const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
226908ab4fafSAmir Ayupov   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2270a34c753fSRafael Auler   if (!Section)
2271a34c753fSRafael Auler     return nullptr;
2272a34c753fSRafael Auler 
2273a34c753fSRafael Auler   return Section->getRelocationAt(Address - Section->getAddress());
2274a34c753fSRafael Auler }
2275a34c753fSRafael Auler 
2276702fe36bSAmir Ayupov const Relocation *
2277702fe36bSAmir Ayupov BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2278702fe36bSAmir Ayupov   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2279a34c753fSRafael Auler   if (!Section)
2280a34c753fSRafael Auler     return nullptr;
2281a34c753fSRafael Auler 
2282a34c753fSRafael Auler   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2283a34c753fSRafael Auler }
2284a34c753fSRafael Auler 
2285a34c753fSRafael Auler void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2286a34c753fSRafael Auler                                              const uint64_t Address) {
2287a34c753fSRafael Auler   auto setImmovable = [&](BinaryData &BD) {
2288a34c753fSRafael Auler     BinaryData *Root = BD.getAtomicRoot();
2289a34c753fSRafael Auler     LLVM_DEBUG(if (Root->isMoveable()) {
2290a34c753fSRafael Auler       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2291a34c753fSRafael Auler              << "due to ambiguous relocation referencing 0x"
2292a34c753fSRafael Auler              << Twine::utohexstr(Address) << '\n';
2293a34c753fSRafael Auler     });
2294a34c753fSRafael Auler     Root->setIsMoveable(false);
2295a34c753fSRafael Auler   };
2296a34c753fSRafael Auler 
2297a34c753fSRafael Auler   if (Address == BD.getAddress()) {
2298a34c753fSRafael Auler     setImmovable(BD);
2299a34c753fSRafael Auler 
2300a34c753fSRafael Auler     // Set previous symbol as immovable
2301a34c753fSRafael Auler     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2302a34c753fSRafael Auler     if (Prev && Prev->getEndAddress() == BD.getAddress())
2303a34c753fSRafael Auler       setImmovable(*Prev);
2304a34c753fSRafael Auler   }
2305a34c753fSRafael Auler 
2306a34c753fSRafael Auler   if (Address == BD.getEndAddress()) {
2307a34c753fSRafael Auler     setImmovable(BD);
2308a34c753fSRafael Auler 
2309a34c753fSRafael Auler     // Set next symbol as immovable
2310a34c753fSRafael Auler     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2311a34c753fSRafael Auler     if (Next && Next->getAddress() == BD.getEndAddress())
2312a34c753fSRafael Auler       setImmovable(*Next);
2313a34c753fSRafael Auler   }
2314a34c753fSRafael Auler }
2315a34c753fSRafael Auler 
2316a34c753fSRafael Auler BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2317a34c753fSRafael Auler                                                     uint64_t *EntryDesc) {
2318e8ce5f1eSNico Weber   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2319a34c753fSRafael Auler   auto BFI = SymbolToFunctionMap.find(Symbol);
2320a34c753fSRafael Auler   if (BFI == SymbolToFunctionMap.end())
2321a34c753fSRafael Auler     return nullptr;
2322a34c753fSRafael Auler 
2323a34c753fSRafael Auler   BinaryFunction *BF = BFI->second;
2324a34c753fSRafael Auler   if (EntryDesc)
2325a34c753fSRafael Auler     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2326a34c753fSRafael Auler 
2327a34c753fSRafael Auler   return BF;
2328a34c753fSRafael Auler }
2329a34c753fSRafael Auler 
233052cf0711SAmir Ayupov std::string
233152cf0711SAmir Ayupov BinaryContext::generateBugReportMessage(StringRef Message,
2332a34c753fSRafael Auler                                         const BinaryFunction &Function) const {
233352cf0711SAmir Ayupov   std::string Msg;
233452cf0711SAmir Ayupov   raw_string_ostream SS(Msg);
233552cf0711SAmir Ayupov   SS << "=======================================\n";
233652cf0711SAmir Ayupov   SS << "BOLT is unable to proceed because it couldn't properly understand "
2337a34c753fSRafael Auler         "this function.\n";
233852cf0711SAmir Ayupov   SS << "If you are running the most recent version of BOLT, you may "
2339a34c753fSRafael Auler         "want to "
2340a34c753fSRafael Auler         "report this and paste this dump.\nPlease check that there is no "
2341a34c753fSRafael Auler         "sensitive contents being shared in this dump.\n";
234252cf0711SAmir Ayupov   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
234352cf0711SAmir Ayupov   ScopedPrinter SP(SS);
2344a34c753fSRafael Auler   SP.printBinaryBlock("Function contents", *Function.getData());
234552cf0711SAmir Ayupov   SS << "\n";
234652cf0711SAmir Ayupov   const_cast<BinaryFunction &>(Function).print(SS, "");
234752cf0711SAmir Ayupov   SS << "ERROR: " << Message;
234852cf0711SAmir Ayupov   SS << "\n=======================================\n";
234952cf0711SAmir Ayupov   return Msg;
2350a34c753fSRafael Auler }
2351a34c753fSRafael Auler 
2352a34c753fSRafael Auler BinaryFunction *
2353a34c753fSRafael Auler BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2354a34c753fSRafael Auler                                             bool IsSimple) {
2355a34c753fSRafael Auler   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2356a34c753fSRafael Auler   BinaryFunction *BF = InjectedBinaryFunctions.back();
2357a34c753fSRafael Auler   setSymbolToFunctionMap(BF->getSymbol(), BF);
2358a34c753fSRafael Auler   BF->CurrentState = BinaryFunction::State::CFG;
2359a34c753fSRafael Auler   return BF;
2360a34c753fSRafael Auler }
2361a34c753fSRafael Auler 
2362a34c753fSRafael Auler std::pair<size_t, size_t>
2363a34c753fSRafael Auler BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2364a34c753fSRafael Auler   // Adjust branch instruction to match the current layout.
2365a34c753fSRafael Auler   if (FixBranches)
2366a34c753fSRafael Auler     BF.fixBranches();
2367a34c753fSRafael Auler 
2368a34c753fSRafael Auler   // Create local MC context to isolate the effect of ephemeral code emission.
2369a34c753fSRafael Auler   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2370a34c753fSRafael Auler   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2371a34c753fSRafael Auler   MCAsmBackend *MAB =
2372a34c753fSRafael Auler       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2373a34c753fSRafael Auler 
2374a34c753fSRafael Auler   SmallString<256> Code;
2375a34c753fSRafael Auler   raw_svector_ostream VecOS(Code);
2376a34c753fSRafael Auler 
2377a34c753fSRafael Auler   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2378a34c753fSRafael Auler   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2379a34c753fSRafael Auler       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
238086e21e1aSFangrui Song       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
2381a34c753fSRafael Auler 
2382a34c753fSRafael Auler   Streamer->initSections(false, *STI);
2383a34c753fSRafael Auler 
2384a34c753fSRafael Auler   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2385a34c753fSRafael Auler   Section->setHasInstructions(true);
2386a34c753fSRafael Auler 
2387a34c753fSRafael Auler   // Create symbols in the LocalCtx so that they get destroyed with it.
2388a34c753fSRafael Auler   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2389a34c753fSRafael Auler   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2390a34c753fSRafael Auler 
2391adf4142fSFangrui Song   Streamer->switchSection(Section);
2392a34c753fSRafael Auler   Streamer->emitLabel(StartLabel);
2393275e075cSFabian Parzefall   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2394a34c753fSRafael Auler                    /*EmitCodeOnly=*/true);
2395a34c753fSRafael Auler   Streamer->emitLabel(EndLabel);
2396a34c753fSRafael Auler 
2397275e075cSFabian Parzefall   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2398275e075cSFabian Parzefall   SmallVector<LabelRange> SplitLabels;
239907f63b0aSFabian Parzefall   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2400275e075cSFabian Parzefall     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2401275e075cSFabian Parzefall     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2402275e075cSFabian Parzefall     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2403a34c753fSRafael Auler 
2404275e075cSFabian Parzefall     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
24050f74d191SFabian Parzefall         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2406275e075cSFabian Parzefall         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2407275e075cSFabian Parzefall     SplitSection->setHasInstructions(true);
2408275e075cSFabian Parzefall     Streamer->switchSection(SplitSection);
2409275e075cSFabian Parzefall 
2410275e075cSFabian Parzefall     Streamer->emitLabel(SplitStartLabel);
2411275e075cSFabian Parzefall     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2412275e075cSFabian Parzefall     Streamer->emitLabel(SplitEndLabel);
2413a34c753fSRafael Auler   }
2414a34c753fSRafael Auler 
2415a34c753fSRafael Auler   MCAssembler &Assembler =
2416a34c753fSRafael Auler       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
241735668e2cSFangrui Song   Assembler.layout();
2418a34c753fSRafael Auler 
2419d333c0e0SShatianWang   // Obtain fragment sizes.
2420d333c0e0SShatianWang   std::vector<uint64_t> FragmentSizes;
2421d333c0e0SShatianWang   // Main fragment size.
2422dbf12b2fSFangrui Song   const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2423dbf12b2fSFangrui Song                            Assembler.getSymbolOffset(*StartLabel);
2424d333c0e0SShatianWang   FragmentSizes.push_back(HotSize);
2425d333c0e0SShatianWang   // Split fragment sizes.
2426d333c0e0SShatianWang   uint64_t ColdSize = 0;
2427d333c0e0SShatianWang   for (const auto &Labels : SplitLabels) {
2428dbf12b2fSFangrui Song     uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2429dbf12b2fSFangrui Song                     Assembler.getSymbolOffset(*Labels.first);
2430d333c0e0SShatianWang     FragmentSizes.push_back(Size);
2431d333c0e0SShatianWang     ColdSize += Size;
2432d333c0e0SShatianWang   }
2433d333c0e0SShatianWang 
2434d333c0e0SShatianWang   // Populate new start and end offsets of each basic block.
2435d333c0e0SShatianWang   uint64_t FragmentIndex = 0;
2436d333c0e0SShatianWang   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2437d333c0e0SShatianWang     BinaryBasicBlock *PrevBB = nullptr;
2438d333c0e0SShatianWang     for (BinaryBasicBlock *BB : FF) {
2439dbf12b2fSFangrui Song       const uint64_t BBStartOffset =
2440dbf12b2fSFangrui Song           Assembler.getSymbolOffset(*(BB->getLabel()));
2441d333c0e0SShatianWang       BB->setOutputStartAddress(BBStartOffset);
2442d333c0e0SShatianWang       if (PrevBB)
2443d333c0e0SShatianWang         PrevBB->setOutputEndAddress(BBStartOffset);
2444d333c0e0SShatianWang       PrevBB = BB;
2445d333c0e0SShatianWang     }
2446d333c0e0SShatianWang     if (PrevBB)
2447d333c0e0SShatianWang       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2448d333c0e0SShatianWang     FragmentIndex++;
2449d333c0e0SShatianWang   }
2450a34c753fSRafael Auler 
2451a34c753fSRafael Auler   // Clean-up the effect of the code emission.
2452a34c753fSRafael Auler   for (const MCSymbol &Symbol : Assembler.symbols()) {
2453a34c753fSRafael Auler     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2454a34c753fSRafael Auler     MutableSymbol->setUndefined();
2455a34c753fSRafael Auler     MutableSymbol->setIsRegistered(false);
2456a34c753fSRafael Auler   }
2457a34c753fSRafael Auler 
2458a34c753fSRafael Auler   return std::make_pair(HotSize, ColdSize);
2459a34c753fSRafael Auler }
2460a34c753fSRafael Auler 
2461bcc4c909SMaksim Panchenko bool BinaryContext::validateInstructionEncoding(
2462bcc4c909SMaksim Panchenko     ArrayRef<uint8_t> InputSequence) const {
2463bcc4c909SMaksim Panchenko   MCInst Inst;
2464bcc4c909SMaksim Panchenko   uint64_t InstSize;
2465bcc4c909SMaksim Panchenko   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2466bcc4c909SMaksim Panchenko   assert(InstSize == InputSequence.size() &&
2467bcc4c909SMaksim Panchenko          "Disassembled instruction size does not match the sequence.");
2468bcc4c909SMaksim Panchenko 
2469a34c753fSRafael Auler   SmallString<256> Code;
2470a34c753fSRafael Auler   SmallVector<MCFixup, 4> Fixups;
2471a34c753fSRafael Auler 
24720c049ea6SAlexis Engelke   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2473bcc4c909SMaksim Panchenko   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2474bcc4c909SMaksim Panchenko   if (InputSequence != OutputSequence) {
2475a34c753fSRafael Auler     if (opts::Verbosity > 1) {
247652cf0711SAmir Ayupov       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2477bcc4c909SMaksim Panchenko                    << "      input: " << InputSequence << '\n'
2478bcc4c909SMaksim Panchenko                    << "     output: " << OutputSequence << '\n';
2479a34c753fSRafael Auler     }
2480a34c753fSRafael Auler     return false;
2481a34c753fSRafael Auler   }
2482a34c753fSRafael Auler 
2483a34c753fSRafael Auler   return true;
2484a34c753fSRafael Auler }
2485a34c753fSRafael Auler 
2486a34c753fSRafael Auler uint64_t BinaryContext::getHotThreshold() const {
2487a34c753fSRafael Auler   static uint64_t Threshold = 0;
2488a34c753fSRafael Auler   if (Threshold == 0) {
248940c2e0faSMaksim Panchenko     Threshold = std::max(
249040c2e0faSMaksim Panchenko         (uint64_t)opts::ExecutionCountThreshold,
2491a34c753fSRafael Auler         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2492a34c753fSRafael Auler   }
2493a34c753fSRafael Auler   return Threshold;
2494a34c753fSRafael Auler }
2495a34c753fSRafael Auler 
249640c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
249740c2e0faSMaksim Panchenko     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2498a34c753fSRafael Auler   auto FI = BinaryFunctions.upper_bound(Address);
2499a34c753fSRafael Auler   if (FI == BinaryFunctions.begin())
2500a34c753fSRafael Auler     return nullptr;
2501a34c753fSRafael Auler   --FI;
2502a34c753fSRafael Auler 
2503a34c753fSRafael Auler   const uint64_t UsedSize =
2504a34c753fSRafael Auler       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2505a34c753fSRafael Auler 
2506a34c753fSRafael Auler   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2507a34c753fSRafael Auler     return nullptr;
2508a34c753fSRafael Auler 
2509a34c753fSRafael Auler   return &FI->second;
2510a34c753fSRafael Auler }
2511a34c753fSRafael Auler 
251240c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2513a34c753fSRafael Auler   // First, try to find a function starting at the given address. If the
2514a34c753fSRafael Auler   // function was folded, this will get us the original folded function if it
2515a34c753fSRafael Auler   // wasn't removed from the list, e.g. in non-relocation mode.
2516a34c753fSRafael Auler   auto BFI = BinaryFunctions.find(Address);
25173652483cSRafael Auler   if (BFI != BinaryFunctions.end())
2518a34c753fSRafael Auler     return &BFI->second;
2519a34c753fSRafael Auler 
2520a34c753fSRafael Auler   // We might have folded the function matching the object at the given
2521a34c753fSRafael Auler   // address. In such case, we look for a function matching the symbol
2522a34c753fSRafael Auler   // registered at the original address. The new function (the one that the
2523a34c753fSRafael Auler   // original was folded into) will hold the symbol.
2524a34c753fSRafael Auler   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2525a34c753fSRafael Auler     uint64_t EntryID = 0;
2526a34c753fSRafael Auler     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2527a34c753fSRafael Auler     if (BF && EntryID == 0)
2528a34c753fSRafael Auler       return BF;
2529a34c753fSRafael Auler   }
2530a34c753fSRafael Auler   return nullptr;
2531a34c753fSRafael Auler }
2532a34c753fSRafael Auler 
25333023b15fSAmir Ayupov /// Deregister JumpTable registered at a given \p Address and delete it.
25343023b15fSAmir Ayupov void BinaryContext::deleteJumpTable(uint64_t Address) {
25353023b15fSAmir Ayupov   assert(JumpTables.count(Address) && "Must have a jump table at address");
25363023b15fSAmir Ayupov   JumpTable *JT = JumpTables.at(Address);
25373023b15fSAmir Ayupov   for (BinaryFunction *Parent : JT->Parents)
25383023b15fSAmir Ayupov     Parent->JumpTables.erase(Address);
25393023b15fSAmir Ayupov   JumpTables.erase(Address);
25403023b15fSAmir Ayupov   delete JT;
25413023b15fSAmir Ayupov }
25423023b15fSAmir Ayupov 
2543a34c753fSRafael Auler DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2544a34c753fSRafael Auler     const DWARFAddressRangesVector &InputRanges) const {
2545a34c753fSRafael Auler   DebugAddressRangesVector OutputRanges;
2546a34c753fSRafael Auler 
2547a34c753fSRafael Auler   for (const DWARFAddressRange Range : InputRanges) {
2548a34c753fSRafael Auler     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2549a34c753fSRafael Auler     while (BFI != BinaryFunctions.end()) {
2550a34c753fSRafael Auler       const BinaryFunction &Function = BFI->second;
2551a34c753fSRafael Auler       if (Function.getAddress() >= Range.HighPC)
2552a34c753fSRafael Auler         break;
2553a34c753fSRafael Auler       const DebugAddressRangesVector FunctionRanges =
2554a34c753fSRafael Auler           Function.getOutputAddressRanges();
2555d2c87699SAmir Ayupov       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2556a34c753fSRafael Auler       std::advance(BFI, 1);
2557a34c753fSRafael Auler     }
2558a34c753fSRafael Auler   }
2559a34c753fSRafael Auler 
2560a34c753fSRafael Auler   return OutputRanges;
2561a34c753fSRafael Auler }
2562a34c753fSRafael Auler 
2563a34c753fSRafael Auler } // namespace bolt
2564a34c753fSRafael Auler } // namespace llvm
2565