xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision fa7dd4919aa705f18f268fab5b2887d45f89d8dd)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
44 
45 using namespace llvm;
46 
47 #undef  DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
49 
50 namespace opts {
51 
52 cl::opt<bool> NoHugePages("no-huge-pages",
53                           cl::desc("use regular size pages for code alignment"),
54                           cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool> PrintRelocations(
64     "print-relocations",
65     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66     cl::cat(BoltCategory));
67 
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70   cl::desc("print memory data annotations when printing functions"),
71   cl::Hidden,
72   cl::ZeroOrMore,
73   cl::cat(BoltCategory));
74 
75 cl::opt<std::string> CompDirOverride(
76     "comp-dir-override",
77     cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
78              "location, which is used with DW_AT_dwo_name to construct a path "
79              "to *.dwo files."),
80     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
81 } // namespace opts
82 
83 namespace llvm {
84 namespace bolt {
85 
86 char BOLTError::ID = 0;
87 
88 BOLTError::BOLTError(bool IsFatal, const Twine &S)
89     : IsFatal(IsFatal), Msg(S.str()) {}
90 
91 void BOLTError::log(raw_ostream &OS) const {
92   if (IsFatal)
93     OS << "FATAL ";
94   StringRef ErrMsg = StringRef(Msg);
95   // Prepend our error prefix if it is missing
96   if (ErrMsg.empty()) {
97     OS << "BOLT-ERROR\n";
98   } else {
99     if (!ErrMsg.starts_with("BOLT-ERROR"))
100       OS << "BOLT-ERROR: ";
101     OS << ErrMsg << "\n";
102   }
103 }
104 
105 std::error_code BOLTError::convertToErrorCode() const {
106   return inconvertibleErrorCode();
107 }
108 
109 Error createNonFatalBOLTError(const Twine &S) {
110   return make_error<BOLTError>(/*IsFatal*/ false, S);
111 }
112 
113 Error createFatalBOLTError(const Twine &S) {
114   return make_error<BOLTError>(/*IsFatal*/ true, S);
115 }
116 
117 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
118                              std::unique_ptr<DWARFContext> DwCtx,
119                              std::unique_ptr<Triple> TheTriple,
120                              const Target *TheTarget, std::string TripleName,
121                              std::unique_ptr<MCCodeEmitter> MCE,
122                              std::unique_ptr<MCObjectFileInfo> MOFI,
123                              std::unique_ptr<const MCAsmInfo> AsmInfo,
124                              std::unique_ptr<const MCInstrInfo> MII,
125                              std::unique_ptr<const MCSubtargetInfo> STI,
126                              std::unique_ptr<MCInstPrinter> InstPrinter,
127                              std::unique_ptr<const MCInstrAnalysis> MIA,
128                              std::unique_ptr<MCPlusBuilder> MIB,
129                              std::unique_ptr<const MCRegisterInfo> MRI,
130                              std::unique_ptr<MCDisassembler> DisAsm)
131     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
132       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
133       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
134       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
135       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
136       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
137   Relocation::Arch = this->TheTriple->getArch();
138   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
139   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
140 }
141 
142 BinaryContext::~BinaryContext() {
143   for (BinarySection *Section : Sections)
144     delete Section;
145   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
146     delete InjectedFunction;
147   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
148     delete JTI.second;
149   clearBinaryData();
150 }
151 
152 /// Create BinaryContext for a given architecture \p ArchName and
153 /// triple \p TripleName.
154 Expected<std::unique_ptr<BinaryContext>>
155 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
156                                    std::unique_ptr<DWARFContext> DwCtx) {
157   StringRef ArchName = "";
158   std::string FeaturesStr = "";
159   switch (File->getArch()) {
160   case llvm::Triple::x86_64:
161     ArchName = "x86-64";
162     FeaturesStr = "+nopl";
163     break;
164   case llvm::Triple::aarch64:
165     ArchName = "aarch64";
166     FeaturesStr = "+all";
167     break;
168   case llvm::Triple::riscv64: {
169     ArchName = "riscv64";
170     Expected<SubtargetFeatures> Features = File->getFeatures();
171 
172     if (auto E = Features.takeError())
173       return std::move(E);
174 
175     // We rely on relaxation for some transformations (e.g., promoting all calls
176     // to PseudoCALL and then making JITLink relax them). Since the relax
177     // feature is not stored in the object file, we manually enable it.
178     Features->AddFeature("relax");
179     FeaturesStr = Features->getString();
180     break;
181   }
182   default:
183     return createStringError(std::errc::not_supported,
184                              "BOLT-ERROR: Unrecognized machine in ELF file");
185   }
186 
187   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
188   const std::string TripleName = TheTriple->str();
189 
190   std::string Error;
191   const Target *TheTarget =
192       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
193   if (!TheTarget)
194     return createStringError(make_error_code(std::errc::not_supported),
195                              Twine("BOLT-ERROR: ", Error));
196 
197   std::unique_ptr<const MCRegisterInfo> MRI(
198       TheTarget->createMCRegInfo(TripleName));
199   if (!MRI)
200     return createStringError(
201         make_error_code(std::errc::not_supported),
202         Twine("BOLT-ERROR: no register info for target ", TripleName));
203 
204   // Set up disassembler.
205   std::unique_ptr<MCAsmInfo> AsmInfo(
206       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
207   if (!AsmInfo)
208     return createStringError(
209         make_error_code(std::errc::not_supported),
210         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
211   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
212   // we want to emit such names as using @PLT without double quotes to convey
213   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
214   // override the default AsmInfo behavior to emit names the way we want.
215   AsmInfo->setAllowAtInName(true);
216 
217   std::unique_ptr<const MCSubtargetInfo> STI(
218       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
219   if (!STI)
220     return createStringError(
221         make_error_code(std::errc::not_supported),
222         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
223 
224   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
225   if (!MII)
226     return createStringError(
227         make_error_code(std::errc::not_supported),
228         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
229 
230   std::unique_ptr<MCContext> Ctx(
231       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
232   std::unique_ptr<MCObjectFileInfo> MOFI(
233       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
234   Ctx->setObjectFileInfo(MOFI.get());
235   // We do not support X86 Large code model. Change this in the future.
236   bool Large = false;
237   if (TheTriple->getArch() == llvm::Triple::aarch64)
238     Large = true;
239   unsigned LSDAEncoding =
240       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
241   if (IsPIC) {
242     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
243                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
244   }
245 
246   std::unique_ptr<MCDisassembler> DisAsm(
247       TheTarget->createMCDisassembler(*STI, *Ctx));
248 
249   if (!DisAsm)
250     return createStringError(
251         make_error_code(std::errc::not_supported),
252         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
253 
254   std::unique_ptr<const MCInstrAnalysis> MIA(
255       TheTarget->createMCInstrAnalysis(MII.get()));
256   if (!MIA)
257     return createStringError(
258         make_error_code(std::errc::not_supported),
259         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
260               TripleName));
261 
262   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
263   std::unique_ptr<MCInstPrinter> InstructionPrinter(
264       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
265                                      *MII, *MRI));
266   if (!InstructionPrinter)
267     return createStringError(
268         make_error_code(std::errc::not_supported),
269         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
270   InstructionPrinter->setPrintImmHex(true);
271 
272   std::unique_ptr<MCCodeEmitter> MCE(
273       TheTarget->createMCCodeEmitter(*MII, *Ctx));
274 
275   // Make sure we don't miss any output on core dumps.
276   outs().SetUnbuffered();
277   errs().SetUnbuffered();
278   dbgs().SetUnbuffered();
279 
280   auto BC = std::make_unique<BinaryContext>(
281       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
282       std::string(TripleName), std::move(MCE), std::move(MOFI),
283       std::move(AsmInfo), std::move(MII), std::move(STI),
284       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
285       std::move(DisAsm));
286 
287   BC->LSDAEncoding = LSDAEncoding;
288 
289   BC->MAB = std::unique_ptr<MCAsmBackend>(
290       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
291 
292   BC->setFilename(File->getFileName());
293 
294   BC->HasFixedLoadAddress = !IsPIC;
295 
296   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
297       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
298 
299   if (!BC->SymbolicDisAsm)
300     return createStringError(
301         make_error_code(std::errc::not_supported),
302         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
303 
304   return std::move(BC);
305 }
306 
307 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
308   if (opts::HotText &&
309       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
310     return true;
311 
312   if (opts::HotData &&
313       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
314     return true;
315 
316   if (SymbolName == "_end")
317     return true;
318 
319   return false;
320 }
321 
322 std::unique_ptr<MCObjectWriter>
323 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
324   return MAB->createObjectWriter(OS);
325 }
326 
327 bool BinaryContext::validateObjectNesting() const {
328   auto Itr = BinaryDataMap.begin();
329   auto End = BinaryDataMap.end();
330   bool Valid = true;
331   while (Itr != End) {
332     auto Next = std::next(Itr);
333     while (Next != End &&
334            Itr->second->getSection() == Next->second->getSection() &&
335            Itr->second->containsRange(Next->second->getAddress(),
336                                       Next->second->getSize())) {
337       if (Next->second->Parent != Itr->second) {
338         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
339                << "BOLT-WARNING:  " << *Itr->second << "\n"
340                << "BOLT-WARNING:  " << *Next->second << "\n";
341         Valid = false;
342       }
343       ++Next;
344     }
345     Itr = Next;
346   }
347   return Valid;
348 }
349 
350 bool BinaryContext::validateHoles() const {
351   bool Valid = true;
352   for (BinarySection &Section : sections()) {
353     for (const Relocation &Rel : Section.relocations()) {
354       uint64_t RelAddr = Rel.Offset + Section.getAddress();
355       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
356       if (!BD) {
357         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
358                << " 0x" << Twine::utohexstr(RelAddr) << " in "
359                << Section.getName() << "\n";
360         Valid = false;
361       } else if (!BD->getAtomicRoot()) {
362         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
363                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
364                << Section.getName() << "\n";
365         Valid = false;
366       }
367     }
368   }
369   return Valid;
370 }
371 
372 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
373   const uint64_t Address = GAI->second->getAddress();
374   const uint64_t Size = GAI->second->getSize();
375 
376   auto fixParents = [&](BinaryDataMapType::iterator Itr,
377                         BinaryData *NewParent) {
378     BinaryData *OldParent = Itr->second->Parent;
379     Itr->second->Parent = NewParent;
380     ++Itr;
381     while (Itr != BinaryDataMap.end() && OldParent &&
382            Itr->second->Parent == OldParent) {
383       Itr->second->Parent = NewParent;
384       ++Itr;
385     }
386   };
387 
388   // Check if the previous symbol contains the newly added symbol.
389   if (GAI != BinaryDataMap.begin()) {
390     BinaryData *Prev = std::prev(GAI)->second;
391     while (Prev) {
392       if (Prev->getSection() == GAI->second->getSection() &&
393           Prev->containsRange(Address, Size)) {
394         fixParents(GAI, Prev);
395       } else {
396         fixParents(GAI, nullptr);
397       }
398       Prev = Prev->Parent;
399     }
400   }
401 
402   // Check if the newly added symbol contains any subsequent symbols.
403   if (Size != 0) {
404     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
405     auto Itr = std::next(GAI);
406     while (
407         Itr != BinaryDataMap.end() &&
408         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
409       Itr->second->Parent = BD;
410       ++Itr;
411     }
412   }
413 }
414 
415 iterator_range<BinaryContext::binary_data_iterator>
416 BinaryContext::getSubBinaryData(BinaryData *BD) {
417   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
418   auto End = Start;
419   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
420     ++End;
421   return make_range(Start, End);
422 }
423 
424 std::pair<const MCSymbol *, uint64_t>
425 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
426                                 bool IsPCRel) {
427   if (isAArch64()) {
428     // Check if this is an access to a constant island and create bookkeeping
429     // to keep track of it and emit it later as part of this function.
430     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
431       return std::make_pair(IslandSym, 0);
432 
433     // Detect custom code written in assembly that refers to arbitrary
434     // constant islands from other functions. Write this reference so we
435     // can pull this constant island and emit it as part of this function
436     // too.
437     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
438 
439     if (IslandIter != AddressToConstantIslandMap.begin() &&
440         (IslandIter == AddressToConstantIslandMap.end() ||
441          IslandIter->first > Address))
442       --IslandIter;
443 
444     if (IslandIter != AddressToConstantIslandMap.end()) {
445       // Fall-back to referencing the original constant island in the presence
446       // of dynamic relocs, as we currently do not support cloning them.
447       // Notice: we might fail to link because of this, if the original constant
448       // island we are referring would be emitted too far away.
449       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
450         MCSymbol *IslandSym =
451             IslandIter->second->getOrCreateIslandAccess(Address);
452         if (IslandSym)
453           return std::make_pair(IslandSym, 0);
454       } else if (MCSymbol *IslandSym =
455                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
456                                                                       BF)) {
457         BF.createIslandDependency(IslandSym, IslandIter->second);
458         return std::make_pair(IslandSym, 0);
459       }
460     }
461   }
462 
463   // Note that the address does not necessarily have to reside inside
464   // a section, it could be an absolute address too.
465   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
466   if (Section && Section->isText()) {
467     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
468       if (Address != BF.getAddress()) {
469         // The address could potentially escape. Mark it as another entry
470         // point into the function.
471         if (opts::Verbosity >= 1) {
472           outs() << "BOLT-INFO: potentially escaped address 0x"
473                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
474         }
475         BF.HasInternalLabelReference = true;
476         return std::make_pair(
477             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
478       }
479     } else {
480       addInterproceduralReference(&BF, Address);
481     }
482   }
483 
484   // With relocations, catch jump table references outside of the basic block
485   // containing the indirect jump.
486   if (HasRelocations) {
487     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
488     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
489       const MCSymbol *Symbol =
490           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
491 
492       return std::make_pair(Symbol, 0);
493     }
494   }
495 
496   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
497     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
498 
499   // TODO: use DWARF info to get size/alignment here?
500   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
501   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
502   return std::make_pair(TargetSymbol, 0);
503 }
504 
505 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
506                                                   BinaryFunction &BF) {
507   if (!isX86())
508     return MemoryContentsType::UNKNOWN;
509 
510   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
511   if (!Section) {
512     // No section - possibly an absolute address. Since we don't allow
513     // internal function addresses to escape the function scope - we
514     // consider it a tail call.
515     if (opts::Verbosity > 1) {
516       errs() << "BOLT-WARNING: no section for address 0x"
517              << Twine::utohexstr(Address) << " referenced from function " << BF
518              << '\n';
519     }
520     return MemoryContentsType::UNKNOWN;
521   }
522 
523   if (Section->isVirtual()) {
524     // The contents are filled at runtime.
525     return MemoryContentsType::UNKNOWN;
526   }
527 
528   // No support for jump tables in code yet.
529   if (Section->isText())
530     return MemoryContentsType::UNKNOWN;
531 
532   // Start with checking for PIC jump table. We expect non-PIC jump tables
533   // to have high 32 bits set to 0.
534   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
535     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
536 
537   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
538     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
539 
540   return MemoryContentsType::UNKNOWN;
541 }
542 
543 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
544                                      const JumpTable::JumpTableType Type,
545                                      const BinaryFunction &BF,
546                                      const uint64_t NextJTAddress,
547                                      JumpTable::AddressesType *EntriesAsAddress,
548                                      bool *HasEntryInFragment) const {
549   // Is one of the targets __builtin_unreachable?
550   bool HasUnreachable = false;
551 
552   // Does one of the entries match function start address?
553   bool HasStartAsEntry = false;
554 
555   // Number of targets other than __builtin_unreachable.
556   uint64_t NumRealEntries = 0;
557 
558   auto addEntryAddress = [&](uint64_t EntryAddress) {
559     if (EntriesAsAddress)
560       EntriesAsAddress->emplace_back(EntryAddress);
561   };
562 
563   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
564   if (!Section)
565     return false;
566 
567   // The upper bound is defined by containing object, section limits, and
568   // the next jump table in memory.
569   uint64_t UpperBound = Section->getEndAddress();
570   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
571   if (JumpTableBD && JumpTableBD->getSize()) {
572     assert(JumpTableBD->getEndAddress() <= UpperBound &&
573            "data object cannot cross a section boundary");
574     UpperBound = JumpTableBD->getEndAddress();
575   }
576   if (NextJTAddress)
577     UpperBound = std::min(NextJTAddress, UpperBound);
578 
579   LLVM_DEBUG({
580     using JTT = JumpTable::JumpTableType;
581     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
582                       Address, BF.getPrintName(),
583                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
584   });
585   const uint64_t EntrySize = getJumpTableEntrySize(Type);
586   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
587        EntryAddress += EntrySize) {
588     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
589                       << " -> ");
590     // Check if there's a proper relocation against the jump table entry.
591     if (HasRelocations) {
592       if (Type == JumpTable::JTT_PIC &&
593           !DataPCRelocations.count(EntryAddress)) {
594         LLVM_DEBUG(
595             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
596         break;
597       }
598       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
599         LLVM_DEBUG(
600             dbgs()
601             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
602         break;
603       }
604     }
605 
606     const uint64_t Value =
607         (Type == JumpTable::JTT_PIC)
608             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
609             : *getPointerAtAddress(EntryAddress);
610 
611     // __builtin_unreachable() case.
612     if (Value == BF.getAddress() + BF.getSize()) {
613       addEntryAddress(Value);
614       HasUnreachable = true;
615       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
616       continue;
617     }
618 
619     // Function start is another special case. It is allowed in the jump table,
620     // but we need at least one another regular entry to distinguish the table
621     // from, e.g. a function pointer array.
622     if (Value == BF.getAddress()) {
623       HasStartAsEntry = true;
624       addEntryAddress(Value);
625       continue;
626     }
627 
628     // Function or one of its fragments.
629     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
630     const bool DoesBelongToFunction =
631         BF.containsAddress(Value) ||
632         (TargetBF && TargetBF->isParentOrChildOf(BF));
633     if (!DoesBelongToFunction) {
634       LLVM_DEBUG({
635         if (!BF.containsAddress(Value)) {
636           dbgs() << "FAIL: function doesn't contain this address\n";
637           if (TargetBF) {
638             dbgs() << "  ! function containing this address: "
639                    << TargetBF->getPrintName() << '\n';
640             if (TargetBF->isFragment()) {
641               dbgs() << "  ! is a fragment";
642               for (BinaryFunction *Parent : TargetBF->ParentFragments)
643                 dbgs() << ", parent: " << Parent->getPrintName();
644               dbgs() << '\n';
645             }
646           }
647         }
648       });
649       break;
650     }
651 
652     // Check there's an instruction at this offset.
653     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
654         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
655       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
656       break;
657     }
658 
659     ++NumRealEntries;
660     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
661 
662     if (TargetBF != &BF && HasEntryInFragment)
663       *HasEntryInFragment = true;
664     addEntryAddress(Value);
665   }
666 
667   // It's a jump table if the number of real entries is more than 1, or there's
668   // one real entry and one or more special targets. If there are only multiple
669   // special targets, then it's not a jump table.
670   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
671 }
672 
673 void BinaryContext::populateJumpTables() {
674   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
675                     << '\n');
676   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
677        ++JTI) {
678     JumpTable *JT = JTI->second;
679 
680     bool NonSimpleParent = false;
681     for (BinaryFunction *BF : JT->Parents)
682       NonSimpleParent |= !BF->isSimple();
683     if (NonSimpleParent)
684       continue;
685 
686     uint64_t NextJTAddress = 0;
687     auto NextJTI = std::next(JTI);
688     if (NextJTI != JTE)
689       NextJTAddress = NextJTI->second->getAddress();
690 
691     const bool Success =
692         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
693                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
694     if (!Success) {
695       LLVM_DEBUG({
696         dbgs() << "failed to analyze ";
697         JT->print(dbgs());
698         if (NextJTI != JTE) {
699           dbgs() << "next ";
700           NextJTI->second->print(dbgs());
701         }
702       });
703       llvm_unreachable("jump table heuristic failure");
704     }
705     for (BinaryFunction *Frag : JT->Parents) {
706       if (JT->IsSplit)
707         Frag->setHasIndirectTargetToSplitFragment(true);
708       for (uint64_t EntryAddress : JT->EntriesAsAddress)
709         // if target is builtin_unreachable
710         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
711           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
712                                              Frag->getSize());
713         } else if (EntryAddress >= Frag->getAddress() &&
714                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
715           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
716         }
717     }
718 
719     // In strict mode, erase PC-relative relocation record. Later we check that
720     // all such records are erased and thus have been accounted for.
721     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
722       for (uint64_t Address = JT->getAddress();
723            Address < JT->getAddress() + JT->getSize();
724            Address += JT->EntrySize) {
725         DataPCRelocations.erase(DataPCRelocations.find(Address));
726       }
727     }
728 
729     // Mark to skip the function and all its fragments.
730     for (BinaryFunction *Frag : JT->Parents)
731       if (Frag->hasIndirectTargetToSplitFragment())
732         addFragmentsToSkip(Frag);
733   }
734 
735   if (opts::StrictMode && DataPCRelocations.size()) {
736     LLVM_DEBUG({
737       dbgs() << DataPCRelocations.size()
738              << " unclaimed PC-relative relocations left in data:\n";
739       for (uint64_t Reloc : DataPCRelocations)
740         dbgs() << Twine::utohexstr(Reloc) << '\n';
741     });
742     assert(0 && "unclaimed PC-relative relocations left in data\n");
743   }
744   clearList(DataPCRelocations);
745 }
746 
747 void BinaryContext::skipMarkedFragments() {
748   std::vector<BinaryFunction *> FragmentQueue;
749   // Copy the functions to FragmentQueue.
750   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
751   auto addToWorklist = [&](BinaryFunction *Function) -> void {
752     if (FragmentsToSkip.count(Function))
753       return;
754     FragmentQueue.push_back(Function);
755     addFragmentsToSkip(Function);
756   };
757   // Functions containing split jump tables need to be skipped with all
758   // fragments (transitively).
759   for (size_t I = 0; I != FragmentQueue.size(); I++) {
760     BinaryFunction *BF = FragmentQueue[I];
761     assert(FragmentsToSkip.count(BF) &&
762            "internal error in traversing function fragments");
763     if (opts::Verbosity >= 1)
764       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
765     BF->setSimple(false);
766     BF->setHasIndirectTargetToSplitFragment(true);
767 
768     llvm::for_each(BF->Fragments, addToWorklist);
769     llvm::for_each(BF->ParentFragments, addToWorklist);
770   }
771   if (!FragmentsToSkip.empty())
772     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
773            << (FragmentsToSkip.size() == 1 ? "" : "s")
774            << " due to cold fragments\n";
775 }
776 
777 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
778                                                  uint64_t Size,
779                                                  uint16_t Alignment,
780                                                  unsigned Flags) {
781   auto Itr = BinaryDataMap.find(Address);
782   if (Itr != BinaryDataMap.end()) {
783     assert(Itr->second->getSize() == Size || !Size);
784     return Itr->second->getSymbol();
785   }
786 
787   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
788   assert(!GlobalSymbols.count(Name) && "created name is not unique");
789   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
790 }
791 
792 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
793   return Ctx->getOrCreateSymbol(Name);
794 }
795 
796 BinaryFunction *BinaryContext::createBinaryFunction(
797     const std::string &Name, BinarySection &Section, uint64_t Address,
798     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
799   auto Result = BinaryFunctions.emplace(
800       Address, BinaryFunction(Name, Section, Address, Size, *this));
801   assert(Result.second == true && "unexpected duplicate function");
802   BinaryFunction *BF = &Result.first->second;
803   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
804                         Alignment);
805   setSymbolToFunctionMap(BF->getSymbol(), BF);
806   return BF;
807 }
808 
809 const MCSymbol *
810 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
811                                     JumpTable::JumpTableType Type) {
812   // Two fragments of same function access same jump table
813   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
814     assert(JT->Type == Type && "jump table types have to match");
815     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
816 
817     // Prevent associating a jump table to a specific fragment twice.
818     // This simple check arises from the assumption: no more than 2 fragments.
819     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
820       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
821              "cannot re-use jump table of a different function");
822       // Duplicate the entry for the parent function for easy access
823       JT->Parents.push_back(&Function);
824       if (opts::Verbosity > 2) {
825         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
826                << JT->Parents[0]->getPrintName() << "; "
827                << Function.getPrintName() << "\n";
828         JT->print(outs());
829       }
830       Function.JumpTables.emplace(Address, JT);
831       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
832       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
833     }
834 
835     bool IsJumpTableParent = false;
836     (void)IsJumpTableParent;
837     for (BinaryFunction *Frag : JT->Parents)
838       if (Frag == &Function)
839         IsJumpTableParent = true;
840     assert(IsJumpTableParent &&
841            "cannot re-use jump table of a different function");
842     return JT->getFirstLabel();
843   }
844 
845   // Re-use the existing symbol if possible.
846   MCSymbol *JTLabel = nullptr;
847   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
848     if (!isInternalSymbolName(Object->getSymbol()->getName()))
849       JTLabel = Object->getSymbol();
850   }
851 
852   const uint64_t EntrySize = getJumpTableEntrySize(Type);
853   if (!JTLabel) {
854     const std::string JumpTableName = generateJumpTableName(Function, Address);
855     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
856   }
857 
858   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
859                     << " in function " << Function << '\n');
860 
861   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
862                                 JumpTable::LabelMapType{{0, JTLabel}},
863                                 *getSectionForAddress(Address));
864   JT->Parents.push_back(&Function);
865   if (opts::Verbosity > 2)
866     JT->print(outs());
867   JumpTables.emplace(Address, JT);
868 
869   // Duplicate the entry for the parent function for easy access.
870   Function.JumpTables.emplace(Address, JT);
871   return JTLabel;
872 }
873 
874 std::pair<uint64_t, const MCSymbol *>
875 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
876                                   const MCSymbol *OldLabel) {
877   auto L = scopeLock();
878   unsigned Offset = 0;
879   bool Found = false;
880   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
881     if (Elmt.second != OldLabel)
882       continue;
883     Offset = Elmt.first;
884     Found = true;
885     break;
886   }
887   assert(Found && "Label not found");
888   (void)Found;
889   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
890   JumpTable *NewJT =
891       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
892                     JumpTable::LabelMapType{{Offset, NewLabel}},
893                     *getSectionForAddress(JT->getAddress()));
894   NewJT->Parents = JT->Parents;
895   NewJT->Entries = JT->Entries;
896   NewJT->Counts = JT->Counts;
897   uint64_t JumpTableID = ++DuplicatedJumpTables;
898   // Invert it to differentiate from regular jump tables whose IDs are their
899   // addresses in the input binary memory space
900   JumpTableID = ~JumpTableID;
901   JumpTables.emplace(JumpTableID, NewJT);
902   Function.JumpTables.emplace(JumpTableID, NewJT);
903   return std::make_pair(JumpTableID, NewLabel);
904 }
905 
906 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
907                                                  uint64_t Address) {
908   size_t Id;
909   uint64_t Offset = 0;
910   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
911     Offset = Address - JT->getAddress();
912     auto Itr = JT->Labels.find(Offset);
913     if (Itr != JT->Labels.end())
914       return std::string(Itr->second->getName());
915     Id = JumpTableIds.at(JT->getAddress());
916   } else {
917     Id = JumpTableIds[Address] = BF.JumpTables.size();
918   }
919   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
920           (Offset ? ("." + std::to_string(Offset)) : ""));
921 }
922 
923 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
924   // FIXME: aarch64 support is missing.
925   if (!isX86())
926     return true;
927 
928   if (BF.getSize() == BF.getMaxSize())
929     return true;
930 
931   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
932   assert(FunctionData && "cannot get function as data");
933 
934   uint64_t Offset = BF.getSize();
935   MCInst Instr;
936   uint64_t InstrSize = 0;
937   uint64_t InstrAddress = BF.getAddress() + Offset;
938   using std::placeholders::_1;
939 
940   // Skip instructions that satisfy the predicate condition.
941   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
942     const uint64_t StartOffset = Offset;
943     for (; Offset < BF.getMaxSize();
944          Offset += InstrSize, InstrAddress += InstrSize) {
945       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
946                                   InstrAddress, nulls()))
947         break;
948       if (!Predicate(Instr))
949         break;
950     }
951 
952     return Offset - StartOffset;
953   };
954 
955   // Skip a sequence of zero bytes.
956   auto skipZeros = [&]() {
957     const uint64_t StartOffset = Offset;
958     for (; Offset < BF.getMaxSize(); ++Offset)
959       if ((*FunctionData)[Offset] != 0)
960         break;
961 
962     return Offset - StartOffset;
963   };
964 
965   // Accept the whole padding area filled with breakpoints.
966   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
967   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
968     return true;
969 
970   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
971 
972   // Some functions have a jump to the next function or to the padding area
973   // inserted after the body.
974   auto isSkipJump = [&](const MCInst &Instr) {
975     uint64_t TargetAddress = 0;
976     if (MIB->isUnconditionalBranch(Instr) &&
977         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
978       if (TargetAddress >= InstrAddress + InstrSize &&
979           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
980         return true;
981       }
982     }
983     return false;
984   };
985 
986   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
987   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
988          skipZeros())
989     ;
990 
991   if (Offset == BF.getMaxSize())
992     return true;
993 
994   if (opts::Verbosity >= 1) {
995     errs() << "BOLT-WARNING: bad padding at address 0x"
996            << Twine::utohexstr(BF.getAddress() + BF.getSize())
997            << " starting at offset " << (Offset - BF.getSize())
998            << " in function " << BF << '\n'
999            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
1000            << '\n';
1001   }
1002 
1003   return false;
1004 }
1005 
1006 void BinaryContext::adjustCodePadding() {
1007   for (auto &BFI : BinaryFunctions) {
1008     BinaryFunction &BF = BFI.second;
1009     if (!shouldEmit(BF))
1010       continue;
1011 
1012     if (!hasValidCodePadding(BF)) {
1013       if (HasRelocations) {
1014         if (opts::Verbosity >= 1) {
1015           outs() << "BOLT-INFO: function " << BF
1016                  << " has invalid padding. Ignoring the function.\n";
1017         }
1018         BF.setIgnored();
1019       } else {
1020         BF.setMaxSize(BF.getSize());
1021       }
1022     }
1023   }
1024 }
1025 
1026 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1027                                                uint64_t Size,
1028                                                uint16_t Alignment,
1029                                                unsigned Flags) {
1030   // Register the name with MCContext.
1031   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1032 
1033   auto GAI = BinaryDataMap.find(Address);
1034   BinaryData *BD;
1035   if (GAI == BinaryDataMap.end()) {
1036     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1037     BinarySection &Section =
1038         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1039     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1040                         Section, Flags);
1041     GAI = BinaryDataMap.emplace(Address, BD).first;
1042     GlobalSymbols[Name] = BD;
1043     updateObjectNesting(GAI);
1044   } else {
1045     BD = GAI->second;
1046     if (!BD->hasName(Name)) {
1047       GlobalSymbols[Name] = BD;
1048       BD->Symbols.push_back(Symbol);
1049     }
1050   }
1051 
1052   return Symbol;
1053 }
1054 
1055 const BinaryData *
1056 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1057   auto NI = BinaryDataMap.lower_bound(Address);
1058   auto End = BinaryDataMap.end();
1059   if ((NI != End && Address == NI->first) ||
1060       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1061     if (NI->second->containsAddress(Address))
1062       return NI->second;
1063 
1064     // If this is a sub-symbol, see if a parent data contains the address.
1065     const BinaryData *BD = NI->second->getParent();
1066     while (BD) {
1067       if (BD->containsAddress(Address))
1068         return BD;
1069       BD = BD->getParent();
1070     }
1071   }
1072   return nullptr;
1073 }
1074 
1075 BinaryData *BinaryContext::getGOTSymbol() {
1076   // First tries to find a global symbol with that name
1077   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1078   if (GOTSymBD)
1079     return GOTSymBD;
1080 
1081   // This symbol might be hidden from run-time link, so fetch the local
1082   // definition if available.
1083   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1084   if (!GOTSymBD)
1085     return nullptr;
1086 
1087   // If the local symbol is not unique, fail
1088   unsigned Index = 2;
1089   SmallString<30> Storage;
1090   while (const BinaryData *BD =
1091              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1092                                      .concat(Twine(Index++))
1093                                      .toStringRef(Storage)))
1094     if (BD->getAddress() != GOTSymBD->getAddress())
1095       return nullptr;
1096 
1097   return GOTSymBD;
1098 }
1099 
1100 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1101   auto NI = BinaryDataMap.find(Address);
1102   assert(NI != BinaryDataMap.end());
1103   if (NI == BinaryDataMap.end())
1104     return false;
1105   // TODO: it's possible that a jump table starts at the same address
1106   // as a larger blob of private data.  When we set the size of the
1107   // jump table, it might be smaller than the total blob size.  In this
1108   // case we just leave the original size since (currently) it won't really
1109   // affect anything.
1110   assert((!NI->second->Size || NI->second->Size == Size ||
1111           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1112          "can't change the size of a symbol that has already had its "
1113          "size set");
1114   if (!NI->second->Size) {
1115     NI->second->Size = Size;
1116     updateObjectNesting(NI);
1117     return true;
1118   }
1119   return false;
1120 }
1121 
1122 void BinaryContext::generateSymbolHashes() {
1123   auto isPadding = [](const BinaryData &BD) {
1124     StringRef Contents = BD.getSection().getContents();
1125     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1126     return (BD.getName().starts_with("HOLEat") ||
1127             SymData.find_first_not_of(0) == StringRef::npos);
1128   };
1129 
1130   uint64_t NumCollisions = 0;
1131   for (auto &Entry : BinaryDataMap) {
1132     BinaryData &BD = *Entry.second;
1133     StringRef Name = BD.getName();
1134 
1135     if (!isInternalSymbolName(Name))
1136       continue;
1137 
1138     // First check if a non-anonymous alias exists and move it to the front.
1139     if (BD.getSymbols().size() > 1) {
1140       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1141         return !isInternalSymbolName(Symbol->getName());
1142       });
1143       if (Itr != BD.getSymbols().end()) {
1144         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1145         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1146         continue;
1147       }
1148     }
1149 
1150     // We have to skip 0 size symbols since they will all collide.
1151     if (BD.getSize() == 0) {
1152       continue;
1153     }
1154 
1155     const uint64_t Hash = BD.getSection().hash(BD);
1156     const size_t Idx = Name.find("0x");
1157     std::string NewName =
1158         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1159     if (getBinaryDataByName(NewName)) {
1160       // Ignore collisions for symbols that appear to be padding
1161       // (i.e. all zeros or a "hole")
1162       if (!isPadding(BD)) {
1163         if (opts::Verbosity) {
1164           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1165                  << " with new name (" << NewName << "), skipping.\n";
1166         }
1167         ++NumCollisions;
1168       }
1169       continue;
1170     }
1171     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1172     GlobalSymbols[NewName] = &BD;
1173   }
1174   if (NumCollisions) {
1175     errs() << "BOLT-WARNING: " << NumCollisions
1176            << " collisions detected while hashing binary objects";
1177     if (!opts::Verbosity)
1178       errs() << ". Use -v=1 to see the list.";
1179     errs() << '\n';
1180   }
1181 }
1182 
1183 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1184                                      BinaryFunction &Function) const {
1185   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1186   if (TargetFunction.isChildOf(Function))
1187     return true;
1188   TargetFunction.addParentFragment(Function);
1189   Function.addFragment(TargetFunction);
1190   if (!HasRelocations) {
1191     TargetFunction.setSimple(false);
1192     Function.setSimple(false);
1193   }
1194   if (opts::Verbosity >= 1) {
1195     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1196            << Function << '\n';
1197   }
1198   return true;
1199 }
1200 
1201 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1202                                            MCInst &LoadLowBits,
1203                                            MCInst &LoadHiBits,
1204                                            uint64_t Target) {
1205   const MCSymbol *TargetSymbol;
1206   uint64_t Addend = 0;
1207   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1208                                                     /*IsPCRel*/ true);
1209   int64_t Val;
1210   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1211                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1212   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1213                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1214 }
1215 
1216 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1217   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1218   if (TargetFunction)
1219     return false;
1220 
1221   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1222   assert(Section && "cannot get section for referenced address");
1223   if (!Section->isText())
1224     return false;
1225 
1226   bool Ret = false;
1227   StringRef SectionContents = Section->getContents();
1228   uint64_t Offset = Address - Section->getAddress();
1229   const uint64_t MaxSize = SectionContents.size() - Offset;
1230   const uint8_t *Bytes =
1231       reinterpret_cast<const uint8_t *>(SectionContents.data());
1232   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1233 
1234   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1235                          MCInst &Instruction, uint64_t Offset,
1236                          uint64_t AbsoluteInstrAddr,
1237                          uint64_t TotalSize) -> bool {
1238     MCInst *TargetHiBits, *TargetLowBits;
1239     uint64_t TargetAddress, Count;
1240     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1241                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1242                                    TargetLowBits, TargetAddress);
1243     if (!Count)
1244       return false;
1245 
1246     if (MatchOnly)
1247       return true;
1248 
1249     // NOTE The target symbol was created during disassemble's
1250     // handleExternalReference
1251     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1252     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1253                                                   *Section, Address, TotalSize);
1254     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1255                            TargetAddress);
1256     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1257     Veneer->addInstruction(Offset, std::move(Instruction));
1258     --Count;
1259     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1260       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1261       Veneer->addInstruction(It->first, std::move(It->second));
1262     }
1263 
1264     Veneer->getOrCreateLocalLabel(Address);
1265     Veneer->setMaxSize(TotalSize);
1266     Veneer->updateState(BinaryFunction::State::Disassembled);
1267     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1268                       << "\n");
1269     return true;
1270   };
1271 
1272   uint64_t Size = 0, TotalSize = 0;
1273   BinaryFunction::InstrMapType VeneerInstructions;
1274   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1275     MCInst Instruction;
1276     const uint64_t AbsoluteInstrAddr = Address + Offset;
1277     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1278                                         AbsoluteInstrAddr, nulls()))
1279       break;
1280 
1281     TotalSize += Size;
1282     if (MIB->isBranch(Instruction)) {
1283       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1284                         AbsoluteInstrAddr, TotalSize);
1285       break;
1286     }
1287 
1288     VeneerInstructions.emplace(Offset, std::move(Instruction));
1289   }
1290 
1291   return Ret;
1292 }
1293 
1294 void BinaryContext::processInterproceduralReferences() {
1295   for (const std::pair<BinaryFunction *, uint64_t> &It :
1296        InterproceduralReferences) {
1297     BinaryFunction &Function = *It.first;
1298     uint64_t Address = It.second;
1299     if (!Address || Function.isIgnored())
1300       continue;
1301 
1302     BinaryFunction *TargetFunction =
1303         getBinaryFunctionContainingAddress(Address);
1304     if (&Function == TargetFunction)
1305       continue;
1306 
1307     if (TargetFunction) {
1308       if (TargetFunction->isFragment() &&
1309           !TargetFunction->isChildOf(Function)) {
1310         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1311                   "fragments: "
1312                << Function.getPrintName() << " and "
1313                << TargetFunction->getPrintName() << '\n';
1314       }
1315       if (uint64_t Offset = Address - TargetFunction->getAddress())
1316         TargetFunction->addEntryPointAtOffset(Offset);
1317 
1318       continue;
1319     }
1320 
1321     // Check if address falls in function padding space - this could be
1322     // unmarked data in code. In this case adjust the padding space size.
1323     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1324     assert(Section && "cannot get section for referenced address");
1325 
1326     if (!Section->isText())
1327       continue;
1328 
1329     // PLT requires special handling and could be ignored in this context.
1330     StringRef SectionName = Section->getName();
1331     if (SectionName == ".plt" || SectionName == ".plt.got")
1332       continue;
1333 
1334     // Check if it is aarch64 veneer written at Address
1335     if (isAArch64() && handleAArch64Veneer(Address))
1336       continue;
1337 
1338     if (opts::processAllFunctions()) {
1339       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1340              << "object in code at address 0x" << Twine::utohexstr(Address)
1341              << " belonging to section " << SectionName << " in current mode\n";
1342       exit(1);
1343     }
1344 
1345     TargetFunction = getBinaryFunctionContainingAddress(Address,
1346                                                         /*CheckPastEnd=*/false,
1347                                                         /*UseMaxSize=*/true);
1348     // We are not going to overwrite non-simple functions, but for simple
1349     // ones - adjust the padding size.
1350     if (TargetFunction && TargetFunction->isSimple()) {
1351       errs() << "BOLT-WARNING: function " << *TargetFunction
1352              << " has an object detected in a padding region at address 0x"
1353              << Twine::utohexstr(Address) << '\n';
1354       TargetFunction->setMaxSize(TargetFunction->getSize());
1355     }
1356   }
1357 
1358   InterproceduralReferences.clear();
1359 }
1360 
1361 void BinaryContext::postProcessSymbolTable() {
1362   fixBinaryDataHoles();
1363   bool Valid = true;
1364   for (auto &Entry : BinaryDataMap) {
1365     BinaryData *BD = Entry.second;
1366     if ((BD->getName().starts_with("SYMBOLat") ||
1367          BD->getName().starts_with("DATAat")) &&
1368         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1369         BD->getSection()) {
1370       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1371       Valid = false;
1372     }
1373   }
1374   assert(Valid);
1375   (void)Valid;
1376   generateSymbolHashes();
1377 }
1378 
1379 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1380                                  BinaryFunction &ParentBF) {
1381   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1382          "cannot merge functions with multiple entry points");
1383 
1384   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1385   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1386       SymbolToFunctionMapMutex, std::defer_lock);
1387 
1388   const StringRef ChildName = ChildBF.getOneName();
1389 
1390   // Move symbols over and update bookkeeping info.
1391   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1392     ParentBF.getSymbols().push_back(Symbol);
1393     WriteSymbolMapLock.lock();
1394     SymbolToFunctionMap[Symbol] = &ParentBF;
1395     WriteSymbolMapLock.unlock();
1396     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1397   }
1398   ChildBF.getSymbols().clear();
1399 
1400   // Move other names the child function is known under.
1401   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1402   ChildBF.Aliases.clear();
1403 
1404   if (HasRelocations) {
1405     // Merge execution counts of ChildBF into those of ParentBF.
1406     // Without relocations, we cannot reliably merge profiles as both functions
1407     // continue to exist and either one can be executed.
1408     ChildBF.mergeProfileDataInto(ParentBF);
1409 
1410     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1411                                                      std::defer_lock);
1412     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1413                                                       std::defer_lock);
1414     // Remove ChildBF from the global set of functions in relocs mode.
1415     ReadBfsLock.lock();
1416     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1417     ReadBfsLock.unlock();
1418 
1419     assert(FI != BinaryFunctions.end() && "function not found");
1420     assert(&ChildBF == &FI->second && "function mismatch");
1421 
1422     WriteBfsLock.lock();
1423     ChildBF.clearDisasmState();
1424     FI = BinaryFunctions.erase(FI);
1425     WriteBfsLock.unlock();
1426 
1427   } else {
1428     // In non-relocation mode we keep the function, but rename it.
1429     std::string NewName = "__ICF_" + ChildName.str();
1430 
1431     WriteCtxLock.lock();
1432     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1433     WriteCtxLock.unlock();
1434 
1435     ChildBF.setFolded(&ParentBF);
1436   }
1437 
1438   ParentBF.setHasFunctionsFoldedInto();
1439 }
1440 
1441 void BinaryContext::fixBinaryDataHoles() {
1442   assert(validateObjectNesting() && "object nesting inconsistency detected");
1443 
1444   for (BinarySection &Section : allocatableSections()) {
1445     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1446 
1447     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1448       BinaryData *BD = Itr->second;
1449       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1450                      (BD->getName().starts_with("SYMBOLat0x") ||
1451                       BD->getName().starts_with("DATAat0x") ||
1452                       BD->getName().starts_with("ANONYMOUS")));
1453       return !isHole && BD->getSection() == Section && !BD->getParent();
1454     };
1455 
1456     auto BDStart = BinaryDataMap.begin();
1457     auto BDEnd = BinaryDataMap.end();
1458     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1459     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1460 
1461     uint64_t EndAddress = Section.getAddress();
1462 
1463     while (Itr != End) {
1464       if (Itr->second->getAddress() > EndAddress) {
1465         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1466         Holes.emplace_back(EndAddress, Gap);
1467       }
1468       EndAddress = Itr->second->getEndAddress();
1469       ++Itr;
1470     }
1471 
1472     if (EndAddress < Section.getEndAddress())
1473       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1474 
1475     // If there is already a symbol at the start of the hole, grow that symbol
1476     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1477     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1478       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1479       if (BD) {
1480         // BD->getSection() can be != Section if there are sections that
1481         // overlap.  In this case it is probably safe to just skip the holes
1482         // since the overlapping section will not(?) have any symbols in it.
1483         if (BD->getSection() == Section)
1484           setBinaryDataSize(Hole.first, Hole.second);
1485       } else {
1486         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1487       }
1488     }
1489   }
1490 
1491   assert(validateObjectNesting() && "object nesting inconsistency detected");
1492   assert(validateHoles() && "top level hole detected in object map");
1493 }
1494 
1495 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1496   const BinarySection *CurrentSection = nullptr;
1497   bool FirstSection = true;
1498 
1499   for (auto &Entry : BinaryDataMap) {
1500     const BinaryData *BD = Entry.second;
1501     const BinarySection &Section = BD->getSection();
1502     if (FirstSection || Section != *CurrentSection) {
1503       uint64_t Address, Size;
1504       StringRef Name = Section.getName();
1505       if (Section) {
1506         Address = Section.getAddress();
1507         Size = Section.getSize();
1508       } else {
1509         Address = BD->getAddress();
1510         Size = BD->getSize();
1511       }
1512       OS << "BOLT-INFO: Section " << Name << ", "
1513          << "0x" + Twine::utohexstr(Address) << ":"
1514          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1515       CurrentSection = &Section;
1516       FirstSection = false;
1517     }
1518 
1519     OS << "BOLT-INFO: ";
1520     const BinaryData *P = BD->getParent();
1521     while (P) {
1522       OS << "  ";
1523       P = P->getParent();
1524     }
1525     OS << *BD << "\n";
1526   }
1527 }
1528 
1529 Expected<unsigned> BinaryContext::getDwarfFile(
1530     StringRef Directory, StringRef FileName, unsigned FileNumber,
1531     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1532     unsigned CUID, unsigned DWARFVersion) {
1533   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1534   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1535                           FileNumber);
1536 }
1537 
1538 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1539                                                const uint32_t SrcCUID,
1540                                                unsigned FileIndex) {
1541   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1542   const DWARFDebugLine::LineTable *LineTable =
1543       DwCtx->getLineTableForUnit(SrcUnit);
1544   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1545       LineTable->Prologue.FileNames;
1546   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1547   // means empty dir.
1548   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1549          "FileIndex out of range for the compilation unit.");
1550   StringRef Dir = "";
1551   if (FileNames[FileIndex - 1].DirIdx != 0) {
1552     if (std::optional<const char *> DirName = dwarf::toString(
1553             LineTable->Prologue
1554                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1555       Dir = *DirName;
1556     }
1557   }
1558   StringRef FileName = "";
1559   if (std::optional<const char *> FName =
1560           dwarf::toString(FileNames[FileIndex - 1].Name))
1561     FileName = *FName;
1562   assert(FileName != "");
1563   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1564   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1565                                DestCUID, DstUnit->getVersion()));
1566 }
1567 
1568 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1569   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1570   llvm::transform(llvm::make_second_range(BinaryFunctions),
1571                   SortedFunctions.begin(),
1572                   [](BinaryFunction &BF) { return &BF; });
1573 
1574   llvm::stable_sort(SortedFunctions,
1575                     [](const BinaryFunction *A, const BinaryFunction *B) {
1576                       if (A->hasValidIndex() && B->hasValidIndex()) {
1577                         return A->getIndex() < B->getIndex();
1578                       }
1579                       return A->hasValidIndex();
1580                     });
1581   return SortedFunctions;
1582 }
1583 
1584 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1585   std::vector<BinaryFunction *> AllFunctions;
1586   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1587   llvm::transform(llvm::make_second_range(BinaryFunctions),
1588                   std::back_inserter(AllFunctions),
1589                   [](BinaryFunction &BF) { return &BF; });
1590   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1591 
1592   return AllFunctions;
1593 }
1594 
1595 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1596   auto Iter = DWOCUs.find(DWOId);
1597   if (Iter == DWOCUs.end())
1598     return std::nullopt;
1599 
1600   return Iter->second;
1601 }
1602 
1603 DWARFContext *BinaryContext::getDWOContext() const {
1604   if (DWOCUs.empty())
1605     return nullptr;
1606   return &DWOCUs.begin()->second->getContext();
1607 }
1608 
1609 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1610 void BinaryContext::preprocessDWODebugInfo() {
1611   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1612     DWARFUnit *const DwarfUnit = CU.get();
1613     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1614       std::string DWOName = dwarf::toString(
1615           DwarfUnit->getUnitDIE().find(
1616               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1617           "");
1618       SmallString<16> AbsolutePath;
1619       if (!opts::CompDirOverride.empty()) {
1620         sys::path::append(AbsolutePath, opts::CompDirOverride);
1621         sys::path::append(AbsolutePath, DWOName);
1622       }
1623       DWARFUnit *DWOCU =
1624           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1625       if (!DWOCU->isDWOUnit()) {
1626         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1627                << DWOName
1628                << " was not retrieved and won't be updated. Please check "
1629                   "relative path.\n";
1630         continue;
1631       }
1632       DWOCUs[*DWOId] = DWOCU;
1633     }
1634   }
1635   if (!DWOCUs.empty())
1636     outs() << "BOLT-INFO: processing split DWARF\n";
1637 }
1638 
1639 void BinaryContext::preprocessDebugInfo() {
1640   struct CURange {
1641     uint64_t LowPC;
1642     uint64_t HighPC;
1643     DWARFUnit *Unit;
1644 
1645     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1646   };
1647 
1648   // Building a map of address ranges to CUs similar to .debug_aranges and use
1649   // it to assign CU to functions.
1650   std::vector<CURange> AllRanges;
1651   AllRanges.reserve(DwCtx->getNumCompileUnits());
1652   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1653     Expected<DWARFAddressRangesVector> RangesOrError =
1654         CU->getUnitDIE().getAddressRanges();
1655     if (!RangesOrError) {
1656       consumeError(RangesOrError.takeError());
1657       continue;
1658     }
1659     for (DWARFAddressRange &Range : *RangesOrError) {
1660       // Parts of the debug info could be invalidated due to corresponding code
1661       // being removed from the binary by the linker. Hence we check if the
1662       // address is a valid one.
1663       if (containsAddress(Range.LowPC))
1664         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1665     }
1666 
1667     ContainsDwarf5 |= CU->getVersion() >= 5;
1668     ContainsDwarfLegacy |= CU->getVersion() < 5;
1669   }
1670 
1671   llvm::sort(AllRanges);
1672   for (auto &KV : BinaryFunctions) {
1673     const uint64_t FunctionAddress = KV.first;
1674     BinaryFunction &Function = KV.second;
1675 
1676     auto It = llvm::partition_point(
1677         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1678     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1679       Function.setDWARFUnit(It->Unit);
1680   }
1681 
1682   // Discover units with debug info that needs to be updated.
1683   for (const auto &KV : BinaryFunctions) {
1684     const BinaryFunction &BF = KV.second;
1685     if (shouldEmit(BF) && BF.getDWARFUnit())
1686       ProcessedCUs.insert(BF.getDWARFUnit());
1687   }
1688 
1689   // Clear debug info for functions from units that we are not going to process.
1690   for (auto &KV : BinaryFunctions) {
1691     BinaryFunction &BF = KV.second;
1692     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1693       BF.setDWARFUnit(nullptr);
1694   }
1695 
1696   if (opts::Verbosity >= 1) {
1697     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1698            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1699   }
1700 
1701   preprocessDWODebugInfo();
1702 
1703   // Populate MCContext with DWARF files from all units.
1704   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1705   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1706     const uint64_t CUID = CU->getOffset();
1707     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1708     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1709         GlobalPrefix + "line_table_start" + Twine(CUID)));
1710 
1711     if (!ProcessedCUs.count(CU.get()))
1712       continue;
1713 
1714     const DWARFDebugLine::LineTable *LineTable =
1715         DwCtx->getLineTableForUnit(CU.get());
1716     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1717         LineTable->Prologue.FileNames;
1718 
1719     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1720     if (DwarfVersion >= 5) {
1721       std::optional<MD5::MD5Result> Checksum;
1722       if (LineTable->Prologue.ContentTypes.HasMD5)
1723         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1724       std::optional<const char *> Name =
1725           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1726       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1727         auto Iter = DWOCUs.find(*DWOID);
1728         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1729         Name = dwarf::toString(
1730             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1731       }
1732       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1733                                   std::nullopt);
1734     }
1735 
1736     BinaryLineTable.setDwarfVersion(DwarfVersion);
1737 
1738     // Assign a unique label to every line table, one per CU.
1739     // Make sure empty debug line tables are registered too.
1740     if (FileNames.empty()) {
1741       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1742                             CUID, DwarfVersion));
1743       continue;
1744     }
1745     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1746     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1747       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1748       // means empty dir.
1749       StringRef Dir = "";
1750       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1751         if (std::optional<const char *> DirName = dwarf::toString(
1752                 LineTable->Prologue
1753                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1754           Dir = *DirName;
1755       StringRef FileName = "";
1756       if (std::optional<const char *> FName =
1757               dwarf::toString(FileNames[I].Name))
1758         FileName = *FName;
1759       assert(FileName != "");
1760       std::optional<MD5::MD5Result> Checksum;
1761       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1762         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1763       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1764                             DwarfVersion));
1765     }
1766   }
1767 }
1768 
1769 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1770   if (Function.isPseudo())
1771     return false;
1772 
1773   if (opts::processAllFunctions())
1774     return true;
1775 
1776   if (Function.isIgnored())
1777     return false;
1778 
1779   // In relocation mode we will emit non-simple functions with CFG.
1780   // If the function does not have a CFG it should be marked as ignored.
1781   return HasRelocations || Function.isSimple();
1782 }
1783 
1784 void BinaryContext::dump(const MCInst &Inst) const {
1785   if (LLVM_UNLIKELY(!InstPrinter)) {
1786     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1787     return;
1788   }
1789   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1790   dbgs() << "\n";
1791 }
1792 
1793 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1794   uint32_t Operation = Inst.getOperation();
1795   switch (Operation) {
1796   case MCCFIInstruction::OpSameValue:
1797     OS << "OpSameValue Reg" << Inst.getRegister();
1798     break;
1799   case MCCFIInstruction::OpRememberState:
1800     OS << "OpRememberState";
1801     break;
1802   case MCCFIInstruction::OpRestoreState:
1803     OS << "OpRestoreState";
1804     break;
1805   case MCCFIInstruction::OpOffset:
1806     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1807     break;
1808   case MCCFIInstruction::OpDefCfaRegister:
1809     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1810     break;
1811   case MCCFIInstruction::OpDefCfaOffset:
1812     OS << "OpDefCfaOffset " << Inst.getOffset();
1813     break;
1814   case MCCFIInstruction::OpDefCfa:
1815     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1816     break;
1817   case MCCFIInstruction::OpRelOffset:
1818     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1819     break;
1820   case MCCFIInstruction::OpAdjustCfaOffset:
1821     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1822     break;
1823   case MCCFIInstruction::OpEscape:
1824     OS << "OpEscape";
1825     break;
1826   case MCCFIInstruction::OpRestore:
1827     OS << "OpRestore Reg" << Inst.getRegister();
1828     break;
1829   case MCCFIInstruction::OpUndefined:
1830     OS << "OpUndefined Reg" << Inst.getRegister();
1831     break;
1832   case MCCFIInstruction::OpRegister:
1833     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1834        << Inst.getRegister2();
1835     break;
1836   case MCCFIInstruction::OpWindowSave:
1837     OS << "OpWindowSave";
1838     break;
1839   case MCCFIInstruction::OpGnuArgsSize:
1840     OS << "OpGnuArgsSize";
1841     break;
1842   default:
1843     OS << "Op#" << Operation;
1844     break;
1845   }
1846 }
1847 
1848 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1849   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1850   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1851   // the end of a data chunk inside code, $d identifies start of data.
1852   if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1853     return MarkerSymType::NONE;
1854 
1855   Expected<StringRef> NameOrError = Symbol.getName();
1856   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1857 
1858   if (!TypeOrError || !NameOrError)
1859     return MarkerSymType::NONE;
1860 
1861   if (*TypeOrError != SymbolRef::ST_Unknown)
1862     return MarkerSymType::NONE;
1863 
1864   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1865     return MarkerSymType::CODE;
1866 
1867   // $x<ISA>
1868   if (isRISCV() && NameOrError->starts_with("$x"))
1869     return MarkerSymType::CODE;
1870 
1871   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1872     return MarkerSymType::DATA;
1873 
1874   return MarkerSymType::NONE;
1875 }
1876 
1877 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1878   return getMarkerType(Symbol) != MarkerSymType::NONE;
1879 }
1880 
1881 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1882                            const BinaryFunction *Function,
1883                            DWARFContext *DwCtx) {
1884   DebugLineTableRowRef RowRef =
1885       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1886   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1887     return;
1888 
1889   const DWARFDebugLine::LineTable *LineTable;
1890   if (Function && Function->getDWARFUnit() &&
1891       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1892     LineTable = Function->getDWARFLineTable();
1893   } else {
1894     LineTable = DwCtx->getLineTableForUnit(
1895         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1896   }
1897   assert(LineTable && "line table expected for instruction with debug info");
1898 
1899   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1900   StringRef FileName = "";
1901   if (std::optional<const char *> FName =
1902           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1903     FileName = *FName;
1904   OS << " # debug line " << FileName << ":" << Row.Line;
1905   if (Row.Column)
1906     OS << ":" << Row.Column;
1907   if (Row.Discriminator)
1908     OS << " discriminator:" << Row.Discriminator;
1909 }
1910 
1911 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1912                                      uint64_t Offset,
1913                                      const BinaryFunction *Function,
1914                                      bool PrintMCInst, bool PrintMemData,
1915                                      bool PrintRelocations,
1916                                      StringRef Endl) const {
1917   OS << format("    %08" PRIx64 ": ", Offset);
1918   if (MIB->isCFI(Instruction)) {
1919     uint32_t Offset = Instruction.getOperand(0).getImm();
1920     OS << "\t!CFI\t$" << Offset << "\t; ";
1921     if (Function)
1922       printCFI(OS, *Function->getCFIFor(Instruction));
1923     OS << Endl;
1924     return;
1925   }
1926   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1927   if (MIB->isCall(Instruction)) {
1928     if (MIB->isTailCall(Instruction))
1929       OS << " # TAILCALL ";
1930     if (MIB->isInvoke(Instruction)) {
1931       const std::optional<MCPlus::MCLandingPad> EHInfo =
1932           MIB->getEHInfo(Instruction);
1933       OS << " # handler: ";
1934       if (EHInfo->first)
1935         OS << *EHInfo->first;
1936       else
1937         OS << '0';
1938       OS << "; action: " << EHInfo->second;
1939       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1940       if (GnuArgsSize >= 0)
1941         OS << "; GNU_args_size = " << GnuArgsSize;
1942     }
1943   } else if (MIB->isIndirectBranch(Instruction)) {
1944     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1945       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1946     } else {
1947       OS << " # UNKNOWN CONTROL FLOW";
1948     }
1949   }
1950   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1951     OS << " # Offset: " << *Offset;
1952   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1953     OS << " # Size: " << *Size;
1954   if (MCSymbol *Label = MIB->getLabel(Instruction))
1955     OS << " # Label: " << *Label;
1956 
1957   MIB->printAnnotations(Instruction, OS);
1958 
1959   if (opts::PrintDebugInfo)
1960     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1961 
1962   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1963     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1964     Function->printRelocations(OS, Offset, Size);
1965   }
1966 
1967   OS << Endl;
1968 
1969   if (PrintMCInst) {
1970     Instruction.dump_pretty(OS, InstPrinter.get());
1971     OS << Endl;
1972   }
1973 }
1974 
1975 std::optional<uint64_t>
1976 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1977                                         uint64_t FileOffset) const {
1978   // Find a segment with a matching file offset.
1979   for (auto &KV : SegmentMapInfo) {
1980     const SegmentInfo &SegInfo = KV.second;
1981     // FileOffset is got from perf event,
1982     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
1983     // If the pagesize is not equal to SegInfo.Alignment.
1984     // FileOffset and SegInfo.FileOffset should be aligned first,
1985     // and then judge whether they are equal.
1986     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
1987         alignDown(FileOffset, SegInfo.Alignment)) {
1988       // The function's offset from base address in VAS is aligned by pagesize
1989       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
1990       // However, The ELF document says that SegInfo.FileOffset should equal
1991       // to SegInfo.Address, modulo the pagesize.
1992       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
1993 
1994       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
1995       // alignDown(SegInfo.Address, pagesize)
1996       //   = SegInfo.Address - (SegInfo.Address % pagesize)
1997       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
1998       //   = SegInfo.Address - SegInfo.FileOffset +
1999       //     alignDown(SegInfo.FileOffset, pagesize)
2000       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2001       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2002     }
2003   }
2004 
2005   return std::nullopt;
2006 }
2007 
2008 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2009   auto SI = AddressToSection.upper_bound(Address);
2010   if (SI != AddressToSection.begin()) {
2011     --SI;
2012     uint64_t UpperBound = SI->first + SI->second->getSize();
2013     if (!SI->second->getSize())
2014       UpperBound += 1;
2015     if (UpperBound > Address)
2016       return *SI->second;
2017   }
2018   return std::make_error_code(std::errc::bad_address);
2019 }
2020 
2021 ErrorOr<StringRef>
2022 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2023   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2024     return Section->getName();
2025   return std::make_error_code(std::errc::bad_address);
2026 }
2027 
2028 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2029   auto Res = Sections.insert(Section);
2030   (void)Res;
2031   assert(Res.second && "can't register the same section twice.");
2032 
2033   // Only register allocatable sections in the AddressToSection map.
2034   if (Section->isAllocatable() && Section->getAddress())
2035     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2036   NameToSection.insert(
2037       std::make_pair(std::string(Section->getName()), Section));
2038   if (Section->hasSectionRef())
2039     SectionRefToBinarySection.insert(
2040         std::make_pair(Section->getSectionRef(), Section));
2041 
2042   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2043   return *Section;
2044 }
2045 
2046 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2047   return registerSection(new BinarySection(*this, Section));
2048 }
2049 
2050 BinarySection &
2051 BinaryContext::registerSection(const Twine &SectionName,
2052                                const BinarySection &OriginalSection) {
2053   return registerSection(
2054       new BinarySection(*this, SectionName, OriginalSection));
2055 }
2056 
2057 BinarySection &
2058 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2059                                        unsigned ELFFlags, uint8_t *Data,
2060                                        uint64_t Size, unsigned Alignment) {
2061   auto NamedSections = getSectionByName(Name);
2062   if (NamedSections.begin() != NamedSections.end()) {
2063     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2064            "can only update unique sections");
2065     BinarySection *Section = NamedSections.begin()->second;
2066 
2067     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2068     const bool Flag = Section->isAllocatable();
2069     (void)Flag;
2070     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2071     LLVM_DEBUG(dbgs() << *Section << "\n");
2072     // FIXME: Fix section flags/attributes for MachO.
2073     if (isELF())
2074       assert(Flag == Section->isAllocatable() &&
2075              "can't change section allocation status");
2076     return *Section;
2077   }
2078 
2079   return registerSection(
2080       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2081 }
2082 
2083 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2084   auto NameRange = NameToSection.equal_range(Section.getName().str());
2085   while (NameRange.first != NameRange.second) {
2086     if (NameRange.first->second == &Section) {
2087       NameToSection.erase(NameRange.first);
2088       break;
2089     }
2090     ++NameRange.first;
2091   }
2092 }
2093 
2094 void BinaryContext::deregisterUnusedSections() {
2095   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2096   for (auto SI = Sections.begin(); SI != Sections.end();) {
2097     BinarySection *Section = *SI;
2098     // We check getOutputData() instead of getOutputSize() because sometimes
2099     // zero-sized .text.cold sections are allocated.
2100     if (Section->hasSectionRef() || Section->getOutputData() ||
2101         (AbsSection && Section == &AbsSection.get())) {
2102       ++SI;
2103       continue;
2104     }
2105 
2106     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2107                       << '\n';);
2108     deregisterSectionName(*Section);
2109     SI = Sections.erase(SI);
2110     delete Section;
2111   }
2112 }
2113 
2114 bool BinaryContext::deregisterSection(BinarySection &Section) {
2115   BinarySection *SectionPtr = &Section;
2116   auto Itr = Sections.find(SectionPtr);
2117   if (Itr != Sections.end()) {
2118     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2119     while (Range.first != Range.second) {
2120       if (Range.first->second == SectionPtr) {
2121         AddressToSection.erase(Range.first);
2122         break;
2123       }
2124       ++Range.first;
2125     }
2126 
2127     deregisterSectionName(*SectionPtr);
2128     Sections.erase(Itr);
2129     delete SectionPtr;
2130     return true;
2131   }
2132   return false;
2133 }
2134 
2135 void BinaryContext::renameSection(BinarySection &Section,
2136                                   const Twine &NewName) {
2137   auto Itr = Sections.find(&Section);
2138   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2139   Sections.erase(Itr);
2140 
2141   deregisterSectionName(Section);
2142 
2143   Section.Name = NewName.str();
2144   Section.setOutputName(Section.Name);
2145 
2146   NameToSection.insert(std::make_pair(Section.Name, &Section));
2147 
2148   // Reinsert with the new name.
2149   Sections.insert(&Section);
2150 }
2151 
2152 void BinaryContext::printSections(raw_ostream &OS) const {
2153   for (BinarySection *const &Section : Sections)
2154     OS << "BOLT-INFO: " << *Section << "\n";
2155 }
2156 
2157 BinarySection &BinaryContext::absoluteSection() {
2158   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2159     return *Section;
2160   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2161 }
2162 
2163 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2164                                                            size_t Size) const {
2165   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2166   if (!Section)
2167     return std::make_error_code(std::errc::bad_address);
2168 
2169   if (Section->isVirtual())
2170     return 0;
2171 
2172   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2173                    AsmInfo->getCodePointerSize());
2174   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2175   return DE.getUnsigned(&ValueOffset, Size);
2176 }
2177 
2178 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2179                                                          size_t Size) const {
2180   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2181   if (!Section)
2182     return std::make_error_code(std::errc::bad_address);
2183 
2184   if (Section->isVirtual())
2185     return 0;
2186 
2187   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2188                    AsmInfo->getCodePointerSize());
2189   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2190   return DE.getSigned(&ValueOffset, Size);
2191 }
2192 
2193 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2194                                   uint64_t Type, uint64_t Addend,
2195                                   uint64_t Value) {
2196   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2197   assert(Section && "cannot find section for address");
2198   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2199                          Value);
2200 }
2201 
2202 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2203                                          uint64_t Type, uint64_t Addend,
2204                                          uint64_t Value) {
2205   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2206   assert(Section && "cannot find section for address");
2207   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2208                                 Addend, Value);
2209 }
2210 
2211 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2212   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2213   assert(Section && "cannot find section for address");
2214   return Section->removeRelocationAt(Address - Section->getAddress());
2215 }
2216 
2217 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2218   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2219   if (!Section)
2220     return nullptr;
2221 
2222   return Section->getRelocationAt(Address - Section->getAddress());
2223 }
2224 
2225 const Relocation *
2226 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2227   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2228   if (!Section)
2229     return nullptr;
2230 
2231   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2232 }
2233 
2234 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2235                                              const uint64_t Address) {
2236   auto setImmovable = [&](BinaryData &BD) {
2237     BinaryData *Root = BD.getAtomicRoot();
2238     LLVM_DEBUG(if (Root->isMoveable()) {
2239       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2240              << "due to ambiguous relocation referencing 0x"
2241              << Twine::utohexstr(Address) << '\n';
2242     });
2243     Root->setIsMoveable(false);
2244   };
2245 
2246   if (Address == BD.getAddress()) {
2247     setImmovable(BD);
2248 
2249     // Set previous symbol as immovable
2250     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2251     if (Prev && Prev->getEndAddress() == BD.getAddress())
2252       setImmovable(*Prev);
2253   }
2254 
2255   if (Address == BD.getEndAddress()) {
2256     setImmovable(BD);
2257 
2258     // Set next symbol as immovable
2259     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2260     if (Next && Next->getAddress() == BD.getEndAddress())
2261       setImmovable(*Next);
2262   }
2263 }
2264 
2265 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2266                                                     uint64_t *EntryDesc) {
2267   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2268   auto BFI = SymbolToFunctionMap.find(Symbol);
2269   if (BFI == SymbolToFunctionMap.end())
2270     return nullptr;
2271 
2272   BinaryFunction *BF = BFI->second;
2273   if (EntryDesc)
2274     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2275 
2276   return BF;
2277 }
2278 
2279 void BinaryContext::exitWithBugReport(StringRef Message,
2280                                       const BinaryFunction &Function) const {
2281   errs() << "=======================================\n";
2282   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2283             "this function.\n";
2284   errs() << "If you are running the most recent version of BOLT, you may "
2285             "want to "
2286             "report this and paste this dump.\nPlease check that there is no "
2287             "sensitive contents being shared in this dump.\n";
2288   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2289   ScopedPrinter SP(errs());
2290   SP.printBinaryBlock("Function contents", *Function.getData());
2291   errs() << "\n";
2292   Function.dump();
2293   errs() << "ERROR: " << Message;
2294   errs() << "\n=======================================\n";
2295   exit(1);
2296 }
2297 
2298 BinaryFunction *
2299 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2300                                             bool IsSimple) {
2301   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2302   BinaryFunction *BF = InjectedBinaryFunctions.back();
2303   setSymbolToFunctionMap(BF->getSymbol(), BF);
2304   BF->CurrentState = BinaryFunction::State::CFG;
2305   return BF;
2306 }
2307 
2308 std::pair<size_t, size_t>
2309 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2310   // Adjust branch instruction to match the current layout.
2311   if (FixBranches)
2312     BF.fixBranches();
2313 
2314   // Create local MC context to isolate the effect of ephemeral code emission.
2315   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2316   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2317   MCAsmBackend *MAB =
2318       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2319 
2320   SmallString<256> Code;
2321   raw_svector_ostream VecOS(Code);
2322 
2323   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2324   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2325       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2326       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2327       /*RelaxAll=*/false,
2328       /*IncrementalLinkerCompatible=*/false,
2329       /*DWARFMustBeAtTheEnd=*/false));
2330 
2331   Streamer->initSections(false, *STI);
2332 
2333   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2334   Section->setHasInstructions(true);
2335 
2336   // Create symbols in the LocalCtx so that they get destroyed with it.
2337   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2338   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2339 
2340   Streamer->switchSection(Section);
2341   Streamer->emitLabel(StartLabel);
2342   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2343                    /*EmitCodeOnly=*/true);
2344   Streamer->emitLabel(EndLabel);
2345 
2346   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2347   SmallVector<LabelRange> SplitLabels;
2348   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2349     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2350     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2351     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2352 
2353     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2354         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2355         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2356     SplitSection->setHasInstructions(true);
2357     Streamer->switchSection(SplitSection);
2358 
2359     Streamer->emitLabel(SplitStartLabel);
2360     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2361     Streamer->emitLabel(SplitEndLabel);
2362     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2363     // private
2364     Streamer->emitBytes(StringRef(""));
2365     Streamer->switchSection(Section);
2366   }
2367 
2368   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2369   // MCStreamer::Finish(), which does more than we want
2370   Streamer->emitBytes(StringRef(""));
2371 
2372   MCAssembler &Assembler =
2373       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2374   MCAsmLayout Layout(Assembler);
2375   Assembler.layout(Layout);
2376 
2377   // Obtain fragment sizes.
2378   std::vector<uint64_t> FragmentSizes;
2379   // Main fragment size.
2380   const uint64_t HotSize =
2381       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2382   FragmentSizes.push_back(HotSize);
2383   // Split fragment sizes.
2384   uint64_t ColdSize = 0;
2385   for (const auto &Labels : SplitLabels) {
2386     uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2387                     Layout.getSymbolOffset(*Labels.first);
2388     FragmentSizes.push_back(Size);
2389     ColdSize += Size;
2390   }
2391 
2392   // Populate new start and end offsets of each basic block.
2393   uint64_t FragmentIndex = 0;
2394   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2395     BinaryBasicBlock *PrevBB = nullptr;
2396     for (BinaryBasicBlock *BB : FF) {
2397       const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2398       BB->setOutputStartAddress(BBStartOffset);
2399       if (PrevBB)
2400         PrevBB->setOutputEndAddress(BBStartOffset);
2401       PrevBB = BB;
2402     }
2403     if (PrevBB)
2404       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2405     FragmentIndex++;
2406   }
2407 
2408   // Clean-up the effect of the code emission.
2409   for (const MCSymbol &Symbol : Assembler.symbols()) {
2410     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2411     MutableSymbol->setUndefined();
2412     MutableSymbol->setIsRegistered(false);
2413   }
2414 
2415   return std::make_pair(HotSize, ColdSize);
2416 }
2417 
2418 bool BinaryContext::validateInstructionEncoding(
2419     ArrayRef<uint8_t> InputSequence) const {
2420   MCInst Inst;
2421   uint64_t InstSize;
2422   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2423   assert(InstSize == InputSequence.size() &&
2424          "Disassembled instruction size does not match the sequence.");
2425 
2426   SmallString<256> Code;
2427   SmallVector<MCFixup, 4> Fixups;
2428 
2429   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2430   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2431   if (InputSequence != OutputSequence) {
2432     if (opts::Verbosity > 1) {
2433       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2434              << "      input: " << InputSequence << '\n'
2435              << "     output: " << OutputSequence << '\n';
2436     }
2437     return false;
2438   }
2439 
2440   return true;
2441 }
2442 
2443 uint64_t BinaryContext::getHotThreshold() const {
2444   static uint64_t Threshold = 0;
2445   if (Threshold == 0) {
2446     Threshold = std::max(
2447         (uint64_t)opts::ExecutionCountThreshold,
2448         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2449   }
2450   return Threshold;
2451 }
2452 
2453 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2454     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2455   auto FI = BinaryFunctions.upper_bound(Address);
2456   if (FI == BinaryFunctions.begin())
2457     return nullptr;
2458   --FI;
2459 
2460   const uint64_t UsedSize =
2461       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2462 
2463   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2464     return nullptr;
2465 
2466   return &FI->second;
2467 }
2468 
2469 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2470   // First, try to find a function starting at the given address. If the
2471   // function was folded, this will get us the original folded function if it
2472   // wasn't removed from the list, e.g. in non-relocation mode.
2473   auto BFI = BinaryFunctions.find(Address);
2474   if (BFI != BinaryFunctions.end())
2475     return &BFI->second;
2476 
2477   // We might have folded the function matching the object at the given
2478   // address. In such case, we look for a function matching the symbol
2479   // registered at the original address. The new function (the one that the
2480   // original was folded into) will hold the symbol.
2481   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2482     uint64_t EntryID = 0;
2483     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2484     if (BF && EntryID == 0)
2485       return BF;
2486   }
2487   return nullptr;
2488 }
2489 
2490 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2491     const DWARFAddressRangesVector &InputRanges) const {
2492   DebugAddressRangesVector OutputRanges;
2493 
2494   for (const DWARFAddressRange Range : InputRanges) {
2495     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2496     while (BFI != BinaryFunctions.end()) {
2497       const BinaryFunction &Function = BFI->second;
2498       if (Function.getAddress() >= Range.HighPC)
2499         break;
2500       const DebugAddressRangesVector FunctionRanges =
2501           Function.getOutputAddressRanges();
2502       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2503       std::advance(BFI, 1);
2504     }
2505   }
2506 
2507   return OutputRanges;
2508 }
2509 
2510 } // namespace bolt
2511 } // namespace llvm
2512