xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision ff5e2babcb46e7eb3887ee265decb2948da2792c)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
44 
45 using namespace llvm;
46 
47 #undef  DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
49 
50 namespace opts {
51 
52 cl::opt<bool> NoHugePages("no-huge-pages",
53                           cl::desc("use regular size pages for code alignment"),
54                           cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool> PrintRelocations(
64     "print-relocations",
65     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66     cl::cat(BoltCategory));
67 
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70   cl::desc("print memory data annotations when printing functions"),
71   cl::Hidden,
72   cl::ZeroOrMore,
73   cl::cat(BoltCategory));
74 
75 } // namespace opts
76 
77 namespace llvm {
78 namespace bolt {
79 
80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
81                              std::unique_ptr<DWARFContext> DwCtx,
82                              std::unique_ptr<Triple> TheTriple,
83                              const Target *TheTarget, std::string TripleName,
84                              std::unique_ptr<MCCodeEmitter> MCE,
85                              std::unique_ptr<MCObjectFileInfo> MOFI,
86                              std::unique_ptr<const MCAsmInfo> AsmInfo,
87                              std::unique_ptr<const MCInstrInfo> MII,
88                              std::unique_ptr<const MCSubtargetInfo> STI,
89                              std::unique_ptr<MCInstPrinter> InstPrinter,
90                              std::unique_ptr<const MCInstrAnalysis> MIA,
91                              std::unique_ptr<MCPlusBuilder> MIB,
92                              std::unique_ptr<const MCRegisterInfo> MRI,
93                              std::unique_ptr<MCDisassembler> DisAsm)
94     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
95       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
96       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
97       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
98       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
99       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
100   Relocation::Arch = this->TheTriple->getArch();
101   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
102   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
103 }
104 
105 BinaryContext::~BinaryContext() {
106   for (BinarySection *Section : Sections)
107     delete Section;
108   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
109     delete InjectedFunction;
110   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
111     delete JTI.second;
112   clearBinaryData();
113 }
114 
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 Expected<std::unique_ptr<BinaryContext>>
118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
119                                    std::unique_ptr<DWARFContext> DwCtx) {
120   StringRef ArchName = "";
121   StringRef FeaturesStr = "";
122   switch (File->getArch()) {
123   case llvm::Triple::x86_64:
124     ArchName = "x86-64";
125     FeaturesStr = "+nopl";
126     break;
127   case llvm::Triple::aarch64:
128     ArchName = "aarch64";
129     FeaturesStr = "+all";
130     break;
131   case llvm::Triple::riscv64:
132     ArchName = "riscv64";
133     // RV64GC
134     FeaturesStr = "+m,+a,+f,+d,+zicsr,+zifencei,+c,+relax";
135     break;
136   default:
137     return createStringError(std::errc::not_supported,
138                              "BOLT-ERROR: Unrecognized machine in ELF file");
139   }
140 
141   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
142   const std::string TripleName = TheTriple->str();
143 
144   std::string Error;
145   const Target *TheTarget =
146       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
147   if (!TheTarget)
148     return createStringError(make_error_code(std::errc::not_supported),
149                              Twine("BOLT-ERROR: ", Error));
150 
151   std::unique_ptr<const MCRegisterInfo> MRI(
152       TheTarget->createMCRegInfo(TripleName));
153   if (!MRI)
154     return createStringError(
155         make_error_code(std::errc::not_supported),
156         Twine("BOLT-ERROR: no register info for target ", TripleName));
157 
158   // Set up disassembler.
159   std::unique_ptr<MCAsmInfo> AsmInfo(
160       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
161   if (!AsmInfo)
162     return createStringError(
163         make_error_code(std::errc::not_supported),
164         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
165   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
166   // we want to emit such names as using @PLT without double quotes to convey
167   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
168   // override the default AsmInfo behavior to emit names the way we want.
169   AsmInfo->setAllowAtInName(true);
170 
171   std::unique_ptr<const MCSubtargetInfo> STI(
172       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
173   if (!STI)
174     return createStringError(
175         make_error_code(std::errc::not_supported),
176         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
177 
178   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
179   if (!MII)
180     return createStringError(
181         make_error_code(std::errc::not_supported),
182         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
183 
184   std::unique_ptr<MCContext> Ctx(
185       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
186   std::unique_ptr<MCObjectFileInfo> MOFI(
187       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
188   Ctx->setObjectFileInfo(MOFI.get());
189   // We do not support X86 Large code model. Change this in the future.
190   bool Large = false;
191   if (TheTriple->getArch() == llvm::Triple::aarch64)
192     Large = true;
193   unsigned LSDAEncoding =
194       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
195   if (IsPIC) {
196     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
197                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
198   }
199 
200   std::unique_ptr<MCDisassembler> DisAsm(
201       TheTarget->createMCDisassembler(*STI, *Ctx));
202 
203   if (!DisAsm)
204     return createStringError(
205         make_error_code(std::errc::not_supported),
206         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
207 
208   std::unique_ptr<const MCInstrAnalysis> MIA(
209       TheTarget->createMCInstrAnalysis(MII.get()));
210   if (!MIA)
211     return createStringError(
212         make_error_code(std::errc::not_supported),
213         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
214               TripleName));
215 
216   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
217   std::unique_ptr<MCInstPrinter> InstructionPrinter(
218       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
219                                      *MII, *MRI));
220   if (!InstructionPrinter)
221     return createStringError(
222         make_error_code(std::errc::not_supported),
223         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
224   InstructionPrinter->setPrintImmHex(true);
225 
226   std::unique_ptr<MCCodeEmitter> MCE(
227       TheTarget->createMCCodeEmitter(*MII, *Ctx));
228 
229   // Make sure we don't miss any output on core dumps.
230   outs().SetUnbuffered();
231   errs().SetUnbuffered();
232   dbgs().SetUnbuffered();
233 
234   auto BC = std::make_unique<BinaryContext>(
235       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
236       std::string(TripleName), std::move(MCE), std::move(MOFI),
237       std::move(AsmInfo), std::move(MII), std::move(STI),
238       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
239       std::move(DisAsm));
240 
241   BC->LSDAEncoding = LSDAEncoding;
242 
243   BC->MAB = std::unique_ptr<MCAsmBackend>(
244       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
245 
246   BC->setFilename(File->getFileName());
247 
248   BC->HasFixedLoadAddress = !IsPIC;
249 
250   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
251       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
252 
253   if (!BC->SymbolicDisAsm)
254     return createStringError(
255         make_error_code(std::errc::not_supported),
256         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
257 
258   return std::move(BC);
259 }
260 
261 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
262   if (opts::HotText &&
263       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
264     return true;
265 
266   if (opts::HotData &&
267       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
268     return true;
269 
270   if (SymbolName == "_end")
271     return true;
272 
273   return false;
274 }
275 
276 std::unique_ptr<MCObjectWriter>
277 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
278   return MAB->createObjectWriter(OS);
279 }
280 
281 bool BinaryContext::validateObjectNesting() const {
282   auto Itr = BinaryDataMap.begin();
283   auto End = BinaryDataMap.end();
284   bool Valid = true;
285   while (Itr != End) {
286     auto Next = std::next(Itr);
287     while (Next != End &&
288            Itr->second->getSection() == Next->second->getSection() &&
289            Itr->second->containsRange(Next->second->getAddress(),
290                                       Next->second->getSize())) {
291       if (Next->second->Parent != Itr->second) {
292         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
293                << "BOLT-WARNING:  " << *Itr->second << "\n"
294                << "BOLT-WARNING:  " << *Next->second << "\n";
295         Valid = false;
296       }
297       ++Next;
298     }
299     Itr = Next;
300   }
301   return Valid;
302 }
303 
304 bool BinaryContext::validateHoles() const {
305   bool Valid = true;
306   for (BinarySection &Section : sections()) {
307     for (const Relocation &Rel : Section.relocations()) {
308       uint64_t RelAddr = Rel.Offset + Section.getAddress();
309       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
310       if (!BD) {
311         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
312                << " 0x" << Twine::utohexstr(RelAddr) << " in "
313                << Section.getName() << "\n";
314         Valid = false;
315       } else if (!BD->getAtomicRoot()) {
316         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
317                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
318                << Section.getName() << "\n";
319         Valid = false;
320       }
321     }
322   }
323   return Valid;
324 }
325 
326 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
327   const uint64_t Address = GAI->second->getAddress();
328   const uint64_t Size = GAI->second->getSize();
329 
330   auto fixParents = [&](BinaryDataMapType::iterator Itr,
331                         BinaryData *NewParent) {
332     BinaryData *OldParent = Itr->second->Parent;
333     Itr->second->Parent = NewParent;
334     ++Itr;
335     while (Itr != BinaryDataMap.end() && OldParent &&
336            Itr->second->Parent == OldParent) {
337       Itr->second->Parent = NewParent;
338       ++Itr;
339     }
340   };
341 
342   // Check if the previous symbol contains the newly added symbol.
343   if (GAI != BinaryDataMap.begin()) {
344     BinaryData *Prev = std::prev(GAI)->second;
345     while (Prev) {
346       if (Prev->getSection() == GAI->second->getSection() &&
347           Prev->containsRange(Address, Size)) {
348         fixParents(GAI, Prev);
349       } else {
350         fixParents(GAI, nullptr);
351       }
352       Prev = Prev->Parent;
353     }
354   }
355 
356   // Check if the newly added symbol contains any subsequent symbols.
357   if (Size != 0) {
358     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
359     auto Itr = std::next(GAI);
360     while (
361         Itr != BinaryDataMap.end() &&
362         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
363       Itr->second->Parent = BD;
364       ++Itr;
365     }
366   }
367 }
368 
369 iterator_range<BinaryContext::binary_data_iterator>
370 BinaryContext::getSubBinaryData(BinaryData *BD) {
371   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
372   auto End = Start;
373   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
374     ++End;
375   return make_range(Start, End);
376 }
377 
378 std::pair<const MCSymbol *, uint64_t>
379 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
380                                 bool IsPCRel) {
381   if (isAArch64()) {
382     // Check if this is an access to a constant island and create bookkeeping
383     // to keep track of it and emit it later as part of this function.
384     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
385       return std::make_pair(IslandSym, 0);
386 
387     // Detect custom code written in assembly that refers to arbitrary
388     // constant islands from other functions. Write this reference so we
389     // can pull this constant island and emit it as part of this function
390     // too.
391     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
392 
393     if (IslandIter != AddressToConstantIslandMap.begin() &&
394         (IslandIter == AddressToConstantIslandMap.end() ||
395          IslandIter->first > Address))
396       --IslandIter;
397 
398     if (IslandIter != AddressToConstantIslandMap.end()) {
399       // Fall-back to referencing the original constant island in the presence
400       // of dynamic relocs, as we currently do not support cloning them.
401       // Notice: we might fail to link because of this, if the original constant
402       // island we are referring would be emitted too far away.
403       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
404         MCSymbol *IslandSym =
405             IslandIter->second->getOrCreateIslandAccess(Address);
406         if (IslandSym)
407           return std::make_pair(IslandSym, 0);
408       } else if (MCSymbol *IslandSym =
409                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
410                                                                       BF)) {
411         BF.createIslandDependency(IslandSym, IslandIter->second);
412         return std::make_pair(IslandSym, 0);
413       }
414     }
415   }
416 
417   // Note that the address does not necessarily have to reside inside
418   // a section, it could be an absolute address too.
419   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
420   if (Section && Section->isText()) {
421     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
422       if (Address != BF.getAddress()) {
423         // The address could potentially escape. Mark it as another entry
424         // point into the function.
425         if (opts::Verbosity >= 1) {
426           outs() << "BOLT-INFO: potentially escaped address 0x"
427                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
428         }
429         BF.HasInternalLabelReference = true;
430         return std::make_pair(
431             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
432       }
433     } else {
434       addInterproceduralReference(&BF, Address);
435     }
436   }
437 
438   // With relocations, catch jump table references outside of the basic block
439   // containing the indirect jump.
440   if (HasRelocations) {
441     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
442     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
443       const MCSymbol *Symbol =
444           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
445 
446       return std::make_pair(Symbol, 0);
447     }
448   }
449 
450   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
451     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
452 
453   // TODO: use DWARF info to get size/alignment here?
454   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
455   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
456   return std::make_pair(TargetSymbol, 0);
457 }
458 
459 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
460                                                   BinaryFunction &BF) {
461   if (!isX86())
462     return MemoryContentsType::UNKNOWN;
463 
464   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
465   if (!Section) {
466     // No section - possibly an absolute address. Since we don't allow
467     // internal function addresses to escape the function scope - we
468     // consider it a tail call.
469     if (opts::Verbosity > 1) {
470       errs() << "BOLT-WARNING: no section for address 0x"
471              << Twine::utohexstr(Address) << " referenced from function " << BF
472              << '\n';
473     }
474     return MemoryContentsType::UNKNOWN;
475   }
476 
477   if (Section->isVirtual()) {
478     // The contents are filled at runtime.
479     return MemoryContentsType::UNKNOWN;
480   }
481 
482   // No support for jump tables in code yet.
483   if (Section->isText())
484     return MemoryContentsType::UNKNOWN;
485 
486   // Start with checking for PIC jump table. We expect non-PIC jump tables
487   // to have high 32 bits set to 0.
488   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
489     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
490 
491   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
492     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
493 
494   return MemoryContentsType::UNKNOWN;
495 }
496 
497 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
498                                      const JumpTable::JumpTableType Type,
499                                      const BinaryFunction &BF,
500                                      const uint64_t NextJTAddress,
501                                      JumpTable::AddressesType *EntriesAsAddress,
502                                      bool *HasEntryInFragment) const {
503   // Is one of the targets __builtin_unreachable?
504   bool HasUnreachable = false;
505 
506   // Does one of the entries match function start address?
507   bool HasStartAsEntry = false;
508 
509   // Number of targets other than __builtin_unreachable.
510   uint64_t NumRealEntries = 0;
511 
512   auto addEntryAddress = [&](uint64_t EntryAddress) {
513     if (EntriesAsAddress)
514       EntriesAsAddress->emplace_back(EntryAddress);
515   };
516 
517   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
518   if (!Section)
519     return false;
520 
521   // The upper bound is defined by containing object, section limits, and
522   // the next jump table in memory.
523   uint64_t UpperBound = Section->getEndAddress();
524   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
525   if (JumpTableBD && JumpTableBD->getSize()) {
526     assert(JumpTableBD->getEndAddress() <= UpperBound &&
527            "data object cannot cross a section boundary");
528     UpperBound = JumpTableBD->getEndAddress();
529   }
530   if (NextJTAddress)
531     UpperBound = std::min(NextJTAddress, UpperBound);
532 
533   LLVM_DEBUG({
534     using JTT = JumpTable::JumpTableType;
535     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
536                       Address, BF.getPrintName(),
537                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
538   });
539   const uint64_t EntrySize = getJumpTableEntrySize(Type);
540   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
541        EntryAddress += EntrySize) {
542     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
543                       << " -> ");
544     // Check if there's a proper relocation against the jump table entry.
545     if (HasRelocations) {
546       if (Type == JumpTable::JTT_PIC &&
547           !DataPCRelocations.count(EntryAddress)) {
548         LLVM_DEBUG(
549             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
550         break;
551       }
552       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
553         LLVM_DEBUG(
554             dbgs()
555             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
556         break;
557       }
558     }
559 
560     const uint64_t Value =
561         (Type == JumpTable::JTT_PIC)
562             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
563             : *getPointerAtAddress(EntryAddress);
564 
565     // __builtin_unreachable() case.
566     if (Value == BF.getAddress() + BF.getSize()) {
567       addEntryAddress(Value);
568       HasUnreachable = true;
569       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
570       continue;
571     }
572 
573     // Function start is another special case. It is allowed in the jump table,
574     // but we need at least one another regular entry to distinguish the table
575     // from, e.g. a function pointer array.
576     if (Value == BF.getAddress()) {
577       HasStartAsEntry = true;
578       addEntryAddress(Value);
579       continue;
580     }
581 
582     // Function or one of its fragments.
583     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
584     const bool DoesBelongToFunction =
585         BF.containsAddress(Value) ||
586         (TargetBF && TargetBF->isParentOrChildOf(BF));
587     if (!DoesBelongToFunction) {
588       LLVM_DEBUG({
589         if (!BF.containsAddress(Value)) {
590           dbgs() << "FAIL: function doesn't contain this address\n";
591           if (TargetBF) {
592             dbgs() << "  ! function containing this address: "
593                    << TargetBF->getPrintName() << '\n';
594             if (TargetBF->isFragment()) {
595               dbgs() << "  ! is a fragment";
596               for (BinaryFunction *Parent : TargetBF->ParentFragments)
597                 dbgs() << ", parent: " << Parent->getPrintName();
598               dbgs() << '\n';
599             }
600           }
601         }
602       });
603       break;
604     }
605 
606     // Check there's an instruction at this offset.
607     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
608         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
609       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
610       break;
611     }
612 
613     ++NumRealEntries;
614     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
615 
616     if (TargetBF != &BF && HasEntryInFragment)
617       *HasEntryInFragment = true;
618     addEntryAddress(Value);
619   }
620 
621   // It's a jump table if the number of real entries is more than 1, or there's
622   // one real entry and one or more special targets. If there are only multiple
623   // special targets, then it's not a jump table.
624   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
625 }
626 
627 void BinaryContext::populateJumpTables() {
628   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
629                     << '\n');
630   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
631        ++JTI) {
632     JumpTable *JT = JTI->second;
633 
634     bool NonSimpleParent = false;
635     for (BinaryFunction *BF : JT->Parents)
636       NonSimpleParent |= !BF->isSimple();
637     if (NonSimpleParent)
638       continue;
639 
640     uint64_t NextJTAddress = 0;
641     auto NextJTI = std::next(JTI);
642     if (NextJTI != JTE)
643       NextJTAddress = NextJTI->second->getAddress();
644 
645     const bool Success =
646         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
647                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
648     if (!Success) {
649       LLVM_DEBUG({
650         dbgs() << "failed to analyze ";
651         JT->print(dbgs());
652         if (NextJTI != JTE) {
653           dbgs() << "next ";
654           NextJTI->second->print(dbgs());
655         }
656       });
657       llvm_unreachable("jump table heuristic failure");
658     }
659     for (BinaryFunction *Frag : JT->Parents) {
660       if (JT->IsSplit)
661         Frag->setHasIndirectTargetToSplitFragment(true);
662       for (uint64_t EntryAddress : JT->EntriesAsAddress)
663         // if target is builtin_unreachable
664         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
665           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
666                                              Frag->getSize());
667         } else if (EntryAddress >= Frag->getAddress() &&
668                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
669           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
670         }
671     }
672 
673     // In strict mode, erase PC-relative relocation record. Later we check that
674     // all such records are erased and thus have been accounted for.
675     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
676       for (uint64_t Address = JT->getAddress();
677            Address < JT->getAddress() + JT->getSize();
678            Address += JT->EntrySize) {
679         DataPCRelocations.erase(DataPCRelocations.find(Address));
680       }
681     }
682 
683     // Mark to skip the function and all its fragments.
684     for (BinaryFunction *Frag : JT->Parents)
685       if (Frag->hasIndirectTargetToSplitFragment())
686         addFragmentsToSkip(Frag);
687   }
688 
689   if (opts::StrictMode && DataPCRelocations.size()) {
690     LLVM_DEBUG({
691       dbgs() << DataPCRelocations.size()
692              << " unclaimed PC-relative relocations left in data:\n";
693       for (uint64_t Reloc : DataPCRelocations)
694         dbgs() << Twine::utohexstr(Reloc) << '\n';
695     });
696     assert(0 && "unclaimed PC-relative relocations left in data\n");
697   }
698   clearList(DataPCRelocations);
699 }
700 
701 void BinaryContext::skipMarkedFragments() {
702   std::vector<BinaryFunction *> FragmentQueue;
703   // Copy the functions to FragmentQueue.
704   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
705   auto addToWorklist = [&](BinaryFunction *Function) -> void {
706     if (FragmentsToSkip.count(Function))
707       return;
708     FragmentQueue.push_back(Function);
709     addFragmentsToSkip(Function);
710   };
711   // Functions containing split jump tables need to be skipped with all
712   // fragments (transitively).
713   for (size_t I = 0; I != FragmentQueue.size(); I++) {
714     BinaryFunction *BF = FragmentQueue[I];
715     assert(FragmentsToSkip.count(BF) &&
716            "internal error in traversing function fragments");
717     if (opts::Verbosity >= 1)
718       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
719     BF->setSimple(false);
720     BF->setHasIndirectTargetToSplitFragment(true);
721 
722     llvm::for_each(BF->Fragments, addToWorklist);
723     llvm::for_each(BF->ParentFragments, addToWorklist);
724   }
725   if (!FragmentsToSkip.empty())
726     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
727            << (FragmentsToSkip.size() == 1 ? "" : "s")
728            << " due to cold fragments\n";
729 }
730 
731 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
732                                                  uint64_t Size,
733                                                  uint16_t Alignment,
734                                                  unsigned Flags) {
735   auto Itr = BinaryDataMap.find(Address);
736   if (Itr != BinaryDataMap.end()) {
737     assert(Itr->second->getSize() == Size || !Size);
738     return Itr->second->getSymbol();
739   }
740 
741   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
742   assert(!GlobalSymbols.count(Name) && "created name is not unique");
743   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
744 }
745 
746 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
747   return Ctx->getOrCreateSymbol(Name);
748 }
749 
750 BinaryFunction *BinaryContext::createBinaryFunction(
751     const std::string &Name, BinarySection &Section, uint64_t Address,
752     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
753   auto Result = BinaryFunctions.emplace(
754       Address, BinaryFunction(Name, Section, Address, Size, *this));
755   assert(Result.second == true && "unexpected duplicate function");
756   BinaryFunction *BF = &Result.first->second;
757   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
758                         Alignment);
759   setSymbolToFunctionMap(BF->getSymbol(), BF);
760   return BF;
761 }
762 
763 const MCSymbol *
764 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
765                                     JumpTable::JumpTableType Type) {
766   // Two fragments of same function access same jump table
767   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
768     assert(JT->Type == Type && "jump table types have to match");
769     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
770 
771     // Prevent associating a jump table to a specific fragment twice.
772     // This simple check arises from the assumption: no more than 2 fragments.
773     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
774       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
775              "cannot re-use jump table of a different function");
776       // Duplicate the entry for the parent function for easy access
777       JT->Parents.push_back(&Function);
778       if (opts::Verbosity > 2) {
779         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
780                << JT->Parents[0]->getPrintName() << "; "
781                << Function.getPrintName() << "\n";
782         JT->print(outs());
783       }
784       Function.JumpTables.emplace(Address, JT);
785       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
786       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
787     }
788 
789     bool IsJumpTableParent = false;
790     (void)IsJumpTableParent;
791     for (BinaryFunction *Frag : JT->Parents)
792       if (Frag == &Function)
793         IsJumpTableParent = true;
794     assert(IsJumpTableParent &&
795            "cannot re-use jump table of a different function");
796     return JT->getFirstLabel();
797   }
798 
799   // Re-use the existing symbol if possible.
800   MCSymbol *JTLabel = nullptr;
801   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
802     if (!isInternalSymbolName(Object->getSymbol()->getName()))
803       JTLabel = Object->getSymbol();
804   }
805 
806   const uint64_t EntrySize = getJumpTableEntrySize(Type);
807   if (!JTLabel) {
808     const std::string JumpTableName = generateJumpTableName(Function, Address);
809     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
810   }
811 
812   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
813                     << " in function " << Function << '\n');
814 
815   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
816                                 JumpTable::LabelMapType{{0, JTLabel}},
817                                 *getSectionForAddress(Address));
818   JT->Parents.push_back(&Function);
819   if (opts::Verbosity > 2)
820     JT->print(outs());
821   JumpTables.emplace(Address, JT);
822 
823   // Duplicate the entry for the parent function for easy access.
824   Function.JumpTables.emplace(Address, JT);
825   return JTLabel;
826 }
827 
828 std::pair<uint64_t, const MCSymbol *>
829 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
830                                   const MCSymbol *OldLabel) {
831   auto L = scopeLock();
832   unsigned Offset = 0;
833   bool Found = false;
834   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
835     if (Elmt.second != OldLabel)
836       continue;
837     Offset = Elmt.first;
838     Found = true;
839     break;
840   }
841   assert(Found && "Label not found");
842   (void)Found;
843   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
844   JumpTable *NewJT =
845       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
846                     JumpTable::LabelMapType{{Offset, NewLabel}},
847                     *getSectionForAddress(JT->getAddress()));
848   NewJT->Parents = JT->Parents;
849   NewJT->Entries = JT->Entries;
850   NewJT->Counts = JT->Counts;
851   uint64_t JumpTableID = ++DuplicatedJumpTables;
852   // Invert it to differentiate from regular jump tables whose IDs are their
853   // addresses in the input binary memory space
854   JumpTableID = ~JumpTableID;
855   JumpTables.emplace(JumpTableID, NewJT);
856   Function.JumpTables.emplace(JumpTableID, NewJT);
857   return std::make_pair(JumpTableID, NewLabel);
858 }
859 
860 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
861                                                  uint64_t Address) {
862   size_t Id;
863   uint64_t Offset = 0;
864   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
865     Offset = Address - JT->getAddress();
866     auto Itr = JT->Labels.find(Offset);
867     if (Itr != JT->Labels.end())
868       return std::string(Itr->second->getName());
869     Id = JumpTableIds.at(JT->getAddress());
870   } else {
871     Id = JumpTableIds[Address] = BF.JumpTables.size();
872   }
873   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
874           (Offset ? ("." + std::to_string(Offset)) : ""));
875 }
876 
877 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
878   // FIXME: aarch64 support is missing.
879   if (!isX86())
880     return true;
881 
882   if (BF.getSize() == BF.getMaxSize())
883     return true;
884 
885   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
886   assert(FunctionData && "cannot get function as data");
887 
888   uint64_t Offset = BF.getSize();
889   MCInst Instr;
890   uint64_t InstrSize = 0;
891   uint64_t InstrAddress = BF.getAddress() + Offset;
892   using std::placeholders::_1;
893 
894   // Skip instructions that satisfy the predicate condition.
895   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
896     const uint64_t StartOffset = Offset;
897     for (; Offset < BF.getMaxSize();
898          Offset += InstrSize, InstrAddress += InstrSize) {
899       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
900                                   InstrAddress, nulls()))
901         break;
902       if (!Predicate(Instr))
903         break;
904     }
905 
906     return Offset - StartOffset;
907   };
908 
909   // Skip a sequence of zero bytes.
910   auto skipZeros = [&]() {
911     const uint64_t StartOffset = Offset;
912     for (; Offset < BF.getMaxSize(); ++Offset)
913       if ((*FunctionData)[Offset] != 0)
914         break;
915 
916     return Offset - StartOffset;
917   };
918 
919   // Accept the whole padding area filled with breakpoints.
920   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
921   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
922     return true;
923 
924   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
925 
926   // Some functions have a jump to the next function or to the padding area
927   // inserted after the body.
928   auto isSkipJump = [&](const MCInst &Instr) {
929     uint64_t TargetAddress = 0;
930     if (MIB->isUnconditionalBranch(Instr) &&
931         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
932       if (TargetAddress >= InstrAddress + InstrSize &&
933           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
934         return true;
935       }
936     }
937     return false;
938   };
939 
940   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
941   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
942          skipZeros())
943     ;
944 
945   if (Offset == BF.getMaxSize())
946     return true;
947 
948   if (opts::Verbosity >= 1) {
949     errs() << "BOLT-WARNING: bad padding at address 0x"
950            << Twine::utohexstr(BF.getAddress() + BF.getSize())
951            << " starting at offset " << (Offset - BF.getSize())
952            << " in function " << BF << '\n'
953            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
954            << '\n';
955   }
956 
957   return false;
958 }
959 
960 void BinaryContext::adjustCodePadding() {
961   for (auto &BFI : BinaryFunctions) {
962     BinaryFunction &BF = BFI.second;
963     if (!shouldEmit(BF))
964       continue;
965 
966     if (!hasValidCodePadding(BF)) {
967       if (HasRelocations) {
968         if (opts::Verbosity >= 1) {
969           outs() << "BOLT-INFO: function " << BF
970                  << " has invalid padding. Ignoring the function.\n";
971         }
972         BF.setIgnored();
973       } else {
974         BF.setMaxSize(BF.getSize());
975       }
976     }
977   }
978 }
979 
980 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
981                                                uint64_t Size,
982                                                uint16_t Alignment,
983                                                unsigned Flags) {
984   // Register the name with MCContext.
985   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
986 
987   auto GAI = BinaryDataMap.find(Address);
988   BinaryData *BD;
989   if (GAI == BinaryDataMap.end()) {
990     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
991     BinarySection &Section =
992         SectionOrErr ? SectionOrErr.get() : absoluteSection();
993     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
994                         Section, Flags);
995     GAI = BinaryDataMap.emplace(Address, BD).first;
996     GlobalSymbols[Name] = BD;
997     updateObjectNesting(GAI);
998   } else {
999     BD = GAI->second;
1000     if (!BD->hasName(Name)) {
1001       GlobalSymbols[Name] = BD;
1002       BD->Symbols.push_back(Symbol);
1003     }
1004   }
1005 
1006   return Symbol;
1007 }
1008 
1009 const BinaryData *
1010 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1011   auto NI = BinaryDataMap.lower_bound(Address);
1012   auto End = BinaryDataMap.end();
1013   if ((NI != End && Address == NI->first) ||
1014       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1015     if (NI->second->containsAddress(Address))
1016       return NI->second;
1017 
1018     // If this is a sub-symbol, see if a parent data contains the address.
1019     const BinaryData *BD = NI->second->getParent();
1020     while (BD) {
1021       if (BD->containsAddress(Address))
1022         return BD;
1023       BD = BD->getParent();
1024     }
1025   }
1026   return nullptr;
1027 }
1028 
1029 BinaryData *BinaryContext::getGOTSymbol() {
1030   // First tries to find a global symbol with that name
1031   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1032   if (GOTSymBD)
1033     return GOTSymBD;
1034 
1035   // This symbol might be hidden from run-time link, so fetch the local
1036   // definition if available.
1037   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1038   if (!GOTSymBD)
1039     return nullptr;
1040 
1041   // If the local symbol is not unique, fail
1042   unsigned Index = 2;
1043   SmallString<30> Storage;
1044   while (const BinaryData *BD =
1045              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1046                                      .concat(Twine(Index++))
1047                                      .toStringRef(Storage)))
1048     if (BD->getAddress() != GOTSymBD->getAddress())
1049       return nullptr;
1050 
1051   return GOTSymBD;
1052 }
1053 
1054 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1055   auto NI = BinaryDataMap.find(Address);
1056   assert(NI != BinaryDataMap.end());
1057   if (NI == BinaryDataMap.end())
1058     return false;
1059   // TODO: it's possible that a jump table starts at the same address
1060   // as a larger blob of private data.  When we set the size of the
1061   // jump table, it might be smaller than the total blob size.  In this
1062   // case we just leave the original size since (currently) it won't really
1063   // affect anything.
1064   assert((!NI->second->Size || NI->second->Size == Size ||
1065           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1066          "can't change the size of a symbol that has already had its "
1067          "size set");
1068   if (!NI->second->Size) {
1069     NI->second->Size = Size;
1070     updateObjectNesting(NI);
1071     return true;
1072   }
1073   return false;
1074 }
1075 
1076 void BinaryContext::generateSymbolHashes() {
1077   auto isPadding = [](const BinaryData &BD) {
1078     StringRef Contents = BD.getSection().getContents();
1079     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1080     return (BD.getName().startswith("HOLEat") ||
1081             SymData.find_first_not_of(0) == StringRef::npos);
1082   };
1083 
1084   uint64_t NumCollisions = 0;
1085   for (auto &Entry : BinaryDataMap) {
1086     BinaryData &BD = *Entry.second;
1087     StringRef Name = BD.getName();
1088 
1089     if (!isInternalSymbolName(Name))
1090       continue;
1091 
1092     // First check if a non-anonymous alias exists and move it to the front.
1093     if (BD.getSymbols().size() > 1) {
1094       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1095         return !isInternalSymbolName(Symbol->getName());
1096       });
1097       if (Itr != BD.getSymbols().end()) {
1098         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1099         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1100         continue;
1101       }
1102     }
1103 
1104     // We have to skip 0 size symbols since they will all collide.
1105     if (BD.getSize() == 0) {
1106       continue;
1107     }
1108 
1109     const uint64_t Hash = BD.getSection().hash(BD);
1110     const size_t Idx = Name.find("0x");
1111     std::string NewName =
1112         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1113     if (getBinaryDataByName(NewName)) {
1114       // Ignore collisions for symbols that appear to be padding
1115       // (i.e. all zeros or a "hole")
1116       if (!isPadding(BD)) {
1117         if (opts::Verbosity) {
1118           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1119                  << " with new name (" << NewName << "), skipping.\n";
1120         }
1121         ++NumCollisions;
1122       }
1123       continue;
1124     }
1125     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1126     GlobalSymbols[NewName] = &BD;
1127   }
1128   if (NumCollisions) {
1129     errs() << "BOLT-WARNING: " << NumCollisions
1130            << " collisions detected while hashing binary objects";
1131     if (!opts::Verbosity)
1132       errs() << ". Use -v=1 to see the list.";
1133     errs() << '\n';
1134   }
1135 }
1136 
1137 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1138                                      BinaryFunction &Function) const {
1139   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1140   if (TargetFunction.isChildOf(Function))
1141     return true;
1142   TargetFunction.addParentFragment(Function);
1143   Function.addFragment(TargetFunction);
1144   if (!HasRelocations) {
1145     TargetFunction.setSimple(false);
1146     Function.setSimple(false);
1147   }
1148   if (opts::Verbosity >= 1) {
1149     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1150            << Function << '\n';
1151   }
1152   return true;
1153 }
1154 
1155 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1156                                            MCInst &LoadLowBits,
1157                                            MCInst &LoadHiBits,
1158                                            uint64_t Target) {
1159   const MCSymbol *TargetSymbol;
1160   uint64_t Addend = 0;
1161   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1162                                                     /*IsPCRel*/ true);
1163   int64_t Val;
1164   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1165                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1166   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1167                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1168 }
1169 
1170 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1171   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1172   if (TargetFunction)
1173     return false;
1174 
1175   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1176   assert(Section && "cannot get section for referenced address");
1177   if (!Section->isText())
1178     return false;
1179 
1180   bool Ret = false;
1181   StringRef SectionContents = Section->getContents();
1182   uint64_t Offset = Address - Section->getAddress();
1183   const uint64_t MaxSize = SectionContents.size() - Offset;
1184   const uint8_t *Bytes =
1185       reinterpret_cast<const uint8_t *>(SectionContents.data());
1186   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1187 
1188   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1189                          MCInst &Instruction, uint64_t Offset,
1190                          uint64_t AbsoluteInstrAddr,
1191                          uint64_t TotalSize) -> bool {
1192     MCInst *TargetHiBits, *TargetLowBits;
1193     uint64_t TargetAddress, Count;
1194     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1195                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1196                                    TargetLowBits, TargetAddress);
1197     if (!Count)
1198       return false;
1199 
1200     if (MatchOnly)
1201       return true;
1202 
1203     // NOTE The target symbol was created during disassemble's
1204     // handleExternalReference
1205     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1206     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1207                                                   *Section, Address, TotalSize);
1208     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1209                            TargetAddress);
1210     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1211     Veneer->addInstruction(Offset, std::move(Instruction));
1212     --Count;
1213     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1214       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1215       Veneer->addInstruction(It->first, std::move(It->second));
1216     }
1217 
1218     Veneer->getOrCreateLocalLabel(Address);
1219     Veneer->setMaxSize(TotalSize);
1220     Veneer->updateState(BinaryFunction::State::Disassembled);
1221     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1222                       << "\n");
1223     return true;
1224   };
1225 
1226   uint64_t Size = 0, TotalSize = 0;
1227   BinaryFunction::InstrMapType VeneerInstructions;
1228   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1229     MCInst Instruction;
1230     const uint64_t AbsoluteInstrAddr = Address + Offset;
1231     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1232                                         AbsoluteInstrAddr, nulls()))
1233       break;
1234 
1235     TotalSize += Size;
1236     if (MIB->isBranch(Instruction)) {
1237       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1238                         AbsoluteInstrAddr, TotalSize);
1239       break;
1240     }
1241 
1242     VeneerInstructions.emplace(Offset, std::move(Instruction));
1243   }
1244 
1245   return Ret;
1246 }
1247 
1248 void BinaryContext::processInterproceduralReferences() {
1249   for (const std::pair<BinaryFunction *, uint64_t> &It :
1250        InterproceduralReferences) {
1251     BinaryFunction &Function = *It.first;
1252     uint64_t Address = It.second;
1253     if (!Address || Function.isIgnored())
1254       continue;
1255 
1256     BinaryFunction *TargetFunction =
1257         getBinaryFunctionContainingAddress(Address);
1258     if (&Function == TargetFunction)
1259       continue;
1260 
1261     if (TargetFunction) {
1262       if (TargetFunction->isFragment() &&
1263           !TargetFunction->isChildOf(Function)) {
1264         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1265                   "fragments: "
1266                << Function.getPrintName() << " and "
1267                << TargetFunction->getPrintName() << '\n';
1268       }
1269       if (uint64_t Offset = Address - TargetFunction->getAddress())
1270         TargetFunction->addEntryPointAtOffset(Offset);
1271 
1272       continue;
1273     }
1274 
1275     // Check if address falls in function padding space - this could be
1276     // unmarked data in code. In this case adjust the padding space size.
1277     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1278     assert(Section && "cannot get section for referenced address");
1279 
1280     if (!Section->isText())
1281       continue;
1282 
1283     // PLT requires special handling and could be ignored in this context.
1284     StringRef SectionName = Section->getName();
1285     if (SectionName == ".plt" || SectionName == ".plt.got")
1286       continue;
1287 
1288     // Check if it is aarch64 veneer written at Address
1289     if (isAArch64() && handleAArch64Veneer(Address))
1290       continue;
1291 
1292     if (opts::processAllFunctions()) {
1293       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1294              << "object in code at address 0x" << Twine::utohexstr(Address)
1295              << " belonging to section " << SectionName << " in current mode\n";
1296       exit(1);
1297     }
1298 
1299     TargetFunction = getBinaryFunctionContainingAddress(Address,
1300                                                         /*CheckPastEnd=*/false,
1301                                                         /*UseMaxSize=*/true);
1302     // We are not going to overwrite non-simple functions, but for simple
1303     // ones - adjust the padding size.
1304     if (TargetFunction && TargetFunction->isSimple()) {
1305       errs() << "BOLT-WARNING: function " << *TargetFunction
1306              << " has an object detected in a padding region at address 0x"
1307              << Twine::utohexstr(Address) << '\n';
1308       TargetFunction->setMaxSize(TargetFunction->getSize());
1309     }
1310   }
1311 
1312   InterproceduralReferences.clear();
1313 }
1314 
1315 void BinaryContext::postProcessSymbolTable() {
1316   fixBinaryDataHoles();
1317   bool Valid = true;
1318   for (auto &Entry : BinaryDataMap) {
1319     BinaryData *BD = Entry.second;
1320     if ((BD->getName().startswith("SYMBOLat") ||
1321          BD->getName().startswith("DATAat")) &&
1322         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1323         BD->getSection()) {
1324       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1325       Valid = false;
1326     }
1327   }
1328   assert(Valid);
1329   (void)Valid;
1330   generateSymbolHashes();
1331 }
1332 
1333 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1334                                  BinaryFunction &ParentBF) {
1335   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1336          "cannot merge functions with multiple entry points");
1337 
1338   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1339   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1340       SymbolToFunctionMapMutex, std::defer_lock);
1341 
1342   const StringRef ChildName = ChildBF.getOneName();
1343 
1344   // Move symbols over and update bookkeeping info.
1345   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1346     ParentBF.getSymbols().push_back(Symbol);
1347     WriteSymbolMapLock.lock();
1348     SymbolToFunctionMap[Symbol] = &ParentBF;
1349     WriteSymbolMapLock.unlock();
1350     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1351   }
1352   ChildBF.getSymbols().clear();
1353 
1354   // Move other names the child function is known under.
1355   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1356   ChildBF.Aliases.clear();
1357 
1358   if (HasRelocations) {
1359     // Merge execution counts of ChildBF into those of ParentBF.
1360     // Without relocations, we cannot reliably merge profiles as both functions
1361     // continue to exist and either one can be executed.
1362     ChildBF.mergeProfileDataInto(ParentBF);
1363 
1364     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1365                                                      std::defer_lock);
1366     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1367                                                       std::defer_lock);
1368     // Remove ChildBF from the global set of functions in relocs mode.
1369     ReadBfsLock.lock();
1370     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1371     ReadBfsLock.unlock();
1372 
1373     assert(FI != BinaryFunctions.end() && "function not found");
1374     assert(&ChildBF == &FI->second && "function mismatch");
1375 
1376     WriteBfsLock.lock();
1377     ChildBF.clearDisasmState();
1378     FI = BinaryFunctions.erase(FI);
1379     WriteBfsLock.unlock();
1380 
1381   } else {
1382     // In non-relocation mode we keep the function, but rename it.
1383     std::string NewName = "__ICF_" + ChildName.str();
1384 
1385     WriteCtxLock.lock();
1386     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1387     WriteCtxLock.unlock();
1388 
1389     ChildBF.setFolded(&ParentBF);
1390   }
1391 
1392   ParentBF.setHasFunctionsFoldedInto();
1393 }
1394 
1395 void BinaryContext::fixBinaryDataHoles() {
1396   assert(validateObjectNesting() && "object nesting inconsitency detected");
1397 
1398   for (BinarySection &Section : allocatableSections()) {
1399     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1400 
1401     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1402       BinaryData *BD = Itr->second;
1403       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1404                      (BD->getName().startswith("SYMBOLat0x") ||
1405                       BD->getName().startswith("DATAat0x") ||
1406                       BD->getName().startswith("ANONYMOUS")));
1407       return !isHole && BD->getSection() == Section && !BD->getParent();
1408     };
1409 
1410     auto BDStart = BinaryDataMap.begin();
1411     auto BDEnd = BinaryDataMap.end();
1412     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1413     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1414 
1415     uint64_t EndAddress = Section.getAddress();
1416 
1417     while (Itr != End) {
1418       if (Itr->second->getAddress() > EndAddress) {
1419         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1420         Holes.emplace_back(EndAddress, Gap);
1421       }
1422       EndAddress = Itr->second->getEndAddress();
1423       ++Itr;
1424     }
1425 
1426     if (EndAddress < Section.getEndAddress())
1427       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1428 
1429     // If there is already a symbol at the start of the hole, grow that symbol
1430     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1431     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1432       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1433       if (BD) {
1434         // BD->getSection() can be != Section if there are sections that
1435         // overlap.  In this case it is probably safe to just skip the holes
1436         // since the overlapping section will not(?) have any symbols in it.
1437         if (BD->getSection() == Section)
1438           setBinaryDataSize(Hole.first, Hole.second);
1439       } else {
1440         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1441       }
1442     }
1443   }
1444 
1445   assert(validateObjectNesting() && "object nesting inconsitency detected");
1446   assert(validateHoles() && "top level hole detected in object map");
1447 }
1448 
1449 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1450   const BinarySection *CurrentSection = nullptr;
1451   bool FirstSection = true;
1452 
1453   for (auto &Entry : BinaryDataMap) {
1454     const BinaryData *BD = Entry.second;
1455     const BinarySection &Section = BD->getSection();
1456     if (FirstSection || Section != *CurrentSection) {
1457       uint64_t Address, Size;
1458       StringRef Name = Section.getName();
1459       if (Section) {
1460         Address = Section.getAddress();
1461         Size = Section.getSize();
1462       } else {
1463         Address = BD->getAddress();
1464         Size = BD->getSize();
1465       }
1466       OS << "BOLT-INFO: Section " << Name << ", "
1467          << "0x" + Twine::utohexstr(Address) << ":"
1468          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1469       CurrentSection = &Section;
1470       FirstSection = false;
1471     }
1472 
1473     OS << "BOLT-INFO: ";
1474     const BinaryData *P = BD->getParent();
1475     while (P) {
1476       OS << "  ";
1477       P = P->getParent();
1478     }
1479     OS << *BD << "\n";
1480   }
1481 }
1482 
1483 Expected<unsigned> BinaryContext::getDwarfFile(
1484     StringRef Directory, StringRef FileName, unsigned FileNumber,
1485     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1486     unsigned CUID, unsigned DWARFVersion) {
1487   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1488   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1489                           FileNumber);
1490 }
1491 
1492 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1493                                                const uint32_t SrcCUID,
1494                                                unsigned FileIndex) {
1495   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1496   const DWARFDebugLine::LineTable *LineTable =
1497       DwCtx->getLineTableForUnit(SrcUnit);
1498   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1499       LineTable->Prologue.FileNames;
1500   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1501   // means empty dir.
1502   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1503          "FileIndex out of range for the compilation unit.");
1504   StringRef Dir = "";
1505   if (FileNames[FileIndex - 1].DirIdx != 0) {
1506     if (std::optional<const char *> DirName = dwarf::toString(
1507             LineTable->Prologue
1508                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1509       Dir = *DirName;
1510     }
1511   }
1512   StringRef FileName = "";
1513   if (std::optional<const char *> FName =
1514           dwarf::toString(FileNames[FileIndex - 1].Name))
1515     FileName = *FName;
1516   assert(FileName != "");
1517   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1518   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1519                                DestCUID, DstUnit->getVersion()));
1520 }
1521 
1522 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1523   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1524   llvm::transform(llvm::make_second_range(BinaryFunctions),
1525                   SortedFunctions.begin(),
1526                   [](BinaryFunction &BF) { return &BF; });
1527 
1528   llvm::stable_sort(SortedFunctions,
1529                     [](const BinaryFunction *A, const BinaryFunction *B) {
1530                       if (A->hasValidIndex() && B->hasValidIndex()) {
1531                         return A->getIndex() < B->getIndex();
1532                       }
1533                       return A->hasValidIndex();
1534                     });
1535   return SortedFunctions;
1536 }
1537 
1538 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1539   std::vector<BinaryFunction *> AllFunctions;
1540   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1541   llvm::transform(llvm::make_second_range(BinaryFunctions),
1542                   std::back_inserter(AllFunctions),
1543                   [](BinaryFunction &BF) { return &BF; });
1544   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1545 
1546   return AllFunctions;
1547 }
1548 
1549 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1550   auto Iter = DWOCUs.find(DWOId);
1551   if (Iter == DWOCUs.end())
1552     return std::nullopt;
1553 
1554   return Iter->second;
1555 }
1556 
1557 DWARFContext *BinaryContext::getDWOContext() const {
1558   if (DWOCUs.empty())
1559     return nullptr;
1560   return &DWOCUs.begin()->second->getContext();
1561 }
1562 
1563 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1564 void BinaryContext::preprocessDWODebugInfo() {
1565   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1566     DWARFUnit *const DwarfUnit = CU.get();
1567     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1568       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1569       if (!DWOCU->isDWOUnit()) {
1570         std::string DWOName = dwarf::toString(
1571             DwarfUnit->getUnitDIE().find(
1572                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1573             "");
1574         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1575                << DWOName
1576                << " was not retrieved and won't be updated. Please check "
1577                   "relative path.\n";
1578         continue;
1579       }
1580       DWOCUs[*DWOId] = DWOCU;
1581     }
1582   }
1583   if (!DWOCUs.empty())
1584     outs() << "BOLT-INFO: processing split DWARF\n";
1585 }
1586 
1587 void BinaryContext::preprocessDebugInfo() {
1588   struct CURange {
1589     uint64_t LowPC;
1590     uint64_t HighPC;
1591     DWARFUnit *Unit;
1592 
1593     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1594   };
1595 
1596   // Building a map of address ranges to CUs similar to .debug_aranges and use
1597   // it to assign CU to functions.
1598   std::vector<CURange> AllRanges;
1599   AllRanges.reserve(DwCtx->getNumCompileUnits());
1600   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1601     Expected<DWARFAddressRangesVector> RangesOrError =
1602         CU->getUnitDIE().getAddressRanges();
1603     if (!RangesOrError) {
1604       consumeError(RangesOrError.takeError());
1605       continue;
1606     }
1607     for (DWARFAddressRange &Range : *RangesOrError) {
1608       // Parts of the debug info could be invalidated due to corresponding code
1609       // being removed from the binary by the linker. Hence we check if the
1610       // address is a valid one.
1611       if (containsAddress(Range.LowPC))
1612         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1613     }
1614 
1615     ContainsDwarf5 |= CU->getVersion() >= 5;
1616     ContainsDwarfLegacy |= CU->getVersion() < 5;
1617   }
1618 
1619   llvm::sort(AllRanges);
1620   for (auto &KV : BinaryFunctions) {
1621     const uint64_t FunctionAddress = KV.first;
1622     BinaryFunction &Function = KV.second;
1623 
1624     auto It = llvm::partition_point(
1625         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1626     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1627       Function.setDWARFUnit(It->Unit);
1628   }
1629 
1630   // Discover units with debug info that needs to be updated.
1631   for (const auto &KV : BinaryFunctions) {
1632     const BinaryFunction &BF = KV.second;
1633     if (shouldEmit(BF) && BF.getDWARFUnit())
1634       ProcessedCUs.insert(BF.getDWARFUnit());
1635   }
1636 
1637   // Clear debug info for functions from units that we are not going to process.
1638   for (auto &KV : BinaryFunctions) {
1639     BinaryFunction &BF = KV.second;
1640     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1641       BF.setDWARFUnit(nullptr);
1642   }
1643 
1644   if (opts::Verbosity >= 1) {
1645     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1646            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1647   }
1648 
1649   preprocessDWODebugInfo();
1650 
1651   // Populate MCContext with DWARF files from all units.
1652   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1653   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1654     const uint64_t CUID = CU->getOffset();
1655     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1656     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1657         GlobalPrefix + "line_table_start" + Twine(CUID)));
1658 
1659     if (!ProcessedCUs.count(CU.get()))
1660       continue;
1661 
1662     const DWARFDebugLine::LineTable *LineTable =
1663         DwCtx->getLineTableForUnit(CU.get());
1664     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1665         LineTable->Prologue.FileNames;
1666 
1667     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1668     if (DwarfVersion >= 5) {
1669       std::optional<MD5::MD5Result> Checksum;
1670       if (LineTable->Prologue.ContentTypes.HasMD5)
1671         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1672       std::optional<const char *> Name =
1673           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1674       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1675         auto Iter = DWOCUs.find(*DWOID);
1676         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1677         Name = dwarf::toString(
1678             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1679       }
1680       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1681                                   std::nullopt);
1682     }
1683 
1684     BinaryLineTable.setDwarfVersion(DwarfVersion);
1685 
1686     // Assign a unique label to every line table, one per CU.
1687     // Make sure empty debug line tables are registered too.
1688     if (FileNames.empty()) {
1689       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1690                             CUID, DwarfVersion));
1691       continue;
1692     }
1693     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1694     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1695       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1696       // means empty dir.
1697       StringRef Dir = "";
1698       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1699         if (std::optional<const char *> DirName = dwarf::toString(
1700                 LineTable->Prologue
1701                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1702           Dir = *DirName;
1703       StringRef FileName = "";
1704       if (std::optional<const char *> FName =
1705               dwarf::toString(FileNames[I].Name))
1706         FileName = *FName;
1707       assert(FileName != "");
1708       std::optional<MD5::MD5Result> Checksum;
1709       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1710         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1711       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1712                             DwarfVersion));
1713     }
1714   }
1715 }
1716 
1717 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1718   if (Function.isPseudo())
1719     return false;
1720 
1721   if (opts::processAllFunctions())
1722     return true;
1723 
1724   if (Function.isIgnored())
1725     return false;
1726 
1727   // In relocation mode we will emit non-simple functions with CFG.
1728   // If the function does not have a CFG it should be marked as ignored.
1729   return HasRelocations || Function.isSimple();
1730 }
1731 
1732 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1733   uint32_t Operation = Inst.getOperation();
1734   switch (Operation) {
1735   case MCCFIInstruction::OpSameValue:
1736     OS << "OpSameValue Reg" << Inst.getRegister();
1737     break;
1738   case MCCFIInstruction::OpRememberState:
1739     OS << "OpRememberState";
1740     break;
1741   case MCCFIInstruction::OpRestoreState:
1742     OS << "OpRestoreState";
1743     break;
1744   case MCCFIInstruction::OpOffset:
1745     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1746     break;
1747   case MCCFIInstruction::OpDefCfaRegister:
1748     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1749     break;
1750   case MCCFIInstruction::OpDefCfaOffset:
1751     OS << "OpDefCfaOffset " << Inst.getOffset();
1752     break;
1753   case MCCFIInstruction::OpDefCfa:
1754     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1755     break;
1756   case MCCFIInstruction::OpRelOffset:
1757     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1758     break;
1759   case MCCFIInstruction::OpAdjustCfaOffset:
1760     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1761     break;
1762   case MCCFIInstruction::OpEscape:
1763     OS << "OpEscape";
1764     break;
1765   case MCCFIInstruction::OpRestore:
1766     OS << "OpRestore Reg" << Inst.getRegister();
1767     break;
1768   case MCCFIInstruction::OpUndefined:
1769     OS << "OpUndefined Reg" << Inst.getRegister();
1770     break;
1771   case MCCFIInstruction::OpRegister:
1772     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1773        << Inst.getRegister2();
1774     break;
1775   case MCCFIInstruction::OpWindowSave:
1776     OS << "OpWindowSave";
1777     break;
1778   case MCCFIInstruction::OpGnuArgsSize:
1779     OS << "OpGnuArgsSize";
1780     break;
1781   default:
1782     OS << "Op#" << Operation;
1783     break;
1784   }
1785 }
1786 
1787 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1788   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1789   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1790   // the end of a data chunk inside code, $d indentifies start of data.
1791   if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1792     return MarkerSymType::NONE;
1793 
1794   Expected<StringRef> NameOrError = Symbol.getName();
1795   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1796 
1797   if (!TypeOrError || !NameOrError)
1798     return MarkerSymType::NONE;
1799 
1800   if (*TypeOrError != SymbolRef::ST_Unknown)
1801     return MarkerSymType::NONE;
1802 
1803   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1804     return MarkerSymType::CODE;
1805 
1806   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1807     return MarkerSymType::DATA;
1808 
1809   return MarkerSymType::NONE;
1810 }
1811 
1812 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1813   return getMarkerType(Symbol) != MarkerSymType::NONE;
1814 }
1815 
1816 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1817                            const BinaryFunction *Function,
1818                            DWARFContext *DwCtx) {
1819   DebugLineTableRowRef RowRef =
1820       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1821   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1822     return;
1823 
1824   const DWARFDebugLine::LineTable *LineTable;
1825   if (Function && Function->getDWARFUnit() &&
1826       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1827     LineTable = Function->getDWARFLineTable();
1828   } else {
1829     LineTable = DwCtx->getLineTableForUnit(
1830         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1831   }
1832   assert(LineTable && "line table expected for instruction with debug info");
1833 
1834   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1835   StringRef FileName = "";
1836   if (std::optional<const char *> FName =
1837           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1838     FileName = *FName;
1839   OS << " # debug line " << FileName << ":" << Row.Line;
1840   if (Row.Column)
1841     OS << ":" << Row.Column;
1842   if (Row.Discriminator)
1843     OS << " discriminator:" << Row.Discriminator;
1844 }
1845 
1846 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1847                                      uint64_t Offset,
1848                                      const BinaryFunction *Function,
1849                                      bool PrintMCInst, bool PrintMemData,
1850                                      bool PrintRelocations,
1851                                      StringRef Endl) const {
1852   if (MIB->isEHLabel(Instruction)) {
1853     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1854     return;
1855   }
1856   OS << format("    %08" PRIx64 ": ", Offset);
1857   if (MIB->isCFI(Instruction)) {
1858     uint32_t Offset = Instruction.getOperand(0).getImm();
1859     OS << "\t!CFI\t$" << Offset << "\t; ";
1860     if (Function)
1861       printCFI(OS, *Function->getCFIFor(Instruction));
1862     OS << Endl;
1863     return;
1864   }
1865   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1866   if (MIB->isCall(Instruction)) {
1867     if (MIB->isTailCall(Instruction))
1868       OS << " # TAILCALL ";
1869     if (MIB->isInvoke(Instruction)) {
1870       const std::optional<MCPlus::MCLandingPad> EHInfo =
1871           MIB->getEHInfo(Instruction);
1872       OS << " # handler: ";
1873       if (EHInfo->first)
1874         OS << *EHInfo->first;
1875       else
1876         OS << '0';
1877       OS << "; action: " << EHInfo->second;
1878       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1879       if (GnuArgsSize >= 0)
1880         OS << "; GNU_args_size = " << GnuArgsSize;
1881     }
1882   } else if (MIB->isIndirectBranch(Instruction)) {
1883     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1884       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1885     } else {
1886       OS << " # UNKNOWN CONTROL FLOW";
1887     }
1888   }
1889   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1890     OS << " # Offset: " << *Offset;
1891   if (auto Label = MIB->getLabel(Instruction))
1892     OS << " # Label: " << **Label;
1893 
1894   MIB->printAnnotations(Instruction, OS);
1895 
1896   if (opts::PrintDebugInfo)
1897     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1898 
1899   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1900     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1901     Function->printRelocations(OS, Offset, Size);
1902   }
1903 
1904   OS << Endl;
1905 
1906   if (PrintMCInst) {
1907     Instruction.dump_pretty(OS, InstPrinter.get());
1908     OS << Endl;
1909   }
1910 }
1911 
1912 std::optional<uint64_t>
1913 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1914                                         uint64_t FileOffset) const {
1915   // Find a segment with a matching file offset.
1916   for (auto &KV : SegmentMapInfo) {
1917     const SegmentInfo &SegInfo = KV.second;
1918     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1919       // Use segment's aligned memory offset to calculate the base address.
1920       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1921       return MMapAddress - MemOffset;
1922     }
1923   }
1924 
1925   return std::nullopt;
1926 }
1927 
1928 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1929   auto SI = AddressToSection.upper_bound(Address);
1930   if (SI != AddressToSection.begin()) {
1931     --SI;
1932     uint64_t UpperBound = SI->first + SI->second->getSize();
1933     if (!SI->second->getSize())
1934       UpperBound += 1;
1935     if (UpperBound > Address)
1936       return *SI->second;
1937   }
1938   return std::make_error_code(std::errc::bad_address);
1939 }
1940 
1941 ErrorOr<StringRef>
1942 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1943   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1944     return Section->getName();
1945   return std::make_error_code(std::errc::bad_address);
1946 }
1947 
1948 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1949   auto Res = Sections.insert(Section);
1950   (void)Res;
1951   assert(Res.second && "can't register the same section twice.");
1952 
1953   // Only register allocatable sections in the AddressToSection map.
1954   if (Section->isAllocatable() && Section->getAddress())
1955     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1956   NameToSection.insert(
1957       std::make_pair(std::string(Section->getName()), Section));
1958   if (Section->hasSectionRef())
1959     SectionRefToBinarySection.insert(
1960         std::make_pair(Section->getSectionRef(), Section));
1961 
1962   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1963   return *Section;
1964 }
1965 
1966 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1967   return registerSection(new BinarySection(*this, Section));
1968 }
1969 
1970 BinarySection &
1971 BinaryContext::registerSection(const Twine &SectionName,
1972                                const BinarySection &OriginalSection) {
1973   return registerSection(
1974       new BinarySection(*this, SectionName, OriginalSection));
1975 }
1976 
1977 BinarySection &
1978 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1979                                        unsigned ELFFlags, uint8_t *Data,
1980                                        uint64_t Size, unsigned Alignment) {
1981   auto NamedSections = getSectionByName(Name);
1982   if (NamedSections.begin() != NamedSections.end()) {
1983     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1984            "can only update unique sections");
1985     BinarySection *Section = NamedSections.begin()->second;
1986 
1987     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1988     const bool Flag = Section->isAllocatable();
1989     (void)Flag;
1990     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1991     LLVM_DEBUG(dbgs() << *Section << "\n");
1992     // FIXME: Fix section flags/attributes for MachO.
1993     if (isELF())
1994       assert(Flag == Section->isAllocatable() &&
1995              "can't change section allocation status");
1996     return *Section;
1997   }
1998 
1999   return registerSection(
2000       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2001 }
2002 
2003 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2004   auto NameRange = NameToSection.equal_range(Section.getName().str());
2005   while (NameRange.first != NameRange.second) {
2006     if (NameRange.first->second == &Section) {
2007       NameToSection.erase(NameRange.first);
2008       break;
2009     }
2010     ++NameRange.first;
2011   }
2012 }
2013 
2014 void BinaryContext::deregisterUnusedSections() {
2015   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2016   for (auto SI = Sections.begin(); SI != Sections.end();) {
2017     BinarySection *Section = *SI;
2018     // We check getOutputData() instead of getOutputSize() because sometimes
2019     // zero-sized .text.cold sections are allocated.
2020     if (Section->hasSectionRef() || Section->getOutputData() ||
2021         (AbsSection && Section == &AbsSection.get())) {
2022       ++SI;
2023       continue;
2024     }
2025 
2026     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2027                       << '\n';);
2028     deregisterSectionName(*Section);
2029     SI = Sections.erase(SI);
2030     delete Section;
2031   }
2032 }
2033 
2034 bool BinaryContext::deregisterSection(BinarySection &Section) {
2035   BinarySection *SectionPtr = &Section;
2036   auto Itr = Sections.find(SectionPtr);
2037   if (Itr != Sections.end()) {
2038     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2039     while (Range.first != Range.second) {
2040       if (Range.first->second == SectionPtr) {
2041         AddressToSection.erase(Range.first);
2042         break;
2043       }
2044       ++Range.first;
2045     }
2046 
2047     deregisterSectionName(*SectionPtr);
2048     Sections.erase(Itr);
2049     delete SectionPtr;
2050     return true;
2051   }
2052   return false;
2053 }
2054 
2055 void BinaryContext::renameSection(BinarySection &Section,
2056                                   const Twine &NewName) {
2057   auto Itr = Sections.find(&Section);
2058   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2059   Sections.erase(Itr);
2060 
2061   deregisterSectionName(Section);
2062 
2063   Section.Name = NewName.str();
2064   Section.setOutputName(Section.Name);
2065 
2066   NameToSection.insert(std::make_pair(Section.Name, &Section));
2067 
2068   // Reinsert with the new name.
2069   Sections.insert(&Section);
2070 }
2071 
2072 void BinaryContext::printSections(raw_ostream &OS) const {
2073   for (BinarySection *const &Section : Sections)
2074     OS << "BOLT-INFO: " << *Section << "\n";
2075 }
2076 
2077 BinarySection &BinaryContext::absoluteSection() {
2078   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2079     return *Section;
2080   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2081 }
2082 
2083 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2084                                                            size_t Size) const {
2085   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2086   if (!Section)
2087     return std::make_error_code(std::errc::bad_address);
2088 
2089   if (Section->isVirtual())
2090     return 0;
2091 
2092   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2093                    AsmInfo->getCodePointerSize());
2094   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2095   return DE.getUnsigned(&ValueOffset, Size);
2096 }
2097 
2098 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2099                                                          size_t Size) const {
2100   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2101   if (!Section)
2102     return std::make_error_code(std::errc::bad_address);
2103 
2104   if (Section->isVirtual())
2105     return 0;
2106 
2107   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2108                    AsmInfo->getCodePointerSize());
2109   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2110   return DE.getSigned(&ValueOffset, Size);
2111 }
2112 
2113 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2114                                   uint64_t Type, uint64_t Addend,
2115                                   uint64_t Value) {
2116   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2117   assert(Section && "cannot find section for address");
2118   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2119                          Value);
2120 }
2121 
2122 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2123                                          uint64_t Type, uint64_t Addend,
2124                                          uint64_t Value) {
2125   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2126   assert(Section && "cannot find section for address");
2127   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2128                                 Addend, Value);
2129 }
2130 
2131 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2132   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2133   assert(Section && "cannot find section for address");
2134   return Section->removeRelocationAt(Address - Section->getAddress());
2135 }
2136 
2137 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2138   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2139   if (!Section)
2140     return nullptr;
2141 
2142   return Section->getRelocationAt(Address - Section->getAddress());
2143 }
2144 
2145 const Relocation *
2146 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2147   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2148   if (!Section)
2149     return nullptr;
2150 
2151   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2152 }
2153 
2154 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2155                                              const uint64_t Address) {
2156   auto setImmovable = [&](BinaryData &BD) {
2157     BinaryData *Root = BD.getAtomicRoot();
2158     LLVM_DEBUG(if (Root->isMoveable()) {
2159       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2160              << "due to ambiguous relocation referencing 0x"
2161              << Twine::utohexstr(Address) << '\n';
2162     });
2163     Root->setIsMoveable(false);
2164   };
2165 
2166   if (Address == BD.getAddress()) {
2167     setImmovable(BD);
2168 
2169     // Set previous symbol as immovable
2170     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2171     if (Prev && Prev->getEndAddress() == BD.getAddress())
2172       setImmovable(*Prev);
2173   }
2174 
2175   if (Address == BD.getEndAddress()) {
2176     setImmovable(BD);
2177 
2178     // Set next symbol as immovable
2179     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2180     if (Next && Next->getAddress() == BD.getEndAddress())
2181       setImmovable(*Next);
2182   }
2183 }
2184 
2185 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2186                                                     uint64_t *EntryDesc) {
2187   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2188   auto BFI = SymbolToFunctionMap.find(Symbol);
2189   if (BFI == SymbolToFunctionMap.end())
2190     return nullptr;
2191 
2192   BinaryFunction *BF = BFI->second;
2193   if (EntryDesc)
2194     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2195 
2196   return BF;
2197 }
2198 
2199 void BinaryContext::exitWithBugReport(StringRef Message,
2200                                       const BinaryFunction &Function) const {
2201   errs() << "=======================================\n";
2202   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2203             "this function.\n";
2204   errs() << "If you are running the most recent version of BOLT, you may "
2205             "want to "
2206             "report this and paste this dump.\nPlease check that there is no "
2207             "sensitive contents being shared in this dump.\n";
2208   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2209   ScopedPrinter SP(errs());
2210   SP.printBinaryBlock("Function contents", *Function.getData());
2211   errs() << "\n";
2212   Function.dump();
2213   errs() << "ERROR: " << Message;
2214   errs() << "\n=======================================\n";
2215   exit(1);
2216 }
2217 
2218 BinaryFunction *
2219 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2220                                             bool IsSimple) {
2221   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2222   BinaryFunction *BF = InjectedBinaryFunctions.back();
2223   setSymbolToFunctionMap(BF->getSymbol(), BF);
2224   BF->CurrentState = BinaryFunction::State::CFG;
2225   return BF;
2226 }
2227 
2228 std::pair<size_t, size_t>
2229 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2230   // Adjust branch instruction to match the current layout.
2231   if (FixBranches)
2232     BF.fixBranches();
2233 
2234   // Create local MC context to isolate the effect of ephemeral code emission.
2235   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2236   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2237   MCAsmBackend *MAB =
2238       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2239 
2240   SmallString<256> Code;
2241   raw_svector_ostream VecOS(Code);
2242 
2243   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2244   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2245       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2246       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2247       /*RelaxAll=*/false,
2248       /*IncrementalLinkerCompatible=*/false,
2249       /*DWARFMustBeAtTheEnd=*/false));
2250 
2251   Streamer->initSections(false, *STI);
2252 
2253   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2254   Section->setHasInstructions(true);
2255 
2256   // Create symbols in the LocalCtx so that they get destroyed with it.
2257   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2258   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2259 
2260   Streamer->switchSection(Section);
2261   Streamer->emitLabel(StartLabel);
2262   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2263                    /*EmitCodeOnly=*/true);
2264   Streamer->emitLabel(EndLabel);
2265 
2266   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2267   SmallVector<LabelRange> SplitLabels;
2268   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2269     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2270     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2271     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2272 
2273     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2274         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2275         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2276     SplitSection->setHasInstructions(true);
2277     Streamer->switchSection(SplitSection);
2278 
2279     Streamer->emitLabel(SplitStartLabel);
2280     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2281     Streamer->emitLabel(SplitEndLabel);
2282     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2283     // private
2284     Streamer->emitBytes(StringRef(""));
2285     Streamer->switchSection(Section);
2286   }
2287 
2288   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2289   // MCStreamer::Finish(), which does more than we want
2290   Streamer->emitBytes(StringRef(""));
2291 
2292   MCAssembler &Assembler =
2293       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2294   MCAsmLayout Layout(Assembler);
2295   Assembler.layout(Layout);
2296 
2297   const uint64_t HotSize =
2298       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2299   const uint64_t ColdSize =
2300       std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
2301                       [&](const uint64_t Accu, const LabelRange &Labels) {
2302                         return Accu + Layout.getSymbolOffset(*Labels.second) -
2303                                Layout.getSymbolOffset(*Labels.first);
2304                       });
2305 
2306   // Clean-up the effect of the code emission.
2307   for (const MCSymbol &Symbol : Assembler.symbols()) {
2308     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2309     MutableSymbol->setUndefined();
2310     MutableSymbol->setIsRegistered(false);
2311   }
2312 
2313   return std::make_pair(HotSize, ColdSize);
2314 }
2315 
2316 bool BinaryContext::validateInstructionEncoding(
2317     ArrayRef<uint8_t> InputSequence) const {
2318   MCInst Inst;
2319   uint64_t InstSize;
2320   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2321   assert(InstSize == InputSequence.size() &&
2322          "Disassembled instruction size does not match the sequence.");
2323 
2324   SmallString<256> Code;
2325   SmallVector<MCFixup, 4> Fixups;
2326 
2327   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2328   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2329   if (InputSequence != OutputSequence) {
2330     if (opts::Verbosity > 1) {
2331       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2332              << "      input: " << InputSequence << '\n'
2333              << "     output: " << OutputSequence << '\n';
2334     }
2335     return false;
2336   }
2337 
2338   return true;
2339 }
2340 
2341 uint64_t BinaryContext::getHotThreshold() const {
2342   static uint64_t Threshold = 0;
2343   if (Threshold == 0) {
2344     Threshold = std::max(
2345         (uint64_t)opts::ExecutionCountThreshold,
2346         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2347   }
2348   return Threshold;
2349 }
2350 
2351 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2352     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2353   auto FI = BinaryFunctions.upper_bound(Address);
2354   if (FI == BinaryFunctions.begin())
2355     return nullptr;
2356   --FI;
2357 
2358   const uint64_t UsedSize =
2359       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2360 
2361   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2362     return nullptr;
2363 
2364   return &FI->second;
2365 }
2366 
2367 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2368   // First, try to find a function starting at the given address. If the
2369   // function was folded, this will get us the original folded function if it
2370   // wasn't removed from the list, e.g. in non-relocation mode.
2371   auto BFI = BinaryFunctions.find(Address);
2372   if (BFI != BinaryFunctions.end())
2373     return &BFI->second;
2374 
2375   // We might have folded the function matching the object at the given
2376   // address. In such case, we look for a function matching the symbol
2377   // registered at the original address. The new function (the one that the
2378   // original was folded into) will hold the symbol.
2379   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2380     uint64_t EntryID = 0;
2381     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2382     if (BF && EntryID == 0)
2383       return BF;
2384   }
2385   return nullptr;
2386 }
2387 
2388 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2389     const DWARFAddressRangesVector &InputRanges) const {
2390   DebugAddressRangesVector OutputRanges;
2391 
2392   for (const DWARFAddressRange Range : InputRanges) {
2393     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2394     while (BFI != BinaryFunctions.end()) {
2395       const BinaryFunction &Function = BFI->second;
2396       if (Function.getAddress() >= Range.HighPC)
2397         break;
2398       const DebugAddressRangesVector FunctionRanges =
2399           Function.getOutputAddressRanges();
2400       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2401       std::advance(BFI, 1);
2402     }
2403   }
2404 
2405   return OutputRanges;
2406 }
2407 
2408 } // namespace bolt
2409 } // namespace llvm
2410