xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision ea47ccc78f953d7201601ae1a9da64c6e7865f54)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <numeric>
42 #include <unordered_set>
43 
44 using namespace llvm;
45 
46 #undef  DEBUG_TYPE
47 #define DEBUG_TYPE "bolt"
48 
49 namespace opts {
50 
51 cl::opt<bool> NoHugePages("no-huge-pages",
52                           cl::desc("use regular size pages for code alignment"),
53                           cl::Hidden, cl::cat(BoltCategory));
54 
55 static cl::opt<bool>
56 PrintDebugInfo("print-debug-info",
57   cl::desc("print debug info when printing functions"),
58   cl::Hidden,
59   cl::ZeroOrMore,
60   cl::cat(BoltCategory));
61 
62 cl::opt<bool> PrintRelocations(
63     "print-relocations",
64     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
65     cl::cat(BoltCategory));
66 
67 static cl::opt<bool>
68 PrintMemData("print-mem-data",
69   cl::desc("print memory data annotations when printing functions"),
70   cl::Hidden,
71   cl::ZeroOrMore,
72   cl::cat(BoltCategory));
73 
74 } // namespace opts
75 
76 namespace llvm {
77 namespace bolt {
78 
79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
80                              std::unique_ptr<DWARFContext> DwCtx,
81                              std::unique_ptr<Triple> TheTriple,
82                              const Target *TheTarget, std::string TripleName,
83                              std::unique_ptr<MCCodeEmitter> MCE,
84                              std::unique_ptr<MCObjectFileInfo> MOFI,
85                              std::unique_ptr<const MCAsmInfo> AsmInfo,
86                              std::unique_ptr<const MCInstrInfo> MII,
87                              std::unique_ptr<const MCSubtargetInfo> STI,
88                              std::unique_ptr<MCInstPrinter> InstPrinter,
89                              std::unique_ptr<const MCInstrAnalysis> MIA,
90                              std::unique_ptr<MCPlusBuilder> MIB,
91                              std::unique_ptr<const MCRegisterInfo> MRI,
92                              std::unique_ptr<MCDisassembler> DisAsm)
93     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
94       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
95       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
96       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
97       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
98       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
99   Relocation::Arch = this->TheTriple->getArch();
100   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
101   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
102 }
103 
104 BinaryContext::~BinaryContext() {
105   for (BinarySection *Section : Sections)
106     delete Section;
107   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
108     delete InjectedFunction;
109   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
110     delete JTI.second;
111   clearBinaryData();
112 }
113 
114 /// Create BinaryContext for a given architecture \p ArchName and
115 /// triple \p TripleName.
116 Expected<std::unique_ptr<BinaryContext>>
117 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
118                                    std::unique_ptr<DWARFContext> DwCtx) {
119   StringRef ArchName = "";
120   StringRef FeaturesStr = "";
121   switch (File->getArch()) {
122   case llvm::Triple::x86_64:
123     ArchName = "x86-64";
124     FeaturesStr = "+nopl";
125     break;
126   case llvm::Triple::aarch64:
127     ArchName = "aarch64";
128     FeaturesStr = "+all";
129     break;
130   default:
131     return createStringError(std::errc::not_supported,
132                              "BOLT-ERROR: Unrecognized machine in ELF file");
133   }
134 
135   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
136   const std::string TripleName = TheTriple->str();
137 
138   std::string Error;
139   const Target *TheTarget =
140       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
141   if (!TheTarget)
142     return createStringError(make_error_code(std::errc::not_supported),
143                              Twine("BOLT-ERROR: ", Error));
144 
145   std::unique_ptr<const MCRegisterInfo> MRI(
146       TheTarget->createMCRegInfo(TripleName));
147   if (!MRI)
148     return createStringError(
149         make_error_code(std::errc::not_supported),
150         Twine("BOLT-ERROR: no register info for target ", TripleName));
151 
152   // Set up disassembler.
153   std::unique_ptr<MCAsmInfo> AsmInfo(
154       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
155   if (!AsmInfo)
156     return createStringError(
157         make_error_code(std::errc::not_supported),
158         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
159   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
160   // we want to emit such names as using @PLT without double quotes to convey
161   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
162   // override the default AsmInfo behavior to emit names the way we want.
163   AsmInfo->setAllowAtInName(true);
164 
165   std::unique_ptr<const MCSubtargetInfo> STI(
166       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
167   if (!STI)
168     return createStringError(
169         make_error_code(std::errc::not_supported),
170         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
171 
172   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
173   if (!MII)
174     return createStringError(
175         make_error_code(std::errc::not_supported),
176         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
177 
178   std::unique_ptr<MCContext> Ctx(
179       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
180   std::unique_ptr<MCObjectFileInfo> MOFI(
181       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
182   Ctx->setObjectFileInfo(MOFI.get());
183   // We do not support X86 Large code model. Change this in the future.
184   bool Large = false;
185   if (TheTriple->getArch() == llvm::Triple::aarch64)
186     Large = true;
187   unsigned LSDAEncoding =
188       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
189   if (IsPIC) {
190     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
191                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
192   }
193 
194   std::unique_ptr<MCDisassembler> DisAsm(
195       TheTarget->createMCDisassembler(*STI, *Ctx));
196 
197   if (!DisAsm)
198     return createStringError(
199         make_error_code(std::errc::not_supported),
200         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
201 
202   std::unique_ptr<const MCInstrAnalysis> MIA(
203       TheTarget->createMCInstrAnalysis(MII.get()));
204   if (!MIA)
205     return createStringError(
206         make_error_code(std::errc::not_supported),
207         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
208               TripleName));
209 
210   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
211   std::unique_ptr<MCInstPrinter> InstructionPrinter(
212       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
213                                      *MII, *MRI));
214   if (!InstructionPrinter)
215     return createStringError(
216         make_error_code(std::errc::not_supported),
217         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
218   InstructionPrinter->setPrintImmHex(true);
219 
220   std::unique_ptr<MCCodeEmitter> MCE(
221       TheTarget->createMCCodeEmitter(*MII, *Ctx));
222 
223   // Make sure we don't miss any output on core dumps.
224   outs().SetUnbuffered();
225   errs().SetUnbuffered();
226   dbgs().SetUnbuffered();
227 
228   auto BC = std::make_unique<BinaryContext>(
229       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
230       std::string(TripleName), std::move(MCE), std::move(MOFI),
231       std::move(AsmInfo), std::move(MII), std::move(STI),
232       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
233       std::move(DisAsm));
234 
235   BC->LSDAEncoding = LSDAEncoding;
236 
237   BC->MAB = std::unique_ptr<MCAsmBackend>(
238       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
239 
240   BC->setFilename(File->getFileName());
241 
242   BC->HasFixedLoadAddress = !IsPIC;
243 
244   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
245       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
246 
247   if (!BC->SymbolicDisAsm)
248     return createStringError(
249         make_error_code(std::errc::not_supported),
250         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
251 
252   return std::move(BC);
253 }
254 
255 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
256   if (opts::HotText &&
257       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
258     return true;
259 
260   if (opts::HotData &&
261       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
262     return true;
263 
264   if (SymbolName == "_end")
265     return true;
266 
267   return false;
268 }
269 
270 std::unique_ptr<MCObjectWriter>
271 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
272   return MAB->createObjectWriter(OS);
273 }
274 
275 bool BinaryContext::validateObjectNesting() const {
276   auto Itr = BinaryDataMap.begin();
277   auto End = BinaryDataMap.end();
278   bool Valid = true;
279   while (Itr != End) {
280     auto Next = std::next(Itr);
281     while (Next != End &&
282            Itr->second->getSection() == Next->second->getSection() &&
283            Itr->second->containsRange(Next->second->getAddress(),
284                                       Next->second->getSize())) {
285       if (Next->second->Parent != Itr->second) {
286         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
287                << "BOLT-WARNING:  " << *Itr->second << "\n"
288                << "BOLT-WARNING:  " << *Next->second << "\n";
289         Valid = false;
290       }
291       ++Next;
292     }
293     Itr = Next;
294   }
295   return Valid;
296 }
297 
298 bool BinaryContext::validateHoles() const {
299   bool Valid = true;
300   for (BinarySection &Section : sections()) {
301     for (const Relocation &Rel : Section.relocations()) {
302       uint64_t RelAddr = Rel.Offset + Section.getAddress();
303       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
304       if (!BD) {
305         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
306                << " 0x" << Twine::utohexstr(RelAddr) << " in "
307                << Section.getName() << "\n";
308         Valid = false;
309       } else if (!BD->getAtomicRoot()) {
310         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
311                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
312                << Section.getName() << "\n";
313         Valid = false;
314       }
315     }
316   }
317   return Valid;
318 }
319 
320 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
321   const uint64_t Address = GAI->second->getAddress();
322   const uint64_t Size = GAI->second->getSize();
323 
324   auto fixParents = [&](BinaryDataMapType::iterator Itr,
325                         BinaryData *NewParent) {
326     BinaryData *OldParent = Itr->second->Parent;
327     Itr->second->Parent = NewParent;
328     ++Itr;
329     while (Itr != BinaryDataMap.end() && OldParent &&
330            Itr->second->Parent == OldParent) {
331       Itr->second->Parent = NewParent;
332       ++Itr;
333     }
334   };
335 
336   // Check if the previous symbol contains the newly added symbol.
337   if (GAI != BinaryDataMap.begin()) {
338     BinaryData *Prev = std::prev(GAI)->second;
339     while (Prev) {
340       if (Prev->getSection() == GAI->second->getSection() &&
341           Prev->containsRange(Address, Size)) {
342         fixParents(GAI, Prev);
343       } else {
344         fixParents(GAI, nullptr);
345       }
346       Prev = Prev->Parent;
347     }
348   }
349 
350   // Check if the newly added symbol contains any subsequent symbols.
351   if (Size != 0) {
352     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
353     auto Itr = std::next(GAI);
354     while (
355         Itr != BinaryDataMap.end() &&
356         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
357       Itr->second->Parent = BD;
358       ++Itr;
359     }
360   }
361 }
362 
363 iterator_range<BinaryContext::binary_data_iterator>
364 BinaryContext::getSubBinaryData(BinaryData *BD) {
365   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
366   auto End = Start;
367   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
368     ++End;
369   return make_range(Start, End);
370 }
371 
372 std::pair<const MCSymbol *, uint64_t>
373 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
374                                 bool IsPCRel) {
375   if (isAArch64()) {
376     // Check if this is an access to a constant island and create bookkeeping
377     // to keep track of it and emit it later as part of this function.
378     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
379       return std::make_pair(IslandSym, 0);
380 
381     // Detect custom code written in assembly that refers to arbitrary
382     // constant islands from other functions. Write this reference so we
383     // can pull this constant island and emit it as part of this function
384     // too.
385     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
386 
387     if (IslandIter != AddressToConstantIslandMap.begin() &&
388         (IslandIter == AddressToConstantIslandMap.end() ||
389          IslandIter->first > Address))
390       --IslandIter;
391 
392     if (IslandIter != AddressToConstantIslandMap.end()) {
393       if (MCSymbol *IslandSym =
394               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
395         BF.createIslandDependency(IslandSym, IslandIter->second);
396         return std::make_pair(IslandSym, 0);
397       }
398     }
399   }
400 
401   // Note that the address does not necessarily have to reside inside
402   // a section, it could be an absolute address too.
403   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
404   if (Section && Section->isText()) {
405     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
406       if (Address != BF.getAddress()) {
407         // The address could potentially escape. Mark it as another entry
408         // point into the function.
409         if (opts::Verbosity >= 1) {
410           outs() << "BOLT-INFO: potentially escaped address 0x"
411                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
412         }
413         BF.HasInternalLabelReference = true;
414         return std::make_pair(
415             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
416       }
417     } else {
418       addInterproceduralReference(&BF, Address);
419     }
420   }
421 
422   // With relocations, catch jump table references outside of the basic block
423   // containing the indirect jump.
424   if (HasRelocations) {
425     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
426     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
427       const MCSymbol *Symbol =
428           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
429 
430       return std::make_pair(Symbol, 0);
431     }
432   }
433 
434   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
435     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
436 
437   // TODO: use DWARF info to get size/alignment here?
438   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
439   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
440   return std::make_pair(TargetSymbol, 0);
441 }
442 
443 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
444                                                   BinaryFunction &BF) {
445   if (!isX86())
446     return MemoryContentsType::UNKNOWN;
447 
448   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
449   if (!Section) {
450     // No section - possibly an absolute address. Since we don't allow
451     // internal function addresses to escape the function scope - we
452     // consider it a tail call.
453     if (opts::Verbosity > 1) {
454       errs() << "BOLT-WARNING: no section for address 0x"
455              << Twine::utohexstr(Address) << " referenced from function " << BF
456              << '\n';
457     }
458     return MemoryContentsType::UNKNOWN;
459   }
460 
461   if (Section->isVirtual()) {
462     // The contents are filled at runtime.
463     return MemoryContentsType::UNKNOWN;
464   }
465 
466   // No support for jump tables in code yet.
467   if (Section->isText())
468     return MemoryContentsType::UNKNOWN;
469 
470   // Start with checking for PIC jump table. We expect non-PIC jump tables
471   // to have high 32 bits set to 0.
472   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
473     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
474 
475   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
476     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
477 
478   return MemoryContentsType::UNKNOWN;
479 }
480 
481 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
482 bool isPotentialFragmentByName(BinaryFunction &Fragment,
483                                BinaryFunction &Parent) {
484   for (StringRef Name : Parent.getNames()) {
485     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
486     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
487     if (Fragment.hasRestoredNameRegex(NameRegex))
488       return true;
489   }
490   return false;
491 }
492 
493 bool BinaryContext::analyzeJumpTable(
494     const uint64_t Address, const JumpTable::JumpTableType Type,
495     BinaryFunction &BF, const uint64_t NextJTAddress,
496     JumpTable::AddressesType *EntriesAsAddress) {
497   // Is one of the targets __builtin_unreachable?
498   bool HasUnreachable = false;
499 
500   // Number of targets other than __builtin_unreachable.
501   uint64_t NumRealEntries = 0;
502 
503   auto addEntryAddress = [&](uint64_t EntryAddress) {
504     if (EntriesAsAddress)
505       EntriesAsAddress->emplace_back(EntryAddress);
506   };
507 
508   auto doesBelongToFunction = [&](const uint64_t Addr,
509                                   BinaryFunction *TargetBF) -> bool {
510     if (BF.containsAddress(Addr))
511       return true;
512     // Nothing to do if we failed to identify the containing function.
513     if (!TargetBF)
514       return false;
515     // Case 1: check if BF is a fragment and TargetBF is its parent.
516     if (BF.isFragment()) {
517       // Parent function may or may not be already registered.
518       // Set parent link based on function name matching heuristic.
519       return registerFragment(BF, *TargetBF);
520     }
521     // Case 2: check if TargetBF is a fragment and BF is its parent.
522     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
523   };
524 
525   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
526   if (!Section)
527     return false;
528 
529   // The upper bound is defined by containing object, section limits, and
530   // the next jump table in memory.
531   uint64_t UpperBound = Section->getEndAddress();
532   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
533   if (JumpTableBD && JumpTableBD->getSize()) {
534     assert(JumpTableBD->getEndAddress() <= UpperBound &&
535            "data object cannot cross a section boundary");
536     UpperBound = JumpTableBD->getEndAddress();
537   }
538   if (NextJTAddress)
539     UpperBound = std::min(NextJTAddress, UpperBound);
540 
541   LLVM_DEBUG({
542     using JTT = JumpTable::JumpTableType;
543     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
544                       Address, BF.getPrintName(),
545                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
546   });
547   const uint64_t EntrySize = getJumpTableEntrySize(Type);
548   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
549        EntryAddress += EntrySize) {
550     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
551                       << " -> ");
552     // Check if there's a proper relocation against the jump table entry.
553     if (HasRelocations) {
554       if (Type == JumpTable::JTT_PIC &&
555           !DataPCRelocations.count(EntryAddress)) {
556         LLVM_DEBUG(
557             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
558         break;
559       }
560       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
561         LLVM_DEBUG(
562             dbgs()
563             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
564         break;
565       }
566     }
567 
568     const uint64_t Value =
569         (Type == JumpTable::JTT_PIC)
570             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
571             : *getPointerAtAddress(EntryAddress);
572 
573     // __builtin_unreachable() case.
574     if (Value == BF.getAddress() + BF.getSize()) {
575       addEntryAddress(Value);
576       HasUnreachable = true;
577       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
578       continue;
579     }
580 
581     // Function or one of its fragments.
582     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
583 
584     // We assume that a jump table cannot have function start as an entry.
585     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
586       LLVM_DEBUG({
587         if (!BF.containsAddress(Value)) {
588           dbgs() << "FAIL: function doesn't contain this address\n";
589           if (TargetBF) {
590             dbgs() << "  ! function containing this address: "
591                    << TargetBF->getPrintName() << '\n';
592             if (TargetBF->isFragment()) {
593               dbgs() << "  ! is a fragment";
594               for (BinaryFunction *Parent : TargetBF->ParentFragments)
595                 dbgs() << ", parent: " << Parent->getPrintName();
596               dbgs() << '\n';
597             }
598           }
599         }
600         if (Value == BF.getAddress())
601           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
602       });
603       break;
604     }
605 
606     // Check there's an instruction at this offset.
607     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
608         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
609       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
610       break;
611     }
612 
613     ++NumRealEntries;
614     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
615 
616     if (TargetBF != &BF)
617       BF.setHasIndirectTargetToSplitFragment(true);
618     addEntryAddress(Value);
619   }
620 
621   // It's a jump table if the number of real entries is more than 1, or there's
622   // one real entry and "unreachable" targets. If there are only multiple
623   // "unreachable" targets, then it's not a jump table.
624   return NumRealEntries + HasUnreachable >= 2;
625 }
626 
627 void BinaryContext::populateJumpTables() {
628   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
629                     << '\n');
630   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
631        ++JTI) {
632     JumpTable *JT = JTI->second;
633 
634     bool NonSimpleParent = false;
635     for (BinaryFunction *BF : JT->Parents)
636       NonSimpleParent |= !BF->isSimple();
637     if (NonSimpleParent)
638       continue;
639 
640     uint64_t NextJTAddress = 0;
641     auto NextJTI = std::next(JTI);
642     if (NextJTI != JTE)
643       NextJTAddress = NextJTI->second->getAddress();
644 
645     const bool Success =
646         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
647                          NextJTAddress, &JT->EntriesAsAddress);
648     if (!Success) {
649       LLVM_DEBUG({
650         dbgs() << "failed to analyze ";
651         JT->print(dbgs());
652         if (NextJTI != JTE) {
653           dbgs() << "next ";
654           NextJTI->second->print(dbgs());
655         }
656       });
657       llvm_unreachable("jump table heuristic failure");
658     }
659     for (BinaryFunction *Frag : JT->Parents) {
660       for (uint64_t EntryAddress : JT->EntriesAsAddress)
661         // if target is builtin_unreachable
662         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
663           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
664                                              Frag->getSize());
665         } else if (EntryAddress >= Frag->getAddress() &&
666                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
667           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
668         }
669     }
670 
671     // In strict mode, erase PC-relative relocation record. Later we check that
672     // all such records are erased and thus have been accounted for.
673     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
674       for (uint64_t Address = JT->getAddress();
675            Address < JT->getAddress() + JT->getSize();
676            Address += JT->EntrySize) {
677         DataPCRelocations.erase(DataPCRelocations.find(Address));
678       }
679     }
680 
681     // Mark to skip the function and all its fragments.
682     for (BinaryFunction *Frag : JT->Parents)
683       if (Frag->hasIndirectTargetToSplitFragment())
684         addFragmentsToSkip(Frag);
685   }
686 
687   if (opts::StrictMode && DataPCRelocations.size()) {
688     LLVM_DEBUG({
689       dbgs() << DataPCRelocations.size()
690              << " unclaimed PC-relative relocations left in data:\n";
691       for (uint64_t Reloc : DataPCRelocations)
692         dbgs() << Twine::utohexstr(Reloc) << '\n';
693     });
694     assert(0 && "unclaimed PC-relative relocations left in data\n");
695   }
696   clearList(DataPCRelocations);
697 }
698 
699 void BinaryContext::skipMarkedFragments() {
700   std::vector<BinaryFunction *> FragmentQueue;
701   // Copy the functions to FragmentQueue.
702   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
703   auto addToWorklist = [&](BinaryFunction *Function) -> void {
704     if (FragmentsToSkip.count(Function))
705       return;
706     FragmentQueue.push_back(Function);
707     addFragmentsToSkip(Function);
708   };
709   // Functions containing split jump tables need to be skipped with all
710   // fragments (transitively).
711   for (size_t I = 0; I != FragmentQueue.size(); I++) {
712     BinaryFunction *BF = FragmentQueue[I];
713     assert(FragmentsToSkip.count(BF) &&
714            "internal error in traversing function fragments");
715     if (opts::Verbosity >= 1)
716       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
717     BF->setSimple(false);
718     BF->setHasIndirectTargetToSplitFragment(true);
719 
720     llvm::for_each(BF->Fragments, addToWorklist);
721     llvm::for_each(BF->ParentFragments, addToWorklist);
722   }
723   if (!FragmentsToSkip.empty())
724     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
725            << (FragmentsToSkip.size() == 1 ? "" : "s")
726            << " due to cold fragments\n";
727 }
728 
729 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
730                                                  uint64_t Size,
731                                                  uint16_t Alignment,
732                                                  unsigned Flags) {
733   auto Itr = BinaryDataMap.find(Address);
734   if (Itr != BinaryDataMap.end()) {
735     assert(Itr->second->getSize() == Size || !Size);
736     return Itr->second->getSymbol();
737   }
738 
739   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
740   assert(!GlobalSymbols.count(Name) && "created name is not unique");
741   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
742 }
743 
744 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
745   return Ctx->getOrCreateSymbol(Name);
746 }
747 
748 BinaryFunction *BinaryContext::createBinaryFunction(
749     const std::string &Name, BinarySection &Section, uint64_t Address,
750     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
751   auto Result = BinaryFunctions.emplace(
752       Address, BinaryFunction(Name, Section, Address, Size, *this));
753   assert(Result.second == true && "unexpected duplicate function");
754   BinaryFunction *BF = &Result.first->second;
755   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
756                         Alignment);
757   setSymbolToFunctionMap(BF->getSymbol(), BF);
758   return BF;
759 }
760 
761 const MCSymbol *
762 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
763                                     JumpTable::JumpTableType Type) {
764   auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
765     return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
766   };
767   (void)isFragmentOf;
768 
769   // Two fragments of same function access same jump table
770   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
771     assert(JT->Type == Type && "jump table types have to match");
772     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
773 
774     // Prevent associating a jump table to a specific fragment twice.
775     // This simple check arises from the assumption: no more than 2 fragments.
776     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
777       assert((isFragmentOf(JT->Parents[0], &Function) ||
778               isFragmentOf(&Function, JT->Parents[0])) &&
779              "cannot re-use jump table of a different function");
780       // Duplicate the entry for the parent function for easy access
781       JT->Parents.push_back(&Function);
782       if (opts::Verbosity > 2) {
783         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
784                << JT->Parents[0]->getPrintName() << "; "
785                << Function.getPrintName() << "\n";
786         JT->print(outs());
787       }
788       Function.JumpTables.emplace(Address, JT);
789       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
790       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
791     }
792 
793     bool IsJumpTableParent = false;
794     (void)IsJumpTableParent;
795     for (BinaryFunction *Frag : JT->Parents)
796       if (Frag == &Function)
797         IsJumpTableParent = true;
798     assert(IsJumpTableParent &&
799            "cannot re-use jump table of a different function");
800     return JT->getFirstLabel();
801   }
802 
803   // Re-use the existing symbol if possible.
804   MCSymbol *JTLabel = nullptr;
805   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
806     if (!isInternalSymbolName(Object->getSymbol()->getName()))
807       JTLabel = Object->getSymbol();
808   }
809 
810   const uint64_t EntrySize = getJumpTableEntrySize(Type);
811   if (!JTLabel) {
812     const std::string JumpTableName = generateJumpTableName(Function, Address);
813     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
814   }
815 
816   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
817                     << " in function " << Function << '\n');
818 
819   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
820                                 JumpTable::LabelMapType{{0, JTLabel}},
821                                 *getSectionForAddress(Address));
822   JT->Parents.push_back(&Function);
823   if (opts::Verbosity > 2)
824     JT->print(outs());
825   JumpTables.emplace(Address, JT);
826 
827   // Duplicate the entry for the parent function for easy access.
828   Function.JumpTables.emplace(Address, JT);
829   return JTLabel;
830 }
831 
832 std::pair<uint64_t, const MCSymbol *>
833 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
834                                   const MCSymbol *OldLabel) {
835   auto L = scopeLock();
836   unsigned Offset = 0;
837   bool Found = false;
838   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
839     if (Elmt.second != OldLabel)
840       continue;
841     Offset = Elmt.first;
842     Found = true;
843     break;
844   }
845   assert(Found && "Label not found");
846   (void)Found;
847   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
848   JumpTable *NewJT =
849       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
850                     JumpTable::LabelMapType{{Offset, NewLabel}},
851                     *getSectionForAddress(JT->getAddress()));
852   NewJT->Parents = JT->Parents;
853   NewJT->Entries = JT->Entries;
854   NewJT->Counts = JT->Counts;
855   uint64_t JumpTableID = ++DuplicatedJumpTables;
856   // Invert it to differentiate from regular jump tables whose IDs are their
857   // addresses in the input binary memory space
858   JumpTableID = ~JumpTableID;
859   JumpTables.emplace(JumpTableID, NewJT);
860   Function.JumpTables.emplace(JumpTableID, NewJT);
861   return std::make_pair(JumpTableID, NewLabel);
862 }
863 
864 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
865                                                  uint64_t Address) {
866   size_t Id;
867   uint64_t Offset = 0;
868   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
869     Offset = Address - JT->getAddress();
870     auto Itr = JT->Labels.find(Offset);
871     if (Itr != JT->Labels.end())
872       return std::string(Itr->second->getName());
873     Id = JumpTableIds.at(JT->getAddress());
874   } else {
875     Id = JumpTableIds[Address] = BF.JumpTables.size();
876   }
877   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
878           (Offset ? ("." + std::to_string(Offset)) : ""));
879 }
880 
881 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
882   // FIXME: aarch64 support is missing.
883   if (!isX86())
884     return true;
885 
886   if (BF.getSize() == BF.getMaxSize())
887     return true;
888 
889   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
890   assert(FunctionData && "cannot get function as data");
891 
892   uint64_t Offset = BF.getSize();
893   MCInst Instr;
894   uint64_t InstrSize = 0;
895   uint64_t InstrAddress = BF.getAddress() + Offset;
896   using std::placeholders::_1;
897 
898   // Skip instructions that satisfy the predicate condition.
899   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
900     const uint64_t StartOffset = Offset;
901     for (; Offset < BF.getMaxSize();
902          Offset += InstrSize, InstrAddress += InstrSize) {
903       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
904                                   InstrAddress, nulls()))
905         break;
906       if (!Predicate(Instr))
907         break;
908     }
909 
910     return Offset - StartOffset;
911   };
912 
913   // Skip a sequence of zero bytes.
914   auto skipZeros = [&]() {
915     const uint64_t StartOffset = Offset;
916     for (; Offset < BF.getMaxSize(); ++Offset)
917       if ((*FunctionData)[Offset] != 0)
918         break;
919 
920     return Offset - StartOffset;
921   };
922 
923   // Accept the whole padding area filled with breakpoints.
924   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
925   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
926     return true;
927 
928   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
929 
930   // Some functions have a jump to the next function or to the padding area
931   // inserted after the body.
932   auto isSkipJump = [&](const MCInst &Instr) {
933     uint64_t TargetAddress = 0;
934     if (MIB->isUnconditionalBranch(Instr) &&
935         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
936       if (TargetAddress >= InstrAddress + InstrSize &&
937           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
938         return true;
939       }
940     }
941     return false;
942   };
943 
944   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
945   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
946          skipZeros())
947     ;
948 
949   if (Offset == BF.getMaxSize())
950     return true;
951 
952   if (opts::Verbosity >= 1) {
953     errs() << "BOLT-WARNING: bad padding at address 0x"
954            << Twine::utohexstr(BF.getAddress() + BF.getSize())
955            << " starting at offset " << (Offset - BF.getSize())
956            << " in function " << BF << '\n'
957            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
958            << '\n';
959   }
960 
961   return false;
962 }
963 
964 void BinaryContext::adjustCodePadding() {
965   for (auto &BFI : BinaryFunctions) {
966     BinaryFunction &BF = BFI.second;
967     if (!shouldEmit(BF))
968       continue;
969 
970     if (!hasValidCodePadding(BF)) {
971       if (HasRelocations) {
972         if (opts::Verbosity >= 1) {
973           outs() << "BOLT-INFO: function " << BF
974                  << " has invalid padding. Ignoring the function.\n";
975         }
976         BF.setIgnored();
977       } else {
978         BF.setMaxSize(BF.getSize());
979       }
980     }
981   }
982 }
983 
984 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
985                                                uint64_t Size,
986                                                uint16_t Alignment,
987                                                unsigned Flags) {
988   // Register the name with MCContext.
989   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
990 
991   auto GAI = BinaryDataMap.find(Address);
992   BinaryData *BD;
993   if (GAI == BinaryDataMap.end()) {
994     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
995     BinarySection &Section =
996         SectionOrErr ? SectionOrErr.get() : absoluteSection();
997     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
998                         Section, Flags);
999     GAI = BinaryDataMap.emplace(Address, BD).first;
1000     GlobalSymbols[Name] = BD;
1001     updateObjectNesting(GAI);
1002   } else {
1003     BD = GAI->second;
1004     if (!BD->hasName(Name)) {
1005       GlobalSymbols[Name] = BD;
1006       BD->Symbols.push_back(Symbol);
1007     }
1008   }
1009 
1010   return Symbol;
1011 }
1012 
1013 const BinaryData *
1014 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1015   auto NI = BinaryDataMap.lower_bound(Address);
1016   auto End = BinaryDataMap.end();
1017   if ((NI != End && Address == NI->first) ||
1018       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1019     if (NI->second->containsAddress(Address))
1020       return NI->second;
1021 
1022     // If this is a sub-symbol, see if a parent data contains the address.
1023     const BinaryData *BD = NI->second->getParent();
1024     while (BD) {
1025       if (BD->containsAddress(Address))
1026         return BD;
1027       BD = BD->getParent();
1028     }
1029   }
1030   return nullptr;
1031 }
1032 
1033 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1034   auto NI = BinaryDataMap.find(Address);
1035   assert(NI != BinaryDataMap.end());
1036   if (NI == BinaryDataMap.end())
1037     return false;
1038   // TODO: it's possible that a jump table starts at the same address
1039   // as a larger blob of private data.  When we set the size of the
1040   // jump table, it might be smaller than the total blob size.  In this
1041   // case we just leave the original size since (currently) it won't really
1042   // affect anything.
1043   assert((!NI->second->Size || NI->second->Size == Size ||
1044           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1045          "can't change the size of a symbol that has already had its "
1046          "size set");
1047   if (!NI->second->Size) {
1048     NI->second->Size = Size;
1049     updateObjectNesting(NI);
1050     return true;
1051   }
1052   return false;
1053 }
1054 
1055 void BinaryContext::generateSymbolHashes() {
1056   auto isPadding = [](const BinaryData &BD) {
1057     StringRef Contents = BD.getSection().getContents();
1058     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1059     return (BD.getName().startswith("HOLEat") ||
1060             SymData.find_first_not_of(0) == StringRef::npos);
1061   };
1062 
1063   uint64_t NumCollisions = 0;
1064   for (auto &Entry : BinaryDataMap) {
1065     BinaryData &BD = *Entry.second;
1066     StringRef Name = BD.getName();
1067 
1068     if (!isInternalSymbolName(Name))
1069       continue;
1070 
1071     // First check if a non-anonymous alias exists and move it to the front.
1072     if (BD.getSymbols().size() > 1) {
1073       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1074         return !isInternalSymbolName(Symbol->getName());
1075       });
1076       if (Itr != BD.getSymbols().end()) {
1077         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1078         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1079         continue;
1080       }
1081     }
1082 
1083     // We have to skip 0 size symbols since they will all collide.
1084     if (BD.getSize() == 0) {
1085       continue;
1086     }
1087 
1088     const uint64_t Hash = BD.getSection().hash(BD);
1089     const size_t Idx = Name.find("0x");
1090     std::string NewName =
1091         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1092     if (getBinaryDataByName(NewName)) {
1093       // Ignore collisions for symbols that appear to be padding
1094       // (i.e. all zeros or a "hole")
1095       if (!isPadding(BD)) {
1096         if (opts::Verbosity) {
1097           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1098                  << " with new name (" << NewName << "), skipping.\n";
1099         }
1100         ++NumCollisions;
1101       }
1102       continue;
1103     }
1104     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1105     GlobalSymbols[NewName] = &BD;
1106   }
1107   if (NumCollisions) {
1108     errs() << "BOLT-WARNING: " << NumCollisions
1109            << " collisions detected while hashing binary objects";
1110     if (!opts::Verbosity)
1111       errs() << ". Use -v=1 to see the list.";
1112     errs() << '\n';
1113   }
1114 }
1115 
1116 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1117                                      BinaryFunction &Function) const {
1118   if (!isPotentialFragmentByName(TargetFunction, Function))
1119     return false;
1120   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1121   if (TargetFunction.isParentFragment(&Function))
1122     return true;
1123   TargetFunction.addParentFragment(Function);
1124   Function.addFragment(TargetFunction);
1125   if (!HasRelocations) {
1126     TargetFunction.setSimple(false);
1127     Function.setSimple(false);
1128   }
1129   if (opts::Verbosity >= 1) {
1130     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1131            << Function << '\n';
1132   }
1133   return true;
1134 }
1135 
1136 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1137                                            MCInst &LoadLowBits,
1138                                            MCInst &LoadHiBits,
1139                                            uint64_t Target) {
1140   const MCSymbol *TargetSymbol;
1141   uint64_t Addend = 0;
1142   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1143                                                     /*IsPCRel*/ true);
1144   int64_t Val;
1145   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1146                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1147   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1148                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1149 }
1150 
1151 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1152   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1153   if (TargetFunction)
1154     return false;
1155 
1156   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1157   assert(Section && "cannot get section for referenced address");
1158   if (!Section->isText())
1159     return false;
1160 
1161   bool Ret = false;
1162   StringRef SectionContents = Section->getContents();
1163   uint64_t Offset = Address - Section->getAddress();
1164   const uint64_t MaxSize = SectionContents.size() - Offset;
1165   const uint8_t *Bytes =
1166       reinterpret_cast<const uint8_t *>(SectionContents.data());
1167   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1168 
1169   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1170                          MCInst &Instruction, uint64_t Offset,
1171                          uint64_t AbsoluteInstrAddr,
1172                          uint64_t TotalSize) -> bool {
1173     MCInst *TargetHiBits, *TargetLowBits;
1174     uint64_t TargetAddress, Count;
1175     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1176                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1177                                    TargetLowBits, TargetAddress);
1178     if (!Count)
1179       return false;
1180 
1181     if (MatchOnly)
1182       return true;
1183 
1184     // NOTE The target symbol was created during disassemble's
1185     // handleExternalReference
1186     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1187     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1188                                                   *Section, Address, TotalSize);
1189     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1190                            TargetAddress);
1191     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1192     Veneer->addInstruction(Offset, std::move(Instruction));
1193     --Count;
1194     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1195       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1196       Veneer->addInstruction(It->first, std::move(It->second));
1197     }
1198 
1199     Veneer->getOrCreateLocalLabel(Address);
1200     Veneer->setMaxSize(TotalSize);
1201     Veneer->updateState(BinaryFunction::State::Disassembled);
1202     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1203                       << "\n");
1204     return true;
1205   };
1206 
1207   uint64_t Size = 0, TotalSize = 0;
1208   BinaryFunction::InstrMapType VeneerInstructions;
1209   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1210     MCInst Instruction;
1211     const uint64_t AbsoluteInstrAddr = Address + Offset;
1212     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1213                                         AbsoluteInstrAddr, nulls()))
1214       break;
1215 
1216     TotalSize += Size;
1217     if (MIB->isBranch(Instruction)) {
1218       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1219                         AbsoluteInstrAddr, TotalSize);
1220       break;
1221     }
1222 
1223     VeneerInstructions.emplace(Offset, std::move(Instruction));
1224   }
1225 
1226   return Ret;
1227 }
1228 
1229 void BinaryContext::processInterproceduralReferences() {
1230   for (const std::pair<BinaryFunction *, uint64_t> &It :
1231        InterproceduralReferences) {
1232     BinaryFunction &Function = *It.first;
1233     uint64_t Address = It.second;
1234     if (!Address || Function.isIgnored())
1235       continue;
1236 
1237     BinaryFunction *TargetFunction =
1238         getBinaryFunctionContainingAddress(Address);
1239     if (&Function == TargetFunction)
1240       continue;
1241 
1242     if (TargetFunction) {
1243       if (TargetFunction->isFragment() &&
1244           !registerFragment(*TargetFunction, Function)) {
1245         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1246                   "fragments: "
1247                << Function.getPrintName() << " and "
1248                << TargetFunction->getPrintName() << '\n';
1249       }
1250       if (uint64_t Offset = Address - TargetFunction->getAddress())
1251         TargetFunction->addEntryPointAtOffset(Offset);
1252 
1253       continue;
1254     }
1255 
1256     // Check if address falls in function padding space - this could be
1257     // unmarked data in code. In this case adjust the padding space size.
1258     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1259     assert(Section && "cannot get section for referenced address");
1260 
1261     if (!Section->isText())
1262       continue;
1263 
1264     // PLT requires special handling and could be ignored in this context.
1265     StringRef SectionName = Section->getName();
1266     if (SectionName == ".plt" || SectionName == ".plt.got")
1267       continue;
1268 
1269     // Check if it is aarch64 veneer written at Address
1270     if (isAArch64() && handleAArch64Veneer(Address))
1271       continue;
1272 
1273     if (opts::processAllFunctions()) {
1274       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1275              << "object in code at address 0x" << Twine::utohexstr(Address)
1276              << " belonging to section " << SectionName << " in current mode\n";
1277       exit(1);
1278     }
1279 
1280     TargetFunction = getBinaryFunctionContainingAddress(Address,
1281                                                         /*CheckPastEnd=*/false,
1282                                                         /*UseMaxSize=*/true);
1283     // We are not going to overwrite non-simple functions, but for simple
1284     // ones - adjust the padding size.
1285     if (TargetFunction && TargetFunction->isSimple()) {
1286       errs() << "BOLT-WARNING: function " << *TargetFunction
1287              << " has an object detected in a padding region at address 0x"
1288              << Twine::utohexstr(Address) << '\n';
1289       TargetFunction->setMaxSize(TargetFunction->getSize());
1290     }
1291   }
1292 
1293   InterproceduralReferences.clear();
1294 }
1295 
1296 void BinaryContext::postProcessSymbolTable() {
1297   fixBinaryDataHoles();
1298   bool Valid = true;
1299   for (auto &Entry : BinaryDataMap) {
1300     BinaryData *BD = Entry.second;
1301     if ((BD->getName().startswith("SYMBOLat") ||
1302          BD->getName().startswith("DATAat")) &&
1303         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1304         BD->getSection()) {
1305       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1306       Valid = false;
1307     }
1308   }
1309   assert(Valid);
1310   (void)Valid;
1311   generateSymbolHashes();
1312 }
1313 
1314 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1315                                  BinaryFunction &ParentBF) {
1316   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1317          "cannot merge functions with multiple entry points");
1318 
1319   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1320   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1321       SymbolToFunctionMapMutex, std::defer_lock);
1322 
1323   const StringRef ChildName = ChildBF.getOneName();
1324 
1325   // Move symbols over and update bookkeeping info.
1326   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1327     ParentBF.getSymbols().push_back(Symbol);
1328     WriteSymbolMapLock.lock();
1329     SymbolToFunctionMap[Symbol] = &ParentBF;
1330     WriteSymbolMapLock.unlock();
1331     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1332   }
1333   ChildBF.getSymbols().clear();
1334 
1335   // Move other names the child function is known under.
1336   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1337   ChildBF.Aliases.clear();
1338 
1339   if (HasRelocations) {
1340     // Merge execution counts of ChildBF into those of ParentBF.
1341     // Without relocations, we cannot reliably merge profiles as both functions
1342     // continue to exist and either one can be executed.
1343     ChildBF.mergeProfileDataInto(ParentBF);
1344 
1345     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1346                                                      std::defer_lock);
1347     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1348                                                       std::defer_lock);
1349     // Remove ChildBF from the global set of functions in relocs mode.
1350     ReadBfsLock.lock();
1351     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1352     ReadBfsLock.unlock();
1353 
1354     assert(FI != BinaryFunctions.end() && "function not found");
1355     assert(&ChildBF == &FI->second && "function mismatch");
1356 
1357     WriteBfsLock.lock();
1358     ChildBF.clearDisasmState();
1359     FI = BinaryFunctions.erase(FI);
1360     WriteBfsLock.unlock();
1361 
1362   } else {
1363     // In non-relocation mode we keep the function, but rename it.
1364     std::string NewName = "__ICF_" + ChildName.str();
1365 
1366     WriteCtxLock.lock();
1367     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1368     WriteCtxLock.unlock();
1369 
1370     ChildBF.setFolded(&ParentBF);
1371   }
1372 }
1373 
1374 void BinaryContext::fixBinaryDataHoles() {
1375   assert(validateObjectNesting() && "object nesting inconsitency detected");
1376 
1377   for (BinarySection &Section : allocatableSections()) {
1378     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1379 
1380     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1381       BinaryData *BD = Itr->second;
1382       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1383                      (BD->getName().startswith("SYMBOLat0x") ||
1384                       BD->getName().startswith("DATAat0x") ||
1385                       BD->getName().startswith("ANONYMOUS")));
1386       return !isHole && BD->getSection() == Section && !BD->getParent();
1387     };
1388 
1389     auto BDStart = BinaryDataMap.begin();
1390     auto BDEnd = BinaryDataMap.end();
1391     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1392     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1393 
1394     uint64_t EndAddress = Section.getAddress();
1395 
1396     while (Itr != End) {
1397       if (Itr->second->getAddress() > EndAddress) {
1398         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1399         Holes.emplace_back(EndAddress, Gap);
1400       }
1401       EndAddress = Itr->second->getEndAddress();
1402       ++Itr;
1403     }
1404 
1405     if (EndAddress < Section.getEndAddress())
1406       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1407 
1408     // If there is already a symbol at the start of the hole, grow that symbol
1409     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1410     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1411       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1412       if (BD) {
1413         // BD->getSection() can be != Section if there are sections that
1414         // overlap.  In this case it is probably safe to just skip the holes
1415         // since the overlapping section will not(?) have any symbols in it.
1416         if (BD->getSection() == Section)
1417           setBinaryDataSize(Hole.first, Hole.second);
1418       } else {
1419         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1420       }
1421     }
1422   }
1423 
1424   assert(validateObjectNesting() && "object nesting inconsitency detected");
1425   assert(validateHoles() && "top level hole detected in object map");
1426 }
1427 
1428 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1429   const BinarySection *CurrentSection = nullptr;
1430   bool FirstSection = true;
1431 
1432   for (auto &Entry : BinaryDataMap) {
1433     const BinaryData *BD = Entry.second;
1434     const BinarySection &Section = BD->getSection();
1435     if (FirstSection || Section != *CurrentSection) {
1436       uint64_t Address, Size;
1437       StringRef Name = Section.getName();
1438       if (Section) {
1439         Address = Section.getAddress();
1440         Size = Section.getSize();
1441       } else {
1442         Address = BD->getAddress();
1443         Size = BD->getSize();
1444       }
1445       OS << "BOLT-INFO: Section " << Name << ", "
1446          << "0x" + Twine::utohexstr(Address) << ":"
1447          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1448       CurrentSection = &Section;
1449       FirstSection = false;
1450     }
1451 
1452     OS << "BOLT-INFO: ";
1453     const BinaryData *P = BD->getParent();
1454     while (P) {
1455       OS << "  ";
1456       P = P->getParent();
1457     }
1458     OS << *BD << "\n";
1459   }
1460 }
1461 
1462 Expected<unsigned> BinaryContext::getDwarfFile(
1463     StringRef Directory, StringRef FileName, unsigned FileNumber,
1464     Optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1465     unsigned CUID, unsigned DWARFVersion) {
1466   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1467   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1468                           FileNumber);
1469 }
1470 
1471 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1472                                                const uint32_t SrcCUID,
1473                                                unsigned FileIndex) {
1474   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1475   const DWARFDebugLine::LineTable *LineTable =
1476       DwCtx->getLineTableForUnit(SrcUnit);
1477   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1478       LineTable->Prologue.FileNames;
1479   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1480   // means empty dir.
1481   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1482          "FileIndex out of range for the compilation unit.");
1483   StringRef Dir = "";
1484   if (FileNames[FileIndex - 1].DirIdx != 0) {
1485     if (Optional<const char *> DirName = dwarf::toString(
1486             LineTable->Prologue
1487                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1488       Dir = *DirName;
1489     }
1490   }
1491   StringRef FileName = "";
1492   if (Optional<const char *> FName =
1493           dwarf::toString(FileNames[FileIndex - 1].Name))
1494     FileName = *FName;
1495   assert(FileName != "");
1496   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1497   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1498                                DestCUID, DstUnit->getVersion()));
1499 }
1500 
1501 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1502   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1503   llvm::transform(BinaryFunctions, SortedFunctions.begin(),
1504                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1505                     return &BFI.second;
1506                   });
1507 
1508   llvm::stable_sort(SortedFunctions,
1509                     [](const BinaryFunction *A, const BinaryFunction *B) {
1510                       if (A->hasValidIndex() && B->hasValidIndex()) {
1511                         return A->getIndex() < B->getIndex();
1512                       }
1513                       return A->hasValidIndex();
1514                     });
1515   return SortedFunctions;
1516 }
1517 
1518 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1519   std::vector<BinaryFunction *> AllFunctions;
1520   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1521   llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions),
1522                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1523                     return &BFI.second;
1524                   });
1525   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1526 
1527   return AllFunctions;
1528 }
1529 
1530 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1531   auto Iter = DWOCUs.find(DWOId);
1532   if (Iter == DWOCUs.end())
1533     return std::nullopt;
1534 
1535   return Iter->second;
1536 }
1537 
1538 DWARFContext *BinaryContext::getDWOContext() const {
1539   if (DWOCUs.empty())
1540     return nullptr;
1541   return &DWOCUs.begin()->second->getContext();
1542 }
1543 
1544 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1545 void BinaryContext::preprocessDWODebugInfo() {
1546   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1547     DWARFUnit *const DwarfUnit = CU.get();
1548     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1549       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1550       if (!DWOCU->isDWOUnit()) {
1551         std::string DWOName = dwarf::toString(
1552             DwarfUnit->getUnitDIE().find(
1553                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1554             "");
1555         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1556                << DWOName
1557                << " was not retrieved and won't be updated. Please check "
1558                   "relative path.\n";
1559         continue;
1560       }
1561       DWOCUs[*DWOId] = DWOCU;
1562     }
1563   }
1564 }
1565 
1566 void BinaryContext::preprocessDebugInfo() {
1567   struct CURange {
1568     uint64_t LowPC;
1569     uint64_t HighPC;
1570     DWARFUnit *Unit;
1571 
1572     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1573   };
1574 
1575   // Building a map of address ranges to CUs similar to .debug_aranges and use
1576   // it to assign CU to functions.
1577   std::vector<CURange> AllRanges;
1578   AllRanges.reserve(DwCtx->getNumCompileUnits());
1579   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1580     Expected<DWARFAddressRangesVector> RangesOrError =
1581         CU->getUnitDIE().getAddressRanges();
1582     if (!RangesOrError) {
1583       consumeError(RangesOrError.takeError());
1584       continue;
1585     }
1586     for (DWARFAddressRange &Range : *RangesOrError) {
1587       // Parts of the debug info could be invalidated due to corresponding code
1588       // being removed from the binary by the linker. Hence we check if the
1589       // address is a valid one.
1590       if (containsAddress(Range.LowPC))
1591         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1592     }
1593 
1594     ContainsDwarf5 |= CU->getVersion() >= 5;
1595     ContainsDwarfLegacy |= CU->getVersion() < 5;
1596   }
1597 
1598   llvm::sort(AllRanges);
1599   for (auto &KV : BinaryFunctions) {
1600     const uint64_t FunctionAddress = KV.first;
1601     BinaryFunction &Function = KV.second;
1602 
1603     auto It = llvm::partition_point(
1604         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1605     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1606       Function.setDWARFUnit(It->Unit);
1607   }
1608 
1609   // Discover units with debug info that needs to be updated.
1610   for (const auto &KV : BinaryFunctions) {
1611     const BinaryFunction &BF = KV.second;
1612     if (shouldEmit(BF) && BF.getDWARFUnit())
1613       ProcessedCUs.insert(BF.getDWARFUnit());
1614   }
1615 
1616   // Clear debug info for functions from units that we are not going to process.
1617   for (auto &KV : BinaryFunctions) {
1618     BinaryFunction &BF = KV.second;
1619     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1620       BF.setDWARFUnit(nullptr);
1621   }
1622 
1623   if (opts::Verbosity >= 1) {
1624     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1625            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1626   }
1627 
1628   preprocessDWODebugInfo();
1629 
1630   // Populate MCContext with DWARF files from all units.
1631   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1632   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1633     const uint64_t CUID = CU->getOffset();
1634     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1635     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1636         GlobalPrefix + "line_table_start" + Twine(CUID)));
1637 
1638     if (!ProcessedCUs.count(CU.get()))
1639       continue;
1640 
1641     const DWARFDebugLine::LineTable *LineTable =
1642         DwCtx->getLineTableForUnit(CU.get());
1643     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1644         LineTable->Prologue.FileNames;
1645 
1646     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1647     if (DwarfVersion >= 5) {
1648       Optional<MD5::MD5Result> Checksum;
1649       if (LineTable->Prologue.ContentTypes.HasMD5)
1650         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1651       Optional<const char *> Name =
1652           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1653       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1654         auto Iter = DWOCUs.find(*DWOID);
1655         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1656         Name = dwarf::toString(
1657             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1658       }
1659       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1660                                   std::nullopt);
1661     }
1662 
1663     BinaryLineTable.setDwarfVersion(DwarfVersion);
1664 
1665     // Assign a unique label to every line table, one per CU.
1666     // Make sure empty debug line tables are registered too.
1667     if (FileNames.empty()) {
1668       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1669                             CUID, DwarfVersion));
1670       continue;
1671     }
1672     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1673     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1674       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1675       // means empty dir.
1676       StringRef Dir = "";
1677       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1678         if (Optional<const char *> DirName = dwarf::toString(
1679                 LineTable->Prologue
1680                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1681           Dir = *DirName;
1682       StringRef FileName = "";
1683       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1684         FileName = *FName;
1685       assert(FileName != "");
1686       Optional<MD5::MD5Result> Checksum;
1687       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1688         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1689       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1690                             DwarfVersion));
1691     }
1692   }
1693 }
1694 
1695 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1696   if (Function.isPseudo())
1697     return false;
1698 
1699   if (opts::processAllFunctions())
1700     return true;
1701 
1702   if (Function.isIgnored())
1703     return false;
1704 
1705   // In relocation mode we will emit non-simple functions with CFG.
1706   // If the function does not have a CFG it should be marked as ignored.
1707   return HasRelocations || Function.isSimple();
1708 }
1709 
1710 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1711   uint32_t Operation = Inst.getOperation();
1712   switch (Operation) {
1713   case MCCFIInstruction::OpSameValue:
1714     OS << "OpSameValue Reg" << Inst.getRegister();
1715     break;
1716   case MCCFIInstruction::OpRememberState:
1717     OS << "OpRememberState";
1718     break;
1719   case MCCFIInstruction::OpRestoreState:
1720     OS << "OpRestoreState";
1721     break;
1722   case MCCFIInstruction::OpOffset:
1723     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1724     break;
1725   case MCCFIInstruction::OpDefCfaRegister:
1726     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1727     break;
1728   case MCCFIInstruction::OpDefCfaOffset:
1729     OS << "OpDefCfaOffset " << Inst.getOffset();
1730     break;
1731   case MCCFIInstruction::OpDefCfa:
1732     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1733     break;
1734   case MCCFIInstruction::OpRelOffset:
1735     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1736     break;
1737   case MCCFIInstruction::OpAdjustCfaOffset:
1738     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1739     break;
1740   case MCCFIInstruction::OpEscape:
1741     OS << "OpEscape";
1742     break;
1743   case MCCFIInstruction::OpRestore:
1744     OS << "OpRestore Reg" << Inst.getRegister();
1745     break;
1746   case MCCFIInstruction::OpUndefined:
1747     OS << "OpUndefined Reg" << Inst.getRegister();
1748     break;
1749   case MCCFIInstruction::OpRegister:
1750     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1751        << Inst.getRegister2();
1752     break;
1753   case MCCFIInstruction::OpWindowSave:
1754     OS << "OpWindowSave";
1755     break;
1756   case MCCFIInstruction::OpGnuArgsSize:
1757     OS << "OpGnuArgsSize";
1758     break;
1759   default:
1760     OS << "Op#" << Operation;
1761     break;
1762   }
1763 }
1764 
1765 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1766   // For aarch64, the ABI defines mapping symbols so we identify data in the
1767   // code section (see IHI0056B). $x identifies a symbol starting code or the
1768   // end of a data chunk inside code, $d indentifies start of data.
1769   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1770     return MarkerSymType::NONE;
1771 
1772   Expected<StringRef> NameOrError = Symbol.getName();
1773   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1774 
1775   if (!TypeOrError || !NameOrError)
1776     return MarkerSymType::NONE;
1777 
1778   if (*TypeOrError != SymbolRef::ST_Unknown)
1779     return MarkerSymType::NONE;
1780 
1781   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1782     return MarkerSymType::CODE;
1783 
1784   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1785     return MarkerSymType::DATA;
1786 
1787   return MarkerSymType::NONE;
1788 }
1789 
1790 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1791   return getMarkerType(Symbol) != MarkerSymType::NONE;
1792 }
1793 
1794 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1795                            const BinaryFunction *Function,
1796                            DWARFContext *DwCtx) {
1797   DebugLineTableRowRef RowRef =
1798       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1799   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1800     return;
1801 
1802   const DWARFDebugLine::LineTable *LineTable;
1803   if (Function && Function->getDWARFUnit() &&
1804       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1805     LineTable = Function->getDWARFLineTable();
1806   } else {
1807     LineTable = DwCtx->getLineTableForUnit(
1808         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1809   }
1810   assert(LineTable && "line table expected for instruction with debug info");
1811 
1812   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1813   StringRef FileName = "";
1814   if (Optional<const char *> FName =
1815           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1816     FileName = *FName;
1817   OS << " # debug line " << FileName << ":" << Row.Line;
1818   if (Row.Column)
1819     OS << ":" << Row.Column;
1820   if (Row.Discriminator)
1821     OS << " discriminator:" << Row.Discriminator;
1822 }
1823 
1824 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1825                                      uint64_t Offset,
1826                                      const BinaryFunction *Function,
1827                                      bool PrintMCInst, bool PrintMemData,
1828                                      bool PrintRelocations,
1829                                      StringRef Endl) const {
1830   if (MIB->isEHLabel(Instruction)) {
1831     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1832     return;
1833   }
1834   OS << format("    %08" PRIx64 ": ", Offset);
1835   if (MIB->isCFI(Instruction)) {
1836     uint32_t Offset = Instruction.getOperand(0).getImm();
1837     OS << "\t!CFI\t$" << Offset << "\t; ";
1838     if (Function)
1839       printCFI(OS, *Function->getCFIFor(Instruction));
1840     OS << Endl;
1841     return;
1842   }
1843   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1844   if (MIB->isCall(Instruction)) {
1845     if (MIB->isTailCall(Instruction))
1846       OS << " # TAILCALL ";
1847     if (MIB->isInvoke(Instruction)) {
1848       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1849       OS << " # handler: ";
1850       if (EHInfo->first)
1851         OS << *EHInfo->first;
1852       else
1853         OS << '0';
1854       OS << "; action: " << EHInfo->second;
1855       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1856       if (GnuArgsSize >= 0)
1857         OS << "; GNU_args_size = " << GnuArgsSize;
1858     }
1859   } else if (MIB->isIndirectBranch(Instruction)) {
1860     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1861       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1862     } else {
1863       OS << " # UNKNOWN CONTROL FLOW";
1864     }
1865   }
1866   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1867     OS << " # Offset: " << *Offset;
1868 
1869   MIB->printAnnotations(Instruction, OS);
1870 
1871   if (opts::PrintDebugInfo)
1872     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1873 
1874   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1875     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1876     Function->printRelocations(OS, Offset, Size);
1877   }
1878 
1879   OS << Endl;
1880 
1881   if (PrintMCInst) {
1882     Instruction.dump_pretty(OS, InstPrinter.get());
1883     OS << Endl;
1884   }
1885 }
1886 
1887 Optional<uint64_t>
1888 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1889                                         uint64_t FileOffset) const {
1890   // Find a segment with a matching file offset.
1891   for (auto &KV : SegmentMapInfo) {
1892     const SegmentInfo &SegInfo = KV.second;
1893     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1894       // Use segment's aligned memory offset to calculate the base address.
1895       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1896       return MMapAddress - MemOffset;
1897     }
1898   }
1899 
1900   return std::nullopt;
1901 }
1902 
1903 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1904   auto SI = AddressToSection.upper_bound(Address);
1905   if (SI != AddressToSection.begin()) {
1906     --SI;
1907     uint64_t UpperBound = SI->first + SI->second->getSize();
1908     if (!SI->second->getSize())
1909       UpperBound += 1;
1910     if (UpperBound > Address)
1911       return *SI->second;
1912   }
1913   return std::make_error_code(std::errc::bad_address);
1914 }
1915 
1916 ErrorOr<StringRef>
1917 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1918   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1919     return Section->getName();
1920   return std::make_error_code(std::errc::bad_address);
1921 }
1922 
1923 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1924   auto Res = Sections.insert(Section);
1925   (void)Res;
1926   assert(Res.second && "can't register the same section twice.");
1927 
1928   // Only register allocatable sections in the AddressToSection map.
1929   if (Section->isAllocatable() && Section->getAddress())
1930     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1931   NameToSection.insert(
1932       std::make_pair(std::string(Section->getName()), Section));
1933   if (Section->hasSectionRef())
1934     SectionRefToBinarySection.insert(
1935         std::make_pair(Section->getSectionRef(), Section));
1936 
1937   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1938   return *Section;
1939 }
1940 
1941 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1942   return registerSection(new BinarySection(*this, Section));
1943 }
1944 
1945 BinarySection &
1946 BinaryContext::registerSection(const Twine &SectionName,
1947                                const BinarySection &OriginalSection) {
1948   return registerSection(
1949       new BinarySection(*this, SectionName, OriginalSection));
1950 }
1951 
1952 BinarySection &
1953 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1954                                        unsigned ELFFlags, uint8_t *Data,
1955                                        uint64_t Size, unsigned Alignment) {
1956   auto NamedSections = getSectionByName(Name);
1957   if (NamedSections.begin() != NamedSections.end()) {
1958     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1959            "can only update unique sections");
1960     BinarySection *Section = NamedSections.begin()->second;
1961 
1962     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1963     const bool Flag = Section->isAllocatable();
1964     (void)Flag;
1965     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1966     LLVM_DEBUG(dbgs() << *Section << "\n");
1967     // FIXME: Fix section flags/attributes for MachO.
1968     if (isELF())
1969       assert(Flag == Section->isAllocatable() &&
1970              "can't change section allocation status");
1971     return *Section;
1972   }
1973 
1974   return registerSection(
1975       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1976 }
1977 
1978 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
1979   auto NameRange = NameToSection.equal_range(Section.getName().str());
1980   while (NameRange.first != NameRange.second) {
1981     if (NameRange.first->second == &Section) {
1982       NameToSection.erase(NameRange.first);
1983       break;
1984     }
1985     ++NameRange.first;
1986   }
1987 }
1988 
1989 void BinaryContext::deregisterUnusedSections() {
1990   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
1991   for (auto SI = Sections.begin(); SI != Sections.end();) {
1992     BinarySection *Section = *SI;
1993     if (Section->hasSectionRef() || Section->getOutputSize() ||
1994         (AbsSection && Section == &AbsSection.get())) {
1995       ++SI;
1996       continue;
1997     }
1998 
1999     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2000                       << '\n';);
2001     deregisterSectionName(*Section);
2002     SI = Sections.erase(SI);
2003     delete Section;
2004   }
2005 }
2006 
2007 bool BinaryContext::deregisterSection(BinarySection &Section) {
2008   BinarySection *SectionPtr = &Section;
2009   auto Itr = Sections.find(SectionPtr);
2010   if (Itr != Sections.end()) {
2011     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2012     while (Range.first != Range.second) {
2013       if (Range.first->second == SectionPtr) {
2014         AddressToSection.erase(Range.first);
2015         break;
2016       }
2017       ++Range.first;
2018     }
2019 
2020     deregisterSectionName(*SectionPtr);
2021     Sections.erase(Itr);
2022     delete SectionPtr;
2023     return true;
2024   }
2025   return false;
2026 }
2027 
2028 void BinaryContext::renameSection(BinarySection &Section,
2029                                   const Twine &NewName) {
2030   auto Itr = Sections.find(&Section);
2031   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2032   Sections.erase(Itr);
2033 
2034   deregisterSectionName(Section);
2035 
2036   Section.Name = NewName.str();
2037   Section.setOutputName(Section.Name);
2038 
2039   NameToSection.insert(std::make_pair(Section.Name, &Section));
2040 
2041   // Reinsert with the new name.
2042   Sections.insert(&Section);
2043 }
2044 
2045 void BinaryContext::printSections(raw_ostream &OS) const {
2046   for (BinarySection *const &Section : Sections)
2047     OS << "BOLT-INFO: " << *Section << "\n";
2048 }
2049 
2050 BinarySection &BinaryContext::absoluteSection() {
2051   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2052     return *Section;
2053   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2054 }
2055 
2056 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2057                                                            size_t Size) const {
2058   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2059   if (!Section)
2060     return std::make_error_code(std::errc::bad_address);
2061 
2062   if (Section->isVirtual())
2063     return 0;
2064 
2065   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2066                    AsmInfo->getCodePointerSize());
2067   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2068   return DE.getUnsigned(&ValueOffset, Size);
2069 }
2070 
2071 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2072                                                          size_t Size) const {
2073   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2074   if (!Section)
2075     return std::make_error_code(std::errc::bad_address);
2076 
2077   if (Section->isVirtual())
2078     return 0;
2079 
2080   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2081                    AsmInfo->getCodePointerSize());
2082   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2083   return DE.getSigned(&ValueOffset, Size);
2084 }
2085 
2086 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2087                                   uint64_t Type, uint64_t Addend,
2088                                   uint64_t Value) {
2089   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2090   assert(Section && "cannot find section for address");
2091   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2092                          Value);
2093 }
2094 
2095 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2096                                          uint64_t Type, uint64_t Addend,
2097                                          uint64_t Value) {
2098   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2099   assert(Section && "cannot find section for address");
2100   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2101                                 Addend, Value);
2102 }
2103 
2104 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2105   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2106   assert(Section && "cannot find section for address");
2107   return Section->removeRelocationAt(Address - Section->getAddress());
2108 }
2109 
2110 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
2111   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2112   if (!Section)
2113     return nullptr;
2114 
2115   return Section->getRelocationAt(Address - Section->getAddress());
2116 }
2117 
2118 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
2119   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2120   if (!Section)
2121     return nullptr;
2122 
2123   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2124 }
2125 
2126 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2127                                              const uint64_t Address) {
2128   auto setImmovable = [&](BinaryData &BD) {
2129     BinaryData *Root = BD.getAtomicRoot();
2130     LLVM_DEBUG(if (Root->isMoveable()) {
2131       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2132              << "due to ambiguous relocation referencing 0x"
2133              << Twine::utohexstr(Address) << '\n';
2134     });
2135     Root->setIsMoveable(false);
2136   };
2137 
2138   if (Address == BD.getAddress()) {
2139     setImmovable(BD);
2140 
2141     // Set previous symbol as immovable
2142     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2143     if (Prev && Prev->getEndAddress() == BD.getAddress())
2144       setImmovable(*Prev);
2145   }
2146 
2147   if (Address == BD.getEndAddress()) {
2148     setImmovable(BD);
2149 
2150     // Set next symbol as immovable
2151     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2152     if (Next && Next->getAddress() == BD.getEndAddress())
2153       setImmovable(*Next);
2154   }
2155 }
2156 
2157 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2158                                                     uint64_t *EntryDesc) {
2159   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2160   auto BFI = SymbolToFunctionMap.find(Symbol);
2161   if (BFI == SymbolToFunctionMap.end())
2162     return nullptr;
2163 
2164   BinaryFunction *BF = BFI->second;
2165   if (EntryDesc)
2166     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2167 
2168   return BF;
2169 }
2170 
2171 void BinaryContext::exitWithBugReport(StringRef Message,
2172                                       const BinaryFunction &Function) const {
2173   errs() << "=======================================\n";
2174   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2175             "this function.\n";
2176   errs() << "If you are running the most recent version of BOLT, you may "
2177             "want to "
2178             "report this and paste this dump.\nPlease check that there is no "
2179             "sensitive contents being shared in this dump.\n";
2180   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2181   ScopedPrinter SP(errs());
2182   SP.printBinaryBlock("Function contents", *Function.getData());
2183   errs() << "\n";
2184   Function.dump();
2185   errs() << "ERROR: " << Message;
2186   errs() << "\n=======================================\n";
2187   exit(1);
2188 }
2189 
2190 BinaryFunction *
2191 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2192                                             bool IsSimple) {
2193   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2194   BinaryFunction *BF = InjectedBinaryFunctions.back();
2195   setSymbolToFunctionMap(BF->getSymbol(), BF);
2196   BF->CurrentState = BinaryFunction::State::CFG;
2197   return BF;
2198 }
2199 
2200 std::pair<size_t, size_t>
2201 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2202   // Adjust branch instruction to match the current layout.
2203   if (FixBranches)
2204     BF.fixBranches();
2205 
2206   // Create local MC context to isolate the effect of ephemeral code emission.
2207   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2208   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2209   MCAsmBackend *MAB =
2210       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2211 
2212   SmallString<256> Code;
2213   raw_svector_ostream VecOS(Code);
2214 
2215   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2216   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2217       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2218       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2219       /*RelaxAll=*/false,
2220       /*IncrementalLinkerCompatible=*/false,
2221       /*DWARFMustBeAtTheEnd=*/false));
2222 
2223   Streamer->initSections(false, *STI);
2224 
2225   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2226   Section->setHasInstructions(true);
2227 
2228   // Create symbols in the LocalCtx so that they get destroyed with it.
2229   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2230   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2231 
2232   Streamer->switchSection(Section);
2233   Streamer->emitLabel(StartLabel);
2234   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2235                    /*EmitCodeOnly=*/true);
2236   Streamer->emitLabel(EndLabel);
2237 
2238   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2239   SmallVector<LabelRange> SplitLabels;
2240   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2241     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2242     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2243     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2244 
2245     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2246         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2247         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2248     SplitSection->setHasInstructions(true);
2249     Streamer->switchSection(SplitSection);
2250 
2251     Streamer->emitLabel(SplitStartLabel);
2252     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2253     Streamer->emitLabel(SplitEndLabel);
2254     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2255     // private
2256     Streamer->emitBytes(StringRef(""));
2257     Streamer->switchSection(Section);
2258   }
2259 
2260   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2261   // MCStreamer::Finish(), which does more than we want
2262   Streamer->emitBytes(StringRef(""));
2263 
2264   MCAssembler &Assembler =
2265       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2266   MCAsmLayout Layout(Assembler);
2267   Assembler.layout(Layout);
2268 
2269   const uint64_t HotSize =
2270       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2271   const uint64_t ColdSize =
2272       std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
2273                       [&](const uint64_t Accu, const LabelRange &Labels) {
2274                         return Accu + Layout.getSymbolOffset(*Labels.second) -
2275                                Layout.getSymbolOffset(*Labels.first);
2276                       });
2277 
2278   // Clean-up the effect of the code emission.
2279   for (const MCSymbol &Symbol : Assembler.symbols()) {
2280     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2281     MutableSymbol->setUndefined();
2282     MutableSymbol->setIsRegistered(false);
2283   }
2284 
2285   return std::make_pair(HotSize, ColdSize);
2286 }
2287 
2288 bool BinaryContext::validateInstructionEncoding(
2289     ArrayRef<uint8_t> InputSequence) const {
2290   MCInst Inst;
2291   uint64_t InstSize;
2292   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2293   assert(InstSize == InputSequence.size() &&
2294          "Disassembled instruction size does not match the sequence.");
2295 
2296   SmallString<256> Code;
2297   SmallVector<MCFixup, 4> Fixups;
2298   raw_svector_ostream VecOS(Code);
2299 
2300   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2301   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2302   if (InputSequence != OutputSequence) {
2303     if (opts::Verbosity > 1) {
2304       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2305              << "      input: " << InputSequence << '\n'
2306              << "     output: " << OutputSequence << '\n';
2307     }
2308     return false;
2309   }
2310 
2311   return true;
2312 }
2313 
2314 uint64_t BinaryContext::getHotThreshold() const {
2315   static uint64_t Threshold = 0;
2316   if (Threshold == 0) {
2317     Threshold = std::max(
2318         (uint64_t)opts::ExecutionCountThreshold,
2319         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2320   }
2321   return Threshold;
2322 }
2323 
2324 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2325     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2326   auto FI = BinaryFunctions.upper_bound(Address);
2327   if (FI == BinaryFunctions.begin())
2328     return nullptr;
2329   --FI;
2330 
2331   const uint64_t UsedSize =
2332       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2333 
2334   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2335     return nullptr;
2336 
2337   return &FI->second;
2338 }
2339 
2340 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2341   // First, try to find a function starting at the given address. If the
2342   // function was folded, this will get us the original folded function if it
2343   // wasn't removed from the list, e.g. in non-relocation mode.
2344   auto BFI = BinaryFunctions.find(Address);
2345   if (BFI != BinaryFunctions.end())
2346     return &BFI->second;
2347 
2348   // We might have folded the function matching the object at the given
2349   // address. In such case, we look for a function matching the symbol
2350   // registered at the original address. The new function (the one that the
2351   // original was folded into) will hold the symbol.
2352   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2353     uint64_t EntryID = 0;
2354     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2355     if (BF && EntryID == 0)
2356       return BF;
2357   }
2358   return nullptr;
2359 }
2360 
2361 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2362     const DWARFAddressRangesVector &InputRanges) const {
2363   DebugAddressRangesVector OutputRanges;
2364 
2365   for (const DWARFAddressRange Range : InputRanges) {
2366     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2367     while (BFI != BinaryFunctions.end()) {
2368       const BinaryFunction &Function = BFI->second;
2369       if (Function.getAddress() >= Range.HighPC)
2370         break;
2371       const DebugAddressRangesVector FunctionRanges =
2372           Function.getOutputAddressRanges();
2373       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2374       std::advance(BFI, 1);
2375     }
2376   }
2377 
2378   return OutputRanges;
2379 }
2380 
2381 } // namespace bolt
2382 } // namespace llvm
2383