xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 05523dc32d8ca81d9a92ff955194a9e80cf79dc0)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool> NoHugePages("no-huge-pages",
51                           cl::desc("use regular size pages for code alignment"),
52                           cl::Hidden, cl::cat(BoltCategory));
53 
54 static cl::opt<bool>
55 PrintDebugInfo("print-debug-info",
56   cl::desc("print debug info when printing functions"),
57   cl::Hidden,
58   cl::ZeroOrMore,
59   cl::cat(BoltCategory));
60 
61 cl::opt<bool> PrintRelocations(
62     "print-relocations",
63     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64     cl::cat(BoltCategory));
65 
66 static cl::opt<bool>
67 PrintMemData("print-mem-data",
68   cl::desc("print memory data annotations when printing functions"),
69   cl::Hidden,
70   cl::ZeroOrMore,
71   cl::cat(BoltCategory));
72 
73 } // namespace opts
74 
75 namespace llvm {
76 namespace bolt {
77 
78 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
79                              std::unique_ptr<DWARFContext> DwCtx,
80                              std::unique_ptr<Triple> TheTriple,
81                              const Target *TheTarget, std::string TripleName,
82                              std::unique_ptr<MCCodeEmitter> MCE,
83                              std::unique_ptr<MCObjectFileInfo> MOFI,
84                              std::unique_ptr<const MCAsmInfo> AsmInfo,
85                              std::unique_ptr<const MCInstrInfo> MII,
86                              std::unique_ptr<const MCSubtargetInfo> STI,
87                              std::unique_ptr<MCInstPrinter> InstPrinter,
88                              std::unique_ptr<const MCInstrAnalysis> MIA,
89                              std::unique_ptr<MCPlusBuilder> MIB,
90                              std::unique_ptr<const MCRegisterInfo> MRI,
91                              std::unique_ptr<MCDisassembler> DisAsm)
92     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
93       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
94       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
95       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
96       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
97       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
98   Relocation::Arch = this->TheTriple->getArch();
99   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
100   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
101 }
102 
103 BinaryContext::~BinaryContext() {
104   for (BinarySection *Section : Sections)
105     delete Section;
106   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
107     delete InjectedFunction;
108   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
109     delete JTI.second;
110   clearBinaryData();
111 }
112 
113 /// Create BinaryContext for a given architecture \p ArchName and
114 /// triple \p TripleName.
115 Expected<std::unique_ptr<BinaryContext>>
116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
117                                    std::unique_ptr<DWARFContext> DwCtx) {
118   StringRef ArchName = "";
119   StringRef FeaturesStr = "";
120   switch (File->getArch()) {
121   case llvm::Triple::x86_64:
122     ArchName = "x86-64";
123     FeaturesStr = "+nopl";
124     break;
125   case llvm::Triple::aarch64:
126     ArchName = "aarch64";
127     FeaturesStr = "+all";
128     break;
129   default:
130     return createStringError(std::errc::not_supported,
131                              "BOLT-ERROR: Unrecognized machine in ELF file");
132   }
133 
134   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
135   const std::string TripleName = TheTriple->str();
136 
137   std::string Error;
138   const Target *TheTarget =
139       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
140   if (!TheTarget)
141     return createStringError(make_error_code(std::errc::not_supported),
142                              Twine("BOLT-ERROR: ", Error));
143 
144   std::unique_ptr<const MCRegisterInfo> MRI(
145       TheTarget->createMCRegInfo(TripleName));
146   if (!MRI)
147     return createStringError(
148         make_error_code(std::errc::not_supported),
149         Twine("BOLT-ERROR: no register info for target ", TripleName));
150 
151   // Set up disassembler.
152   std::unique_ptr<MCAsmInfo> AsmInfo(
153       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
154   if (!AsmInfo)
155     return createStringError(
156         make_error_code(std::errc::not_supported),
157         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
158   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
159   // we want to emit such names as using @PLT without double quotes to convey
160   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
161   // override the default AsmInfo behavior to emit names the way we want.
162   AsmInfo->setAllowAtInName(true);
163 
164   std::unique_ptr<const MCSubtargetInfo> STI(
165       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
166   if (!STI)
167     return createStringError(
168         make_error_code(std::errc::not_supported),
169         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
170 
171   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
172   if (!MII)
173     return createStringError(
174         make_error_code(std::errc::not_supported),
175         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
176 
177   std::unique_ptr<MCContext> Ctx(
178       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
179   std::unique_ptr<MCObjectFileInfo> MOFI(
180       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
181   Ctx->setObjectFileInfo(MOFI.get());
182   // We do not support X86 Large code model. Change this in the future.
183   bool Large = false;
184   if (TheTriple->getArch() == llvm::Triple::aarch64)
185     Large = true;
186   unsigned LSDAEncoding =
187       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
188   unsigned TTypeEncoding =
189       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
190   if (IsPIC) {
191     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
192                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
194                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
195   }
196 
197   std::unique_ptr<MCDisassembler> DisAsm(
198       TheTarget->createMCDisassembler(*STI, *Ctx));
199 
200   if (!DisAsm)
201     return createStringError(
202         make_error_code(std::errc::not_supported),
203         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
204 
205   std::unique_ptr<const MCInstrAnalysis> MIA(
206       TheTarget->createMCInstrAnalysis(MII.get()));
207   if (!MIA)
208     return createStringError(
209         make_error_code(std::errc::not_supported),
210         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
211               TripleName));
212 
213   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
214   std::unique_ptr<MCInstPrinter> InstructionPrinter(
215       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
216                                      *MII, *MRI));
217   if (!InstructionPrinter)
218     return createStringError(
219         make_error_code(std::errc::not_supported),
220         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
221   InstructionPrinter->setPrintImmHex(true);
222 
223   std::unique_ptr<MCCodeEmitter> MCE(
224       TheTarget->createMCCodeEmitter(*MII, *Ctx));
225 
226   // Make sure we don't miss any output on core dumps.
227   outs().SetUnbuffered();
228   errs().SetUnbuffered();
229   dbgs().SetUnbuffered();
230 
231   auto BC = std::make_unique<BinaryContext>(
232       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
233       std::string(TripleName), std::move(MCE), std::move(MOFI),
234       std::move(AsmInfo), std::move(MII), std::move(STI),
235       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
236       std::move(DisAsm));
237 
238   BC->TTypeEncoding = TTypeEncoding;
239   BC->LSDAEncoding = LSDAEncoding;
240 
241   BC->MAB = std::unique_ptr<MCAsmBackend>(
242       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
243 
244   BC->setFilename(File->getFileName());
245 
246   BC->HasFixedLoadAddress = !IsPIC;
247 
248   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
249       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
250 
251   if (!BC->SymbolicDisAsm)
252     return createStringError(
253         make_error_code(std::errc::not_supported),
254         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
255 
256   return std::move(BC);
257 }
258 
259 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
260   if (opts::HotText &&
261       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
262     return true;
263 
264   if (opts::HotData &&
265       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
266     return true;
267 
268   if (SymbolName == "_end")
269     return true;
270 
271   return false;
272 }
273 
274 std::unique_ptr<MCObjectWriter>
275 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
276   return MAB->createObjectWriter(OS);
277 }
278 
279 bool BinaryContext::validateObjectNesting() const {
280   auto Itr = BinaryDataMap.begin();
281   auto End = BinaryDataMap.end();
282   bool Valid = true;
283   while (Itr != End) {
284     auto Next = std::next(Itr);
285     while (Next != End &&
286            Itr->second->getSection() == Next->second->getSection() &&
287            Itr->second->containsRange(Next->second->getAddress(),
288                                       Next->second->getSize())) {
289       if (Next->second->Parent != Itr->second) {
290         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
291                << "BOLT-WARNING:  " << *Itr->second << "\n"
292                << "BOLT-WARNING:  " << *Next->second << "\n";
293         Valid = false;
294       }
295       ++Next;
296     }
297     Itr = Next;
298   }
299   return Valid;
300 }
301 
302 bool BinaryContext::validateHoles() const {
303   bool Valid = true;
304   for (BinarySection &Section : sections()) {
305     for (const Relocation &Rel : Section.relocations()) {
306       uint64_t RelAddr = Rel.Offset + Section.getAddress();
307       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
308       if (!BD) {
309         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
310                << " 0x" << Twine::utohexstr(RelAddr) << " in "
311                << Section.getName() << "\n";
312         Valid = false;
313       } else if (!BD->getAtomicRoot()) {
314         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
315                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
316                << Section.getName() << "\n";
317         Valid = false;
318       }
319     }
320   }
321   return Valid;
322 }
323 
324 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
325   const uint64_t Address = GAI->second->getAddress();
326   const uint64_t Size = GAI->second->getSize();
327 
328   auto fixParents = [&](BinaryDataMapType::iterator Itr,
329                         BinaryData *NewParent) {
330     BinaryData *OldParent = Itr->second->Parent;
331     Itr->second->Parent = NewParent;
332     ++Itr;
333     while (Itr != BinaryDataMap.end() && OldParent &&
334            Itr->second->Parent == OldParent) {
335       Itr->second->Parent = NewParent;
336       ++Itr;
337     }
338   };
339 
340   // Check if the previous symbol contains the newly added symbol.
341   if (GAI != BinaryDataMap.begin()) {
342     BinaryData *Prev = std::prev(GAI)->second;
343     while (Prev) {
344       if (Prev->getSection() == GAI->second->getSection() &&
345           Prev->containsRange(Address, Size)) {
346         fixParents(GAI, Prev);
347       } else {
348         fixParents(GAI, nullptr);
349       }
350       Prev = Prev->Parent;
351     }
352   }
353 
354   // Check if the newly added symbol contains any subsequent symbols.
355   if (Size != 0) {
356     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
357     auto Itr = std::next(GAI);
358     while (
359         Itr != BinaryDataMap.end() &&
360         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
361       Itr->second->Parent = BD;
362       ++Itr;
363     }
364   }
365 }
366 
367 iterator_range<BinaryContext::binary_data_iterator>
368 BinaryContext::getSubBinaryData(BinaryData *BD) {
369   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
370   auto End = Start;
371   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
372     ++End;
373   return make_range(Start, End);
374 }
375 
376 std::pair<const MCSymbol *, uint64_t>
377 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
378                                 bool IsPCRel) {
379   uint64_t Addend = 0;
380 
381   if (isAArch64()) {
382     // Check if this is an access to a constant island and create bookkeeping
383     // to keep track of it and emit it later as part of this function.
384     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
385       return std::make_pair(IslandSym, Addend);
386 
387     // Detect custom code written in assembly that refers to arbitrary
388     // constant islands from other functions. Write this reference so we
389     // can pull this constant island and emit it as part of this function
390     // too.
391     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
392     if (IslandIter != AddressToConstantIslandMap.end()) {
393       if (MCSymbol *IslandSym =
394               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
395         BF.createIslandDependency(IslandSym, IslandIter->second);
396         return std::make_pair(IslandSym, Addend);
397       }
398     }
399   }
400 
401   // Note that the address does not necessarily have to reside inside
402   // a section, it could be an absolute address too.
403   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
404   if (Section && Section->isText()) {
405     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
406       if (Address != BF.getAddress()) {
407         // The address could potentially escape. Mark it as another entry
408         // point into the function.
409         if (opts::Verbosity >= 1) {
410           outs() << "BOLT-INFO: potentially escaped address 0x"
411                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
412         }
413         BF.HasInternalLabelReference = true;
414         return std::make_pair(
415             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
416       }
417     } else {
418       addInterproceduralReference(&BF, Address);
419     }
420   }
421 
422   // With relocations, catch jump table references outside of the basic block
423   // containing the indirect jump.
424   if (HasRelocations) {
425     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
426     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
427       const MCSymbol *Symbol =
428           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
429 
430       return std::make_pair(Symbol, Addend);
431     }
432   }
433 
434   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
435     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
436 
437   // TODO: use DWARF info to get size/alignment here?
438   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
439   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
440   return std::make_pair(TargetSymbol, Addend);
441 }
442 
443 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
444                                                   BinaryFunction &BF) {
445   if (!isX86())
446     return MemoryContentsType::UNKNOWN;
447 
448   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
449   if (!Section) {
450     // No section - possibly an absolute address. Since we don't allow
451     // internal function addresses to escape the function scope - we
452     // consider it a tail call.
453     if (opts::Verbosity > 1) {
454       errs() << "BOLT-WARNING: no section for address 0x"
455              << Twine::utohexstr(Address) << " referenced from function " << BF
456              << '\n';
457     }
458     return MemoryContentsType::UNKNOWN;
459   }
460 
461   if (Section->isVirtual()) {
462     // The contents are filled at runtime.
463     return MemoryContentsType::UNKNOWN;
464   }
465 
466   // No support for jump tables in code yet.
467   if (Section->isText())
468     return MemoryContentsType::UNKNOWN;
469 
470   // Start with checking for PIC jump table. We expect non-PIC jump tables
471   // to have high 32 bits set to 0.
472   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
473     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
474 
475   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
476     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
477 
478   return MemoryContentsType::UNKNOWN;
479 }
480 
481 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
482 bool isPotentialFragmentByName(BinaryFunction &Fragment,
483                                BinaryFunction &Parent) {
484   for (StringRef Name : Parent.getNames()) {
485     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
486     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
487     if (Fragment.hasRestoredNameRegex(NameRegex))
488       return true;
489   }
490   return false;
491 }
492 
493 bool BinaryContext::analyzeJumpTable(
494     const uint64_t Address, const JumpTable::JumpTableType Type,
495     BinaryFunction &BF, const uint64_t NextJTAddress,
496     JumpTable::AddressesType *EntriesAsAddress) {
497   // Is one of the targets __builtin_unreachable?
498   bool HasUnreachable = false;
499 
500   // Number of targets other than __builtin_unreachable.
501   uint64_t NumRealEntries = 0;
502 
503   auto addEntryAddress = [&](uint64_t EntryAddress) {
504     if (EntriesAsAddress)
505       EntriesAsAddress->emplace_back(EntryAddress);
506   };
507 
508   auto doesBelongToFunction = [&](const uint64_t Addr,
509                                   BinaryFunction *TargetBF) -> bool {
510     if (BF.containsAddress(Addr))
511       return true;
512     // Nothing to do if we failed to identify the containing function.
513     if (!TargetBF)
514       return false;
515     // Case 1: check if BF is a fragment and TargetBF is its parent.
516     if (BF.isFragment()) {
517       // Parent function may or may not be already registered.
518       // Set parent link based on function name matching heuristic.
519       return registerFragment(BF, *TargetBF);
520     }
521     // Case 2: check if TargetBF is a fragment and BF is its parent.
522     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
523   };
524 
525   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
526   if (!Section)
527     return false;
528 
529   // The upper bound is defined by containing object, section limits, and
530   // the next jump table in memory.
531   uint64_t UpperBound = Section->getEndAddress();
532   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
533   if (JumpTableBD && JumpTableBD->getSize()) {
534     assert(JumpTableBD->getEndAddress() <= UpperBound &&
535            "data object cannot cross a section boundary");
536     UpperBound = JumpTableBD->getEndAddress();
537   }
538   if (NextJTAddress)
539     UpperBound = std::min(NextJTAddress, UpperBound);
540 
541   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
542                     << '\n');
543   const uint64_t EntrySize = getJumpTableEntrySize(Type);
544   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
545        EntryAddress += EntrySize) {
546     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
547                       << " -> ");
548     // Check if there's a proper relocation against the jump table entry.
549     if (HasRelocations) {
550       if (Type == JumpTable::JTT_PIC &&
551           !DataPCRelocations.count(EntryAddress)) {
552         LLVM_DEBUG(
553             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
554         break;
555       }
556       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
557         LLVM_DEBUG(
558             dbgs()
559             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
560         break;
561       }
562     }
563 
564     const uint64_t Value =
565         (Type == JumpTable::JTT_PIC)
566             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
567             : *getPointerAtAddress(EntryAddress);
568 
569     // __builtin_unreachable() case.
570     if (Value == BF.getAddress() + BF.getSize()) {
571       addEntryAddress(Value);
572       HasUnreachable = true;
573       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
574       continue;
575     }
576 
577     // Function or one of its fragments.
578     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
579 
580     // We assume that a jump table cannot have function start as an entry.
581     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
582       LLVM_DEBUG({
583         if (!BF.containsAddress(Value)) {
584           dbgs() << "FAIL: function doesn't contain this address\n";
585           if (TargetBF) {
586             dbgs() << "  ! function containing this address: "
587                    << TargetBF->getPrintName() << '\n';
588             if (TargetBF->isFragment())
589               dbgs() << "  ! is a fragment\n";
590             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
591               dbgs() << "  ! its parent is "
592                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
593                      << '\n';
594           }
595         }
596         if (Value == BF.getAddress())
597           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
598       });
599       break;
600     }
601 
602     // Check there's an instruction at this offset.
603     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
604         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
605       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
606       break;
607     }
608 
609     ++NumRealEntries;
610 
611     if (TargetBF != &BF)
612       BF.setHasIndirectTargetToSplitFragment(true);
613     addEntryAddress(Value);
614   }
615 
616   // It's a jump table if the number of real entries is more than 1, or there's
617   // one real entry and "unreachable" targets. If there are only multiple
618   // "unreachable" targets, then it's not a jump table.
619   return NumRealEntries + HasUnreachable >= 2;
620 }
621 
622 void BinaryContext::populateJumpTables() {
623   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
624                     << '\n');
625   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
626        ++JTI) {
627     JumpTable *JT = JTI->second;
628 
629     bool NonSimpleParent = false;
630     for (BinaryFunction *BF : JT->Parents)
631       NonSimpleParent |= !BF->isSimple();
632     if (NonSimpleParent)
633       continue;
634 
635     uint64_t NextJTAddress = 0;
636     auto NextJTI = std::next(JTI);
637     if (NextJTI != JTE)
638       NextJTAddress = NextJTI->second->getAddress();
639 
640     const bool Success =
641         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
642                          NextJTAddress, &JT->EntriesAsAddress);
643     if (!Success) {
644       LLVM_DEBUG(ListSeparator LS;
645                  dbgs() << "failed to analyze jump table in function ";
646                  for (BinaryFunction *Frag
647                       : JT->Parents) dbgs()
648                  << LS << *Frag;
649                  dbgs() << '\n';);
650       JT->print(dbgs());
651       if (NextJTI != JTE) {
652         LLVM_DEBUG(ListSeparator LS;
653                    dbgs() << "next jump table at 0x"
654                           << Twine::utohexstr(NextJTI->second->getAddress())
655                           << " belongs to function ";
656                    for (BinaryFunction *Frag
657                         : NextJTI->second->Parents) dbgs()
658                    << LS << *Frag;
659                    dbgs() << "\n";);
660         NextJTI->second->print(dbgs());
661       }
662       llvm_unreachable("jump table heuristic failure");
663     }
664     for (BinaryFunction *Frag : JT->Parents) {
665       for (uint64_t EntryAddress : JT->EntriesAsAddress)
666         // if target is builtin_unreachable
667         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
668           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
669                                              Frag->getSize());
670         } else if (EntryAddress >= Frag->getAddress() &&
671                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
672           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
673         }
674     }
675 
676     // In strict mode, erase PC-relative relocation record. Later we check that
677     // all such records are erased and thus have been accounted for.
678     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
679       for (uint64_t Address = JT->getAddress();
680            Address < JT->getAddress() + JT->getSize();
681            Address += JT->EntrySize) {
682         DataPCRelocations.erase(DataPCRelocations.find(Address));
683       }
684     }
685 
686     // Mark to skip the function and all its fragments.
687     for (BinaryFunction *Frag : JT->Parents)
688       if (Frag->hasIndirectTargetToSplitFragment())
689         addFragmentsToSkip(Frag);
690   }
691 
692   if (opts::StrictMode && DataPCRelocations.size()) {
693     LLVM_DEBUG({
694       dbgs() << DataPCRelocations.size()
695              << " unclaimed PC-relative relocations left in data:\n";
696       for (uint64_t Reloc : DataPCRelocations)
697         dbgs() << Twine::utohexstr(Reloc) << '\n';
698     });
699     assert(0 && "unclaimed PC-relative relocations left in data\n");
700   }
701   clearList(DataPCRelocations);
702 }
703 
704 void BinaryContext::skipMarkedFragments() {
705   std::vector<BinaryFunction *> FragmentQueue;
706   // Copy the functions to FragmentQueue.
707   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
708   auto addToWorklist = [&](BinaryFunction *Function) -> void {
709     if (FragmentsToSkip.count(Function))
710       return;
711     FragmentQueue.push_back(Function);
712     addFragmentsToSkip(Function);
713   };
714   // Functions containing split jump tables need to be skipped with all
715   // fragments (transitively).
716   for (size_t I = 0; I != FragmentQueue.size(); I++) {
717     BinaryFunction *BF = FragmentQueue[I];
718     assert(FragmentsToSkip.count(BF) &&
719            "internal error in traversing function fragments");
720     if (opts::Verbosity >= 1)
721       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
722     BF->setSimple(false);
723     BF->setHasIndirectTargetToSplitFragment(true);
724 
725     llvm::for_each(BF->Fragments, addToWorklist);
726     llvm::for_each(BF->ParentFragments, addToWorklist);
727   }
728   if (!FragmentsToSkip.empty())
729     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
730            << (FragmentsToSkip.size() == 1 ? "" : "s")
731            << " due to cold fragments\n";
732 }
733 
734 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
735                                                  uint64_t Size,
736                                                  uint16_t Alignment,
737                                                  unsigned Flags) {
738   auto Itr = BinaryDataMap.find(Address);
739   if (Itr != BinaryDataMap.end()) {
740     assert(Itr->second->getSize() == Size || !Size);
741     return Itr->second->getSymbol();
742   }
743 
744   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
745   assert(!GlobalSymbols.count(Name) && "created name is not unique");
746   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
747 }
748 
749 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
750   return Ctx->getOrCreateSymbol(Name);
751 }
752 
753 BinaryFunction *BinaryContext::createBinaryFunction(
754     const std::string &Name, BinarySection &Section, uint64_t Address,
755     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
756   auto Result = BinaryFunctions.emplace(
757       Address, BinaryFunction(Name, Section, Address, Size, *this));
758   assert(Result.second == true && "unexpected duplicate function");
759   BinaryFunction *BF = &Result.first->second;
760   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
761                         Alignment);
762   setSymbolToFunctionMap(BF->getSymbol(), BF);
763   return BF;
764 }
765 
766 const MCSymbol *
767 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
768                                     JumpTable::JumpTableType Type) {
769   auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
770     return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
771   };
772 
773   // Two fragments of same function access same jump table
774   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
775     assert(JT->Type == Type && "jump table types have to match");
776     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
777 
778     // Prevent associating a jump table to a specific fragment twice.
779     // This simple check arises from the assumption: no more than 2 fragments.
780     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
781       bool SameFunction = isFragmentOf(JT->Parents[0], &Function) ||
782                           isFragmentOf(&Function, JT->Parents[0]);
783       assert(SameFunction &&
784              "cannot re-use jump table of a different function");
785       // Duplicate the entry for the parent function for easy access
786       JT->Parents.push_back(&Function);
787       if (opts::Verbosity > 2) {
788         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
789                << JT->Parents[0]->getPrintName() << "; "
790                << Function.getPrintName() << "\n";
791         JT->print(outs());
792       }
793       Function.JumpTables.emplace(Address, JT);
794       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
795       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
796     }
797 
798     bool IsJumpTableParent = false;
799     for (BinaryFunction *Frag : JT->Parents)
800       if (Frag == &Function)
801         IsJumpTableParent = true;
802     assert(IsJumpTableParent &&
803            "cannot re-use jump table of a different function");
804     return JT->getFirstLabel();
805   }
806 
807   // Re-use the existing symbol if possible.
808   MCSymbol *JTLabel = nullptr;
809   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
810     if (!isInternalSymbolName(Object->getSymbol()->getName()))
811       JTLabel = Object->getSymbol();
812   }
813 
814   const uint64_t EntrySize = getJumpTableEntrySize(Type);
815   if (!JTLabel) {
816     const std::string JumpTableName = generateJumpTableName(Function, Address);
817     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
818   }
819 
820   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
821                     << " in function " << Function << '\n');
822 
823   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
824                                 JumpTable::LabelMapType{{0, JTLabel}},
825                                 *getSectionForAddress(Address));
826   JT->Parents.push_back(&Function);
827   if (opts::Verbosity > 2)
828     JT->print(outs());
829   JumpTables.emplace(Address, JT);
830 
831   // Duplicate the entry for the parent function for easy access.
832   Function.JumpTables.emplace(Address, JT);
833   return JTLabel;
834 }
835 
836 std::pair<uint64_t, const MCSymbol *>
837 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
838                                   const MCSymbol *OldLabel) {
839   auto L = scopeLock();
840   unsigned Offset = 0;
841   bool Found = false;
842   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
843     if (Elmt.second != OldLabel)
844       continue;
845     Offset = Elmt.first;
846     Found = true;
847     break;
848   }
849   assert(Found && "Label not found");
850   (void)Found;
851   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
852   JumpTable *NewJT =
853       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
854                     JumpTable::LabelMapType{{Offset, NewLabel}},
855                     *getSectionForAddress(JT->getAddress()));
856   NewJT->Parents = JT->Parents;
857   NewJT->Entries = JT->Entries;
858   NewJT->Counts = JT->Counts;
859   uint64_t JumpTableID = ++DuplicatedJumpTables;
860   // Invert it to differentiate from regular jump tables whose IDs are their
861   // addresses in the input binary memory space
862   JumpTableID = ~JumpTableID;
863   JumpTables.emplace(JumpTableID, NewJT);
864   Function.JumpTables.emplace(JumpTableID, NewJT);
865   return std::make_pair(JumpTableID, NewLabel);
866 }
867 
868 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
869                                                  uint64_t Address) {
870   size_t Id;
871   uint64_t Offset = 0;
872   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
873     Offset = Address - JT->getAddress();
874     auto Itr = JT->Labels.find(Offset);
875     if (Itr != JT->Labels.end())
876       return std::string(Itr->second->getName());
877     Id = JumpTableIds.at(JT->getAddress());
878   } else {
879     Id = JumpTableIds[Address] = BF.JumpTables.size();
880   }
881   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
882           (Offset ? ("." + std::to_string(Offset)) : ""));
883 }
884 
885 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
886   // FIXME: aarch64 support is missing.
887   if (!isX86())
888     return true;
889 
890   if (BF.getSize() == BF.getMaxSize())
891     return true;
892 
893   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
894   assert(FunctionData && "cannot get function as data");
895 
896   uint64_t Offset = BF.getSize();
897   MCInst Instr;
898   uint64_t InstrSize = 0;
899   uint64_t InstrAddress = BF.getAddress() + Offset;
900   using std::placeholders::_1;
901 
902   // Skip instructions that satisfy the predicate condition.
903   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
904     const uint64_t StartOffset = Offset;
905     for (; Offset < BF.getMaxSize();
906          Offset += InstrSize, InstrAddress += InstrSize) {
907       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
908                                   InstrAddress, nulls()))
909         break;
910       if (!Predicate(Instr))
911         break;
912     }
913 
914     return Offset - StartOffset;
915   };
916 
917   // Skip a sequence of zero bytes.
918   auto skipZeros = [&]() {
919     const uint64_t StartOffset = Offset;
920     for (; Offset < BF.getMaxSize(); ++Offset)
921       if ((*FunctionData)[Offset] != 0)
922         break;
923 
924     return Offset - StartOffset;
925   };
926 
927   // Accept the whole padding area filled with breakpoints.
928   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
929   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
930     return true;
931 
932   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
933 
934   // Some functions have a jump to the next function or to the padding area
935   // inserted after the body.
936   auto isSkipJump = [&](const MCInst &Instr) {
937     uint64_t TargetAddress = 0;
938     if (MIB->isUnconditionalBranch(Instr) &&
939         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
940       if (TargetAddress >= InstrAddress + InstrSize &&
941           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
942         return true;
943       }
944     }
945     return false;
946   };
947 
948   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
949   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
950          skipZeros())
951     ;
952 
953   if (Offset == BF.getMaxSize())
954     return true;
955 
956   if (opts::Verbosity >= 1) {
957     errs() << "BOLT-WARNING: bad padding at address 0x"
958            << Twine::utohexstr(BF.getAddress() + BF.getSize())
959            << " starting at offset " << (Offset - BF.getSize())
960            << " in function " << BF << '\n'
961            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
962            << '\n';
963   }
964 
965   return false;
966 }
967 
968 void BinaryContext::adjustCodePadding() {
969   for (auto &BFI : BinaryFunctions) {
970     BinaryFunction &BF = BFI.second;
971     if (!shouldEmit(BF))
972       continue;
973 
974     if (!hasValidCodePadding(BF)) {
975       if (HasRelocations) {
976         if (opts::Verbosity >= 1) {
977           outs() << "BOLT-INFO: function " << BF
978                  << " has invalid padding. Ignoring the function.\n";
979         }
980         BF.setIgnored();
981       } else {
982         BF.setMaxSize(BF.getSize());
983       }
984     }
985   }
986 }
987 
988 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
989                                                uint64_t Size,
990                                                uint16_t Alignment,
991                                                unsigned Flags) {
992   // Register the name with MCContext.
993   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
994 
995   auto GAI = BinaryDataMap.find(Address);
996   BinaryData *BD;
997   if (GAI == BinaryDataMap.end()) {
998     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
999     BinarySection &Section =
1000         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1001     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1002                         Section, Flags);
1003     GAI = BinaryDataMap.emplace(Address, BD).first;
1004     GlobalSymbols[Name] = BD;
1005     updateObjectNesting(GAI);
1006   } else {
1007     BD = GAI->second;
1008     if (!BD->hasName(Name)) {
1009       GlobalSymbols[Name] = BD;
1010       BD->Symbols.push_back(Symbol);
1011     }
1012   }
1013 
1014   return Symbol;
1015 }
1016 
1017 const BinaryData *
1018 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1019   auto NI = BinaryDataMap.lower_bound(Address);
1020   auto End = BinaryDataMap.end();
1021   if ((NI != End && Address == NI->first) ||
1022       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1023     if (NI->second->containsAddress(Address))
1024       return NI->second;
1025 
1026     // If this is a sub-symbol, see if a parent data contains the address.
1027     const BinaryData *BD = NI->second->getParent();
1028     while (BD) {
1029       if (BD->containsAddress(Address))
1030         return BD;
1031       BD = BD->getParent();
1032     }
1033   }
1034   return nullptr;
1035 }
1036 
1037 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1038   auto NI = BinaryDataMap.find(Address);
1039   assert(NI != BinaryDataMap.end());
1040   if (NI == BinaryDataMap.end())
1041     return false;
1042   // TODO: it's possible that a jump table starts at the same address
1043   // as a larger blob of private data.  When we set the size of the
1044   // jump table, it might be smaller than the total blob size.  In this
1045   // case we just leave the original size since (currently) it won't really
1046   // affect anything.
1047   assert((!NI->second->Size || NI->second->Size == Size ||
1048           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1049          "can't change the size of a symbol that has already had its "
1050          "size set");
1051   if (!NI->second->Size) {
1052     NI->second->Size = Size;
1053     updateObjectNesting(NI);
1054     return true;
1055   }
1056   return false;
1057 }
1058 
1059 void BinaryContext::generateSymbolHashes() {
1060   auto isPadding = [](const BinaryData &BD) {
1061     StringRef Contents = BD.getSection().getContents();
1062     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1063     return (BD.getName().startswith("HOLEat") ||
1064             SymData.find_first_not_of(0) == StringRef::npos);
1065   };
1066 
1067   uint64_t NumCollisions = 0;
1068   for (auto &Entry : BinaryDataMap) {
1069     BinaryData &BD = *Entry.second;
1070     StringRef Name = BD.getName();
1071 
1072     if (!isInternalSymbolName(Name))
1073       continue;
1074 
1075     // First check if a non-anonymous alias exists and move it to the front.
1076     if (BD.getSymbols().size() > 1) {
1077       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1078         return !isInternalSymbolName(Symbol->getName());
1079       });
1080       if (Itr != BD.getSymbols().end()) {
1081         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1082         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1083         continue;
1084       }
1085     }
1086 
1087     // We have to skip 0 size symbols since they will all collide.
1088     if (BD.getSize() == 0) {
1089       continue;
1090     }
1091 
1092     const uint64_t Hash = BD.getSection().hash(BD);
1093     const size_t Idx = Name.find("0x");
1094     std::string NewName =
1095         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1096     if (getBinaryDataByName(NewName)) {
1097       // Ignore collisions for symbols that appear to be padding
1098       // (i.e. all zeros or a "hole")
1099       if (!isPadding(BD)) {
1100         if (opts::Verbosity) {
1101           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1102                  << " with new name (" << NewName << "), skipping.\n";
1103         }
1104         ++NumCollisions;
1105       }
1106       continue;
1107     }
1108     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1109     GlobalSymbols[NewName] = &BD;
1110   }
1111   if (NumCollisions) {
1112     errs() << "BOLT-WARNING: " << NumCollisions
1113            << " collisions detected while hashing binary objects";
1114     if (!opts::Verbosity)
1115       errs() << ". Use -v=1 to see the list.";
1116     errs() << '\n';
1117   }
1118 }
1119 
1120 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1121                                      BinaryFunction &Function) const {
1122   if (!isPotentialFragmentByName(TargetFunction, Function))
1123     return false;
1124   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1125   if (TargetFunction.isParentFragment(&Function))
1126     return true;
1127   TargetFunction.addParentFragment(Function);
1128   Function.addFragment(TargetFunction);
1129   if (!HasRelocations) {
1130     TargetFunction.setSimple(false);
1131     Function.setSimple(false);
1132   }
1133   if (opts::Verbosity >= 1) {
1134     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1135            << Function << '\n';
1136   }
1137   return true;
1138 }
1139 
1140 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1141                                            MCInst &LoadLowBits,
1142                                            MCInst &LoadHiBits,
1143                                            uint64_t Target) {
1144   const MCSymbol *TargetSymbol;
1145   uint64_t Addend = 0;
1146   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1147                                                     /*IsPCRel*/ true);
1148   int64_t Val;
1149   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1150                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1151   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1152                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1153 }
1154 
1155 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1156   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1157   if (TargetFunction)
1158     return false;
1159 
1160   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1161   assert(Section && "cannot get section for referenced address");
1162   if (!Section->isText())
1163     return false;
1164 
1165   bool Ret = false;
1166   StringRef SectionContents = Section->getContents();
1167   uint64_t Offset = Address - Section->getAddress();
1168   const uint64_t MaxSize = SectionContents.size() - Offset;
1169   const uint8_t *Bytes =
1170       reinterpret_cast<const uint8_t *>(SectionContents.data());
1171   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1172 
1173   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1174                          MCInst &Instruction, uint64_t Offset,
1175                          uint64_t AbsoluteInstrAddr,
1176                          uint64_t TotalSize) -> bool {
1177     MCInst *TargetHiBits, *TargetLowBits;
1178     uint64_t TargetAddress, Count;
1179     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1180                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1181                                    TargetLowBits, TargetAddress);
1182     if (!Count)
1183       return false;
1184 
1185     if (MatchOnly)
1186       return true;
1187 
1188     // NOTE The target symbol was created during disassemble's
1189     // handleExternalReference
1190     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1191     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1192                                                   *Section, Address, TotalSize);
1193     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1194                            TargetAddress);
1195     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1196     Veneer->addInstruction(Offset, std::move(Instruction));
1197     --Count;
1198     for (auto It = std::prev(Instructions.end()); Count != 0;
1199          It = std::prev(It), --Count) {
1200       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1201       Veneer->addInstruction(It->first, std::move(It->second));
1202     }
1203 
1204     Veneer->getOrCreateLocalLabel(Address);
1205     Veneer->setMaxSize(TotalSize);
1206     Veneer->updateState(BinaryFunction::State::Disassembled);
1207     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1208                       << "\n");
1209     return true;
1210   };
1211 
1212   uint64_t Size = 0, TotalSize = 0;
1213   BinaryFunction::InstrMapType VeneerInstructions;
1214   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1215     MCInst Instruction;
1216     const uint64_t AbsoluteInstrAddr = Address + Offset;
1217     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1218                                         AbsoluteInstrAddr, nulls()))
1219       break;
1220 
1221     TotalSize += Size;
1222     if (MIB->isBranch(Instruction)) {
1223       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1224                         AbsoluteInstrAddr, TotalSize);
1225       break;
1226     }
1227 
1228     VeneerInstructions.emplace(Offset, std::move(Instruction));
1229   }
1230 
1231   return Ret;
1232 }
1233 
1234 void BinaryContext::processInterproceduralReferences() {
1235   for (const std::pair<BinaryFunction *, uint64_t> &It :
1236        InterproceduralReferences) {
1237     BinaryFunction &Function = *It.first;
1238     uint64_t Address = It.second;
1239     if (!Address || Function.isIgnored())
1240       continue;
1241 
1242     BinaryFunction *TargetFunction =
1243         getBinaryFunctionContainingAddress(Address);
1244     if (&Function == TargetFunction)
1245       continue;
1246 
1247     if (TargetFunction) {
1248       if (TargetFunction->isFragment() &&
1249           !registerFragment(*TargetFunction, Function)) {
1250         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1251                   "fragments: "
1252                << Function.getPrintName() << " and "
1253                << TargetFunction->getPrintName() << '\n';
1254       }
1255       if (uint64_t Offset = Address - TargetFunction->getAddress())
1256         TargetFunction->addEntryPointAtOffset(Offset);
1257 
1258       continue;
1259     }
1260 
1261     // Check if address falls in function padding space - this could be
1262     // unmarked data in code. In this case adjust the padding space size.
1263     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1264     assert(Section && "cannot get section for referenced address");
1265 
1266     if (!Section->isText())
1267       continue;
1268 
1269     // PLT requires special handling and could be ignored in this context.
1270     StringRef SectionName = Section->getName();
1271     if (SectionName == ".plt" || SectionName == ".plt.got")
1272       continue;
1273 
1274     // Check if it is aarch64 veneer written at Address
1275     if (isAArch64() && handleAArch64Veneer(Address))
1276       continue;
1277 
1278     if (opts::processAllFunctions()) {
1279       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1280              << "object in code at address 0x" << Twine::utohexstr(Address)
1281              << " belonging to section " << SectionName << " in current mode\n";
1282       exit(1);
1283     }
1284 
1285     TargetFunction = getBinaryFunctionContainingAddress(Address,
1286                                                         /*CheckPastEnd=*/false,
1287                                                         /*UseMaxSize=*/true);
1288     // We are not going to overwrite non-simple functions, but for simple
1289     // ones - adjust the padding size.
1290     if (TargetFunction && TargetFunction->isSimple()) {
1291       errs() << "BOLT-WARNING: function " << *TargetFunction
1292              << " has an object detected in a padding region at address 0x"
1293              << Twine::utohexstr(Address) << '\n';
1294       TargetFunction->setMaxSize(TargetFunction->getSize());
1295     }
1296   }
1297 
1298   InterproceduralReferences.clear();
1299 }
1300 
1301 void BinaryContext::postProcessSymbolTable() {
1302   fixBinaryDataHoles();
1303   bool Valid = true;
1304   for (auto &Entry : BinaryDataMap) {
1305     BinaryData *BD = Entry.second;
1306     if ((BD->getName().startswith("SYMBOLat") ||
1307          BD->getName().startswith("DATAat")) &&
1308         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1309         BD->getSection()) {
1310       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1311       Valid = false;
1312     }
1313   }
1314   assert(Valid);
1315   (void)Valid;
1316   generateSymbolHashes();
1317 }
1318 
1319 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1320                                  BinaryFunction &ParentBF) {
1321   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1322          "cannot merge functions with multiple entry points");
1323 
1324   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1325                                                          std::defer_lock);
1326   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1327       SymbolToFunctionMapMutex, std::defer_lock);
1328 
1329   const StringRef ChildName = ChildBF.getOneName();
1330 
1331   // Move symbols over and update bookkeeping info.
1332   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1333     ParentBF.getSymbols().push_back(Symbol);
1334     WriteSymbolMapLock.lock();
1335     SymbolToFunctionMap[Symbol] = &ParentBF;
1336     WriteSymbolMapLock.unlock();
1337     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1338   }
1339   ChildBF.getSymbols().clear();
1340 
1341   // Move other names the child function is known under.
1342   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1343   ChildBF.Aliases.clear();
1344 
1345   if (HasRelocations) {
1346     // Merge execution counts of ChildBF into those of ParentBF.
1347     // Without relocations, we cannot reliably merge profiles as both functions
1348     // continue to exist and either one can be executed.
1349     ChildBF.mergeProfileDataInto(ParentBF);
1350 
1351     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1352                                                           std::defer_lock);
1353     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1354                                                            std::defer_lock);
1355     // Remove ChildBF from the global set of functions in relocs mode.
1356     ReadBfsLock.lock();
1357     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1358     ReadBfsLock.unlock();
1359 
1360     assert(FI != BinaryFunctions.end() && "function not found");
1361     assert(&ChildBF == &FI->second && "function mismatch");
1362 
1363     WriteBfsLock.lock();
1364     ChildBF.clearDisasmState();
1365     FI = BinaryFunctions.erase(FI);
1366     WriteBfsLock.unlock();
1367 
1368   } else {
1369     // In non-relocation mode we keep the function, but rename it.
1370     std::string NewName = "__ICF_" + ChildName.str();
1371 
1372     WriteCtxLock.lock();
1373     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1374     WriteCtxLock.unlock();
1375 
1376     ChildBF.setFolded(&ParentBF);
1377   }
1378 }
1379 
1380 void BinaryContext::fixBinaryDataHoles() {
1381   assert(validateObjectNesting() && "object nesting inconsitency detected");
1382 
1383   for (BinarySection &Section : allocatableSections()) {
1384     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1385 
1386     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1387       BinaryData *BD = Itr->second;
1388       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1389                      (BD->getName().startswith("SYMBOLat0x") ||
1390                       BD->getName().startswith("DATAat0x") ||
1391                       BD->getName().startswith("ANONYMOUS")));
1392       return !isHole && BD->getSection() == Section && !BD->getParent();
1393     };
1394 
1395     auto BDStart = BinaryDataMap.begin();
1396     auto BDEnd = BinaryDataMap.end();
1397     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1398     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1399 
1400     uint64_t EndAddress = Section.getAddress();
1401 
1402     while (Itr != End) {
1403       if (Itr->second->getAddress() > EndAddress) {
1404         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1405         Holes.emplace_back(EndAddress, Gap);
1406       }
1407       EndAddress = Itr->second->getEndAddress();
1408       ++Itr;
1409     }
1410 
1411     if (EndAddress < Section.getEndAddress())
1412       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1413 
1414     // If there is already a symbol at the start of the hole, grow that symbol
1415     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1416     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1417       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1418       if (BD) {
1419         // BD->getSection() can be != Section if there are sections that
1420         // overlap.  In this case it is probably safe to just skip the holes
1421         // since the overlapping section will not(?) have any symbols in it.
1422         if (BD->getSection() == Section)
1423           setBinaryDataSize(Hole.first, Hole.second);
1424       } else {
1425         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1426       }
1427     }
1428   }
1429 
1430   assert(validateObjectNesting() && "object nesting inconsitency detected");
1431   assert(validateHoles() && "top level hole detected in object map");
1432 }
1433 
1434 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1435   const BinarySection *CurrentSection = nullptr;
1436   bool FirstSection = true;
1437 
1438   for (auto &Entry : BinaryDataMap) {
1439     const BinaryData *BD = Entry.second;
1440     const BinarySection &Section = BD->getSection();
1441     if (FirstSection || Section != *CurrentSection) {
1442       uint64_t Address, Size;
1443       StringRef Name = Section.getName();
1444       if (Section) {
1445         Address = Section.getAddress();
1446         Size = Section.getSize();
1447       } else {
1448         Address = BD->getAddress();
1449         Size = BD->getSize();
1450       }
1451       OS << "BOLT-INFO: Section " << Name << ", "
1452          << "0x" + Twine::utohexstr(Address) << ":"
1453          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1454       CurrentSection = &Section;
1455       FirstSection = false;
1456     }
1457 
1458     OS << "BOLT-INFO: ";
1459     const BinaryData *P = BD->getParent();
1460     while (P) {
1461       OS << "  ";
1462       P = P->getParent();
1463     }
1464     OS << *BD << "\n";
1465   }
1466 }
1467 
1468 Expected<unsigned> BinaryContext::getDwarfFile(
1469     StringRef Directory, StringRef FileName, unsigned FileNumber,
1470     Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1471     unsigned CUID, unsigned DWARFVersion) {
1472   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1473   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1474                           FileNumber);
1475 }
1476 
1477 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1478                                                const uint32_t SrcCUID,
1479                                                unsigned FileIndex) {
1480   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1481   const DWARFDebugLine::LineTable *LineTable =
1482       DwCtx->getLineTableForUnit(SrcUnit);
1483   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1484       LineTable->Prologue.FileNames;
1485   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1486   // means empty dir.
1487   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1488          "FileIndex out of range for the compilation unit.");
1489   StringRef Dir = "";
1490   if (FileNames[FileIndex - 1].DirIdx != 0) {
1491     if (Optional<const char *> DirName = dwarf::toString(
1492             LineTable->Prologue
1493                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1494       Dir = *DirName;
1495     }
1496   }
1497   StringRef FileName = "";
1498   if (Optional<const char *> FName =
1499           dwarf::toString(FileNames[FileIndex - 1].Name))
1500     FileName = *FName;
1501   assert(FileName != "");
1502   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1503   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1504                                DstUnit->getVersion()));
1505 }
1506 
1507 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1508   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1509   llvm::transform(BinaryFunctions, SortedFunctions.begin(),
1510                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1511                     return &BFI.second;
1512                   });
1513 
1514   llvm::stable_sort(SortedFunctions,
1515                     [](const BinaryFunction *A, const BinaryFunction *B) {
1516                       if (A->hasValidIndex() && B->hasValidIndex()) {
1517                         return A->getIndex() < B->getIndex();
1518                       }
1519                       return A->hasValidIndex();
1520                     });
1521   return SortedFunctions;
1522 }
1523 
1524 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1525   std::vector<BinaryFunction *> AllFunctions;
1526   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1527   llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions),
1528                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1529                     return &BFI.second;
1530                   });
1531   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1532 
1533   return AllFunctions;
1534 }
1535 
1536 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1537   auto Iter = DWOCUs.find(DWOId);
1538   if (Iter == DWOCUs.end())
1539     return None;
1540 
1541   return Iter->second;
1542 }
1543 
1544 DWARFContext *BinaryContext::getDWOContext() const {
1545   if (DWOCUs.empty())
1546     return nullptr;
1547   return &DWOCUs.begin()->second->getContext();
1548 }
1549 
1550 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1551 void BinaryContext::preprocessDWODebugInfo() {
1552   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1553     DWARFUnit *const DwarfUnit = CU.get();
1554     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1555       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1556       if (!DWOCU->isDWOUnit()) {
1557         std::string DWOName = dwarf::toString(
1558             DwarfUnit->getUnitDIE().find(
1559                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1560             "");
1561         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1562                << DWOName
1563                << " was not retrieved and won't be updated. Please check "
1564                   "relative path.\n";
1565         continue;
1566       }
1567       DWOCUs[*DWOId] = DWOCU;
1568     }
1569   }
1570 }
1571 
1572 void BinaryContext::preprocessDebugInfo() {
1573   struct CURange {
1574     uint64_t LowPC;
1575     uint64_t HighPC;
1576     DWARFUnit *Unit;
1577 
1578     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1579   };
1580 
1581   // Building a map of address ranges to CUs similar to .debug_aranges and use
1582   // it to assign CU to functions.
1583   std::vector<CURange> AllRanges;
1584   AllRanges.reserve(DwCtx->getNumCompileUnits());
1585   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1586     Expected<DWARFAddressRangesVector> RangesOrError =
1587         CU->getUnitDIE().getAddressRanges();
1588     if (!RangesOrError) {
1589       consumeError(RangesOrError.takeError());
1590       continue;
1591     }
1592     for (DWARFAddressRange &Range : *RangesOrError) {
1593       // Parts of the debug info could be invalidated due to corresponding code
1594       // being removed from the binary by the linker. Hence we check if the
1595       // address is a valid one.
1596       if (containsAddress(Range.LowPC))
1597         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1598     }
1599 
1600     ContainsDwarf5 |= CU->getVersion() >= 5;
1601     ContainsDwarfLegacy |= CU->getVersion() < 5;
1602   }
1603 
1604   llvm::sort(AllRanges);
1605   for (auto &KV : BinaryFunctions) {
1606     const uint64_t FunctionAddress = KV.first;
1607     BinaryFunction &Function = KV.second;
1608 
1609     auto It = llvm::partition_point(
1610         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1611     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1612       Function.setDWARFUnit(It->Unit);
1613   }
1614 
1615   // Discover units with debug info that needs to be updated.
1616   for (const auto &KV : BinaryFunctions) {
1617     const BinaryFunction &BF = KV.second;
1618     if (shouldEmit(BF) && BF.getDWARFUnit())
1619       ProcessedCUs.insert(BF.getDWARFUnit());
1620   }
1621 
1622   // Clear debug info for functions from units that we are not going to process.
1623   for (auto &KV : BinaryFunctions) {
1624     BinaryFunction &BF = KV.second;
1625     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1626       BF.setDWARFUnit(nullptr);
1627   }
1628 
1629   if (opts::Verbosity >= 1) {
1630     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1631            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1632   }
1633 
1634   preprocessDWODebugInfo();
1635 
1636   // Populate MCContext with DWARF files from all units.
1637   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1638   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1639     const uint64_t CUID = CU->getOffset();
1640     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1641     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1642         GlobalPrefix + "line_table_start" + Twine(CUID)));
1643 
1644     if (!ProcessedCUs.count(CU.get()))
1645       continue;
1646 
1647     const DWARFDebugLine::LineTable *LineTable =
1648         DwCtx->getLineTableForUnit(CU.get());
1649     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1650         LineTable->Prologue.FileNames;
1651 
1652     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1653     if (DwarfVersion >= 5) {
1654       Optional<MD5::MD5Result> Checksum = None;
1655       if (LineTable->Prologue.ContentTypes.HasMD5)
1656         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1657       Optional<const char *> Name =
1658           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1659       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1660         auto Iter = DWOCUs.find(*DWOID);
1661         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1662         Name = dwarf::toString(
1663             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1664       }
1665       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1666                                   None);
1667     }
1668 
1669     BinaryLineTable.setDwarfVersion(DwarfVersion);
1670 
1671     // Assign a unique label to every line table, one per CU.
1672     // Make sure empty debug line tables are registered too.
1673     if (FileNames.empty()) {
1674       cantFail(
1675           getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1676       continue;
1677     }
1678     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1679     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1680       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1681       // means empty dir.
1682       StringRef Dir = "";
1683       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1684         if (Optional<const char *> DirName = dwarf::toString(
1685                 LineTable->Prologue
1686                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1687           Dir = *DirName;
1688       StringRef FileName = "";
1689       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1690         FileName = *FName;
1691       assert(FileName != "");
1692       Optional<MD5::MD5Result> Checksum = None;
1693       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1694         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1695       cantFail(
1696           getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1697     }
1698   }
1699 }
1700 
1701 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1702   if (Function.isPseudo())
1703     return false;
1704 
1705   if (opts::processAllFunctions())
1706     return true;
1707 
1708   if (Function.isIgnored())
1709     return false;
1710 
1711   // In relocation mode we will emit non-simple functions with CFG.
1712   // If the function does not have a CFG it should be marked as ignored.
1713   return HasRelocations || Function.isSimple();
1714 }
1715 
1716 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1717   uint32_t Operation = Inst.getOperation();
1718   switch (Operation) {
1719   case MCCFIInstruction::OpSameValue:
1720     OS << "OpSameValue Reg" << Inst.getRegister();
1721     break;
1722   case MCCFIInstruction::OpRememberState:
1723     OS << "OpRememberState";
1724     break;
1725   case MCCFIInstruction::OpRestoreState:
1726     OS << "OpRestoreState";
1727     break;
1728   case MCCFIInstruction::OpOffset:
1729     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1730     break;
1731   case MCCFIInstruction::OpDefCfaRegister:
1732     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1733     break;
1734   case MCCFIInstruction::OpDefCfaOffset:
1735     OS << "OpDefCfaOffset " << Inst.getOffset();
1736     break;
1737   case MCCFIInstruction::OpDefCfa:
1738     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1739     break;
1740   case MCCFIInstruction::OpRelOffset:
1741     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1742     break;
1743   case MCCFIInstruction::OpAdjustCfaOffset:
1744     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1745     break;
1746   case MCCFIInstruction::OpEscape:
1747     OS << "OpEscape";
1748     break;
1749   case MCCFIInstruction::OpRestore:
1750     OS << "OpRestore Reg" << Inst.getRegister();
1751     break;
1752   case MCCFIInstruction::OpUndefined:
1753     OS << "OpUndefined Reg" << Inst.getRegister();
1754     break;
1755   case MCCFIInstruction::OpRegister:
1756     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1757        << Inst.getRegister2();
1758     break;
1759   case MCCFIInstruction::OpWindowSave:
1760     OS << "OpWindowSave";
1761     break;
1762   case MCCFIInstruction::OpGnuArgsSize:
1763     OS << "OpGnuArgsSize";
1764     break;
1765   default:
1766     OS << "Op#" << Operation;
1767     break;
1768   }
1769 }
1770 
1771 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1772   // For aarch64, the ABI defines mapping symbols so we identify data in the
1773   // code section (see IHI0056B). $x identifies a symbol starting code or the
1774   // end of a data chunk inside code, $d indentifies start of data.
1775   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1776     return MarkerSymType::NONE;
1777 
1778   Expected<StringRef> NameOrError = Symbol.getName();
1779   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1780 
1781   if (!TypeOrError || !NameOrError)
1782     return MarkerSymType::NONE;
1783 
1784   if (*TypeOrError != SymbolRef::ST_Unknown)
1785     return MarkerSymType::NONE;
1786 
1787   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1788     return MarkerSymType::CODE;
1789 
1790   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1791     return MarkerSymType::DATA;
1792 
1793   return MarkerSymType::NONE;
1794 }
1795 
1796 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1797   return getMarkerType(Symbol) != MarkerSymType::NONE;
1798 }
1799 
1800 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1801                            const BinaryFunction *Function,
1802                            DWARFContext *DwCtx) {
1803   DebugLineTableRowRef RowRef =
1804       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1805   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1806     return;
1807 
1808   const DWARFDebugLine::LineTable *LineTable;
1809   if (Function && Function->getDWARFUnit() &&
1810       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1811     LineTable = Function->getDWARFLineTable();
1812   } else {
1813     LineTable = DwCtx->getLineTableForUnit(
1814         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1815   }
1816   assert(LineTable && "line table expected for instruction with debug info");
1817 
1818   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1819   StringRef FileName = "";
1820   if (Optional<const char *> FName =
1821           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1822     FileName = *FName;
1823   OS << " # debug line " << FileName << ":" << Row.Line;
1824   if (Row.Column)
1825     OS << ":" << Row.Column;
1826   if (Row.Discriminator)
1827     OS << " discriminator:" << Row.Discriminator;
1828 }
1829 
1830 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1831                                      uint64_t Offset,
1832                                      const BinaryFunction *Function,
1833                                      bool PrintMCInst, bool PrintMemData,
1834                                      bool PrintRelocations,
1835                                      StringRef Endl) const {
1836   if (MIB->isEHLabel(Instruction)) {
1837     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1838     return;
1839   }
1840   OS << format("    %08" PRIx64 ": ", Offset);
1841   if (MIB->isCFI(Instruction)) {
1842     uint32_t Offset = Instruction.getOperand(0).getImm();
1843     OS << "\t!CFI\t$" << Offset << "\t; ";
1844     if (Function)
1845       printCFI(OS, *Function->getCFIFor(Instruction));
1846     OS << Endl;
1847     return;
1848   }
1849   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1850   if (MIB->isCall(Instruction)) {
1851     if (MIB->isTailCall(Instruction))
1852       OS << " # TAILCALL ";
1853     if (MIB->isInvoke(Instruction)) {
1854       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1855       OS << " # handler: ";
1856       if (EHInfo->first)
1857         OS << *EHInfo->first;
1858       else
1859         OS << '0';
1860       OS << "; action: " << EHInfo->second;
1861       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1862       if (GnuArgsSize >= 0)
1863         OS << "; GNU_args_size = " << GnuArgsSize;
1864     }
1865   } else if (MIB->isIndirectBranch(Instruction)) {
1866     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1867       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1868     } else {
1869       OS << " # UNKNOWN CONTROL FLOW";
1870     }
1871   }
1872   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1873     OS << " # Offset: " << *Offset;
1874 
1875   MIB->printAnnotations(Instruction, OS);
1876 
1877   if (opts::PrintDebugInfo)
1878     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1879 
1880   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1881     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1882     Function->printRelocations(OS, Offset, Size);
1883   }
1884 
1885   OS << Endl;
1886 
1887   if (PrintMCInst) {
1888     Instruction.dump_pretty(OS, InstPrinter.get());
1889     OS << Endl;
1890   }
1891 }
1892 
1893 Optional<uint64_t>
1894 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1895                                         uint64_t FileOffset) const {
1896   // Find a segment with a matching file offset.
1897   for (auto &KV : SegmentMapInfo) {
1898     const SegmentInfo &SegInfo = KV.second;
1899     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1900       // Use segment's aligned memory offset to calculate the base address.
1901       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1902       return MMapAddress - MemOffset;
1903     }
1904   }
1905 
1906   return NoneType();
1907 }
1908 
1909 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1910   auto SI = AddressToSection.upper_bound(Address);
1911   if (SI != AddressToSection.begin()) {
1912     --SI;
1913     uint64_t UpperBound = SI->first + SI->second->getSize();
1914     if (!SI->second->getSize())
1915       UpperBound += 1;
1916     if (UpperBound > Address)
1917       return *SI->second;
1918   }
1919   return std::make_error_code(std::errc::bad_address);
1920 }
1921 
1922 ErrorOr<StringRef>
1923 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1924   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1925     return Section->getName();
1926   return std::make_error_code(std::errc::bad_address);
1927 }
1928 
1929 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1930   auto Res = Sections.insert(Section);
1931   (void)Res;
1932   assert(Res.second && "can't register the same section twice.");
1933 
1934   // Only register allocatable sections in the AddressToSection map.
1935   if (Section->isAllocatable() && Section->getAddress())
1936     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1937   NameToSection.insert(
1938       std::make_pair(std::string(Section->getName()), Section));
1939   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1940   return *Section;
1941 }
1942 
1943 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1944   return registerSection(new BinarySection(*this, Section));
1945 }
1946 
1947 BinarySection &
1948 BinaryContext::registerSection(StringRef SectionName,
1949                                const BinarySection &OriginalSection) {
1950   return registerSection(
1951       new BinarySection(*this, SectionName, OriginalSection));
1952 }
1953 
1954 BinarySection &
1955 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1956                                        unsigned ELFFlags, uint8_t *Data,
1957                                        uint64_t Size, unsigned Alignment) {
1958   auto NamedSections = getSectionByName(Name);
1959   if (NamedSections.begin() != NamedSections.end()) {
1960     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1961            "can only update unique sections");
1962     BinarySection *Section = NamedSections.begin()->second;
1963 
1964     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1965     const bool Flag = Section->isAllocatable();
1966     (void)Flag;
1967     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1968     LLVM_DEBUG(dbgs() << *Section << "\n");
1969     // FIXME: Fix section flags/attributes for MachO.
1970     if (isELF())
1971       assert(Flag == Section->isAllocatable() &&
1972              "can't change section allocation status");
1973     return *Section;
1974   }
1975 
1976   return registerSection(
1977       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1978 }
1979 
1980 bool BinaryContext::deregisterSection(BinarySection &Section) {
1981   BinarySection *SectionPtr = &Section;
1982   auto Itr = Sections.find(SectionPtr);
1983   if (Itr != Sections.end()) {
1984     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1985     while (Range.first != Range.second) {
1986       if (Range.first->second == SectionPtr) {
1987         AddressToSection.erase(Range.first);
1988         break;
1989       }
1990       ++Range.first;
1991     }
1992 
1993     auto NameRange =
1994         NameToSection.equal_range(std::string(SectionPtr->getName()));
1995     while (NameRange.first != NameRange.second) {
1996       if (NameRange.first->second == SectionPtr) {
1997         NameToSection.erase(NameRange.first);
1998         break;
1999       }
2000       ++NameRange.first;
2001     }
2002 
2003     Sections.erase(Itr);
2004     delete SectionPtr;
2005     return true;
2006   }
2007   return false;
2008 }
2009 
2010 void BinaryContext::printSections(raw_ostream &OS) const {
2011   for (BinarySection *const &Section : Sections)
2012     OS << "BOLT-INFO: " << *Section << "\n";
2013 }
2014 
2015 BinarySection &BinaryContext::absoluteSection() {
2016   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2017     return *Section;
2018   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2019 }
2020 
2021 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2022                                                            size_t Size) const {
2023   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2024   if (!Section)
2025     return std::make_error_code(std::errc::bad_address);
2026 
2027   if (Section->isVirtual())
2028     return 0;
2029 
2030   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2031                    AsmInfo->getCodePointerSize());
2032   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2033   return DE.getUnsigned(&ValueOffset, Size);
2034 }
2035 
2036 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2037                                                          size_t Size) const {
2038   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2039   if (!Section)
2040     return std::make_error_code(std::errc::bad_address);
2041 
2042   if (Section->isVirtual())
2043     return 0;
2044 
2045   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2046                    AsmInfo->getCodePointerSize());
2047   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2048   return DE.getSigned(&ValueOffset, Size);
2049 }
2050 
2051 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2052                                   uint64_t Type, uint64_t Addend,
2053                                   uint64_t Value) {
2054   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2055   assert(Section && "cannot find section for address");
2056   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2057                          Value);
2058 }
2059 
2060 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2061                                          uint64_t Type, uint64_t Addend,
2062                                          uint64_t Value) {
2063   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2064   assert(Section && "cannot find section for address");
2065   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2066                                 Addend, Value);
2067 }
2068 
2069 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2070   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2071   assert(Section && "cannot find section for address");
2072   return Section->removeRelocationAt(Address - Section->getAddress());
2073 }
2074 
2075 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
2076   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2077   if (!Section)
2078     return nullptr;
2079 
2080   return Section->getRelocationAt(Address - Section->getAddress());
2081 }
2082 
2083 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
2084   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2085   if (!Section)
2086     return nullptr;
2087 
2088   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2089 }
2090 
2091 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2092                                              const uint64_t Address) {
2093   auto setImmovable = [&](BinaryData &BD) {
2094     BinaryData *Root = BD.getAtomicRoot();
2095     LLVM_DEBUG(if (Root->isMoveable()) {
2096       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2097              << "due to ambiguous relocation referencing 0x"
2098              << Twine::utohexstr(Address) << '\n';
2099     });
2100     Root->setIsMoveable(false);
2101   };
2102 
2103   if (Address == BD.getAddress()) {
2104     setImmovable(BD);
2105 
2106     // Set previous symbol as immovable
2107     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2108     if (Prev && Prev->getEndAddress() == BD.getAddress())
2109       setImmovable(*Prev);
2110   }
2111 
2112   if (Address == BD.getEndAddress()) {
2113     setImmovable(BD);
2114 
2115     // Set next symbol as immovable
2116     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2117     if (Next && Next->getAddress() == BD.getEndAddress())
2118       setImmovable(*Next);
2119   }
2120 }
2121 
2122 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2123                                                     uint64_t *EntryDesc) {
2124   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
2125   auto BFI = SymbolToFunctionMap.find(Symbol);
2126   if (BFI == SymbolToFunctionMap.end())
2127     return nullptr;
2128 
2129   BinaryFunction *BF = BFI->second;
2130   if (EntryDesc)
2131     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2132 
2133   return BF;
2134 }
2135 
2136 void BinaryContext::exitWithBugReport(StringRef Message,
2137                                       const BinaryFunction &Function) const {
2138   errs() << "=======================================\n";
2139   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2140             "this function.\n";
2141   errs() << "If you are running the most recent version of BOLT, you may "
2142             "want to "
2143             "report this and paste this dump.\nPlease check that there is no "
2144             "sensitive contents being shared in this dump.\n";
2145   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2146   ScopedPrinter SP(errs());
2147   SP.printBinaryBlock("Function contents", *Function.getData());
2148   errs() << "\n";
2149   Function.dump();
2150   errs() << "ERROR: " << Message;
2151   errs() << "\n=======================================\n";
2152   exit(1);
2153 }
2154 
2155 BinaryFunction *
2156 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2157                                             bool IsSimple) {
2158   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2159   BinaryFunction *BF = InjectedBinaryFunctions.back();
2160   setSymbolToFunctionMap(BF->getSymbol(), BF);
2161   BF->CurrentState = BinaryFunction::State::CFG;
2162   return BF;
2163 }
2164 
2165 std::pair<size_t, size_t>
2166 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2167   // Adjust branch instruction to match the current layout.
2168   if (FixBranches)
2169     BF.fixBranches();
2170 
2171   // Create local MC context to isolate the effect of ephemeral code emission.
2172   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2173   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2174   MCAsmBackend *MAB =
2175       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2176 
2177   SmallString<256> Code;
2178   raw_svector_ostream VecOS(Code);
2179 
2180   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2181   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2182       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2183       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2184       /*RelaxAll=*/false,
2185       /*IncrementalLinkerCompatible=*/false,
2186       /*DWARFMustBeAtTheEnd=*/false));
2187 
2188   Streamer->initSections(false, *STI);
2189 
2190   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2191   Section->setHasInstructions(true);
2192 
2193   // Create symbols in the LocalCtx so that they get destroyed with it.
2194   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2195   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2196   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2197   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2198 
2199   Streamer->switchSection(Section);
2200   Streamer->emitLabel(StartLabel);
2201   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2202                    /*EmitCodeOnly=*/true);
2203   Streamer->emitLabel(EndLabel);
2204 
2205   if (BF.isSplit()) {
2206     MCSectionELF *ColdSection =
2207         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2208                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2209     ColdSection->setHasInstructions(true);
2210 
2211     Streamer->switchSection(ColdSection);
2212     Streamer->emitLabel(ColdStartLabel);
2213     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2214                      /*EmitCodeOnly=*/true);
2215     Streamer->emitLabel(ColdEndLabel);
2216     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2217     Streamer->emitBytes(StringRef(""));
2218     Streamer->switchSection(Section);
2219   }
2220 
2221   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2222   // MCStreamer::Finish(), which does more than we want
2223   Streamer->emitBytes(StringRef(""));
2224 
2225   MCAssembler &Assembler =
2226       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2227   MCAsmLayout Layout(Assembler);
2228   Assembler.layout(Layout);
2229 
2230   const uint64_t HotSize =
2231       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2232   const uint64_t ColdSize = BF.isSplit()
2233                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2234                                       Layout.getSymbolOffset(*ColdStartLabel)
2235                                 : 0ULL;
2236 
2237   // Clean-up the effect of the code emission.
2238   for (const MCSymbol &Symbol : Assembler.symbols()) {
2239     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2240     MutableSymbol->setUndefined();
2241     MutableSymbol->setIsRegistered(false);
2242   }
2243 
2244   return std::make_pair(HotSize, ColdSize);
2245 }
2246 
2247 bool BinaryContext::validateEncoding(const MCInst &Inst,
2248                                      ArrayRef<uint8_t> InputEncoding) const {
2249   SmallString<256> Code;
2250   SmallVector<MCFixup, 4> Fixups;
2251   raw_svector_ostream VecOS(Code);
2252 
2253   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2254   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2255   if (InputEncoding != EncodedData) {
2256     if (opts::Verbosity > 1) {
2257       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2258              << "      input: " << InputEncoding << '\n'
2259              << "     output: " << EncodedData << '\n';
2260     }
2261     return false;
2262   }
2263 
2264   return true;
2265 }
2266 
2267 uint64_t BinaryContext::getHotThreshold() const {
2268   static uint64_t Threshold = 0;
2269   if (Threshold == 0) {
2270     Threshold = std::max(
2271         (uint64_t)opts::ExecutionCountThreshold,
2272         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2273   }
2274   return Threshold;
2275 }
2276 
2277 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2278     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2279   auto FI = BinaryFunctions.upper_bound(Address);
2280   if (FI == BinaryFunctions.begin())
2281     return nullptr;
2282   --FI;
2283 
2284   const uint64_t UsedSize =
2285       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2286 
2287   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2288     return nullptr;
2289 
2290   return &FI->second;
2291 }
2292 
2293 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2294   // First, try to find a function starting at the given address. If the
2295   // function was folded, this will get us the original folded function if it
2296   // wasn't removed from the list, e.g. in non-relocation mode.
2297   auto BFI = BinaryFunctions.find(Address);
2298   if (BFI != BinaryFunctions.end())
2299     return &BFI->second;
2300 
2301   // We might have folded the function matching the object at the given
2302   // address. In such case, we look for a function matching the symbol
2303   // registered at the original address. The new function (the one that the
2304   // original was folded into) will hold the symbol.
2305   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2306     uint64_t EntryID = 0;
2307     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2308     if (BF && EntryID == 0)
2309       return BF;
2310   }
2311   return nullptr;
2312 }
2313 
2314 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2315     const DWARFAddressRangesVector &InputRanges) const {
2316   DebugAddressRangesVector OutputRanges;
2317 
2318   for (const DWARFAddressRange Range : InputRanges) {
2319     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2320     while (BFI != BinaryFunctions.end()) {
2321       const BinaryFunction &Function = BFI->second;
2322       if (Function.getAddress() >= Range.HighPC)
2323         break;
2324       const DebugAddressRangesVector FunctionRanges =
2325           Function.getOutputAddressRanges();
2326       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2327       std::advance(BFI, 1);
2328     }
2329   }
2330 
2331   return OutputRanges;
2332 }
2333 
2334 } // namespace bolt
2335 } // namespace llvm
2336