xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 0f74d191d12e1807d291c8db937f1bb89cfe7caa)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <numeric>
42 #include <unordered_set>
43 
44 using namespace llvm;
45 
46 #undef  DEBUG_TYPE
47 #define DEBUG_TYPE "bolt"
48 
49 namespace opts {
50 
51 cl::opt<bool> NoHugePages("no-huge-pages",
52                           cl::desc("use regular size pages for code alignment"),
53                           cl::Hidden, cl::cat(BoltCategory));
54 
55 static cl::opt<bool>
56 PrintDebugInfo("print-debug-info",
57   cl::desc("print debug info when printing functions"),
58   cl::Hidden,
59   cl::ZeroOrMore,
60   cl::cat(BoltCategory));
61 
62 cl::opt<bool> PrintRelocations(
63     "print-relocations",
64     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
65     cl::cat(BoltCategory));
66 
67 static cl::opt<bool>
68 PrintMemData("print-mem-data",
69   cl::desc("print memory data annotations when printing functions"),
70   cl::Hidden,
71   cl::ZeroOrMore,
72   cl::cat(BoltCategory));
73 
74 } // namespace opts
75 
76 namespace llvm {
77 namespace bolt {
78 
79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
80                              std::unique_ptr<DWARFContext> DwCtx,
81                              std::unique_ptr<Triple> TheTriple,
82                              const Target *TheTarget, std::string TripleName,
83                              std::unique_ptr<MCCodeEmitter> MCE,
84                              std::unique_ptr<MCObjectFileInfo> MOFI,
85                              std::unique_ptr<const MCAsmInfo> AsmInfo,
86                              std::unique_ptr<const MCInstrInfo> MII,
87                              std::unique_ptr<const MCSubtargetInfo> STI,
88                              std::unique_ptr<MCInstPrinter> InstPrinter,
89                              std::unique_ptr<const MCInstrAnalysis> MIA,
90                              std::unique_ptr<MCPlusBuilder> MIB,
91                              std::unique_ptr<const MCRegisterInfo> MRI,
92                              std::unique_ptr<MCDisassembler> DisAsm)
93     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
94       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
95       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
96       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
97       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
98       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
99   Relocation::Arch = this->TheTriple->getArch();
100   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
101   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
102 }
103 
104 BinaryContext::~BinaryContext() {
105   for (BinarySection *Section : Sections)
106     delete Section;
107   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
108     delete InjectedFunction;
109   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
110     delete JTI.second;
111   clearBinaryData();
112 }
113 
114 /// Create BinaryContext for a given architecture \p ArchName and
115 /// triple \p TripleName.
116 Expected<std::unique_ptr<BinaryContext>>
117 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
118                                    std::unique_ptr<DWARFContext> DwCtx) {
119   StringRef ArchName = "";
120   StringRef FeaturesStr = "";
121   switch (File->getArch()) {
122   case llvm::Triple::x86_64:
123     ArchName = "x86-64";
124     FeaturesStr = "+nopl";
125     break;
126   case llvm::Triple::aarch64:
127     ArchName = "aarch64";
128     FeaturesStr = "+all";
129     break;
130   default:
131     return createStringError(std::errc::not_supported,
132                              "BOLT-ERROR: Unrecognized machine in ELF file");
133   }
134 
135   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
136   const std::string TripleName = TheTriple->str();
137 
138   std::string Error;
139   const Target *TheTarget =
140       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
141   if (!TheTarget)
142     return createStringError(make_error_code(std::errc::not_supported),
143                              Twine("BOLT-ERROR: ", Error));
144 
145   std::unique_ptr<const MCRegisterInfo> MRI(
146       TheTarget->createMCRegInfo(TripleName));
147   if (!MRI)
148     return createStringError(
149         make_error_code(std::errc::not_supported),
150         Twine("BOLT-ERROR: no register info for target ", TripleName));
151 
152   // Set up disassembler.
153   std::unique_ptr<MCAsmInfo> AsmInfo(
154       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
155   if (!AsmInfo)
156     return createStringError(
157         make_error_code(std::errc::not_supported),
158         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
159   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
160   // we want to emit such names as using @PLT without double quotes to convey
161   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
162   // override the default AsmInfo behavior to emit names the way we want.
163   AsmInfo->setAllowAtInName(true);
164 
165   std::unique_ptr<const MCSubtargetInfo> STI(
166       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
167   if (!STI)
168     return createStringError(
169         make_error_code(std::errc::not_supported),
170         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
171 
172   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
173   if (!MII)
174     return createStringError(
175         make_error_code(std::errc::not_supported),
176         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
177 
178   std::unique_ptr<MCContext> Ctx(
179       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
180   std::unique_ptr<MCObjectFileInfo> MOFI(
181       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
182   Ctx->setObjectFileInfo(MOFI.get());
183   // We do not support X86 Large code model. Change this in the future.
184   bool Large = false;
185   if (TheTriple->getArch() == llvm::Triple::aarch64)
186     Large = true;
187   unsigned LSDAEncoding =
188       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
189   unsigned TTypeEncoding =
190       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
191   if (IsPIC) {
192     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
193                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
194     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
195                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
196   }
197 
198   std::unique_ptr<MCDisassembler> DisAsm(
199       TheTarget->createMCDisassembler(*STI, *Ctx));
200 
201   if (!DisAsm)
202     return createStringError(
203         make_error_code(std::errc::not_supported),
204         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
205 
206   std::unique_ptr<const MCInstrAnalysis> MIA(
207       TheTarget->createMCInstrAnalysis(MII.get()));
208   if (!MIA)
209     return createStringError(
210         make_error_code(std::errc::not_supported),
211         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
212               TripleName));
213 
214   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
215   std::unique_ptr<MCInstPrinter> InstructionPrinter(
216       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
217                                      *MII, *MRI));
218   if (!InstructionPrinter)
219     return createStringError(
220         make_error_code(std::errc::not_supported),
221         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
222   InstructionPrinter->setPrintImmHex(true);
223 
224   std::unique_ptr<MCCodeEmitter> MCE(
225       TheTarget->createMCCodeEmitter(*MII, *Ctx));
226 
227   // Make sure we don't miss any output on core dumps.
228   outs().SetUnbuffered();
229   errs().SetUnbuffered();
230   dbgs().SetUnbuffered();
231 
232   auto BC = std::make_unique<BinaryContext>(
233       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
234       std::string(TripleName), std::move(MCE), std::move(MOFI),
235       std::move(AsmInfo), std::move(MII), std::move(STI),
236       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
237       std::move(DisAsm));
238 
239   BC->TTypeEncoding = TTypeEncoding;
240   BC->LSDAEncoding = LSDAEncoding;
241 
242   BC->MAB = std::unique_ptr<MCAsmBackend>(
243       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
244 
245   BC->setFilename(File->getFileName());
246 
247   BC->HasFixedLoadAddress = !IsPIC;
248 
249   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
250       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
251 
252   if (!BC->SymbolicDisAsm)
253     return createStringError(
254         make_error_code(std::errc::not_supported),
255         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
256 
257   return std::move(BC);
258 }
259 
260 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
261   if (opts::HotText &&
262       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
263     return true;
264 
265   if (opts::HotData &&
266       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
267     return true;
268 
269   if (SymbolName == "_end")
270     return true;
271 
272   return false;
273 }
274 
275 std::unique_ptr<MCObjectWriter>
276 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
277   return MAB->createObjectWriter(OS);
278 }
279 
280 bool BinaryContext::validateObjectNesting() const {
281   auto Itr = BinaryDataMap.begin();
282   auto End = BinaryDataMap.end();
283   bool Valid = true;
284   while (Itr != End) {
285     auto Next = std::next(Itr);
286     while (Next != End &&
287            Itr->second->getSection() == Next->second->getSection() &&
288            Itr->second->containsRange(Next->second->getAddress(),
289                                       Next->second->getSize())) {
290       if (Next->second->Parent != Itr->second) {
291         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
292                << "BOLT-WARNING:  " << *Itr->second << "\n"
293                << "BOLT-WARNING:  " << *Next->second << "\n";
294         Valid = false;
295       }
296       ++Next;
297     }
298     Itr = Next;
299   }
300   return Valid;
301 }
302 
303 bool BinaryContext::validateHoles() const {
304   bool Valid = true;
305   for (BinarySection &Section : sections()) {
306     for (const Relocation &Rel : Section.relocations()) {
307       uint64_t RelAddr = Rel.Offset + Section.getAddress();
308       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
309       if (!BD) {
310         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
311                << " 0x" << Twine::utohexstr(RelAddr) << " in "
312                << Section.getName() << "\n";
313         Valid = false;
314       } else if (!BD->getAtomicRoot()) {
315         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
316                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
317                << Section.getName() << "\n";
318         Valid = false;
319       }
320     }
321   }
322   return Valid;
323 }
324 
325 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
326   const uint64_t Address = GAI->second->getAddress();
327   const uint64_t Size = GAI->second->getSize();
328 
329   auto fixParents = [&](BinaryDataMapType::iterator Itr,
330                         BinaryData *NewParent) {
331     BinaryData *OldParent = Itr->second->Parent;
332     Itr->second->Parent = NewParent;
333     ++Itr;
334     while (Itr != BinaryDataMap.end() && OldParent &&
335            Itr->second->Parent == OldParent) {
336       Itr->second->Parent = NewParent;
337       ++Itr;
338     }
339   };
340 
341   // Check if the previous symbol contains the newly added symbol.
342   if (GAI != BinaryDataMap.begin()) {
343     BinaryData *Prev = std::prev(GAI)->second;
344     while (Prev) {
345       if (Prev->getSection() == GAI->second->getSection() &&
346           Prev->containsRange(Address, Size)) {
347         fixParents(GAI, Prev);
348       } else {
349         fixParents(GAI, nullptr);
350       }
351       Prev = Prev->Parent;
352     }
353   }
354 
355   // Check if the newly added symbol contains any subsequent symbols.
356   if (Size != 0) {
357     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
358     auto Itr = std::next(GAI);
359     while (
360         Itr != BinaryDataMap.end() &&
361         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
362       Itr->second->Parent = BD;
363       ++Itr;
364     }
365   }
366 }
367 
368 iterator_range<BinaryContext::binary_data_iterator>
369 BinaryContext::getSubBinaryData(BinaryData *BD) {
370   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
371   auto End = Start;
372   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
373     ++End;
374   return make_range(Start, End);
375 }
376 
377 std::pair<const MCSymbol *, uint64_t>
378 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
379                                 bool IsPCRel) {
380   uint64_t Addend = 0;
381 
382   if (isAArch64()) {
383     // Check if this is an access to a constant island and create bookkeeping
384     // to keep track of it and emit it later as part of this function.
385     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
386       return std::make_pair(IslandSym, Addend);
387 
388     // Detect custom code written in assembly that refers to arbitrary
389     // constant islands from other functions. Write this reference so we
390     // can pull this constant island and emit it as part of this function
391     // too.
392     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
393     if (IslandIter != AddressToConstantIslandMap.end()) {
394       if (MCSymbol *IslandSym =
395               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
396         BF.createIslandDependency(IslandSym, IslandIter->second);
397         return std::make_pair(IslandSym, Addend);
398       }
399     }
400   }
401 
402   // Note that the address does not necessarily have to reside inside
403   // a section, it could be an absolute address too.
404   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
405   if (Section && Section->isText()) {
406     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
407       if (Address != BF.getAddress()) {
408         // The address could potentially escape. Mark it as another entry
409         // point into the function.
410         if (opts::Verbosity >= 1) {
411           outs() << "BOLT-INFO: potentially escaped address 0x"
412                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
413         }
414         BF.HasInternalLabelReference = true;
415         return std::make_pair(
416             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
417       }
418     } else {
419       addInterproceduralReference(&BF, Address);
420     }
421   }
422 
423   // With relocations, catch jump table references outside of the basic block
424   // containing the indirect jump.
425   if (HasRelocations) {
426     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
427     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
428       const MCSymbol *Symbol =
429           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
430 
431       return std::make_pair(Symbol, Addend);
432     }
433   }
434 
435   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
436     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
437 
438   // TODO: use DWARF info to get size/alignment here?
439   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
440   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
441   return std::make_pair(TargetSymbol, Addend);
442 }
443 
444 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
445                                                   BinaryFunction &BF) {
446   if (!isX86())
447     return MemoryContentsType::UNKNOWN;
448 
449   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
450   if (!Section) {
451     // No section - possibly an absolute address. Since we don't allow
452     // internal function addresses to escape the function scope - we
453     // consider it a tail call.
454     if (opts::Verbosity > 1) {
455       errs() << "BOLT-WARNING: no section for address 0x"
456              << Twine::utohexstr(Address) << " referenced from function " << BF
457              << '\n';
458     }
459     return MemoryContentsType::UNKNOWN;
460   }
461 
462   if (Section->isVirtual()) {
463     // The contents are filled at runtime.
464     return MemoryContentsType::UNKNOWN;
465   }
466 
467   // No support for jump tables in code yet.
468   if (Section->isText())
469     return MemoryContentsType::UNKNOWN;
470 
471   // Start with checking for PIC jump table. We expect non-PIC jump tables
472   // to have high 32 bits set to 0.
473   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
474     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
475 
476   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
477     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
478 
479   return MemoryContentsType::UNKNOWN;
480 }
481 
482 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
483 bool isPotentialFragmentByName(BinaryFunction &Fragment,
484                                BinaryFunction &Parent) {
485   for (StringRef Name : Parent.getNames()) {
486     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
487     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
488     if (Fragment.hasRestoredNameRegex(NameRegex))
489       return true;
490   }
491   return false;
492 }
493 
494 bool BinaryContext::analyzeJumpTable(
495     const uint64_t Address, const JumpTable::JumpTableType Type,
496     BinaryFunction &BF, const uint64_t NextJTAddress,
497     JumpTable::AddressesType *EntriesAsAddress) {
498   // Is one of the targets __builtin_unreachable?
499   bool HasUnreachable = false;
500 
501   // Number of targets other than __builtin_unreachable.
502   uint64_t NumRealEntries = 0;
503 
504   auto addEntryAddress = [&](uint64_t EntryAddress) {
505     if (EntriesAsAddress)
506       EntriesAsAddress->emplace_back(EntryAddress);
507   };
508 
509   auto doesBelongToFunction = [&](const uint64_t Addr,
510                                   BinaryFunction *TargetBF) -> bool {
511     if (BF.containsAddress(Addr))
512       return true;
513     // Nothing to do if we failed to identify the containing function.
514     if (!TargetBF)
515       return false;
516     // Case 1: check if BF is a fragment and TargetBF is its parent.
517     if (BF.isFragment()) {
518       // Parent function may or may not be already registered.
519       // Set parent link based on function name matching heuristic.
520       return registerFragment(BF, *TargetBF);
521     }
522     // Case 2: check if TargetBF is a fragment and BF is its parent.
523     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
524   };
525 
526   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
527   if (!Section)
528     return false;
529 
530   // The upper bound is defined by containing object, section limits, and
531   // the next jump table in memory.
532   uint64_t UpperBound = Section->getEndAddress();
533   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
534   if (JumpTableBD && JumpTableBD->getSize()) {
535     assert(JumpTableBD->getEndAddress() <= UpperBound &&
536            "data object cannot cross a section boundary");
537     UpperBound = JumpTableBD->getEndAddress();
538   }
539   if (NextJTAddress)
540     UpperBound = std::min(NextJTAddress, UpperBound);
541 
542   LLVM_DEBUG({
543     using JTT = JumpTable::JumpTableType;
544     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
545                       Address, BF.getPrintName(),
546                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
547   });
548   const uint64_t EntrySize = getJumpTableEntrySize(Type);
549   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
550        EntryAddress += EntrySize) {
551     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
552                       << " -> ");
553     // Check if there's a proper relocation against the jump table entry.
554     if (HasRelocations) {
555       if (Type == JumpTable::JTT_PIC &&
556           !DataPCRelocations.count(EntryAddress)) {
557         LLVM_DEBUG(
558             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
559         break;
560       }
561       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
562         LLVM_DEBUG(
563             dbgs()
564             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
565         break;
566       }
567     }
568 
569     const uint64_t Value =
570         (Type == JumpTable::JTT_PIC)
571             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
572             : *getPointerAtAddress(EntryAddress);
573 
574     // __builtin_unreachable() case.
575     if (Value == BF.getAddress() + BF.getSize()) {
576       addEntryAddress(Value);
577       HasUnreachable = true;
578       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
579       continue;
580     }
581 
582     // Function or one of its fragments.
583     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
584 
585     // We assume that a jump table cannot have function start as an entry.
586     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
587       LLVM_DEBUG({
588         if (!BF.containsAddress(Value)) {
589           dbgs() << "FAIL: function doesn't contain this address\n";
590           if (TargetBF) {
591             dbgs() << "  ! function containing this address: "
592                    << TargetBF->getPrintName() << '\n';
593             if (TargetBF->isFragment()) {
594               dbgs() << "  ! is a fragment";
595               for (BinaryFunction *Parent : TargetBF->ParentFragments)
596                 dbgs() << ", parent: " << Parent->getPrintName();
597               dbgs() << '\n';
598             }
599           }
600         }
601         if (Value == BF.getAddress())
602           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
603       });
604       break;
605     }
606 
607     // Check there's an instruction at this offset.
608     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
609         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
610       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
611       break;
612     }
613 
614     ++NumRealEntries;
615     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
616 
617     if (TargetBF != &BF)
618       BF.setHasIndirectTargetToSplitFragment(true);
619     addEntryAddress(Value);
620   }
621 
622   // It's a jump table if the number of real entries is more than 1, or there's
623   // one real entry and "unreachable" targets. If there are only multiple
624   // "unreachable" targets, then it's not a jump table.
625   return NumRealEntries + HasUnreachable >= 2;
626 }
627 
628 void BinaryContext::populateJumpTables() {
629   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
630                     << '\n');
631   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
632        ++JTI) {
633     JumpTable *JT = JTI->second;
634 
635     bool NonSimpleParent = false;
636     for (BinaryFunction *BF : JT->Parents)
637       NonSimpleParent |= !BF->isSimple();
638     if (NonSimpleParent)
639       continue;
640 
641     uint64_t NextJTAddress = 0;
642     auto NextJTI = std::next(JTI);
643     if (NextJTI != JTE)
644       NextJTAddress = NextJTI->second->getAddress();
645 
646     const bool Success =
647         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
648                          NextJTAddress, &JT->EntriesAsAddress);
649     if (!Success) {
650       LLVM_DEBUG({
651         dbgs() << "failed to analyze ";
652         JT->print(dbgs());
653         if (NextJTI != JTE) {
654           dbgs() << "next ";
655           NextJTI->second->print(dbgs());
656         }
657       });
658       llvm_unreachable("jump table heuristic failure");
659     }
660     for (BinaryFunction *Frag : JT->Parents) {
661       for (uint64_t EntryAddress : JT->EntriesAsAddress)
662         // if target is builtin_unreachable
663         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
664           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
665                                              Frag->getSize());
666         } else if (EntryAddress >= Frag->getAddress() &&
667                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
668           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
669         }
670     }
671 
672     // In strict mode, erase PC-relative relocation record. Later we check that
673     // all such records are erased and thus have been accounted for.
674     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
675       for (uint64_t Address = JT->getAddress();
676            Address < JT->getAddress() + JT->getSize();
677            Address += JT->EntrySize) {
678         DataPCRelocations.erase(DataPCRelocations.find(Address));
679       }
680     }
681 
682     // Mark to skip the function and all its fragments.
683     for (BinaryFunction *Frag : JT->Parents)
684       if (Frag->hasIndirectTargetToSplitFragment())
685         addFragmentsToSkip(Frag);
686   }
687 
688   if (opts::StrictMode && DataPCRelocations.size()) {
689     LLVM_DEBUG({
690       dbgs() << DataPCRelocations.size()
691              << " unclaimed PC-relative relocations left in data:\n";
692       for (uint64_t Reloc : DataPCRelocations)
693         dbgs() << Twine::utohexstr(Reloc) << '\n';
694     });
695     assert(0 && "unclaimed PC-relative relocations left in data\n");
696   }
697   clearList(DataPCRelocations);
698 }
699 
700 void BinaryContext::skipMarkedFragments() {
701   std::vector<BinaryFunction *> FragmentQueue;
702   // Copy the functions to FragmentQueue.
703   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
704   auto addToWorklist = [&](BinaryFunction *Function) -> void {
705     if (FragmentsToSkip.count(Function))
706       return;
707     FragmentQueue.push_back(Function);
708     addFragmentsToSkip(Function);
709   };
710   // Functions containing split jump tables need to be skipped with all
711   // fragments (transitively).
712   for (size_t I = 0; I != FragmentQueue.size(); I++) {
713     BinaryFunction *BF = FragmentQueue[I];
714     assert(FragmentsToSkip.count(BF) &&
715            "internal error in traversing function fragments");
716     if (opts::Verbosity >= 1)
717       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
718     BF->setSimple(false);
719     BF->setHasIndirectTargetToSplitFragment(true);
720 
721     llvm::for_each(BF->Fragments, addToWorklist);
722     llvm::for_each(BF->ParentFragments, addToWorklist);
723   }
724   if (!FragmentsToSkip.empty())
725     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
726            << (FragmentsToSkip.size() == 1 ? "" : "s")
727            << " due to cold fragments\n";
728 }
729 
730 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
731                                                  uint64_t Size,
732                                                  uint16_t Alignment,
733                                                  unsigned Flags) {
734   auto Itr = BinaryDataMap.find(Address);
735   if (Itr != BinaryDataMap.end()) {
736     assert(Itr->second->getSize() == Size || !Size);
737     return Itr->second->getSymbol();
738   }
739 
740   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
741   assert(!GlobalSymbols.count(Name) && "created name is not unique");
742   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
743 }
744 
745 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
746   return Ctx->getOrCreateSymbol(Name);
747 }
748 
749 BinaryFunction *BinaryContext::createBinaryFunction(
750     const std::string &Name, BinarySection &Section, uint64_t Address,
751     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
752   auto Result = BinaryFunctions.emplace(
753       Address, BinaryFunction(Name, Section, Address, Size, *this));
754   assert(Result.second == true && "unexpected duplicate function");
755   BinaryFunction *BF = &Result.first->second;
756   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
757                         Alignment);
758   setSymbolToFunctionMap(BF->getSymbol(), BF);
759   return BF;
760 }
761 
762 const MCSymbol *
763 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
764                                     JumpTable::JumpTableType Type) {
765   auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) {
766     return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
767   };
768   (void)isFragmentOf;
769 
770   // Two fragments of same function access same jump table
771   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
772     assert(JT->Type == Type && "jump table types have to match");
773     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
774 
775     // Prevent associating a jump table to a specific fragment twice.
776     // This simple check arises from the assumption: no more than 2 fragments.
777     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
778       assert((isFragmentOf(JT->Parents[0], &Function) ||
779               isFragmentOf(&Function, JT->Parents[0])) &&
780              "cannot re-use jump table of a different function");
781       // Duplicate the entry for the parent function for easy access
782       JT->Parents.push_back(&Function);
783       if (opts::Verbosity > 2) {
784         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
785                << JT->Parents[0]->getPrintName() << "; "
786                << Function.getPrintName() << "\n";
787         JT->print(outs());
788       }
789       Function.JumpTables.emplace(Address, JT);
790       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
791       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
792     }
793 
794     bool IsJumpTableParent = false;
795     (void)IsJumpTableParent;
796     for (BinaryFunction *Frag : JT->Parents)
797       if (Frag == &Function)
798         IsJumpTableParent = true;
799     assert(IsJumpTableParent &&
800            "cannot re-use jump table of a different function");
801     return JT->getFirstLabel();
802   }
803 
804   // Re-use the existing symbol if possible.
805   MCSymbol *JTLabel = nullptr;
806   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
807     if (!isInternalSymbolName(Object->getSymbol()->getName()))
808       JTLabel = Object->getSymbol();
809   }
810 
811   const uint64_t EntrySize = getJumpTableEntrySize(Type);
812   if (!JTLabel) {
813     const std::string JumpTableName = generateJumpTableName(Function, Address);
814     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
815   }
816 
817   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
818                     << " in function " << Function << '\n');
819 
820   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
821                                 JumpTable::LabelMapType{{0, JTLabel}},
822                                 *getSectionForAddress(Address));
823   JT->Parents.push_back(&Function);
824   if (opts::Verbosity > 2)
825     JT->print(outs());
826   JumpTables.emplace(Address, JT);
827 
828   // Duplicate the entry for the parent function for easy access.
829   Function.JumpTables.emplace(Address, JT);
830   return JTLabel;
831 }
832 
833 std::pair<uint64_t, const MCSymbol *>
834 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
835                                   const MCSymbol *OldLabel) {
836   auto L = scopeLock();
837   unsigned Offset = 0;
838   bool Found = false;
839   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
840     if (Elmt.second != OldLabel)
841       continue;
842     Offset = Elmt.first;
843     Found = true;
844     break;
845   }
846   assert(Found && "Label not found");
847   (void)Found;
848   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
849   JumpTable *NewJT =
850       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
851                     JumpTable::LabelMapType{{Offset, NewLabel}},
852                     *getSectionForAddress(JT->getAddress()));
853   NewJT->Parents = JT->Parents;
854   NewJT->Entries = JT->Entries;
855   NewJT->Counts = JT->Counts;
856   uint64_t JumpTableID = ++DuplicatedJumpTables;
857   // Invert it to differentiate from regular jump tables whose IDs are their
858   // addresses in the input binary memory space
859   JumpTableID = ~JumpTableID;
860   JumpTables.emplace(JumpTableID, NewJT);
861   Function.JumpTables.emplace(JumpTableID, NewJT);
862   return std::make_pair(JumpTableID, NewLabel);
863 }
864 
865 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
866                                                  uint64_t Address) {
867   size_t Id;
868   uint64_t Offset = 0;
869   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
870     Offset = Address - JT->getAddress();
871     auto Itr = JT->Labels.find(Offset);
872     if (Itr != JT->Labels.end())
873       return std::string(Itr->second->getName());
874     Id = JumpTableIds.at(JT->getAddress());
875   } else {
876     Id = JumpTableIds[Address] = BF.JumpTables.size();
877   }
878   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
879           (Offset ? ("." + std::to_string(Offset)) : ""));
880 }
881 
882 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
883   // FIXME: aarch64 support is missing.
884   if (!isX86())
885     return true;
886 
887   if (BF.getSize() == BF.getMaxSize())
888     return true;
889 
890   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
891   assert(FunctionData && "cannot get function as data");
892 
893   uint64_t Offset = BF.getSize();
894   MCInst Instr;
895   uint64_t InstrSize = 0;
896   uint64_t InstrAddress = BF.getAddress() + Offset;
897   using std::placeholders::_1;
898 
899   // Skip instructions that satisfy the predicate condition.
900   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
901     const uint64_t StartOffset = Offset;
902     for (; Offset < BF.getMaxSize();
903          Offset += InstrSize, InstrAddress += InstrSize) {
904       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
905                                   InstrAddress, nulls()))
906         break;
907       if (!Predicate(Instr))
908         break;
909     }
910 
911     return Offset - StartOffset;
912   };
913 
914   // Skip a sequence of zero bytes.
915   auto skipZeros = [&]() {
916     const uint64_t StartOffset = Offset;
917     for (; Offset < BF.getMaxSize(); ++Offset)
918       if ((*FunctionData)[Offset] != 0)
919         break;
920 
921     return Offset - StartOffset;
922   };
923 
924   // Accept the whole padding area filled with breakpoints.
925   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
926   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
927     return true;
928 
929   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
930 
931   // Some functions have a jump to the next function or to the padding area
932   // inserted after the body.
933   auto isSkipJump = [&](const MCInst &Instr) {
934     uint64_t TargetAddress = 0;
935     if (MIB->isUnconditionalBranch(Instr) &&
936         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
937       if (TargetAddress >= InstrAddress + InstrSize &&
938           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
939         return true;
940       }
941     }
942     return false;
943   };
944 
945   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
946   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
947          skipZeros())
948     ;
949 
950   if (Offset == BF.getMaxSize())
951     return true;
952 
953   if (opts::Verbosity >= 1) {
954     errs() << "BOLT-WARNING: bad padding at address 0x"
955            << Twine::utohexstr(BF.getAddress() + BF.getSize())
956            << " starting at offset " << (Offset - BF.getSize())
957            << " in function " << BF << '\n'
958            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
959            << '\n';
960   }
961 
962   return false;
963 }
964 
965 void BinaryContext::adjustCodePadding() {
966   for (auto &BFI : BinaryFunctions) {
967     BinaryFunction &BF = BFI.second;
968     if (!shouldEmit(BF))
969       continue;
970 
971     if (!hasValidCodePadding(BF)) {
972       if (HasRelocations) {
973         if (opts::Verbosity >= 1) {
974           outs() << "BOLT-INFO: function " << BF
975                  << " has invalid padding. Ignoring the function.\n";
976         }
977         BF.setIgnored();
978       } else {
979         BF.setMaxSize(BF.getSize());
980       }
981     }
982   }
983 }
984 
985 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
986                                                uint64_t Size,
987                                                uint16_t Alignment,
988                                                unsigned Flags) {
989   // Register the name with MCContext.
990   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
991 
992   auto GAI = BinaryDataMap.find(Address);
993   BinaryData *BD;
994   if (GAI == BinaryDataMap.end()) {
995     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
996     BinarySection &Section =
997         SectionOrErr ? SectionOrErr.get() : absoluteSection();
998     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
999                         Section, Flags);
1000     GAI = BinaryDataMap.emplace(Address, BD).first;
1001     GlobalSymbols[Name] = BD;
1002     updateObjectNesting(GAI);
1003   } else {
1004     BD = GAI->second;
1005     if (!BD->hasName(Name)) {
1006       GlobalSymbols[Name] = BD;
1007       BD->Symbols.push_back(Symbol);
1008     }
1009   }
1010 
1011   return Symbol;
1012 }
1013 
1014 const BinaryData *
1015 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1016   auto NI = BinaryDataMap.lower_bound(Address);
1017   auto End = BinaryDataMap.end();
1018   if ((NI != End && Address == NI->first) ||
1019       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1020     if (NI->second->containsAddress(Address))
1021       return NI->second;
1022 
1023     // If this is a sub-symbol, see if a parent data contains the address.
1024     const BinaryData *BD = NI->second->getParent();
1025     while (BD) {
1026       if (BD->containsAddress(Address))
1027         return BD;
1028       BD = BD->getParent();
1029     }
1030   }
1031   return nullptr;
1032 }
1033 
1034 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1035   auto NI = BinaryDataMap.find(Address);
1036   assert(NI != BinaryDataMap.end());
1037   if (NI == BinaryDataMap.end())
1038     return false;
1039   // TODO: it's possible that a jump table starts at the same address
1040   // as a larger blob of private data.  When we set the size of the
1041   // jump table, it might be smaller than the total blob size.  In this
1042   // case we just leave the original size since (currently) it won't really
1043   // affect anything.
1044   assert((!NI->second->Size || NI->second->Size == Size ||
1045           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1046          "can't change the size of a symbol that has already had its "
1047          "size set");
1048   if (!NI->second->Size) {
1049     NI->second->Size = Size;
1050     updateObjectNesting(NI);
1051     return true;
1052   }
1053   return false;
1054 }
1055 
1056 void BinaryContext::generateSymbolHashes() {
1057   auto isPadding = [](const BinaryData &BD) {
1058     StringRef Contents = BD.getSection().getContents();
1059     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1060     return (BD.getName().startswith("HOLEat") ||
1061             SymData.find_first_not_of(0) == StringRef::npos);
1062   };
1063 
1064   uint64_t NumCollisions = 0;
1065   for (auto &Entry : BinaryDataMap) {
1066     BinaryData &BD = *Entry.second;
1067     StringRef Name = BD.getName();
1068 
1069     if (!isInternalSymbolName(Name))
1070       continue;
1071 
1072     // First check if a non-anonymous alias exists and move it to the front.
1073     if (BD.getSymbols().size() > 1) {
1074       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1075         return !isInternalSymbolName(Symbol->getName());
1076       });
1077       if (Itr != BD.getSymbols().end()) {
1078         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1079         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1080         continue;
1081       }
1082     }
1083 
1084     // We have to skip 0 size symbols since they will all collide.
1085     if (BD.getSize() == 0) {
1086       continue;
1087     }
1088 
1089     const uint64_t Hash = BD.getSection().hash(BD);
1090     const size_t Idx = Name.find("0x");
1091     std::string NewName =
1092         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1093     if (getBinaryDataByName(NewName)) {
1094       // Ignore collisions for symbols that appear to be padding
1095       // (i.e. all zeros or a "hole")
1096       if (!isPadding(BD)) {
1097         if (opts::Verbosity) {
1098           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1099                  << " with new name (" << NewName << "), skipping.\n";
1100         }
1101         ++NumCollisions;
1102       }
1103       continue;
1104     }
1105     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1106     GlobalSymbols[NewName] = &BD;
1107   }
1108   if (NumCollisions) {
1109     errs() << "BOLT-WARNING: " << NumCollisions
1110            << " collisions detected while hashing binary objects";
1111     if (!opts::Verbosity)
1112       errs() << ". Use -v=1 to see the list.";
1113     errs() << '\n';
1114   }
1115 }
1116 
1117 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1118                                      BinaryFunction &Function) const {
1119   if (!isPotentialFragmentByName(TargetFunction, Function))
1120     return false;
1121   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1122   if (TargetFunction.isParentFragment(&Function))
1123     return true;
1124   TargetFunction.addParentFragment(Function);
1125   Function.addFragment(TargetFunction);
1126   if (!HasRelocations) {
1127     TargetFunction.setSimple(false);
1128     Function.setSimple(false);
1129   }
1130   if (opts::Verbosity >= 1) {
1131     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1132            << Function << '\n';
1133   }
1134   return true;
1135 }
1136 
1137 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1138                                            MCInst &LoadLowBits,
1139                                            MCInst &LoadHiBits,
1140                                            uint64_t Target) {
1141   const MCSymbol *TargetSymbol;
1142   uint64_t Addend = 0;
1143   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1144                                                     /*IsPCRel*/ true);
1145   int64_t Val;
1146   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1147                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1148   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1149                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1150 }
1151 
1152 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1153   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1154   if (TargetFunction)
1155     return false;
1156 
1157   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1158   assert(Section && "cannot get section for referenced address");
1159   if (!Section->isText())
1160     return false;
1161 
1162   bool Ret = false;
1163   StringRef SectionContents = Section->getContents();
1164   uint64_t Offset = Address - Section->getAddress();
1165   const uint64_t MaxSize = SectionContents.size() - Offset;
1166   const uint8_t *Bytes =
1167       reinterpret_cast<const uint8_t *>(SectionContents.data());
1168   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1169 
1170   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1171                          MCInst &Instruction, uint64_t Offset,
1172                          uint64_t AbsoluteInstrAddr,
1173                          uint64_t TotalSize) -> bool {
1174     MCInst *TargetHiBits, *TargetLowBits;
1175     uint64_t TargetAddress, Count;
1176     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1177                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1178                                    TargetLowBits, TargetAddress);
1179     if (!Count)
1180       return false;
1181 
1182     if (MatchOnly)
1183       return true;
1184 
1185     // NOTE The target symbol was created during disassemble's
1186     // handleExternalReference
1187     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1188     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1189                                                   *Section, Address, TotalSize);
1190     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1191                            TargetAddress);
1192     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1193     Veneer->addInstruction(Offset, std::move(Instruction));
1194     --Count;
1195     for (auto It = std::prev(Instructions.end()); Count != 0;
1196          It = std::prev(It), --Count) {
1197       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1198       Veneer->addInstruction(It->first, std::move(It->second));
1199     }
1200 
1201     Veneer->getOrCreateLocalLabel(Address);
1202     Veneer->setMaxSize(TotalSize);
1203     Veneer->updateState(BinaryFunction::State::Disassembled);
1204     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1205                       << "\n");
1206     return true;
1207   };
1208 
1209   uint64_t Size = 0, TotalSize = 0;
1210   BinaryFunction::InstrMapType VeneerInstructions;
1211   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1212     MCInst Instruction;
1213     const uint64_t AbsoluteInstrAddr = Address + Offset;
1214     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1215                                         AbsoluteInstrAddr, nulls()))
1216       break;
1217 
1218     TotalSize += Size;
1219     if (MIB->isBranch(Instruction)) {
1220       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1221                         AbsoluteInstrAddr, TotalSize);
1222       break;
1223     }
1224 
1225     VeneerInstructions.emplace(Offset, std::move(Instruction));
1226   }
1227 
1228   return Ret;
1229 }
1230 
1231 void BinaryContext::processInterproceduralReferences() {
1232   for (const std::pair<BinaryFunction *, uint64_t> &It :
1233        InterproceduralReferences) {
1234     BinaryFunction &Function = *It.first;
1235     uint64_t Address = It.second;
1236     if (!Address || Function.isIgnored())
1237       continue;
1238 
1239     BinaryFunction *TargetFunction =
1240         getBinaryFunctionContainingAddress(Address);
1241     if (&Function == TargetFunction)
1242       continue;
1243 
1244     if (TargetFunction) {
1245       if (TargetFunction->isFragment() &&
1246           !registerFragment(*TargetFunction, Function)) {
1247         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1248                   "fragments: "
1249                << Function.getPrintName() << " and "
1250                << TargetFunction->getPrintName() << '\n';
1251       }
1252       if (uint64_t Offset = Address - TargetFunction->getAddress())
1253         TargetFunction->addEntryPointAtOffset(Offset);
1254 
1255       continue;
1256     }
1257 
1258     // Check if address falls in function padding space - this could be
1259     // unmarked data in code. In this case adjust the padding space size.
1260     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1261     assert(Section && "cannot get section for referenced address");
1262 
1263     if (!Section->isText())
1264       continue;
1265 
1266     // PLT requires special handling and could be ignored in this context.
1267     StringRef SectionName = Section->getName();
1268     if (SectionName == ".plt" || SectionName == ".plt.got")
1269       continue;
1270 
1271     // Check if it is aarch64 veneer written at Address
1272     if (isAArch64() && handleAArch64Veneer(Address))
1273       continue;
1274 
1275     if (opts::processAllFunctions()) {
1276       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1277              << "object in code at address 0x" << Twine::utohexstr(Address)
1278              << " belonging to section " << SectionName << " in current mode\n";
1279       exit(1);
1280     }
1281 
1282     TargetFunction = getBinaryFunctionContainingAddress(Address,
1283                                                         /*CheckPastEnd=*/false,
1284                                                         /*UseMaxSize=*/true);
1285     // We are not going to overwrite non-simple functions, but for simple
1286     // ones - adjust the padding size.
1287     if (TargetFunction && TargetFunction->isSimple()) {
1288       errs() << "BOLT-WARNING: function " << *TargetFunction
1289              << " has an object detected in a padding region at address 0x"
1290              << Twine::utohexstr(Address) << '\n';
1291       TargetFunction->setMaxSize(TargetFunction->getSize());
1292     }
1293   }
1294 
1295   InterproceduralReferences.clear();
1296 }
1297 
1298 void BinaryContext::postProcessSymbolTable() {
1299   fixBinaryDataHoles();
1300   bool Valid = true;
1301   for (auto &Entry : BinaryDataMap) {
1302     BinaryData *BD = Entry.second;
1303     if ((BD->getName().startswith("SYMBOLat") ||
1304          BD->getName().startswith("DATAat")) &&
1305         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1306         BD->getSection()) {
1307       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1308       Valid = false;
1309     }
1310   }
1311   assert(Valid);
1312   (void)Valid;
1313   generateSymbolHashes();
1314 }
1315 
1316 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1317                                  BinaryFunction &ParentBF) {
1318   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1319          "cannot merge functions with multiple entry points");
1320 
1321   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1322                                                          std::defer_lock);
1323   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1324       SymbolToFunctionMapMutex, std::defer_lock);
1325 
1326   const StringRef ChildName = ChildBF.getOneName();
1327 
1328   // Move symbols over and update bookkeeping info.
1329   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1330     ParentBF.getSymbols().push_back(Symbol);
1331     WriteSymbolMapLock.lock();
1332     SymbolToFunctionMap[Symbol] = &ParentBF;
1333     WriteSymbolMapLock.unlock();
1334     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1335   }
1336   ChildBF.getSymbols().clear();
1337 
1338   // Move other names the child function is known under.
1339   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1340   ChildBF.Aliases.clear();
1341 
1342   if (HasRelocations) {
1343     // Merge execution counts of ChildBF into those of ParentBF.
1344     // Without relocations, we cannot reliably merge profiles as both functions
1345     // continue to exist and either one can be executed.
1346     ChildBF.mergeProfileDataInto(ParentBF);
1347 
1348     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1349                                                           std::defer_lock);
1350     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1351                                                            std::defer_lock);
1352     // Remove ChildBF from the global set of functions in relocs mode.
1353     ReadBfsLock.lock();
1354     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1355     ReadBfsLock.unlock();
1356 
1357     assert(FI != BinaryFunctions.end() && "function not found");
1358     assert(&ChildBF == &FI->second && "function mismatch");
1359 
1360     WriteBfsLock.lock();
1361     ChildBF.clearDisasmState();
1362     FI = BinaryFunctions.erase(FI);
1363     WriteBfsLock.unlock();
1364 
1365   } else {
1366     // In non-relocation mode we keep the function, but rename it.
1367     std::string NewName = "__ICF_" + ChildName.str();
1368 
1369     WriteCtxLock.lock();
1370     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1371     WriteCtxLock.unlock();
1372 
1373     ChildBF.setFolded(&ParentBF);
1374   }
1375 }
1376 
1377 void BinaryContext::fixBinaryDataHoles() {
1378   assert(validateObjectNesting() && "object nesting inconsitency detected");
1379 
1380   for (BinarySection &Section : allocatableSections()) {
1381     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1382 
1383     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1384       BinaryData *BD = Itr->second;
1385       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1386                      (BD->getName().startswith("SYMBOLat0x") ||
1387                       BD->getName().startswith("DATAat0x") ||
1388                       BD->getName().startswith("ANONYMOUS")));
1389       return !isHole && BD->getSection() == Section && !BD->getParent();
1390     };
1391 
1392     auto BDStart = BinaryDataMap.begin();
1393     auto BDEnd = BinaryDataMap.end();
1394     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1395     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1396 
1397     uint64_t EndAddress = Section.getAddress();
1398 
1399     while (Itr != End) {
1400       if (Itr->second->getAddress() > EndAddress) {
1401         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1402         Holes.emplace_back(EndAddress, Gap);
1403       }
1404       EndAddress = Itr->second->getEndAddress();
1405       ++Itr;
1406     }
1407 
1408     if (EndAddress < Section.getEndAddress())
1409       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1410 
1411     // If there is already a symbol at the start of the hole, grow that symbol
1412     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1413     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1414       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1415       if (BD) {
1416         // BD->getSection() can be != Section if there are sections that
1417         // overlap.  In this case it is probably safe to just skip the holes
1418         // since the overlapping section will not(?) have any symbols in it.
1419         if (BD->getSection() == Section)
1420           setBinaryDataSize(Hole.first, Hole.second);
1421       } else {
1422         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1423       }
1424     }
1425   }
1426 
1427   assert(validateObjectNesting() && "object nesting inconsitency detected");
1428   assert(validateHoles() && "top level hole detected in object map");
1429 }
1430 
1431 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1432   const BinarySection *CurrentSection = nullptr;
1433   bool FirstSection = true;
1434 
1435   for (auto &Entry : BinaryDataMap) {
1436     const BinaryData *BD = Entry.second;
1437     const BinarySection &Section = BD->getSection();
1438     if (FirstSection || Section != *CurrentSection) {
1439       uint64_t Address, Size;
1440       StringRef Name = Section.getName();
1441       if (Section) {
1442         Address = Section.getAddress();
1443         Size = Section.getSize();
1444       } else {
1445         Address = BD->getAddress();
1446         Size = BD->getSize();
1447       }
1448       OS << "BOLT-INFO: Section " << Name << ", "
1449          << "0x" + Twine::utohexstr(Address) << ":"
1450          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1451       CurrentSection = &Section;
1452       FirstSection = false;
1453     }
1454 
1455     OS << "BOLT-INFO: ";
1456     const BinaryData *P = BD->getParent();
1457     while (P) {
1458       OS << "  ";
1459       P = P->getParent();
1460     }
1461     OS << *BD << "\n";
1462   }
1463 }
1464 
1465 Expected<unsigned> BinaryContext::getDwarfFile(
1466     StringRef Directory, StringRef FileName, unsigned FileNumber,
1467     Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1468     unsigned CUID, unsigned DWARFVersion) {
1469   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1470   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1471                           FileNumber);
1472 }
1473 
1474 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1475                                                const uint32_t SrcCUID,
1476                                                unsigned FileIndex) {
1477   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1478   const DWARFDebugLine::LineTable *LineTable =
1479       DwCtx->getLineTableForUnit(SrcUnit);
1480   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1481       LineTable->Prologue.FileNames;
1482   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1483   // means empty dir.
1484   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1485          "FileIndex out of range for the compilation unit.");
1486   StringRef Dir = "";
1487   if (FileNames[FileIndex - 1].DirIdx != 0) {
1488     if (Optional<const char *> DirName = dwarf::toString(
1489             LineTable->Prologue
1490                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1491       Dir = *DirName;
1492     }
1493   }
1494   StringRef FileName = "";
1495   if (Optional<const char *> FName =
1496           dwarf::toString(FileNames[FileIndex - 1].Name))
1497     FileName = *FName;
1498   assert(FileName != "");
1499   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1500   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1501                                DstUnit->getVersion()));
1502 }
1503 
1504 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1505   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1506   llvm::transform(BinaryFunctions, SortedFunctions.begin(),
1507                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1508                     return &BFI.second;
1509                   });
1510 
1511   llvm::stable_sort(SortedFunctions,
1512                     [](const BinaryFunction *A, const BinaryFunction *B) {
1513                       if (A->hasValidIndex() && B->hasValidIndex()) {
1514                         return A->getIndex() < B->getIndex();
1515                       }
1516                       return A->hasValidIndex();
1517                     });
1518   return SortedFunctions;
1519 }
1520 
1521 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1522   std::vector<BinaryFunction *> AllFunctions;
1523   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1524   llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions),
1525                   [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1526                     return &BFI.second;
1527                   });
1528   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1529 
1530   return AllFunctions;
1531 }
1532 
1533 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1534   auto Iter = DWOCUs.find(DWOId);
1535   if (Iter == DWOCUs.end())
1536     return None;
1537 
1538   return Iter->second;
1539 }
1540 
1541 DWARFContext *BinaryContext::getDWOContext() const {
1542   if (DWOCUs.empty())
1543     return nullptr;
1544   return &DWOCUs.begin()->second->getContext();
1545 }
1546 
1547 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1548 void BinaryContext::preprocessDWODebugInfo() {
1549   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1550     DWARFUnit *const DwarfUnit = CU.get();
1551     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1552       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1553       if (!DWOCU->isDWOUnit()) {
1554         std::string DWOName = dwarf::toString(
1555             DwarfUnit->getUnitDIE().find(
1556                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1557             "");
1558         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1559                << DWOName
1560                << " was not retrieved and won't be updated. Please check "
1561                   "relative path.\n";
1562         continue;
1563       }
1564       DWOCUs[*DWOId] = DWOCU;
1565     }
1566   }
1567 }
1568 
1569 void BinaryContext::preprocessDebugInfo() {
1570   struct CURange {
1571     uint64_t LowPC;
1572     uint64_t HighPC;
1573     DWARFUnit *Unit;
1574 
1575     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1576   };
1577 
1578   // Building a map of address ranges to CUs similar to .debug_aranges and use
1579   // it to assign CU to functions.
1580   std::vector<CURange> AllRanges;
1581   AllRanges.reserve(DwCtx->getNumCompileUnits());
1582   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1583     Expected<DWARFAddressRangesVector> RangesOrError =
1584         CU->getUnitDIE().getAddressRanges();
1585     if (!RangesOrError) {
1586       consumeError(RangesOrError.takeError());
1587       continue;
1588     }
1589     for (DWARFAddressRange &Range : *RangesOrError) {
1590       // Parts of the debug info could be invalidated due to corresponding code
1591       // being removed from the binary by the linker. Hence we check if the
1592       // address is a valid one.
1593       if (containsAddress(Range.LowPC))
1594         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1595     }
1596 
1597     ContainsDwarf5 |= CU->getVersion() >= 5;
1598     ContainsDwarfLegacy |= CU->getVersion() < 5;
1599   }
1600 
1601   llvm::sort(AllRanges);
1602   for (auto &KV : BinaryFunctions) {
1603     const uint64_t FunctionAddress = KV.first;
1604     BinaryFunction &Function = KV.second;
1605 
1606     auto It = llvm::partition_point(
1607         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1608     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1609       Function.setDWARFUnit(It->Unit);
1610   }
1611 
1612   // Discover units with debug info that needs to be updated.
1613   for (const auto &KV : BinaryFunctions) {
1614     const BinaryFunction &BF = KV.second;
1615     if (shouldEmit(BF) && BF.getDWARFUnit())
1616       ProcessedCUs.insert(BF.getDWARFUnit());
1617   }
1618 
1619   // Clear debug info for functions from units that we are not going to process.
1620   for (auto &KV : BinaryFunctions) {
1621     BinaryFunction &BF = KV.second;
1622     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1623       BF.setDWARFUnit(nullptr);
1624   }
1625 
1626   if (opts::Verbosity >= 1) {
1627     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1628            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1629   }
1630 
1631   preprocessDWODebugInfo();
1632 
1633   // Populate MCContext with DWARF files from all units.
1634   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1635   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1636     const uint64_t CUID = CU->getOffset();
1637     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1638     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1639         GlobalPrefix + "line_table_start" + Twine(CUID)));
1640 
1641     if (!ProcessedCUs.count(CU.get()))
1642       continue;
1643 
1644     const DWARFDebugLine::LineTable *LineTable =
1645         DwCtx->getLineTableForUnit(CU.get());
1646     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1647         LineTable->Prologue.FileNames;
1648 
1649     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1650     if (DwarfVersion >= 5) {
1651       Optional<MD5::MD5Result> Checksum = None;
1652       if (LineTable->Prologue.ContentTypes.HasMD5)
1653         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1654       Optional<const char *> Name =
1655           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1656       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1657         auto Iter = DWOCUs.find(*DWOID);
1658         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1659         Name = dwarf::toString(
1660             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1661       }
1662       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1663                                   None);
1664     }
1665 
1666     BinaryLineTable.setDwarfVersion(DwarfVersion);
1667 
1668     // Assign a unique label to every line table, one per CU.
1669     // Make sure empty debug line tables are registered too.
1670     if (FileNames.empty()) {
1671       cantFail(
1672           getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1673       continue;
1674     }
1675     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1676     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1677       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1678       // means empty dir.
1679       StringRef Dir = "";
1680       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1681         if (Optional<const char *> DirName = dwarf::toString(
1682                 LineTable->Prologue
1683                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1684           Dir = *DirName;
1685       StringRef FileName = "";
1686       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1687         FileName = *FName;
1688       assert(FileName != "");
1689       Optional<MD5::MD5Result> Checksum = None;
1690       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1691         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1692       cantFail(
1693           getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1694     }
1695   }
1696 }
1697 
1698 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1699   if (Function.isPseudo())
1700     return false;
1701 
1702   if (opts::processAllFunctions())
1703     return true;
1704 
1705   if (Function.isIgnored())
1706     return false;
1707 
1708   // In relocation mode we will emit non-simple functions with CFG.
1709   // If the function does not have a CFG it should be marked as ignored.
1710   return HasRelocations || Function.isSimple();
1711 }
1712 
1713 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1714   uint32_t Operation = Inst.getOperation();
1715   switch (Operation) {
1716   case MCCFIInstruction::OpSameValue:
1717     OS << "OpSameValue Reg" << Inst.getRegister();
1718     break;
1719   case MCCFIInstruction::OpRememberState:
1720     OS << "OpRememberState";
1721     break;
1722   case MCCFIInstruction::OpRestoreState:
1723     OS << "OpRestoreState";
1724     break;
1725   case MCCFIInstruction::OpOffset:
1726     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1727     break;
1728   case MCCFIInstruction::OpDefCfaRegister:
1729     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1730     break;
1731   case MCCFIInstruction::OpDefCfaOffset:
1732     OS << "OpDefCfaOffset " << Inst.getOffset();
1733     break;
1734   case MCCFIInstruction::OpDefCfa:
1735     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1736     break;
1737   case MCCFIInstruction::OpRelOffset:
1738     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1739     break;
1740   case MCCFIInstruction::OpAdjustCfaOffset:
1741     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1742     break;
1743   case MCCFIInstruction::OpEscape:
1744     OS << "OpEscape";
1745     break;
1746   case MCCFIInstruction::OpRestore:
1747     OS << "OpRestore Reg" << Inst.getRegister();
1748     break;
1749   case MCCFIInstruction::OpUndefined:
1750     OS << "OpUndefined Reg" << Inst.getRegister();
1751     break;
1752   case MCCFIInstruction::OpRegister:
1753     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1754        << Inst.getRegister2();
1755     break;
1756   case MCCFIInstruction::OpWindowSave:
1757     OS << "OpWindowSave";
1758     break;
1759   case MCCFIInstruction::OpGnuArgsSize:
1760     OS << "OpGnuArgsSize";
1761     break;
1762   default:
1763     OS << "Op#" << Operation;
1764     break;
1765   }
1766 }
1767 
1768 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1769   // For aarch64, the ABI defines mapping symbols so we identify data in the
1770   // code section (see IHI0056B). $x identifies a symbol starting code or the
1771   // end of a data chunk inside code, $d indentifies start of data.
1772   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1773     return MarkerSymType::NONE;
1774 
1775   Expected<StringRef> NameOrError = Symbol.getName();
1776   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1777 
1778   if (!TypeOrError || !NameOrError)
1779     return MarkerSymType::NONE;
1780 
1781   if (*TypeOrError != SymbolRef::ST_Unknown)
1782     return MarkerSymType::NONE;
1783 
1784   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1785     return MarkerSymType::CODE;
1786 
1787   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1788     return MarkerSymType::DATA;
1789 
1790   return MarkerSymType::NONE;
1791 }
1792 
1793 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1794   return getMarkerType(Symbol) != MarkerSymType::NONE;
1795 }
1796 
1797 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1798                            const BinaryFunction *Function,
1799                            DWARFContext *DwCtx) {
1800   DebugLineTableRowRef RowRef =
1801       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1802   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1803     return;
1804 
1805   const DWARFDebugLine::LineTable *LineTable;
1806   if (Function && Function->getDWARFUnit() &&
1807       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1808     LineTable = Function->getDWARFLineTable();
1809   } else {
1810     LineTable = DwCtx->getLineTableForUnit(
1811         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1812   }
1813   assert(LineTable && "line table expected for instruction with debug info");
1814 
1815   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1816   StringRef FileName = "";
1817   if (Optional<const char *> FName =
1818           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1819     FileName = *FName;
1820   OS << " # debug line " << FileName << ":" << Row.Line;
1821   if (Row.Column)
1822     OS << ":" << Row.Column;
1823   if (Row.Discriminator)
1824     OS << " discriminator:" << Row.Discriminator;
1825 }
1826 
1827 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1828                                      uint64_t Offset,
1829                                      const BinaryFunction *Function,
1830                                      bool PrintMCInst, bool PrintMemData,
1831                                      bool PrintRelocations,
1832                                      StringRef Endl) const {
1833   if (MIB->isEHLabel(Instruction)) {
1834     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1835     return;
1836   }
1837   OS << format("    %08" PRIx64 ": ", Offset);
1838   if (MIB->isCFI(Instruction)) {
1839     uint32_t Offset = Instruction.getOperand(0).getImm();
1840     OS << "\t!CFI\t$" << Offset << "\t; ";
1841     if (Function)
1842       printCFI(OS, *Function->getCFIFor(Instruction));
1843     OS << Endl;
1844     return;
1845   }
1846   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1847   if (MIB->isCall(Instruction)) {
1848     if (MIB->isTailCall(Instruction))
1849       OS << " # TAILCALL ";
1850     if (MIB->isInvoke(Instruction)) {
1851       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1852       OS << " # handler: ";
1853       if (EHInfo->first)
1854         OS << *EHInfo->first;
1855       else
1856         OS << '0';
1857       OS << "; action: " << EHInfo->second;
1858       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1859       if (GnuArgsSize >= 0)
1860         OS << "; GNU_args_size = " << GnuArgsSize;
1861     }
1862   } else if (MIB->isIndirectBranch(Instruction)) {
1863     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1864       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1865     } else {
1866       OS << " # UNKNOWN CONTROL FLOW";
1867     }
1868   }
1869   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1870     OS << " # Offset: " << *Offset;
1871 
1872   MIB->printAnnotations(Instruction, OS);
1873 
1874   if (opts::PrintDebugInfo)
1875     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1876 
1877   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1878     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1879     Function->printRelocations(OS, Offset, Size);
1880   }
1881 
1882   OS << Endl;
1883 
1884   if (PrintMCInst) {
1885     Instruction.dump_pretty(OS, InstPrinter.get());
1886     OS << Endl;
1887   }
1888 }
1889 
1890 Optional<uint64_t>
1891 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1892                                         uint64_t FileOffset) const {
1893   // Find a segment with a matching file offset.
1894   for (auto &KV : SegmentMapInfo) {
1895     const SegmentInfo &SegInfo = KV.second;
1896     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1897       // Use segment's aligned memory offset to calculate the base address.
1898       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1899       return MMapAddress - MemOffset;
1900     }
1901   }
1902 
1903   return NoneType();
1904 }
1905 
1906 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1907   auto SI = AddressToSection.upper_bound(Address);
1908   if (SI != AddressToSection.begin()) {
1909     --SI;
1910     uint64_t UpperBound = SI->first + SI->second->getSize();
1911     if (!SI->second->getSize())
1912       UpperBound += 1;
1913     if (UpperBound > Address)
1914       return *SI->second;
1915   }
1916   return std::make_error_code(std::errc::bad_address);
1917 }
1918 
1919 ErrorOr<StringRef>
1920 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1921   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1922     return Section->getName();
1923   return std::make_error_code(std::errc::bad_address);
1924 }
1925 
1926 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1927   auto Res = Sections.insert(Section);
1928   (void)Res;
1929   assert(Res.second && "can't register the same section twice.");
1930 
1931   // Only register allocatable sections in the AddressToSection map.
1932   if (Section->isAllocatable() && Section->getAddress())
1933     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1934   NameToSection.insert(
1935       std::make_pair(std::string(Section->getName()), Section));
1936   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1937   return *Section;
1938 }
1939 
1940 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1941   return registerSection(new BinarySection(*this, Section));
1942 }
1943 
1944 BinarySection &
1945 BinaryContext::registerSection(StringRef SectionName,
1946                                const BinarySection &OriginalSection) {
1947   return registerSection(
1948       new BinarySection(*this, SectionName, OriginalSection));
1949 }
1950 
1951 BinarySection &
1952 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1953                                        unsigned ELFFlags, uint8_t *Data,
1954                                        uint64_t Size, unsigned Alignment) {
1955   auto NamedSections = getSectionByName(Name);
1956   if (NamedSections.begin() != NamedSections.end()) {
1957     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1958            "can only update unique sections");
1959     BinarySection *Section = NamedSections.begin()->second;
1960 
1961     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1962     const bool Flag = Section->isAllocatable();
1963     (void)Flag;
1964     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1965     LLVM_DEBUG(dbgs() << *Section << "\n");
1966     // FIXME: Fix section flags/attributes for MachO.
1967     if (isELF())
1968       assert(Flag == Section->isAllocatable() &&
1969              "can't change section allocation status");
1970     return *Section;
1971   }
1972 
1973   return registerSection(
1974       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1975 }
1976 
1977 bool BinaryContext::deregisterSection(BinarySection &Section) {
1978   BinarySection *SectionPtr = &Section;
1979   auto Itr = Sections.find(SectionPtr);
1980   if (Itr != Sections.end()) {
1981     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1982     while (Range.first != Range.second) {
1983       if (Range.first->second == SectionPtr) {
1984         AddressToSection.erase(Range.first);
1985         break;
1986       }
1987       ++Range.first;
1988     }
1989 
1990     auto NameRange =
1991         NameToSection.equal_range(std::string(SectionPtr->getName()));
1992     while (NameRange.first != NameRange.second) {
1993       if (NameRange.first->second == SectionPtr) {
1994         NameToSection.erase(NameRange.first);
1995         break;
1996       }
1997       ++NameRange.first;
1998     }
1999 
2000     Sections.erase(Itr);
2001     delete SectionPtr;
2002     return true;
2003   }
2004   return false;
2005 }
2006 
2007 void BinaryContext::printSections(raw_ostream &OS) const {
2008   for (BinarySection *const &Section : Sections)
2009     OS << "BOLT-INFO: " << *Section << "\n";
2010 }
2011 
2012 BinarySection &BinaryContext::absoluteSection() {
2013   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2014     return *Section;
2015   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2016 }
2017 
2018 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2019                                                            size_t Size) const {
2020   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2021   if (!Section)
2022     return std::make_error_code(std::errc::bad_address);
2023 
2024   if (Section->isVirtual())
2025     return 0;
2026 
2027   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2028                    AsmInfo->getCodePointerSize());
2029   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2030   return DE.getUnsigned(&ValueOffset, Size);
2031 }
2032 
2033 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2034                                                          size_t Size) const {
2035   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2036   if (!Section)
2037     return std::make_error_code(std::errc::bad_address);
2038 
2039   if (Section->isVirtual())
2040     return 0;
2041 
2042   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2043                    AsmInfo->getCodePointerSize());
2044   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2045   return DE.getSigned(&ValueOffset, Size);
2046 }
2047 
2048 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2049                                   uint64_t Type, uint64_t Addend,
2050                                   uint64_t Value) {
2051   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2052   assert(Section && "cannot find section for address");
2053   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2054                          Value);
2055 }
2056 
2057 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2058                                          uint64_t Type, uint64_t Addend,
2059                                          uint64_t Value) {
2060   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2061   assert(Section && "cannot find section for address");
2062   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2063                                 Addend, Value);
2064 }
2065 
2066 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2067   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2068   assert(Section && "cannot find section for address");
2069   return Section->removeRelocationAt(Address - Section->getAddress());
2070 }
2071 
2072 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
2073   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2074   if (!Section)
2075     return nullptr;
2076 
2077   return Section->getRelocationAt(Address - Section->getAddress());
2078 }
2079 
2080 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
2081   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2082   if (!Section)
2083     return nullptr;
2084 
2085   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2086 }
2087 
2088 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2089                                              const uint64_t Address) {
2090   auto setImmovable = [&](BinaryData &BD) {
2091     BinaryData *Root = BD.getAtomicRoot();
2092     LLVM_DEBUG(if (Root->isMoveable()) {
2093       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2094              << "due to ambiguous relocation referencing 0x"
2095              << Twine::utohexstr(Address) << '\n';
2096     });
2097     Root->setIsMoveable(false);
2098   };
2099 
2100   if (Address == BD.getAddress()) {
2101     setImmovable(BD);
2102 
2103     // Set previous symbol as immovable
2104     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2105     if (Prev && Prev->getEndAddress() == BD.getAddress())
2106       setImmovable(*Prev);
2107   }
2108 
2109   if (Address == BD.getEndAddress()) {
2110     setImmovable(BD);
2111 
2112     // Set next symbol as immovable
2113     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2114     if (Next && Next->getAddress() == BD.getEndAddress())
2115       setImmovable(*Next);
2116   }
2117 }
2118 
2119 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2120                                                     uint64_t *EntryDesc) {
2121   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
2122   auto BFI = SymbolToFunctionMap.find(Symbol);
2123   if (BFI == SymbolToFunctionMap.end())
2124     return nullptr;
2125 
2126   BinaryFunction *BF = BFI->second;
2127   if (EntryDesc)
2128     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2129 
2130   return BF;
2131 }
2132 
2133 void BinaryContext::exitWithBugReport(StringRef Message,
2134                                       const BinaryFunction &Function) const {
2135   errs() << "=======================================\n";
2136   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2137             "this function.\n";
2138   errs() << "If you are running the most recent version of BOLT, you may "
2139             "want to "
2140             "report this and paste this dump.\nPlease check that there is no "
2141             "sensitive contents being shared in this dump.\n";
2142   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2143   ScopedPrinter SP(errs());
2144   SP.printBinaryBlock("Function contents", *Function.getData());
2145   errs() << "\n";
2146   Function.dump();
2147   errs() << "ERROR: " << Message;
2148   errs() << "\n=======================================\n";
2149   exit(1);
2150 }
2151 
2152 BinaryFunction *
2153 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2154                                             bool IsSimple) {
2155   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2156   BinaryFunction *BF = InjectedBinaryFunctions.back();
2157   setSymbolToFunctionMap(BF->getSymbol(), BF);
2158   BF->CurrentState = BinaryFunction::State::CFG;
2159   return BF;
2160 }
2161 
2162 std::pair<size_t, size_t>
2163 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2164   // Adjust branch instruction to match the current layout.
2165   if (FixBranches)
2166     BF.fixBranches();
2167 
2168   // Create local MC context to isolate the effect of ephemeral code emission.
2169   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2170   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2171   MCAsmBackend *MAB =
2172       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2173 
2174   SmallString<256> Code;
2175   raw_svector_ostream VecOS(Code);
2176 
2177   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2178   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2179       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2180       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2181       /*RelaxAll=*/false,
2182       /*IncrementalLinkerCompatible=*/false,
2183       /*DWARFMustBeAtTheEnd=*/false));
2184 
2185   Streamer->initSections(false, *STI);
2186 
2187   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2188   Section->setHasInstructions(true);
2189 
2190   // Create symbols in the LocalCtx so that they get destroyed with it.
2191   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2192   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2193 
2194   Streamer->switchSection(Section);
2195   Streamer->emitLabel(StartLabel);
2196   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2197                    /*EmitCodeOnly=*/true);
2198   Streamer->emitLabel(EndLabel);
2199 
2200   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2201   SmallVector<LabelRange> SplitLabels;
2202   for (const FunctionFragment FF : BF.getLayout().getSplitFragments()) {
2203     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2204     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2205     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2206 
2207     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2208         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2209         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2210     SplitSection->setHasInstructions(true);
2211     Streamer->switchSection(SplitSection);
2212 
2213     Streamer->emitLabel(SplitStartLabel);
2214     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2215     Streamer->emitLabel(SplitEndLabel);
2216     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2217     // private
2218     Streamer->emitBytes(StringRef(""));
2219     Streamer->switchSection(Section);
2220   }
2221 
2222   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2223   // MCStreamer::Finish(), which does more than we want
2224   Streamer->emitBytes(StringRef(""));
2225 
2226   MCAssembler &Assembler =
2227       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2228   MCAsmLayout Layout(Assembler);
2229   Assembler.layout(Layout);
2230 
2231   const uint64_t HotSize =
2232       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2233   const uint64_t ColdSize =
2234       std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
2235                       [&](const uint64_t Accu, const LabelRange &Labels) {
2236                         return Accu + Layout.getSymbolOffset(*Labels.second) -
2237                                Layout.getSymbolOffset(*Labels.first);
2238                       });
2239 
2240   // Clean-up the effect of the code emission.
2241   for (const MCSymbol &Symbol : Assembler.symbols()) {
2242     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2243     MutableSymbol->setUndefined();
2244     MutableSymbol->setIsRegistered(false);
2245   }
2246 
2247   return std::make_pair(HotSize, ColdSize);
2248 }
2249 
2250 bool BinaryContext::validateEncoding(const MCInst &Inst,
2251                                      ArrayRef<uint8_t> InputEncoding) const {
2252   SmallString<256> Code;
2253   SmallVector<MCFixup, 4> Fixups;
2254   raw_svector_ostream VecOS(Code);
2255 
2256   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2257   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2258   if (InputEncoding != EncodedData) {
2259     if (opts::Verbosity > 1) {
2260       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2261              << "      input: " << InputEncoding << '\n'
2262              << "     output: " << EncodedData << '\n';
2263     }
2264     return false;
2265   }
2266 
2267   return true;
2268 }
2269 
2270 uint64_t BinaryContext::getHotThreshold() const {
2271   static uint64_t Threshold = 0;
2272   if (Threshold == 0) {
2273     Threshold = std::max(
2274         (uint64_t)opts::ExecutionCountThreshold,
2275         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2276   }
2277   return Threshold;
2278 }
2279 
2280 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2281     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2282   auto FI = BinaryFunctions.upper_bound(Address);
2283   if (FI == BinaryFunctions.begin())
2284     return nullptr;
2285   --FI;
2286 
2287   const uint64_t UsedSize =
2288       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2289 
2290   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2291     return nullptr;
2292 
2293   return &FI->second;
2294 }
2295 
2296 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2297   // First, try to find a function starting at the given address. If the
2298   // function was folded, this will get us the original folded function if it
2299   // wasn't removed from the list, e.g. in non-relocation mode.
2300   auto BFI = BinaryFunctions.find(Address);
2301   if (BFI != BinaryFunctions.end())
2302     return &BFI->second;
2303 
2304   // We might have folded the function matching the object at the given
2305   // address. In such case, we look for a function matching the symbol
2306   // registered at the original address. The new function (the one that the
2307   // original was folded into) will hold the symbol.
2308   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2309     uint64_t EntryID = 0;
2310     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2311     if (BF && EntryID == 0)
2312       return BF;
2313   }
2314   return nullptr;
2315 }
2316 
2317 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2318     const DWARFAddressRangesVector &InputRanges) const {
2319   DebugAddressRangesVector OutputRanges;
2320 
2321   for (const DWARFAddressRange Range : InputRanges) {
2322     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2323     while (BFI != BinaryFunctions.end()) {
2324       const BinaryFunction &Function = BFI->second;
2325       if (Function.getAddress() >= Range.HighPC)
2326         break;
2327       const DebugAddressRangesVector FunctionRanges =
2328           Function.getOutputAddressRanges();
2329       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2330       std::advance(BFI, 1);
2331     }
2332   }
2333 
2334   return OutputRanges;
2335 }
2336 
2337 } // namespace bolt
2338 } // namespace llvm
2339