xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 0c049ea60a9f214911eef7901b94bd6343c04409)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
44 
45 using namespace llvm;
46 
47 #undef  DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
49 
50 namespace opts {
51 
52 cl::opt<bool> NoHugePages("no-huge-pages",
53                           cl::desc("use regular size pages for code alignment"),
54                           cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool> PrintRelocations(
64     "print-relocations",
65     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66     cl::cat(BoltCategory));
67 
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70   cl::desc("print memory data annotations when printing functions"),
71   cl::Hidden,
72   cl::ZeroOrMore,
73   cl::cat(BoltCategory));
74 
75 } // namespace opts
76 
77 namespace llvm {
78 namespace bolt {
79 
80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
81                              std::unique_ptr<DWARFContext> DwCtx,
82                              std::unique_ptr<Triple> TheTriple,
83                              const Target *TheTarget, std::string TripleName,
84                              std::unique_ptr<MCCodeEmitter> MCE,
85                              std::unique_ptr<MCObjectFileInfo> MOFI,
86                              std::unique_ptr<const MCAsmInfo> AsmInfo,
87                              std::unique_ptr<const MCInstrInfo> MII,
88                              std::unique_ptr<const MCSubtargetInfo> STI,
89                              std::unique_ptr<MCInstPrinter> InstPrinter,
90                              std::unique_ptr<const MCInstrAnalysis> MIA,
91                              std::unique_ptr<MCPlusBuilder> MIB,
92                              std::unique_ptr<const MCRegisterInfo> MRI,
93                              std::unique_ptr<MCDisassembler> DisAsm)
94     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
95       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
96       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
97       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
98       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
99       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
100   Relocation::Arch = this->TheTriple->getArch();
101   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
102   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
103 }
104 
105 BinaryContext::~BinaryContext() {
106   for (BinarySection *Section : Sections)
107     delete Section;
108   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
109     delete InjectedFunction;
110   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
111     delete JTI.second;
112   clearBinaryData();
113 }
114 
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 Expected<std::unique_ptr<BinaryContext>>
118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
119                                    std::unique_ptr<DWARFContext> DwCtx) {
120   StringRef ArchName = "";
121   StringRef FeaturesStr = "";
122   switch (File->getArch()) {
123   case llvm::Triple::x86_64:
124     ArchName = "x86-64";
125     FeaturesStr = "+nopl";
126     break;
127   case llvm::Triple::aarch64:
128     ArchName = "aarch64";
129     FeaturesStr = "+all";
130     break;
131   default:
132     return createStringError(std::errc::not_supported,
133                              "BOLT-ERROR: Unrecognized machine in ELF file");
134   }
135 
136   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
137   const std::string TripleName = TheTriple->str();
138 
139   std::string Error;
140   const Target *TheTarget =
141       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
142   if (!TheTarget)
143     return createStringError(make_error_code(std::errc::not_supported),
144                              Twine("BOLT-ERROR: ", Error));
145 
146   std::unique_ptr<const MCRegisterInfo> MRI(
147       TheTarget->createMCRegInfo(TripleName));
148   if (!MRI)
149     return createStringError(
150         make_error_code(std::errc::not_supported),
151         Twine("BOLT-ERROR: no register info for target ", TripleName));
152 
153   // Set up disassembler.
154   std::unique_ptr<MCAsmInfo> AsmInfo(
155       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
156   if (!AsmInfo)
157     return createStringError(
158         make_error_code(std::errc::not_supported),
159         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
160   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
161   // we want to emit such names as using @PLT without double quotes to convey
162   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
163   // override the default AsmInfo behavior to emit names the way we want.
164   AsmInfo->setAllowAtInName(true);
165 
166   std::unique_ptr<const MCSubtargetInfo> STI(
167       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
168   if (!STI)
169     return createStringError(
170         make_error_code(std::errc::not_supported),
171         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
172 
173   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
174   if (!MII)
175     return createStringError(
176         make_error_code(std::errc::not_supported),
177         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
178 
179   std::unique_ptr<MCContext> Ctx(
180       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
181   std::unique_ptr<MCObjectFileInfo> MOFI(
182       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
183   Ctx->setObjectFileInfo(MOFI.get());
184   // We do not support X86 Large code model. Change this in the future.
185   bool Large = false;
186   if (TheTriple->getArch() == llvm::Triple::aarch64)
187     Large = true;
188   unsigned LSDAEncoding =
189       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
190   if (IsPIC) {
191     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
192                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193   }
194 
195   std::unique_ptr<MCDisassembler> DisAsm(
196       TheTarget->createMCDisassembler(*STI, *Ctx));
197 
198   if (!DisAsm)
199     return createStringError(
200         make_error_code(std::errc::not_supported),
201         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
202 
203   std::unique_ptr<const MCInstrAnalysis> MIA(
204       TheTarget->createMCInstrAnalysis(MII.get()));
205   if (!MIA)
206     return createStringError(
207         make_error_code(std::errc::not_supported),
208         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
209               TripleName));
210 
211   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
212   std::unique_ptr<MCInstPrinter> InstructionPrinter(
213       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
214                                      *MII, *MRI));
215   if (!InstructionPrinter)
216     return createStringError(
217         make_error_code(std::errc::not_supported),
218         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
219   InstructionPrinter->setPrintImmHex(true);
220 
221   std::unique_ptr<MCCodeEmitter> MCE(
222       TheTarget->createMCCodeEmitter(*MII, *Ctx));
223 
224   // Make sure we don't miss any output on core dumps.
225   outs().SetUnbuffered();
226   errs().SetUnbuffered();
227   dbgs().SetUnbuffered();
228 
229   auto BC = std::make_unique<BinaryContext>(
230       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
231       std::string(TripleName), std::move(MCE), std::move(MOFI),
232       std::move(AsmInfo), std::move(MII), std::move(STI),
233       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
234       std::move(DisAsm));
235 
236   BC->LSDAEncoding = LSDAEncoding;
237 
238   BC->MAB = std::unique_ptr<MCAsmBackend>(
239       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
240 
241   BC->setFilename(File->getFileName());
242 
243   BC->HasFixedLoadAddress = !IsPIC;
244 
245   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
246       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
247 
248   if (!BC->SymbolicDisAsm)
249     return createStringError(
250         make_error_code(std::errc::not_supported),
251         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
252 
253   return std::move(BC);
254 }
255 
256 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
257   if (opts::HotText &&
258       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
259     return true;
260 
261   if (opts::HotData &&
262       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
263     return true;
264 
265   if (SymbolName == "_end")
266     return true;
267 
268   return false;
269 }
270 
271 std::unique_ptr<MCObjectWriter>
272 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
273   return MAB->createObjectWriter(OS);
274 }
275 
276 bool BinaryContext::validateObjectNesting() const {
277   auto Itr = BinaryDataMap.begin();
278   auto End = BinaryDataMap.end();
279   bool Valid = true;
280   while (Itr != End) {
281     auto Next = std::next(Itr);
282     while (Next != End &&
283            Itr->second->getSection() == Next->second->getSection() &&
284            Itr->second->containsRange(Next->second->getAddress(),
285                                       Next->second->getSize())) {
286       if (Next->second->Parent != Itr->second) {
287         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
288                << "BOLT-WARNING:  " << *Itr->second << "\n"
289                << "BOLT-WARNING:  " << *Next->second << "\n";
290         Valid = false;
291       }
292       ++Next;
293     }
294     Itr = Next;
295   }
296   return Valid;
297 }
298 
299 bool BinaryContext::validateHoles() const {
300   bool Valid = true;
301   for (BinarySection &Section : sections()) {
302     for (const Relocation &Rel : Section.relocations()) {
303       uint64_t RelAddr = Rel.Offset + Section.getAddress();
304       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
305       if (!BD) {
306         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
307                << " 0x" << Twine::utohexstr(RelAddr) << " in "
308                << Section.getName() << "\n";
309         Valid = false;
310       } else if (!BD->getAtomicRoot()) {
311         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
312                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
313                << Section.getName() << "\n";
314         Valid = false;
315       }
316     }
317   }
318   return Valid;
319 }
320 
321 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
322   const uint64_t Address = GAI->second->getAddress();
323   const uint64_t Size = GAI->second->getSize();
324 
325   auto fixParents = [&](BinaryDataMapType::iterator Itr,
326                         BinaryData *NewParent) {
327     BinaryData *OldParent = Itr->second->Parent;
328     Itr->second->Parent = NewParent;
329     ++Itr;
330     while (Itr != BinaryDataMap.end() && OldParent &&
331            Itr->second->Parent == OldParent) {
332       Itr->second->Parent = NewParent;
333       ++Itr;
334     }
335   };
336 
337   // Check if the previous symbol contains the newly added symbol.
338   if (GAI != BinaryDataMap.begin()) {
339     BinaryData *Prev = std::prev(GAI)->second;
340     while (Prev) {
341       if (Prev->getSection() == GAI->second->getSection() &&
342           Prev->containsRange(Address, Size)) {
343         fixParents(GAI, Prev);
344       } else {
345         fixParents(GAI, nullptr);
346       }
347       Prev = Prev->Parent;
348     }
349   }
350 
351   // Check if the newly added symbol contains any subsequent symbols.
352   if (Size != 0) {
353     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
354     auto Itr = std::next(GAI);
355     while (
356         Itr != BinaryDataMap.end() &&
357         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
358       Itr->second->Parent = BD;
359       ++Itr;
360     }
361   }
362 }
363 
364 iterator_range<BinaryContext::binary_data_iterator>
365 BinaryContext::getSubBinaryData(BinaryData *BD) {
366   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
367   auto End = Start;
368   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
369     ++End;
370   return make_range(Start, End);
371 }
372 
373 std::pair<const MCSymbol *, uint64_t>
374 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
375                                 bool IsPCRel) {
376   if (isAArch64()) {
377     // Check if this is an access to a constant island and create bookkeeping
378     // to keep track of it and emit it later as part of this function.
379     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
380       return std::make_pair(IslandSym, 0);
381 
382     // Detect custom code written in assembly that refers to arbitrary
383     // constant islands from other functions. Write this reference so we
384     // can pull this constant island and emit it as part of this function
385     // too.
386     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
387 
388     if (IslandIter != AddressToConstantIslandMap.begin() &&
389         (IslandIter == AddressToConstantIslandMap.end() ||
390          IslandIter->first > Address))
391       --IslandIter;
392 
393     if (IslandIter != AddressToConstantIslandMap.end()) {
394       // Fall-back to referencing the original constant island in the presence
395       // of dynamic relocs, as we currently do not support cloning them.
396       // Notice: we might fail to link because of this, if the original constant
397       // island we are referring would be emitted too far away.
398       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
399         MCSymbol *IslandSym =
400             IslandIter->second->getOrCreateIslandAccess(Address);
401         if (IslandSym)
402           return std::make_pair(IslandSym, 0);
403       } else if (MCSymbol *IslandSym =
404                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
405                                                                       BF)) {
406         BF.createIslandDependency(IslandSym, IslandIter->second);
407         return std::make_pair(IslandSym, 0);
408       }
409     }
410   }
411 
412   // Note that the address does not necessarily have to reside inside
413   // a section, it could be an absolute address too.
414   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
415   if (Section && Section->isText()) {
416     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
417       if (Address != BF.getAddress()) {
418         // The address could potentially escape. Mark it as another entry
419         // point into the function.
420         if (opts::Verbosity >= 1) {
421           outs() << "BOLT-INFO: potentially escaped address 0x"
422                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
423         }
424         BF.HasInternalLabelReference = true;
425         return std::make_pair(
426             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
427       }
428     } else {
429       addInterproceduralReference(&BF, Address);
430     }
431   }
432 
433   // With relocations, catch jump table references outside of the basic block
434   // containing the indirect jump.
435   if (HasRelocations) {
436     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
437     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
438       const MCSymbol *Symbol =
439           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
440 
441       return std::make_pair(Symbol, 0);
442     }
443   }
444 
445   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
446     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
447 
448   // TODO: use DWARF info to get size/alignment here?
449   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
450   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
451   return std::make_pair(TargetSymbol, 0);
452 }
453 
454 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
455                                                   BinaryFunction &BF) {
456   if (!isX86())
457     return MemoryContentsType::UNKNOWN;
458 
459   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
460   if (!Section) {
461     // No section - possibly an absolute address. Since we don't allow
462     // internal function addresses to escape the function scope - we
463     // consider it a tail call.
464     if (opts::Verbosity > 1) {
465       errs() << "BOLT-WARNING: no section for address 0x"
466              << Twine::utohexstr(Address) << " referenced from function " << BF
467              << '\n';
468     }
469     return MemoryContentsType::UNKNOWN;
470   }
471 
472   if (Section->isVirtual()) {
473     // The contents are filled at runtime.
474     return MemoryContentsType::UNKNOWN;
475   }
476 
477   // No support for jump tables in code yet.
478   if (Section->isText())
479     return MemoryContentsType::UNKNOWN;
480 
481   // Start with checking for PIC jump table. We expect non-PIC jump tables
482   // to have high 32 bits set to 0.
483   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
484     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
485 
486   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
487     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
488 
489   return MemoryContentsType::UNKNOWN;
490 }
491 
492 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
493                                      const JumpTable::JumpTableType Type,
494                                      const BinaryFunction &BF,
495                                      const uint64_t NextJTAddress,
496                                      JumpTable::AddressesType *EntriesAsAddress,
497                                      bool *HasEntryInFragment) const {
498   // Is one of the targets __builtin_unreachable?
499   bool HasUnreachable = false;
500 
501   // Number of targets other than __builtin_unreachable.
502   uint64_t NumRealEntries = 0;
503 
504   auto addEntryAddress = [&](uint64_t EntryAddress) {
505     if (EntriesAsAddress)
506       EntriesAsAddress->emplace_back(EntryAddress);
507   };
508 
509   auto doesBelongToFunction = [&](const uint64_t Addr,
510                                   const BinaryFunction *TargetBF) -> bool {
511     if (BF.containsAddress(Addr))
512       return true;
513     // Nothing to do if we failed to identify the containing function.
514     if (!TargetBF)
515       return false;
516     // Check if BF is a fragment of TargetBF or vice versa.
517     return BF.isChildOf(*TargetBF) || TargetBF->isChildOf(BF);
518   };
519 
520   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
521   if (!Section)
522     return false;
523 
524   // The upper bound is defined by containing object, section limits, and
525   // the next jump table in memory.
526   uint64_t UpperBound = Section->getEndAddress();
527   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
528   if (JumpTableBD && JumpTableBD->getSize()) {
529     assert(JumpTableBD->getEndAddress() <= UpperBound &&
530            "data object cannot cross a section boundary");
531     UpperBound = JumpTableBD->getEndAddress();
532   }
533   if (NextJTAddress)
534     UpperBound = std::min(NextJTAddress, UpperBound);
535 
536   LLVM_DEBUG({
537     using JTT = JumpTable::JumpTableType;
538     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
539                       Address, BF.getPrintName(),
540                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
541   });
542   const uint64_t EntrySize = getJumpTableEntrySize(Type);
543   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
544        EntryAddress += EntrySize) {
545     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
546                       << " -> ");
547     // Check if there's a proper relocation against the jump table entry.
548     if (HasRelocations) {
549       if (Type == JumpTable::JTT_PIC &&
550           !DataPCRelocations.count(EntryAddress)) {
551         LLVM_DEBUG(
552             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
553         break;
554       }
555       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
556         LLVM_DEBUG(
557             dbgs()
558             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
559         break;
560       }
561     }
562 
563     const uint64_t Value =
564         (Type == JumpTable::JTT_PIC)
565             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
566             : *getPointerAtAddress(EntryAddress);
567 
568     // __builtin_unreachable() case.
569     if (Value == BF.getAddress() + BF.getSize()) {
570       addEntryAddress(Value);
571       HasUnreachable = true;
572       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
573       continue;
574     }
575 
576     // Function or one of its fragments.
577     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
578 
579     // We assume that a jump table cannot have function start as an entry.
580     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
581       LLVM_DEBUG({
582         if (!BF.containsAddress(Value)) {
583           dbgs() << "FAIL: function doesn't contain this address\n";
584           if (TargetBF) {
585             dbgs() << "  ! function containing this address: "
586                    << TargetBF->getPrintName() << '\n';
587             if (TargetBF->isFragment()) {
588               dbgs() << "  ! is a fragment";
589               for (BinaryFunction *Parent : TargetBF->ParentFragments)
590                 dbgs() << ", parent: " << Parent->getPrintName();
591               dbgs() << '\n';
592             }
593           }
594         }
595         if (Value == BF.getAddress())
596           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
597       });
598       break;
599     }
600 
601     // Check there's an instruction at this offset.
602     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
603         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
604       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
605       break;
606     }
607 
608     ++NumRealEntries;
609     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
610 
611     if (TargetBF != &BF && HasEntryInFragment)
612       *HasEntryInFragment = true;
613     addEntryAddress(Value);
614   }
615 
616   // It's a jump table if the number of real entries is more than 1, or there's
617   // one real entry and "unreachable" targets. If there are only multiple
618   // "unreachable" targets, then it's not a jump table.
619   return NumRealEntries + HasUnreachable >= 2;
620 }
621 
622 void BinaryContext::populateJumpTables() {
623   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
624                     << '\n');
625   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
626        ++JTI) {
627     JumpTable *JT = JTI->second;
628 
629     bool NonSimpleParent = false;
630     for (BinaryFunction *BF : JT->Parents)
631       NonSimpleParent |= !BF->isSimple();
632     if (NonSimpleParent)
633       continue;
634 
635     uint64_t NextJTAddress = 0;
636     auto NextJTI = std::next(JTI);
637     if (NextJTI != JTE)
638       NextJTAddress = NextJTI->second->getAddress();
639 
640     const bool Success =
641         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
642                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
643     if (!Success) {
644       LLVM_DEBUG({
645         dbgs() << "failed to analyze ";
646         JT->print(dbgs());
647         if (NextJTI != JTE) {
648           dbgs() << "next ";
649           NextJTI->second->print(dbgs());
650         }
651       });
652       llvm_unreachable("jump table heuristic failure");
653     }
654     for (BinaryFunction *Frag : JT->Parents) {
655       if (JT->IsSplit)
656         Frag->setHasIndirectTargetToSplitFragment(true);
657       for (uint64_t EntryAddress : JT->EntriesAsAddress)
658         // if target is builtin_unreachable
659         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
660           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
661                                              Frag->getSize());
662         } else if (EntryAddress >= Frag->getAddress() &&
663                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
664           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
665         }
666     }
667 
668     // In strict mode, erase PC-relative relocation record. Later we check that
669     // all such records are erased and thus have been accounted for.
670     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
671       for (uint64_t Address = JT->getAddress();
672            Address < JT->getAddress() + JT->getSize();
673            Address += JT->EntrySize) {
674         DataPCRelocations.erase(DataPCRelocations.find(Address));
675       }
676     }
677 
678     // Mark to skip the function and all its fragments.
679     for (BinaryFunction *Frag : JT->Parents)
680       if (Frag->hasIndirectTargetToSplitFragment())
681         addFragmentsToSkip(Frag);
682   }
683 
684   if (opts::StrictMode && DataPCRelocations.size()) {
685     LLVM_DEBUG({
686       dbgs() << DataPCRelocations.size()
687              << " unclaimed PC-relative relocations left in data:\n";
688       for (uint64_t Reloc : DataPCRelocations)
689         dbgs() << Twine::utohexstr(Reloc) << '\n';
690     });
691     assert(0 && "unclaimed PC-relative relocations left in data\n");
692   }
693   clearList(DataPCRelocations);
694 }
695 
696 void BinaryContext::skipMarkedFragments() {
697   std::vector<BinaryFunction *> FragmentQueue;
698   // Copy the functions to FragmentQueue.
699   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
700   auto addToWorklist = [&](BinaryFunction *Function) -> void {
701     if (FragmentsToSkip.count(Function))
702       return;
703     FragmentQueue.push_back(Function);
704     addFragmentsToSkip(Function);
705   };
706   // Functions containing split jump tables need to be skipped with all
707   // fragments (transitively).
708   for (size_t I = 0; I != FragmentQueue.size(); I++) {
709     BinaryFunction *BF = FragmentQueue[I];
710     assert(FragmentsToSkip.count(BF) &&
711            "internal error in traversing function fragments");
712     if (opts::Verbosity >= 1)
713       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
714     BF->setSimple(false);
715     BF->setHasIndirectTargetToSplitFragment(true);
716 
717     llvm::for_each(BF->Fragments, addToWorklist);
718     llvm::for_each(BF->ParentFragments, addToWorklist);
719   }
720   if (!FragmentsToSkip.empty())
721     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
722            << (FragmentsToSkip.size() == 1 ? "" : "s")
723            << " due to cold fragments\n";
724 }
725 
726 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
727                                                  uint64_t Size,
728                                                  uint16_t Alignment,
729                                                  unsigned Flags) {
730   auto Itr = BinaryDataMap.find(Address);
731   if (Itr != BinaryDataMap.end()) {
732     assert(Itr->second->getSize() == Size || !Size);
733     return Itr->second->getSymbol();
734   }
735 
736   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
737   assert(!GlobalSymbols.count(Name) && "created name is not unique");
738   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
739 }
740 
741 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
742   return Ctx->getOrCreateSymbol(Name);
743 }
744 
745 BinaryFunction *BinaryContext::createBinaryFunction(
746     const std::string &Name, BinarySection &Section, uint64_t Address,
747     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
748   auto Result = BinaryFunctions.emplace(
749       Address, BinaryFunction(Name, Section, Address, Size, *this));
750   assert(Result.second == true && "unexpected duplicate function");
751   BinaryFunction *BF = &Result.first->second;
752   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
753                         Alignment);
754   setSymbolToFunctionMap(BF->getSymbol(), BF);
755   return BF;
756 }
757 
758 const MCSymbol *
759 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
760                                     JumpTable::JumpTableType Type) {
761   // Two fragments of same function access same jump table
762   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
763     assert(JT->Type == Type && "jump table types have to match");
764     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
765 
766     // Prevent associating a jump table to a specific fragment twice.
767     // This simple check arises from the assumption: no more than 2 fragments.
768     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
769       assert((JT->Parents[0]->isChildOf(Function) ||
770               Function.isChildOf(*JT->Parents[0])) &&
771              "cannot re-use jump table of a different function");
772       // Duplicate the entry for the parent function for easy access
773       JT->Parents.push_back(&Function);
774       if (opts::Verbosity > 2) {
775         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
776                << JT->Parents[0]->getPrintName() << "; "
777                << Function.getPrintName() << "\n";
778         JT->print(outs());
779       }
780       Function.JumpTables.emplace(Address, JT);
781       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
782       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
783     }
784 
785     bool IsJumpTableParent = false;
786     (void)IsJumpTableParent;
787     for (BinaryFunction *Frag : JT->Parents)
788       if (Frag == &Function)
789         IsJumpTableParent = true;
790     assert(IsJumpTableParent &&
791            "cannot re-use jump table of a different function");
792     return JT->getFirstLabel();
793   }
794 
795   // Re-use the existing symbol if possible.
796   MCSymbol *JTLabel = nullptr;
797   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
798     if (!isInternalSymbolName(Object->getSymbol()->getName()))
799       JTLabel = Object->getSymbol();
800   }
801 
802   const uint64_t EntrySize = getJumpTableEntrySize(Type);
803   if (!JTLabel) {
804     const std::string JumpTableName = generateJumpTableName(Function, Address);
805     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
806   }
807 
808   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
809                     << " in function " << Function << '\n');
810 
811   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
812                                 JumpTable::LabelMapType{{0, JTLabel}},
813                                 *getSectionForAddress(Address));
814   JT->Parents.push_back(&Function);
815   if (opts::Verbosity > 2)
816     JT->print(outs());
817   JumpTables.emplace(Address, JT);
818 
819   // Duplicate the entry for the parent function for easy access.
820   Function.JumpTables.emplace(Address, JT);
821   return JTLabel;
822 }
823 
824 std::pair<uint64_t, const MCSymbol *>
825 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
826                                   const MCSymbol *OldLabel) {
827   auto L = scopeLock();
828   unsigned Offset = 0;
829   bool Found = false;
830   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
831     if (Elmt.second != OldLabel)
832       continue;
833     Offset = Elmt.first;
834     Found = true;
835     break;
836   }
837   assert(Found && "Label not found");
838   (void)Found;
839   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
840   JumpTable *NewJT =
841       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
842                     JumpTable::LabelMapType{{Offset, NewLabel}},
843                     *getSectionForAddress(JT->getAddress()));
844   NewJT->Parents = JT->Parents;
845   NewJT->Entries = JT->Entries;
846   NewJT->Counts = JT->Counts;
847   uint64_t JumpTableID = ++DuplicatedJumpTables;
848   // Invert it to differentiate from regular jump tables whose IDs are their
849   // addresses in the input binary memory space
850   JumpTableID = ~JumpTableID;
851   JumpTables.emplace(JumpTableID, NewJT);
852   Function.JumpTables.emplace(JumpTableID, NewJT);
853   return std::make_pair(JumpTableID, NewLabel);
854 }
855 
856 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
857                                                  uint64_t Address) {
858   size_t Id;
859   uint64_t Offset = 0;
860   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
861     Offset = Address - JT->getAddress();
862     auto Itr = JT->Labels.find(Offset);
863     if (Itr != JT->Labels.end())
864       return std::string(Itr->second->getName());
865     Id = JumpTableIds.at(JT->getAddress());
866   } else {
867     Id = JumpTableIds[Address] = BF.JumpTables.size();
868   }
869   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
870           (Offset ? ("." + std::to_string(Offset)) : ""));
871 }
872 
873 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
874   // FIXME: aarch64 support is missing.
875   if (!isX86())
876     return true;
877 
878   if (BF.getSize() == BF.getMaxSize())
879     return true;
880 
881   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
882   assert(FunctionData && "cannot get function as data");
883 
884   uint64_t Offset = BF.getSize();
885   MCInst Instr;
886   uint64_t InstrSize = 0;
887   uint64_t InstrAddress = BF.getAddress() + Offset;
888   using std::placeholders::_1;
889 
890   // Skip instructions that satisfy the predicate condition.
891   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
892     const uint64_t StartOffset = Offset;
893     for (; Offset < BF.getMaxSize();
894          Offset += InstrSize, InstrAddress += InstrSize) {
895       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
896                                   InstrAddress, nulls()))
897         break;
898       if (!Predicate(Instr))
899         break;
900     }
901 
902     return Offset - StartOffset;
903   };
904 
905   // Skip a sequence of zero bytes.
906   auto skipZeros = [&]() {
907     const uint64_t StartOffset = Offset;
908     for (; Offset < BF.getMaxSize(); ++Offset)
909       if ((*FunctionData)[Offset] != 0)
910         break;
911 
912     return Offset - StartOffset;
913   };
914 
915   // Accept the whole padding area filled with breakpoints.
916   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
917   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
918     return true;
919 
920   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
921 
922   // Some functions have a jump to the next function or to the padding area
923   // inserted after the body.
924   auto isSkipJump = [&](const MCInst &Instr) {
925     uint64_t TargetAddress = 0;
926     if (MIB->isUnconditionalBranch(Instr) &&
927         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
928       if (TargetAddress >= InstrAddress + InstrSize &&
929           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
930         return true;
931       }
932     }
933     return false;
934   };
935 
936   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
937   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
938          skipZeros())
939     ;
940 
941   if (Offset == BF.getMaxSize())
942     return true;
943 
944   if (opts::Verbosity >= 1) {
945     errs() << "BOLT-WARNING: bad padding at address 0x"
946            << Twine::utohexstr(BF.getAddress() + BF.getSize())
947            << " starting at offset " << (Offset - BF.getSize())
948            << " in function " << BF << '\n'
949            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
950            << '\n';
951   }
952 
953   return false;
954 }
955 
956 void BinaryContext::adjustCodePadding() {
957   for (auto &BFI : BinaryFunctions) {
958     BinaryFunction &BF = BFI.second;
959     if (!shouldEmit(BF))
960       continue;
961 
962     if (!hasValidCodePadding(BF)) {
963       if (HasRelocations) {
964         if (opts::Verbosity >= 1) {
965           outs() << "BOLT-INFO: function " << BF
966                  << " has invalid padding. Ignoring the function.\n";
967         }
968         BF.setIgnored();
969       } else {
970         BF.setMaxSize(BF.getSize());
971       }
972     }
973   }
974 }
975 
976 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
977                                                uint64_t Size,
978                                                uint16_t Alignment,
979                                                unsigned Flags) {
980   // Register the name with MCContext.
981   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
982 
983   auto GAI = BinaryDataMap.find(Address);
984   BinaryData *BD;
985   if (GAI == BinaryDataMap.end()) {
986     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
987     BinarySection &Section =
988         SectionOrErr ? SectionOrErr.get() : absoluteSection();
989     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
990                         Section, Flags);
991     GAI = BinaryDataMap.emplace(Address, BD).first;
992     GlobalSymbols[Name] = BD;
993     updateObjectNesting(GAI);
994   } else {
995     BD = GAI->second;
996     if (!BD->hasName(Name)) {
997       GlobalSymbols[Name] = BD;
998       BD->Symbols.push_back(Symbol);
999     }
1000   }
1001 
1002   return Symbol;
1003 }
1004 
1005 const BinaryData *
1006 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1007   auto NI = BinaryDataMap.lower_bound(Address);
1008   auto End = BinaryDataMap.end();
1009   if ((NI != End && Address == NI->first) ||
1010       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1011     if (NI->second->containsAddress(Address))
1012       return NI->second;
1013 
1014     // If this is a sub-symbol, see if a parent data contains the address.
1015     const BinaryData *BD = NI->second->getParent();
1016     while (BD) {
1017       if (BD->containsAddress(Address))
1018         return BD;
1019       BD = BD->getParent();
1020     }
1021   }
1022   return nullptr;
1023 }
1024 
1025 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1026   auto NI = BinaryDataMap.find(Address);
1027   assert(NI != BinaryDataMap.end());
1028   if (NI == BinaryDataMap.end())
1029     return false;
1030   // TODO: it's possible that a jump table starts at the same address
1031   // as a larger blob of private data.  When we set the size of the
1032   // jump table, it might be smaller than the total blob size.  In this
1033   // case we just leave the original size since (currently) it won't really
1034   // affect anything.
1035   assert((!NI->second->Size || NI->second->Size == Size ||
1036           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1037          "can't change the size of a symbol that has already had its "
1038          "size set");
1039   if (!NI->second->Size) {
1040     NI->second->Size = Size;
1041     updateObjectNesting(NI);
1042     return true;
1043   }
1044   return false;
1045 }
1046 
1047 void BinaryContext::generateSymbolHashes() {
1048   auto isPadding = [](const BinaryData &BD) {
1049     StringRef Contents = BD.getSection().getContents();
1050     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1051     return (BD.getName().startswith("HOLEat") ||
1052             SymData.find_first_not_of(0) == StringRef::npos);
1053   };
1054 
1055   uint64_t NumCollisions = 0;
1056   for (auto &Entry : BinaryDataMap) {
1057     BinaryData &BD = *Entry.second;
1058     StringRef Name = BD.getName();
1059 
1060     if (!isInternalSymbolName(Name))
1061       continue;
1062 
1063     // First check if a non-anonymous alias exists and move it to the front.
1064     if (BD.getSymbols().size() > 1) {
1065       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1066         return !isInternalSymbolName(Symbol->getName());
1067       });
1068       if (Itr != BD.getSymbols().end()) {
1069         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1070         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1071         continue;
1072       }
1073     }
1074 
1075     // We have to skip 0 size symbols since they will all collide.
1076     if (BD.getSize() == 0) {
1077       continue;
1078     }
1079 
1080     const uint64_t Hash = BD.getSection().hash(BD);
1081     const size_t Idx = Name.find("0x");
1082     std::string NewName =
1083         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1084     if (getBinaryDataByName(NewName)) {
1085       // Ignore collisions for symbols that appear to be padding
1086       // (i.e. all zeros or a "hole")
1087       if (!isPadding(BD)) {
1088         if (opts::Verbosity) {
1089           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1090                  << " with new name (" << NewName << "), skipping.\n";
1091         }
1092         ++NumCollisions;
1093       }
1094       continue;
1095     }
1096     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1097     GlobalSymbols[NewName] = &BD;
1098   }
1099   if (NumCollisions) {
1100     errs() << "BOLT-WARNING: " << NumCollisions
1101            << " collisions detected while hashing binary objects";
1102     if (!opts::Verbosity)
1103       errs() << ". Use -v=1 to see the list.";
1104     errs() << '\n';
1105   }
1106 }
1107 
1108 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1109                                      BinaryFunction &Function) const {
1110   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1111   if (TargetFunction.isChildOf(Function))
1112     return true;
1113   TargetFunction.addParentFragment(Function);
1114   Function.addFragment(TargetFunction);
1115   if (!HasRelocations) {
1116     TargetFunction.setSimple(false);
1117     Function.setSimple(false);
1118   }
1119   if (opts::Verbosity >= 1) {
1120     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1121            << Function << '\n';
1122   }
1123   return true;
1124 }
1125 
1126 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1127                                            MCInst &LoadLowBits,
1128                                            MCInst &LoadHiBits,
1129                                            uint64_t Target) {
1130   const MCSymbol *TargetSymbol;
1131   uint64_t Addend = 0;
1132   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1133                                                     /*IsPCRel*/ true);
1134   int64_t Val;
1135   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1136                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1137   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1138                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1139 }
1140 
1141 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1142   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1143   if (TargetFunction)
1144     return false;
1145 
1146   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1147   assert(Section && "cannot get section for referenced address");
1148   if (!Section->isText())
1149     return false;
1150 
1151   bool Ret = false;
1152   StringRef SectionContents = Section->getContents();
1153   uint64_t Offset = Address - Section->getAddress();
1154   const uint64_t MaxSize = SectionContents.size() - Offset;
1155   const uint8_t *Bytes =
1156       reinterpret_cast<const uint8_t *>(SectionContents.data());
1157   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1158 
1159   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1160                          MCInst &Instruction, uint64_t Offset,
1161                          uint64_t AbsoluteInstrAddr,
1162                          uint64_t TotalSize) -> bool {
1163     MCInst *TargetHiBits, *TargetLowBits;
1164     uint64_t TargetAddress, Count;
1165     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1166                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1167                                    TargetLowBits, TargetAddress);
1168     if (!Count)
1169       return false;
1170 
1171     if (MatchOnly)
1172       return true;
1173 
1174     // NOTE The target symbol was created during disassemble's
1175     // handleExternalReference
1176     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1177     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1178                                                   *Section, Address, TotalSize);
1179     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1180                            TargetAddress);
1181     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1182     Veneer->addInstruction(Offset, std::move(Instruction));
1183     --Count;
1184     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1185       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1186       Veneer->addInstruction(It->first, std::move(It->second));
1187     }
1188 
1189     Veneer->getOrCreateLocalLabel(Address);
1190     Veneer->setMaxSize(TotalSize);
1191     Veneer->updateState(BinaryFunction::State::Disassembled);
1192     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1193                       << "\n");
1194     return true;
1195   };
1196 
1197   uint64_t Size = 0, TotalSize = 0;
1198   BinaryFunction::InstrMapType VeneerInstructions;
1199   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1200     MCInst Instruction;
1201     const uint64_t AbsoluteInstrAddr = Address + Offset;
1202     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1203                                         AbsoluteInstrAddr, nulls()))
1204       break;
1205 
1206     TotalSize += Size;
1207     if (MIB->isBranch(Instruction)) {
1208       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1209                         AbsoluteInstrAddr, TotalSize);
1210       break;
1211     }
1212 
1213     VeneerInstructions.emplace(Offset, std::move(Instruction));
1214   }
1215 
1216   return Ret;
1217 }
1218 
1219 void BinaryContext::processInterproceduralReferences() {
1220   for (const std::pair<BinaryFunction *, uint64_t> &It :
1221        InterproceduralReferences) {
1222     BinaryFunction &Function = *It.first;
1223     uint64_t Address = It.second;
1224     if (!Address || Function.isIgnored())
1225       continue;
1226 
1227     BinaryFunction *TargetFunction =
1228         getBinaryFunctionContainingAddress(Address);
1229     if (&Function == TargetFunction)
1230       continue;
1231 
1232     if (TargetFunction) {
1233       if (TargetFunction->isFragment() &&
1234           !TargetFunction->isChildOf(Function)) {
1235         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1236                   "fragments: "
1237                << Function.getPrintName() << " and "
1238                << TargetFunction->getPrintName() << '\n';
1239       }
1240       if (uint64_t Offset = Address - TargetFunction->getAddress())
1241         TargetFunction->addEntryPointAtOffset(Offset);
1242 
1243       continue;
1244     }
1245 
1246     // Check if address falls in function padding space - this could be
1247     // unmarked data in code. In this case adjust the padding space size.
1248     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1249     assert(Section && "cannot get section for referenced address");
1250 
1251     if (!Section->isText())
1252       continue;
1253 
1254     // PLT requires special handling and could be ignored in this context.
1255     StringRef SectionName = Section->getName();
1256     if (SectionName == ".plt" || SectionName == ".plt.got")
1257       continue;
1258 
1259     // Check if it is aarch64 veneer written at Address
1260     if (isAArch64() && handleAArch64Veneer(Address))
1261       continue;
1262 
1263     if (opts::processAllFunctions()) {
1264       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1265              << "object in code at address 0x" << Twine::utohexstr(Address)
1266              << " belonging to section " << SectionName << " in current mode\n";
1267       exit(1);
1268     }
1269 
1270     TargetFunction = getBinaryFunctionContainingAddress(Address,
1271                                                         /*CheckPastEnd=*/false,
1272                                                         /*UseMaxSize=*/true);
1273     // We are not going to overwrite non-simple functions, but for simple
1274     // ones - adjust the padding size.
1275     if (TargetFunction && TargetFunction->isSimple()) {
1276       errs() << "BOLT-WARNING: function " << *TargetFunction
1277              << " has an object detected in a padding region at address 0x"
1278              << Twine::utohexstr(Address) << '\n';
1279       TargetFunction->setMaxSize(TargetFunction->getSize());
1280     }
1281   }
1282 
1283   InterproceduralReferences.clear();
1284 }
1285 
1286 void BinaryContext::postProcessSymbolTable() {
1287   fixBinaryDataHoles();
1288   bool Valid = true;
1289   for (auto &Entry : BinaryDataMap) {
1290     BinaryData *BD = Entry.second;
1291     if ((BD->getName().startswith("SYMBOLat") ||
1292          BD->getName().startswith("DATAat")) &&
1293         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1294         BD->getSection()) {
1295       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1296       Valid = false;
1297     }
1298   }
1299   assert(Valid);
1300   (void)Valid;
1301   generateSymbolHashes();
1302 }
1303 
1304 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1305                                  BinaryFunction &ParentBF) {
1306   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1307          "cannot merge functions with multiple entry points");
1308 
1309   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1310   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1311       SymbolToFunctionMapMutex, std::defer_lock);
1312 
1313   const StringRef ChildName = ChildBF.getOneName();
1314 
1315   // Move symbols over and update bookkeeping info.
1316   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1317     ParentBF.getSymbols().push_back(Symbol);
1318     WriteSymbolMapLock.lock();
1319     SymbolToFunctionMap[Symbol] = &ParentBF;
1320     WriteSymbolMapLock.unlock();
1321     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1322   }
1323   ChildBF.getSymbols().clear();
1324 
1325   // Move other names the child function is known under.
1326   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1327   ChildBF.Aliases.clear();
1328 
1329   if (HasRelocations) {
1330     // Merge execution counts of ChildBF into those of ParentBF.
1331     // Without relocations, we cannot reliably merge profiles as both functions
1332     // continue to exist and either one can be executed.
1333     ChildBF.mergeProfileDataInto(ParentBF);
1334 
1335     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1336                                                      std::defer_lock);
1337     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1338                                                       std::defer_lock);
1339     // Remove ChildBF from the global set of functions in relocs mode.
1340     ReadBfsLock.lock();
1341     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1342     ReadBfsLock.unlock();
1343 
1344     assert(FI != BinaryFunctions.end() && "function not found");
1345     assert(&ChildBF == &FI->second && "function mismatch");
1346 
1347     WriteBfsLock.lock();
1348     ChildBF.clearDisasmState();
1349     FI = BinaryFunctions.erase(FI);
1350     WriteBfsLock.unlock();
1351 
1352   } else {
1353     // In non-relocation mode we keep the function, but rename it.
1354     std::string NewName = "__ICF_" + ChildName.str();
1355 
1356     WriteCtxLock.lock();
1357     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1358     WriteCtxLock.unlock();
1359 
1360     ChildBF.setFolded(&ParentBF);
1361   }
1362 
1363   ParentBF.setHasFunctionsFoldedInto();
1364 }
1365 
1366 void BinaryContext::fixBinaryDataHoles() {
1367   assert(validateObjectNesting() && "object nesting inconsitency detected");
1368 
1369   for (BinarySection &Section : allocatableSections()) {
1370     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1371 
1372     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1373       BinaryData *BD = Itr->second;
1374       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1375                      (BD->getName().startswith("SYMBOLat0x") ||
1376                       BD->getName().startswith("DATAat0x") ||
1377                       BD->getName().startswith("ANONYMOUS")));
1378       return !isHole && BD->getSection() == Section && !BD->getParent();
1379     };
1380 
1381     auto BDStart = BinaryDataMap.begin();
1382     auto BDEnd = BinaryDataMap.end();
1383     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1384     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1385 
1386     uint64_t EndAddress = Section.getAddress();
1387 
1388     while (Itr != End) {
1389       if (Itr->second->getAddress() > EndAddress) {
1390         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1391         Holes.emplace_back(EndAddress, Gap);
1392       }
1393       EndAddress = Itr->second->getEndAddress();
1394       ++Itr;
1395     }
1396 
1397     if (EndAddress < Section.getEndAddress())
1398       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1399 
1400     // If there is already a symbol at the start of the hole, grow that symbol
1401     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1402     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1403       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1404       if (BD) {
1405         // BD->getSection() can be != Section if there are sections that
1406         // overlap.  In this case it is probably safe to just skip the holes
1407         // since the overlapping section will not(?) have any symbols in it.
1408         if (BD->getSection() == Section)
1409           setBinaryDataSize(Hole.first, Hole.second);
1410       } else {
1411         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1412       }
1413     }
1414   }
1415 
1416   assert(validateObjectNesting() && "object nesting inconsitency detected");
1417   assert(validateHoles() && "top level hole detected in object map");
1418 }
1419 
1420 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1421   const BinarySection *CurrentSection = nullptr;
1422   bool FirstSection = true;
1423 
1424   for (auto &Entry : BinaryDataMap) {
1425     const BinaryData *BD = Entry.second;
1426     const BinarySection &Section = BD->getSection();
1427     if (FirstSection || Section != *CurrentSection) {
1428       uint64_t Address, Size;
1429       StringRef Name = Section.getName();
1430       if (Section) {
1431         Address = Section.getAddress();
1432         Size = Section.getSize();
1433       } else {
1434         Address = BD->getAddress();
1435         Size = BD->getSize();
1436       }
1437       OS << "BOLT-INFO: Section " << Name << ", "
1438          << "0x" + Twine::utohexstr(Address) << ":"
1439          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1440       CurrentSection = &Section;
1441       FirstSection = false;
1442     }
1443 
1444     OS << "BOLT-INFO: ";
1445     const BinaryData *P = BD->getParent();
1446     while (P) {
1447       OS << "  ";
1448       P = P->getParent();
1449     }
1450     OS << *BD << "\n";
1451   }
1452 }
1453 
1454 Expected<unsigned> BinaryContext::getDwarfFile(
1455     StringRef Directory, StringRef FileName, unsigned FileNumber,
1456     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1457     unsigned CUID, unsigned DWARFVersion) {
1458   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1459   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1460                           FileNumber);
1461 }
1462 
1463 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1464                                                const uint32_t SrcCUID,
1465                                                unsigned FileIndex) {
1466   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1467   const DWARFDebugLine::LineTable *LineTable =
1468       DwCtx->getLineTableForUnit(SrcUnit);
1469   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1470       LineTable->Prologue.FileNames;
1471   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1472   // means empty dir.
1473   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1474          "FileIndex out of range for the compilation unit.");
1475   StringRef Dir = "";
1476   if (FileNames[FileIndex - 1].DirIdx != 0) {
1477     if (std::optional<const char *> DirName = dwarf::toString(
1478             LineTable->Prologue
1479                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1480       Dir = *DirName;
1481     }
1482   }
1483   StringRef FileName = "";
1484   if (std::optional<const char *> FName =
1485           dwarf::toString(FileNames[FileIndex - 1].Name))
1486     FileName = *FName;
1487   assert(FileName != "");
1488   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1489   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1490                                DestCUID, DstUnit->getVersion()));
1491 }
1492 
1493 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1494   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1495   llvm::transform(llvm::make_second_range(BinaryFunctions),
1496                   SortedFunctions.begin(),
1497                   [](BinaryFunction &BF) { return &BF; });
1498 
1499   llvm::stable_sort(SortedFunctions,
1500                     [](const BinaryFunction *A, const BinaryFunction *B) {
1501                       if (A->hasValidIndex() && B->hasValidIndex()) {
1502                         return A->getIndex() < B->getIndex();
1503                       }
1504                       return A->hasValidIndex();
1505                     });
1506   return SortedFunctions;
1507 }
1508 
1509 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1510   std::vector<BinaryFunction *> AllFunctions;
1511   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1512   llvm::transform(llvm::make_second_range(BinaryFunctions),
1513                   std::back_inserter(AllFunctions),
1514                   [](BinaryFunction &BF) { return &BF; });
1515   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1516 
1517   return AllFunctions;
1518 }
1519 
1520 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1521   auto Iter = DWOCUs.find(DWOId);
1522   if (Iter == DWOCUs.end())
1523     return std::nullopt;
1524 
1525   return Iter->second;
1526 }
1527 
1528 DWARFContext *BinaryContext::getDWOContext() const {
1529   if (DWOCUs.empty())
1530     return nullptr;
1531   return &DWOCUs.begin()->second->getContext();
1532 }
1533 
1534 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1535 void BinaryContext::preprocessDWODebugInfo() {
1536   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1537     DWARFUnit *const DwarfUnit = CU.get();
1538     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1539       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1540       if (!DWOCU->isDWOUnit()) {
1541         std::string DWOName = dwarf::toString(
1542             DwarfUnit->getUnitDIE().find(
1543                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1544             "");
1545         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1546                << DWOName
1547                << " was not retrieved and won't be updated. Please check "
1548                   "relative path.\n";
1549         continue;
1550       }
1551       DWOCUs[*DWOId] = DWOCU;
1552     }
1553   }
1554   if (!DWOCUs.empty())
1555     outs() << "BOLT-INFO: processing split DWARF\n";
1556 }
1557 
1558 void BinaryContext::preprocessDebugInfo() {
1559   struct CURange {
1560     uint64_t LowPC;
1561     uint64_t HighPC;
1562     DWARFUnit *Unit;
1563 
1564     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1565   };
1566 
1567   // Building a map of address ranges to CUs similar to .debug_aranges and use
1568   // it to assign CU to functions.
1569   std::vector<CURange> AllRanges;
1570   AllRanges.reserve(DwCtx->getNumCompileUnits());
1571   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1572     Expected<DWARFAddressRangesVector> RangesOrError =
1573         CU->getUnitDIE().getAddressRanges();
1574     if (!RangesOrError) {
1575       consumeError(RangesOrError.takeError());
1576       continue;
1577     }
1578     for (DWARFAddressRange &Range : *RangesOrError) {
1579       // Parts of the debug info could be invalidated due to corresponding code
1580       // being removed from the binary by the linker. Hence we check if the
1581       // address is a valid one.
1582       if (containsAddress(Range.LowPC))
1583         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1584     }
1585 
1586     ContainsDwarf5 |= CU->getVersion() >= 5;
1587     ContainsDwarfLegacy |= CU->getVersion() < 5;
1588   }
1589 
1590   llvm::sort(AllRanges);
1591   for (auto &KV : BinaryFunctions) {
1592     const uint64_t FunctionAddress = KV.first;
1593     BinaryFunction &Function = KV.second;
1594 
1595     auto It = llvm::partition_point(
1596         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1597     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1598       Function.setDWARFUnit(It->Unit);
1599   }
1600 
1601   // Discover units with debug info that needs to be updated.
1602   for (const auto &KV : BinaryFunctions) {
1603     const BinaryFunction &BF = KV.second;
1604     if (shouldEmit(BF) && BF.getDWARFUnit())
1605       ProcessedCUs.insert(BF.getDWARFUnit());
1606   }
1607 
1608   // Clear debug info for functions from units that we are not going to process.
1609   for (auto &KV : BinaryFunctions) {
1610     BinaryFunction &BF = KV.second;
1611     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1612       BF.setDWARFUnit(nullptr);
1613   }
1614 
1615   if (opts::Verbosity >= 1) {
1616     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1617            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1618   }
1619 
1620   preprocessDWODebugInfo();
1621 
1622   // Populate MCContext with DWARF files from all units.
1623   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1624   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1625     const uint64_t CUID = CU->getOffset();
1626     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1627     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1628         GlobalPrefix + "line_table_start" + Twine(CUID)));
1629 
1630     if (!ProcessedCUs.count(CU.get()))
1631       continue;
1632 
1633     const DWARFDebugLine::LineTable *LineTable =
1634         DwCtx->getLineTableForUnit(CU.get());
1635     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1636         LineTable->Prologue.FileNames;
1637 
1638     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1639     if (DwarfVersion >= 5) {
1640       std::optional<MD5::MD5Result> Checksum;
1641       if (LineTable->Prologue.ContentTypes.HasMD5)
1642         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1643       std::optional<const char *> Name =
1644           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1645       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1646         auto Iter = DWOCUs.find(*DWOID);
1647         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1648         Name = dwarf::toString(
1649             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1650       }
1651       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1652                                   std::nullopt);
1653     }
1654 
1655     BinaryLineTable.setDwarfVersion(DwarfVersion);
1656 
1657     // Assign a unique label to every line table, one per CU.
1658     // Make sure empty debug line tables are registered too.
1659     if (FileNames.empty()) {
1660       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1661                             CUID, DwarfVersion));
1662       continue;
1663     }
1664     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1665     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1666       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1667       // means empty dir.
1668       StringRef Dir = "";
1669       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1670         if (std::optional<const char *> DirName = dwarf::toString(
1671                 LineTable->Prologue
1672                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1673           Dir = *DirName;
1674       StringRef FileName = "";
1675       if (std::optional<const char *> FName =
1676               dwarf::toString(FileNames[I].Name))
1677         FileName = *FName;
1678       assert(FileName != "");
1679       std::optional<MD5::MD5Result> Checksum;
1680       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1681         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1682       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1683                             DwarfVersion));
1684     }
1685   }
1686 }
1687 
1688 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1689   if (Function.isPseudo())
1690     return false;
1691 
1692   if (opts::processAllFunctions())
1693     return true;
1694 
1695   if (Function.isIgnored())
1696     return false;
1697 
1698   // In relocation mode we will emit non-simple functions with CFG.
1699   // If the function does not have a CFG it should be marked as ignored.
1700   return HasRelocations || Function.isSimple();
1701 }
1702 
1703 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1704   uint32_t Operation = Inst.getOperation();
1705   switch (Operation) {
1706   case MCCFIInstruction::OpSameValue:
1707     OS << "OpSameValue Reg" << Inst.getRegister();
1708     break;
1709   case MCCFIInstruction::OpRememberState:
1710     OS << "OpRememberState";
1711     break;
1712   case MCCFIInstruction::OpRestoreState:
1713     OS << "OpRestoreState";
1714     break;
1715   case MCCFIInstruction::OpOffset:
1716     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1717     break;
1718   case MCCFIInstruction::OpDefCfaRegister:
1719     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1720     break;
1721   case MCCFIInstruction::OpDefCfaOffset:
1722     OS << "OpDefCfaOffset " << Inst.getOffset();
1723     break;
1724   case MCCFIInstruction::OpDefCfa:
1725     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1726     break;
1727   case MCCFIInstruction::OpRelOffset:
1728     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1729     break;
1730   case MCCFIInstruction::OpAdjustCfaOffset:
1731     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1732     break;
1733   case MCCFIInstruction::OpEscape:
1734     OS << "OpEscape";
1735     break;
1736   case MCCFIInstruction::OpRestore:
1737     OS << "OpRestore Reg" << Inst.getRegister();
1738     break;
1739   case MCCFIInstruction::OpUndefined:
1740     OS << "OpUndefined Reg" << Inst.getRegister();
1741     break;
1742   case MCCFIInstruction::OpRegister:
1743     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1744        << Inst.getRegister2();
1745     break;
1746   case MCCFIInstruction::OpWindowSave:
1747     OS << "OpWindowSave";
1748     break;
1749   case MCCFIInstruction::OpGnuArgsSize:
1750     OS << "OpGnuArgsSize";
1751     break;
1752   default:
1753     OS << "Op#" << Operation;
1754     break;
1755   }
1756 }
1757 
1758 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1759   // For aarch64, the ABI defines mapping symbols so we identify data in the
1760   // code section (see IHI0056B). $x identifies a symbol starting code or the
1761   // end of a data chunk inside code, $d indentifies start of data.
1762   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1763     return MarkerSymType::NONE;
1764 
1765   Expected<StringRef> NameOrError = Symbol.getName();
1766   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1767 
1768   if (!TypeOrError || !NameOrError)
1769     return MarkerSymType::NONE;
1770 
1771   if (*TypeOrError != SymbolRef::ST_Unknown)
1772     return MarkerSymType::NONE;
1773 
1774   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1775     return MarkerSymType::CODE;
1776 
1777   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1778     return MarkerSymType::DATA;
1779 
1780   return MarkerSymType::NONE;
1781 }
1782 
1783 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1784   return getMarkerType(Symbol) != MarkerSymType::NONE;
1785 }
1786 
1787 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1788                            const BinaryFunction *Function,
1789                            DWARFContext *DwCtx) {
1790   DebugLineTableRowRef RowRef =
1791       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1792   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1793     return;
1794 
1795   const DWARFDebugLine::LineTable *LineTable;
1796   if (Function && Function->getDWARFUnit() &&
1797       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1798     LineTable = Function->getDWARFLineTable();
1799   } else {
1800     LineTable = DwCtx->getLineTableForUnit(
1801         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1802   }
1803   assert(LineTable && "line table expected for instruction with debug info");
1804 
1805   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1806   StringRef FileName = "";
1807   if (std::optional<const char *> FName =
1808           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1809     FileName = *FName;
1810   OS << " # debug line " << FileName << ":" << Row.Line;
1811   if (Row.Column)
1812     OS << ":" << Row.Column;
1813   if (Row.Discriminator)
1814     OS << " discriminator:" << Row.Discriminator;
1815 }
1816 
1817 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1818                                      uint64_t Offset,
1819                                      const BinaryFunction *Function,
1820                                      bool PrintMCInst, bool PrintMemData,
1821                                      bool PrintRelocations,
1822                                      StringRef Endl) const {
1823   if (MIB->isEHLabel(Instruction)) {
1824     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1825     return;
1826   }
1827   OS << format("    %08" PRIx64 ": ", Offset);
1828   if (MIB->isCFI(Instruction)) {
1829     uint32_t Offset = Instruction.getOperand(0).getImm();
1830     OS << "\t!CFI\t$" << Offset << "\t; ";
1831     if (Function)
1832       printCFI(OS, *Function->getCFIFor(Instruction));
1833     OS << Endl;
1834     return;
1835   }
1836   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1837   if (MIB->isCall(Instruction)) {
1838     if (MIB->isTailCall(Instruction))
1839       OS << " # TAILCALL ";
1840     if (MIB->isInvoke(Instruction)) {
1841       const std::optional<MCPlus::MCLandingPad> EHInfo =
1842           MIB->getEHInfo(Instruction);
1843       OS << " # handler: ";
1844       if (EHInfo->first)
1845         OS << *EHInfo->first;
1846       else
1847         OS << '0';
1848       OS << "; action: " << EHInfo->second;
1849       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1850       if (GnuArgsSize >= 0)
1851         OS << "; GNU_args_size = " << GnuArgsSize;
1852     }
1853   } else if (MIB->isIndirectBranch(Instruction)) {
1854     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1855       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1856     } else {
1857       OS << " # UNKNOWN CONTROL FLOW";
1858     }
1859   }
1860   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1861     OS << " # Offset: " << *Offset;
1862 
1863   MIB->printAnnotations(Instruction, OS);
1864 
1865   if (opts::PrintDebugInfo)
1866     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1867 
1868   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1869     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1870     Function->printRelocations(OS, Offset, Size);
1871   }
1872 
1873   OS << Endl;
1874 
1875   if (PrintMCInst) {
1876     Instruction.dump_pretty(OS, InstPrinter.get());
1877     OS << Endl;
1878   }
1879 }
1880 
1881 std::optional<uint64_t>
1882 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1883                                         uint64_t FileOffset) const {
1884   // Find a segment with a matching file offset.
1885   for (auto &KV : SegmentMapInfo) {
1886     const SegmentInfo &SegInfo = KV.second;
1887     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1888       // Use segment's aligned memory offset to calculate the base address.
1889       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1890       return MMapAddress - MemOffset;
1891     }
1892   }
1893 
1894   return std::nullopt;
1895 }
1896 
1897 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1898   auto SI = AddressToSection.upper_bound(Address);
1899   if (SI != AddressToSection.begin()) {
1900     --SI;
1901     uint64_t UpperBound = SI->first + SI->second->getSize();
1902     if (!SI->second->getSize())
1903       UpperBound += 1;
1904     if (UpperBound > Address)
1905       return *SI->second;
1906   }
1907   return std::make_error_code(std::errc::bad_address);
1908 }
1909 
1910 ErrorOr<StringRef>
1911 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1912   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1913     return Section->getName();
1914   return std::make_error_code(std::errc::bad_address);
1915 }
1916 
1917 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1918   auto Res = Sections.insert(Section);
1919   (void)Res;
1920   assert(Res.second && "can't register the same section twice.");
1921 
1922   // Only register allocatable sections in the AddressToSection map.
1923   if (Section->isAllocatable() && Section->getAddress())
1924     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1925   NameToSection.insert(
1926       std::make_pair(std::string(Section->getName()), Section));
1927   if (Section->hasSectionRef())
1928     SectionRefToBinarySection.insert(
1929         std::make_pair(Section->getSectionRef(), Section));
1930 
1931   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1932   return *Section;
1933 }
1934 
1935 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1936   return registerSection(new BinarySection(*this, Section));
1937 }
1938 
1939 BinarySection &
1940 BinaryContext::registerSection(const Twine &SectionName,
1941                                const BinarySection &OriginalSection) {
1942   return registerSection(
1943       new BinarySection(*this, SectionName, OriginalSection));
1944 }
1945 
1946 BinarySection &
1947 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1948                                        unsigned ELFFlags, uint8_t *Data,
1949                                        uint64_t Size, unsigned Alignment) {
1950   auto NamedSections = getSectionByName(Name);
1951   if (NamedSections.begin() != NamedSections.end()) {
1952     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1953            "can only update unique sections");
1954     BinarySection *Section = NamedSections.begin()->second;
1955 
1956     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1957     const bool Flag = Section->isAllocatable();
1958     (void)Flag;
1959     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1960     LLVM_DEBUG(dbgs() << *Section << "\n");
1961     // FIXME: Fix section flags/attributes for MachO.
1962     if (isELF())
1963       assert(Flag == Section->isAllocatable() &&
1964              "can't change section allocation status");
1965     return *Section;
1966   }
1967 
1968   return registerSection(
1969       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1970 }
1971 
1972 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
1973   auto NameRange = NameToSection.equal_range(Section.getName().str());
1974   while (NameRange.first != NameRange.second) {
1975     if (NameRange.first->second == &Section) {
1976       NameToSection.erase(NameRange.first);
1977       break;
1978     }
1979     ++NameRange.first;
1980   }
1981 }
1982 
1983 void BinaryContext::deregisterUnusedSections() {
1984   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
1985   for (auto SI = Sections.begin(); SI != Sections.end();) {
1986     BinarySection *Section = *SI;
1987     if (Section->hasSectionRef() || Section->getOutputSize() ||
1988         (AbsSection && Section == &AbsSection.get())) {
1989       ++SI;
1990       continue;
1991     }
1992 
1993     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
1994                       << '\n';);
1995     deregisterSectionName(*Section);
1996     SI = Sections.erase(SI);
1997     delete Section;
1998   }
1999 }
2000 
2001 bool BinaryContext::deregisterSection(BinarySection &Section) {
2002   BinarySection *SectionPtr = &Section;
2003   auto Itr = Sections.find(SectionPtr);
2004   if (Itr != Sections.end()) {
2005     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2006     while (Range.first != Range.second) {
2007       if (Range.first->second == SectionPtr) {
2008         AddressToSection.erase(Range.first);
2009         break;
2010       }
2011       ++Range.first;
2012     }
2013 
2014     deregisterSectionName(*SectionPtr);
2015     Sections.erase(Itr);
2016     delete SectionPtr;
2017     return true;
2018   }
2019   return false;
2020 }
2021 
2022 void BinaryContext::renameSection(BinarySection &Section,
2023                                   const Twine &NewName) {
2024   auto Itr = Sections.find(&Section);
2025   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2026   Sections.erase(Itr);
2027 
2028   deregisterSectionName(Section);
2029 
2030   Section.Name = NewName.str();
2031   Section.setOutputName(Section.Name);
2032 
2033   NameToSection.insert(std::make_pair(Section.Name, &Section));
2034 
2035   // Reinsert with the new name.
2036   Sections.insert(&Section);
2037 }
2038 
2039 void BinaryContext::printSections(raw_ostream &OS) const {
2040   for (BinarySection *const &Section : Sections)
2041     OS << "BOLT-INFO: " << *Section << "\n";
2042 }
2043 
2044 BinarySection &BinaryContext::absoluteSection() {
2045   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2046     return *Section;
2047   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2048 }
2049 
2050 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2051                                                            size_t Size) const {
2052   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2053   if (!Section)
2054     return std::make_error_code(std::errc::bad_address);
2055 
2056   if (Section->isVirtual())
2057     return 0;
2058 
2059   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2060                    AsmInfo->getCodePointerSize());
2061   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2062   return DE.getUnsigned(&ValueOffset, Size);
2063 }
2064 
2065 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2066                                                          size_t Size) const {
2067   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2068   if (!Section)
2069     return std::make_error_code(std::errc::bad_address);
2070 
2071   if (Section->isVirtual())
2072     return 0;
2073 
2074   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2075                    AsmInfo->getCodePointerSize());
2076   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2077   return DE.getSigned(&ValueOffset, Size);
2078 }
2079 
2080 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2081                                   uint64_t Type, uint64_t Addend,
2082                                   uint64_t Value) {
2083   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2084   assert(Section && "cannot find section for address");
2085   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2086                          Value);
2087 }
2088 
2089 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2090                                          uint64_t Type, uint64_t Addend,
2091                                          uint64_t Value) {
2092   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2093   assert(Section && "cannot find section for address");
2094   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2095                                 Addend, Value);
2096 }
2097 
2098 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2099   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2100   assert(Section && "cannot find section for address");
2101   return Section->removeRelocationAt(Address - Section->getAddress());
2102 }
2103 
2104 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2105   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2106   if (!Section)
2107     return nullptr;
2108 
2109   return Section->getRelocationAt(Address - Section->getAddress());
2110 }
2111 
2112 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
2113   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2114   if (!Section)
2115     return nullptr;
2116 
2117   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2118 }
2119 
2120 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2121                                              const uint64_t Address) {
2122   auto setImmovable = [&](BinaryData &BD) {
2123     BinaryData *Root = BD.getAtomicRoot();
2124     LLVM_DEBUG(if (Root->isMoveable()) {
2125       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2126              << "due to ambiguous relocation referencing 0x"
2127              << Twine::utohexstr(Address) << '\n';
2128     });
2129     Root->setIsMoveable(false);
2130   };
2131 
2132   if (Address == BD.getAddress()) {
2133     setImmovable(BD);
2134 
2135     // Set previous symbol as immovable
2136     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2137     if (Prev && Prev->getEndAddress() == BD.getAddress())
2138       setImmovable(*Prev);
2139   }
2140 
2141   if (Address == BD.getEndAddress()) {
2142     setImmovable(BD);
2143 
2144     // Set next symbol as immovable
2145     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2146     if (Next && Next->getAddress() == BD.getEndAddress())
2147       setImmovable(*Next);
2148   }
2149 }
2150 
2151 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2152                                                     uint64_t *EntryDesc) {
2153   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2154   auto BFI = SymbolToFunctionMap.find(Symbol);
2155   if (BFI == SymbolToFunctionMap.end())
2156     return nullptr;
2157 
2158   BinaryFunction *BF = BFI->second;
2159   if (EntryDesc)
2160     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2161 
2162   return BF;
2163 }
2164 
2165 void BinaryContext::exitWithBugReport(StringRef Message,
2166                                       const BinaryFunction &Function) const {
2167   errs() << "=======================================\n";
2168   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2169             "this function.\n";
2170   errs() << "If you are running the most recent version of BOLT, you may "
2171             "want to "
2172             "report this and paste this dump.\nPlease check that there is no "
2173             "sensitive contents being shared in this dump.\n";
2174   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2175   ScopedPrinter SP(errs());
2176   SP.printBinaryBlock("Function contents", *Function.getData());
2177   errs() << "\n";
2178   Function.dump();
2179   errs() << "ERROR: " << Message;
2180   errs() << "\n=======================================\n";
2181   exit(1);
2182 }
2183 
2184 BinaryFunction *
2185 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2186                                             bool IsSimple) {
2187   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2188   BinaryFunction *BF = InjectedBinaryFunctions.back();
2189   setSymbolToFunctionMap(BF->getSymbol(), BF);
2190   BF->CurrentState = BinaryFunction::State::CFG;
2191   return BF;
2192 }
2193 
2194 std::pair<size_t, size_t>
2195 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2196   // Adjust branch instruction to match the current layout.
2197   if (FixBranches)
2198     BF.fixBranches();
2199 
2200   // Create local MC context to isolate the effect of ephemeral code emission.
2201   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2202   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2203   MCAsmBackend *MAB =
2204       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2205 
2206   SmallString<256> Code;
2207   raw_svector_ostream VecOS(Code);
2208 
2209   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2210   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2211       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2212       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2213       /*RelaxAll=*/false,
2214       /*IncrementalLinkerCompatible=*/false,
2215       /*DWARFMustBeAtTheEnd=*/false));
2216 
2217   Streamer->initSections(false, *STI);
2218 
2219   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2220   Section->setHasInstructions(true);
2221 
2222   // Create symbols in the LocalCtx so that they get destroyed with it.
2223   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2224   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2225 
2226   Streamer->switchSection(Section);
2227   Streamer->emitLabel(StartLabel);
2228   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2229                    /*EmitCodeOnly=*/true);
2230   Streamer->emitLabel(EndLabel);
2231 
2232   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2233   SmallVector<LabelRange> SplitLabels;
2234   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2235     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2236     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2237     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2238 
2239     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2240         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2241         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2242     SplitSection->setHasInstructions(true);
2243     Streamer->switchSection(SplitSection);
2244 
2245     Streamer->emitLabel(SplitStartLabel);
2246     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2247     Streamer->emitLabel(SplitEndLabel);
2248     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2249     // private
2250     Streamer->emitBytes(StringRef(""));
2251     Streamer->switchSection(Section);
2252   }
2253 
2254   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2255   // MCStreamer::Finish(), which does more than we want
2256   Streamer->emitBytes(StringRef(""));
2257 
2258   MCAssembler &Assembler =
2259       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2260   MCAsmLayout Layout(Assembler);
2261   Assembler.layout(Layout);
2262 
2263   const uint64_t HotSize =
2264       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2265   const uint64_t ColdSize =
2266       std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
2267                       [&](const uint64_t Accu, const LabelRange &Labels) {
2268                         return Accu + Layout.getSymbolOffset(*Labels.second) -
2269                                Layout.getSymbolOffset(*Labels.first);
2270                       });
2271 
2272   // Clean-up the effect of the code emission.
2273   for (const MCSymbol &Symbol : Assembler.symbols()) {
2274     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2275     MutableSymbol->setUndefined();
2276     MutableSymbol->setIsRegistered(false);
2277   }
2278 
2279   return std::make_pair(HotSize, ColdSize);
2280 }
2281 
2282 bool BinaryContext::validateInstructionEncoding(
2283     ArrayRef<uint8_t> InputSequence) const {
2284   MCInst Inst;
2285   uint64_t InstSize;
2286   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2287   assert(InstSize == InputSequence.size() &&
2288          "Disassembled instruction size does not match the sequence.");
2289 
2290   SmallString<256> Code;
2291   SmallVector<MCFixup, 4> Fixups;
2292 
2293   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2294   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2295   if (InputSequence != OutputSequence) {
2296     if (opts::Verbosity > 1) {
2297       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2298              << "      input: " << InputSequence << '\n'
2299              << "     output: " << OutputSequence << '\n';
2300     }
2301     return false;
2302   }
2303 
2304   return true;
2305 }
2306 
2307 uint64_t BinaryContext::getHotThreshold() const {
2308   static uint64_t Threshold = 0;
2309   if (Threshold == 0) {
2310     Threshold = std::max(
2311         (uint64_t)opts::ExecutionCountThreshold,
2312         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2313   }
2314   return Threshold;
2315 }
2316 
2317 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2318     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2319   auto FI = BinaryFunctions.upper_bound(Address);
2320   if (FI == BinaryFunctions.begin())
2321     return nullptr;
2322   --FI;
2323 
2324   const uint64_t UsedSize =
2325       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2326 
2327   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2328     return nullptr;
2329 
2330   return &FI->second;
2331 }
2332 
2333 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2334   // First, try to find a function starting at the given address. If the
2335   // function was folded, this will get us the original folded function if it
2336   // wasn't removed from the list, e.g. in non-relocation mode.
2337   auto BFI = BinaryFunctions.find(Address);
2338   if (BFI != BinaryFunctions.end())
2339     return &BFI->second;
2340 
2341   // We might have folded the function matching the object at the given
2342   // address. In such case, we look for a function matching the symbol
2343   // registered at the original address. The new function (the one that the
2344   // original was folded into) will hold the symbol.
2345   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2346     uint64_t EntryID = 0;
2347     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2348     if (BF && EntryID == 0)
2349       return BF;
2350   }
2351   return nullptr;
2352 }
2353 
2354 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2355     const DWARFAddressRangesVector &InputRanges) const {
2356   DebugAddressRangesVector OutputRanges;
2357 
2358   for (const DWARFAddressRange Range : InputRanges) {
2359     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2360     while (BFI != BinaryFunctions.end()) {
2361       const BinaryFunction &Function = BFI->second;
2362       if (Function.getAddress() >= Range.HighPC)
2363         break;
2364       const DebugAddressRangesVector FunctionRanges =
2365           Function.getOutputAddressRanges();
2366       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2367       std::advance(BFI, 1);
2368     }
2369   }
2370 
2371   return OutputRanges;
2372 }
2373 
2374 } // namespace bolt
2375 } // namespace llvm
2376