xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision f3e54f2f97daa135b99a47277625cc6e8391753e)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
44 
45 using namespace llvm;
46 
47 #undef  DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
49 
50 namespace opts {
51 
52 cl::opt<bool> NoHugePages("no-huge-pages",
53                           cl::desc("use regular size pages for code alignment"),
54                           cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool> PrintRelocations(
64     "print-relocations",
65     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66     cl::cat(BoltCategory));
67 
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70   cl::desc("print memory data annotations when printing functions"),
71   cl::Hidden,
72   cl::ZeroOrMore,
73   cl::cat(BoltCategory));
74 
75 } // namespace opts
76 
77 namespace llvm {
78 namespace bolt {
79 
80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
81                              std::unique_ptr<DWARFContext> DwCtx,
82                              std::unique_ptr<Triple> TheTriple,
83                              const Target *TheTarget, std::string TripleName,
84                              std::unique_ptr<MCCodeEmitter> MCE,
85                              std::unique_ptr<MCObjectFileInfo> MOFI,
86                              std::unique_ptr<const MCAsmInfo> AsmInfo,
87                              std::unique_ptr<const MCInstrInfo> MII,
88                              std::unique_ptr<const MCSubtargetInfo> STI,
89                              std::unique_ptr<MCInstPrinter> InstPrinter,
90                              std::unique_ptr<const MCInstrAnalysis> MIA,
91                              std::unique_ptr<MCPlusBuilder> MIB,
92                              std::unique_ptr<const MCRegisterInfo> MRI,
93                              std::unique_ptr<MCDisassembler> DisAsm)
94     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
95       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
96       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
97       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
98       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
99       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
100   Relocation::Arch = this->TheTriple->getArch();
101   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
102   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
103 }
104 
105 BinaryContext::~BinaryContext() {
106   for (BinarySection *Section : Sections)
107     delete Section;
108   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
109     delete InjectedFunction;
110   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
111     delete JTI.second;
112   clearBinaryData();
113 }
114 
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 Expected<std::unique_ptr<BinaryContext>>
118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
119                                    std::unique_ptr<DWARFContext> DwCtx) {
120   StringRef ArchName = "";
121   std::string FeaturesStr = "";
122   switch (File->getArch()) {
123   case llvm::Triple::x86_64:
124     ArchName = "x86-64";
125     FeaturesStr = "+nopl";
126     break;
127   case llvm::Triple::aarch64:
128     ArchName = "aarch64";
129     FeaturesStr = "+all";
130     break;
131   case llvm::Triple::riscv64: {
132     ArchName = "riscv64";
133     Expected<SubtargetFeatures> Features = File->getFeatures();
134 
135     if (auto E = Features.takeError())
136       return std::move(E);
137 
138     // We rely on relaxation for some transformations (e.g., promoting all calls
139     // to PseudoCALL and then making JITLink relax them). Since the relax
140     // feature is not stored in the object file, we manually enable it.
141     Features->AddFeature("relax");
142     FeaturesStr = Features->getString();
143     break;
144   }
145   default:
146     return createStringError(std::errc::not_supported,
147                              "BOLT-ERROR: Unrecognized machine in ELF file");
148   }
149 
150   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
151   const std::string TripleName = TheTriple->str();
152 
153   std::string Error;
154   const Target *TheTarget =
155       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
156   if (!TheTarget)
157     return createStringError(make_error_code(std::errc::not_supported),
158                              Twine("BOLT-ERROR: ", Error));
159 
160   std::unique_ptr<const MCRegisterInfo> MRI(
161       TheTarget->createMCRegInfo(TripleName));
162   if (!MRI)
163     return createStringError(
164         make_error_code(std::errc::not_supported),
165         Twine("BOLT-ERROR: no register info for target ", TripleName));
166 
167   // Set up disassembler.
168   std::unique_ptr<MCAsmInfo> AsmInfo(
169       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
170   if (!AsmInfo)
171     return createStringError(
172         make_error_code(std::errc::not_supported),
173         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
174   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
175   // we want to emit such names as using @PLT without double quotes to convey
176   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
177   // override the default AsmInfo behavior to emit names the way we want.
178   AsmInfo->setAllowAtInName(true);
179 
180   std::unique_ptr<const MCSubtargetInfo> STI(
181       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
182   if (!STI)
183     return createStringError(
184         make_error_code(std::errc::not_supported),
185         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
186 
187   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
188   if (!MII)
189     return createStringError(
190         make_error_code(std::errc::not_supported),
191         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
192 
193   std::unique_ptr<MCContext> Ctx(
194       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
195   std::unique_ptr<MCObjectFileInfo> MOFI(
196       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
197   Ctx->setObjectFileInfo(MOFI.get());
198   // We do not support X86 Large code model. Change this in the future.
199   bool Large = false;
200   if (TheTriple->getArch() == llvm::Triple::aarch64)
201     Large = true;
202   unsigned LSDAEncoding =
203       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
204   if (IsPIC) {
205     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
206                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
207   }
208 
209   std::unique_ptr<MCDisassembler> DisAsm(
210       TheTarget->createMCDisassembler(*STI, *Ctx));
211 
212   if (!DisAsm)
213     return createStringError(
214         make_error_code(std::errc::not_supported),
215         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
216 
217   std::unique_ptr<const MCInstrAnalysis> MIA(
218       TheTarget->createMCInstrAnalysis(MII.get()));
219   if (!MIA)
220     return createStringError(
221         make_error_code(std::errc::not_supported),
222         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
223               TripleName));
224 
225   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
226   std::unique_ptr<MCInstPrinter> InstructionPrinter(
227       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
228                                      *MII, *MRI));
229   if (!InstructionPrinter)
230     return createStringError(
231         make_error_code(std::errc::not_supported),
232         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
233   InstructionPrinter->setPrintImmHex(true);
234 
235   std::unique_ptr<MCCodeEmitter> MCE(
236       TheTarget->createMCCodeEmitter(*MII, *Ctx));
237 
238   // Make sure we don't miss any output on core dumps.
239   outs().SetUnbuffered();
240   errs().SetUnbuffered();
241   dbgs().SetUnbuffered();
242 
243   auto BC = std::make_unique<BinaryContext>(
244       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
245       std::string(TripleName), std::move(MCE), std::move(MOFI),
246       std::move(AsmInfo), std::move(MII), std::move(STI),
247       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
248       std::move(DisAsm));
249 
250   BC->LSDAEncoding = LSDAEncoding;
251 
252   BC->MAB = std::unique_ptr<MCAsmBackend>(
253       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
254 
255   BC->setFilename(File->getFileName());
256 
257   BC->HasFixedLoadAddress = !IsPIC;
258 
259   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
260       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
261 
262   if (!BC->SymbolicDisAsm)
263     return createStringError(
264         make_error_code(std::errc::not_supported),
265         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
266 
267   return std::move(BC);
268 }
269 
270 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
271   if (opts::HotText &&
272       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
273     return true;
274 
275   if (opts::HotData &&
276       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
277     return true;
278 
279   if (SymbolName == "_end")
280     return true;
281 
282   return false;
283 }
284 
285 std::unique_ptr<MCObjectWriter>
286 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
287   return MAB->createObjectWriter(OS);
288 }
289 
290 bool BinaryContext::validateObjectNesting() const {
291   auto Itr = BinaryDataMap.begin();
292   auto End = BinaryDataMap.end();
293   bool Valid = true;
294   while (Itr != End) {
295     auto Next = std::next(Itr);
296     while (Next != End &&
297            Itr->second->getSection() == Next->second->getSection() &&
298            Itr->second->containsRange(Next->second->getAddress(),
299                                       Next->second->getSize())) {
300       if (Next->second->Parent != Itr->second) {
301         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
302                << "BOLT-WARNING:  " << *Itr->second << "\n"
303                << "BOLT-WARNING:  " << *Next->second << "\n";
304         Valid = false;
305       }
306       ++Next;
307     }
308     Itr = Next;
309   }
310   return Valid;
311 }
312 
313 bool BinaryContext::validateHoles() const {
314   bool Valid = true;
315   for (BinarySection &Section : sections()) {
316     for (const Relocation &Rel : Section.relocations()) {
317       uint64_t RelAddr = Rel.Offset + Section.getAddress();
318       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
319       if (!BD) {
320         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
321                << " 0x" << Twine::utohexstr(RelAddr) << " in "
322                << Section.getName() << "\n";
323         Valid = false;
324       } else if (!BD->getAtomicRoot()) {
325         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
326                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
327                << Section.getName() << "\n";
328         Valid = false;
329       }
330     }
331   }
332   return Valid;
333 }
334 
335 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
336   const uint64_t Address = GAI->second->getAddress();
337   const uint64_t Size = GAI->second->getSize();
338 
339   auto fixParents = [&](BinaryDataMapType::iterator Itr,
340                         BinaryData *NewParent) {
341     BinaryData *OldParent = Itr->second->Parent;
342     Itr->second->Parent = NewParent;
343     ++Itr;
344     while (Itr != BinaryDataMap.end() && OldParent &&
345            Itr->second->Parent == OldParent) {
346       Itr->second->Parent = NewParent;
347       ++Itr;
348     }
349   };
350 
351   // Check if the previous symbol contains the newly added symbol.
352   if (GAI != BinaryDataMap.begin()) {
353     BinaryData *Prev = std::prev(GAI)->second;
354     while (Prev) {
355       if (Prev->getSection() == GAI->second->getSection() &&
356           Prev->containsRange(Address, Size)) {
357         fixParents(GAI, Prev);
358       } else {
359         fixParents(GAI, nullptr);
360       }
361       Prev = Prev->Parent;
362     }
363   }
364 
365   // Check if the newly added symbol contains any subsequent symbols.
366   if (Size != 0) {
367     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
368     auto Itr = std::next(GAI);
369     while (
370         Itr != BinaryDataMap.end() &&
371         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
372       Itr->second->Parent = BD;
373       ++Itr;
374     }
375   }
376 }
377 
378 iterator_range<BinaryContext::binary_data_iterator>
379 BinaryContext::getSubBinaryData(BinaryData *BD) {
380   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
381   auto End = Start;
382   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
383     ++End;
384   return make_range(Start, End);
385 }
386 
387 std::pair<const MCSymbol *, uint64_t>
388 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
389                                 bool IsPCRel) {
390   if (isAArch64()) {
391     // Check if this is an access to a constant island and create bookkeeping
392     // to keep track of it and emit it later as part of this function.
393     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
394       return std::make_pair(IslandSym, 0);
395 
396     // Detect custom code written in assembly that refers to arbitrary
397     // constant islands from other functions. Write this reference so we
398     // can pull this constant island and emit it as part of this function
399     // too.
400     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
401 
402     if (IslandIter != AddressToConstantIslandMap.begin() &&
403         (IslandIter == AddressToConstantIslandMap.end() ||
404          IslandIter->first > Address))
405       --IslandIter;
406 
407     if (IslandIter != AddressToConstantIslandMap.end()) {
408       // Fall-back to referencing the original constant island in the presence
409       // of dynamic relocs, as we currently do not support cloning them.
410       // Notice: we might fail to link because of this, if the original constant
411       // island we are referring would be emitted too far away.
412       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
413         MCSymbol *IslandSym =
414             IslandIter->second->getOrCreateIslandAccess(Address);
415         if (IslandSym)
416           return std::make_pair(IslandSym, 0);
417       } else if (MCSymbol *IslandSym =
418                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
419                                                                       BF)) {
420         BF.createIslandDependency(IslandSym, IslandIter->second);
421         return std::make_pair(IslandSym, 0);
422       }
423     }
424   }
425 
426   // Note that the address does not necessarily have to reside inside
427   // a section, it could be an absolute address too.
428   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
429   if (Section && Section->isText()) {
430     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
431       if (Address != BF.getAddress()) {
432         // The address could potentially escape. Mark it as another entry
433         // point into the function.
434         if (opts::Verbosity >= 1) {
435           outs() << "BOLT-INFO: potentially escaped address 0x"
436                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
437         }
438         BF.HasInternalLabelReference = true;
439         return std::make_pair(
440             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
441       }
442     } else {
443       addInterproceduralReference(&BF, Address);
444     }
445   }
446 
447   // With relocations, catch jump table references outside of the basic block
448   // containing the indirect jump.
449   if (HasRelocations) {
450     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
451     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
452       const MCSymbol *Symbol =
453           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
454 
455       return std::make_pair(Symbol, 0);
456     }
457   }
458 
459   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
460     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
461 
462   // TODO: use DWARF info to get size/alignment here?
463   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
464   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
465   return std::make_pair(TargetSymbol, 0);
466 }
467 
468 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
469                                                   BinaryFunction &BF) {
470   if (!isX86())
471     return MemoryContentsType::UNKNOWN;
472 
473   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474   if (!Section) {
475     // No section - possibly an absolute address. Since we don't allow
476     // internal function addresses to escape the function scope - we
477     // consider it a tail call.
478     if (opts::Verbosity > 1) {
479       errs() << "BOLT-WARNING: no section for address 0x"
480              << Twine::utohexstr(Address) << " referenced from function " << BF
481              << '\n';
482     }
483     return MemoryContentsType::UNKNOWN;
484   }
485 
486   if (Section->isVirtual()) {
487     // The contents are filled at runtime.
488     return MemoryContentsType::UNKNOWN;
489   }
490 
491   // No support for jump tables in code yet.
492   if (Section->isText())
493     return MemoryContentsType::UNKNOWN;
494 
495   // Start with checking for PIC jump table. We expect non-PIC jump tables
496   // to have high 32 bits set to 0.
497   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
498     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
499 
500   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
501     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
502 
503   return MemoryContentsType::UNKNOWN;
504 }
505 
506 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
507                                      const JumpTable::JumpTableType Type,
508                                      const BinaryFunction &BF,
509                                      const uint64_t NextJTAddress,
510                                      JumpTable::AddressesType *EntriesAsAddress,
511                                      bool *HasEntryInFragment) const {
512   // Is one of the targets __builtin_unreachable?
513   bool HasUnreachable = false;
514 
515   // Does one of the entries match function start address?
516   bool HasStartAsEntry = false;
517 
518   // Number of targets other than __builtin_unreachable.
519   uint64_t NumRealEntries = 0;
520 
521   auto addEntryAddress = [&](uint64_t EntryAddress) {
522     if (EntriesAsAddress)
523       EntriesAsAddress->emplace_back(EntryAddress);
524   };
525 
526   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
527   if (!Section)
528     return false;
529 
530   // The upper bound is defined by containing object, section limits, and
531   // the next jump table in memory.
532   uint64_t UpperBound = Section->getEndAddress();
533   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
534   if (JumpTableBD && JumpTableBD->getSize()) {
535     assert(JumpTableBD->getEndAddress() <= UpperBound &&
536            "data object cannot cross a section boundary");
537     UpperBound = JumpTableBD->getEndAddress();
538   }
539   if (NextJTAddress)
540     UpperBound = std::min(NextJTAddress, UpperBound);
541 
542   LLVM_DEBUG({
543     using JTT = JumpTable::JumpTableType;
544     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
545                       Address, BF.getPrintName(),
546                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
547   });
548   const uint64_t EntrySize = getJumpTableEntrySize(Type);
549   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
550        EntryAddress += EntrySize) {
551     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
552                       << " -> ");
553     // Check if there's a proper relocation against the jump table entry.
554     if (HasRelocations) {
555       if (Type == JumpTable::JTT_PIC &&
556           !DataPCRelocations.count(EntryAddress)) {
557         LLVM_DEBUG(
558             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
559         break;
560       }
561       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
562         LLVM_DEBUG(
563             dbgs()
564             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
565         break;
566       }
567     }
568 
569     const uint64_t Value =
570         (Type == JumpTable::JTT_PIC)
571             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
572             : *getPointerAtAddress(EntryAddress);
573 
574     // __builtin_unreachable() case.
575     if (Value == BF.getAddress() + BF.getSize()) {
576       addEntryAddress(Value);
577       HasUnreachable = true;
578       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
579       continue;
580     }
581 
582     // Function start is another special case. It is allowed in the jump table,
583     // but we need at least one another regular entry to distinguish the table
584     // from, e.g. a function pointer array.
585     if (Value == BF.getAddress()) {
586       HasStartAsEntry = true;
587       addEntryAddress(Value);
588       continue;
589     }
590 
591     // Function or one of its fragments.
592     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
593     const bool DoesBelongToFunction =
594         BF.containsAddress(Value) ||
595         (TargetBF && TargetBF->isParentOrChildOf(BF));
596     if (!DoesBelongToFunction) {
597       LLVM_DEBUG({
598         if (!BF.containsAddress(Value)) {
599           dbgs() << "FAIL: function doesn't contain this address\n";
600           if (TargetBF) {
601             dbgs() << "  ! function containing this address: "
602                    << TargetBF->getPrintName() << '\n';
603             if (TargetBF->isFragment()) {
604               dbgs() << "  ! is a fragment";
605               for (BinaryFunction *Parent : TargetBF->ParentFragments)
606                 dbgs() << ", parent: " << Parent->getPrintName();
607               dbgs() << '\n';
608             }
609           }
610         }
611       });
612       break;
613     }
614 
615     // Check there's an instruction at this offset.
616     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
617         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
618       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
619       break;
620     }
621 
622     ++NumRealEntries;
623     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
624 
625     if (TargetBF != &BF && HasEntryInFragment)
626       *HasEntryInFragment = true;
627     addEntryAddress(Value);
628   }
629 
630   // It's a jump table if the number of real entries is more than 1, or there's
631   // one real entry and one or more special targets. If there are only multiple
632   // special targets, then it's not a jump table.
633   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
634 }
635 
636 void BinaryContext::populateJumpTables() {
637   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
638                     << '\n');
639   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
640        ++JTI) {
641     JumpTable *JT = JTI->second;
642 
643     bool NonSimpleParent = false;
644     for (BinaryFunction *BF : JT->Parents)
645       NonSimpleParent |= !BF->isSimple();
646     if (NonSimpleParent)
647       continue;
648 
649     uint64_t NextJTAddress = 0;
650     auto NextJTI = std::next(JTI);
651     if (NextJTI != JTE)
652       NextJTAddress = NextJTI->second->getAddress();
653 
654     const bool Success =
655         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
656                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
657     if (!Success) {
658       LLVM_DEBUG({
659         dbgs() << "failed to analyze ";
660         JT->print(dbgs());
661         if (NextJTI != JTE) {
662           dbgs() << "next ";
663           NextJTI->second->print(dbgs());
664         }
665       });
666       llvm_unreachable("jump table heuristic failure");
667     }
668     for (BinaryFunction *Frag : JT->Parents) {
669       if (JT->IsSplit)
670         Frag->setHasIndirectTargetToSplitFragment(true);
671       for (uint64_t EntryAddress : JT->EntriesAsAddress)
672         // if target is builtin_unreachable
673         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
674           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
675                                              Frag->getSize());
676         } else if (EntryAddress >= Frag->getAddress() &&
677                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
678           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
679         }
680     }
681 
682     // In strict mode, erase PC-relative relocation record. Later we check that
683     // all such records are erased and thus have been accounted for.
684     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
685       for (uint64_t Address = JT->getAddress();
686            Address < JT->getAddress() + JT->getSize();
687            Address += JT->EntrySize) {
688         DataPCRelocations.erase(DataPCRelocations.find(Address));
689       }
690     }
691 
692     // Mark to skip the function and all its fragments.
693     for (BinaryFunction *Frag : JT->Parents)
694       if (Frag->hasIndirectTargetToSplitFragment())
695         addFragmentsToSkip(Frag);
696   }
697 
698   if (opts::StrictMode && DataPCRelocations.size()) {
699     LLVM_DEBUG({
700       dbgs() << DataPCRelocations.size()
701              << " unclaimed PC-relative relocations left in data:\n";
702       for (uint64_t Reloc : DataPCRelocations)
703         dbgs() << Twine::utohexstr(Reloc) << '\n';
704     });
705     assert(0 && "unclaimed PC-relative relocations left in data\n");
706   }
707   clearList(DataPCRelocations);
708 }
709 
710 void BinaryContext::skipMarkedFragments() {
711   std::vector<BinaryFunction *> FragmentQueue;
712   // Copy the functions to FragmentQueue.
713   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
714   auto addToWorklist = [&](BinaryFunction *Function) -> void {
715     if (FragmentsToSkip.count(Function))
716       return;
717     FragmentQueue.push_back(Function);
718     addFragmentsToSkip(Function);
719   };
720   // Functions containing split jump tables need to be skipped with all
721   // fragments (transitively).
722   for (size_t I = 0; I != FragmentQueue.size(); I++) {
723     BinaryFunction *BF = FragmentQueue[I];
724     assert(FragmentsToSkip.count(BF) &&
725            "internal error in traversing function fragments");
726     if (opts::Verbosity >= 1)
727       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
728     BF->setSimple(false);
729     BF->setHasIndirectTargetToSplitFragment(true);
730 
731     llvm::for_each(BF->Fragments, addToWorklist);
732     llvm::for_each(BF->ParentFragments, addToWorklist);
733   }
734   if (!FragmentsToSkip.empty())
735     errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function"
736            << (FragmentsToSkip.size() == 1 ? "" : "s")
737            << " due to cold fragments\n";
738 }
739 
740 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
741                                                  uint64_t Size,
742                                                  uint16_t Alignment,
743                                                  unsigned Flags) {
744   auto Itr = BinaryDataMap.find(Address);
745   if (Itr != BinaryDataMap.end()) {
746     assert(Itr->second->getSize() == Size || !Size);
747     return Itr->second->getSymbol();
748   }
749 
750   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
751   assert(!GlobalSymbols.count(Name) && "created name is not unique");
752   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
753 }
754 
755 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
756   return Ctx->getOrCreateSymbol(Name);
757 }
758 
759 BinaryFunction *BinaryContext::createBinaryFunction(
760     const std::string &Name, BinarySection &Section, uint64_t Address,
761     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
762   auto Result = BinaryFunctions.emplace(
763       Address, BinaryFunction(Name, Section, Address, Size, *this));
764   assert(Result.second == true && "unexpected duplicate function");
765   BinaryFunction *BF = &Result.first->second;
766   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
767                         Alignment);
768   setSymbolToFunctionMap(BF->getSymbol(), BF);
769   return BF;
770 }
771 
772 const MCSymbol *
773 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
774                                     JumpTable::JumpTableType Type) {
775   // Two fragments of same function access same jump table
776   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
777     assert(JT->Type == Type && "jump table types have to match");
778     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
779 
780     // Prevent associating a jump table to a specific fragment twice.
781     // This simple check arises from the assumption: no more than 2 fragments.
782     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
783       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
784              "cannot re-use jump table of a different function");
785       // Duplicate the entry for the parent function for easy access
786       JT->Parents.push_back(&Function);
787       if (opts::Verbosity > 2) {
788         outs() << "BOLT-INFO: Multiple fragments access same jump table: "
789                << JT->Parents[0]->getPrintName() << "; "
790                << Function.getPrintName() << "\n";
791         JT->print(outs());
792       }
793       Function.JumpTables.emplace(Address, JT);
794       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
795       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
796     }
797 
798     bool IsJumpTableParent = false;
799     (void)IsJumpTableParent;
800     for (BinaryFunction *Frag : JT->Parents)
801       if (Frag == &Function)
802         IsJumpTableParent = true;
803     assert(IsJumpTableParent &&
804            "cannot re-use jump table of a different function");
805     return JT->getFirstLabel();
806   }
807 
808   // Re-use the existing symbol if possible.
809   MCSymbol *JTLabel = nullptr;
810   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
811     if (!isInternalSymbolName(Object->getSymbol()->getName()))
812       JTLabel = Object->getSymbol();
813   }
814 
815   const uint64_t EntrySize = getJumpTableEntrySize(Type);
816   if (!JTLabel) {
817     const std::string JumpTableName = generateJumpTableName(Function, Address);
818     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
819   }
820 
821   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
822                     << " in function " << Function << '\n');
823 
824   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
825                                 JumpTable::LabelMapType{{0, JTLabel}},
826                                 *getSectionForAddress(Address));
827   JT->Parents.push_back(&Function);
828   if (opts::Verbosity > 2)
829     JT->print(outs());
830   JumpTables.emplace(Address, JT);
831 
832   // Duplicate the entry for the parent function for easy access.
833   Function.JumpTables.emplace(Address, JT);
834   return JTLabel;
835 }
836 
837 std::pair<uint64_t, const MCSymbol *>
838 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
839                                   const MCSymbol *OldLabel) {
840   auto L = scopeLock();
841   unsigned Offset = 0;
842   bool Found = false;
843   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
844     if (Elmt.second != OldLabel)
845       continue;
846     Offset = Elmt.first;
847     Found = true;
848     break;
849   }
850   assert(Found && "Label not found");
851   (void)Found;
852   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
853   JumpTable *NewJT =
854       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
855                     JumpTable::LabelMapType{{Offset, NewLabel}},
856                     *getSectionForAddress(JT->getAddress()));
857   NewJT->Parents = JT->Parents;
858   NewJT->Entries = JT->Entries;
859   NewJT->Counts = JT->Counts;
860   uint64_t JumpTableID = ++DuplicatedJumpTables;
861   // Invert it to differentiate from regular jump tables whose IDs are their
862   // addresses in the input binary memory space
863   JumpTableID = ~JumpTableID;
864   JumpTables.emplace(JumpTableID, NewJT);
865   Function.JumpTables.emplace(JumpTableID, NewJT);
866   return std::make_pair(JumpTableID, NewLabel);
867 }
868 
869 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
870                                                  uint64_t Address) {
871   size_t Id;
872   uint64_t Offset = 0;
873   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
874     Offset = Address - JT->getAddress();
875     auto Itr = JT->Labels.find(Offset);
876     if (Itr != JT->Labels.end())
877       return std::string(Itr->second->getName());
878     Id = JumpTableIds.at(JT->getAddress());
879   } else {
880     Id = JumpTableIds[Address] = BF.JumpTables.size();
881   }
882   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
883           (Offset ? ("." + std::to_string(Offset)) : ""));
884 }
885 
886 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
887   // FIXME: aarch64 support is missing.
888   if (!isX86())
889     return true;
890 
891   if (BF.getSize() == BF.getMaxSize())
892     return true;
893 
894   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
895   assert(FunctionData && "cannot get function as data");
896 
897   uint64_t Offset = BF.getSize();
898   MCInst Instr;
899   uint64_t InstrSize = 0;
900   uint64_t InstrAddress = BF.getAddress() + Offset;
901   using std::placeholders::_1;
902 
903   // Skip instructions that satisfy the predicate condition.
904   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
905     const uint64_t StartOffset = Offset;
906     for (; Offset < BF.getMaxSize();
907          Offset += InstrSize, InstrAddress += InstrSize) {
908       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
909                                   InstrAddress, nulls()))
910         break;
911       if (!Predicate(Instr))
912         break;
913     }
914 
915     return Offset - StartOffset;
916   };
917 
918   // Skip a sequence of zero bytes.
919   auto skipZeros = [&]() {
920     const uint64_t StartOffset = Offset;
921     for (; Offset < BF.getMaxSize(); ++Offset)
922       if ((*FunctionData)[Offset] != 0)
923         break;
924 
925     return Offset - StartOffset;
926   };
927 
928   // Accept the whole padding area filled with breakpoints.
929   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
930   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
931     return true;
932 
933   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
934 
935   // Some functions have a jump to the next function or to the padding area
936   // inserted after the body.
937   auto isSkipJump = [&](const MCInst &Instr) {
938     uint64_t TargetAddress = 0;
939     if (MIB->isUnconditionalBranch(Instr) &&
940         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
941       if (TargetAddress >= InstrAddress + InstrSize &&
942           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
943         return true;
944       }
945     }
946     return false;
947   };
948 
949   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
950   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
951          skipZeros())
952     ;
953 
954   if (Offset == BF.getMaxSize())
955     return true;
956 
957   if (opts::Verbosity >= 1) {
958     errs() << "BOLT-WARNING: bad padding at address 0x"
959            << Twine::utohexstr(BF.getAddress() + BF.getSize())
960            << " starting at offset " << (Offset - BF.getSize())
961            << " in function " << BF << '\n'
962            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
963            << '\n';
964   }
965 
966   return false;
967 }
968 
969 void BinaryContext::adjustCodePadding() {
970   for (auto &BFI : BinaryFunctions) {
971     BinaryFunction &BF = BFI.second;
972     if (!shouldEmit(BF))
973       continue;
974 
975     if (!hasValidCodePadding(BF)) {
976       if (HasRelocations) {
977         if (opts::Verbosity >= 1) {
978           outs() << "BOLT-INFO: function " << BF
979                  << " has invalid padding. Ignoring the function.\n";
980         }
981         BF.setIgnored();
982       } else {
983         BF.setMaxSize(BF.getSize());
984       }
985     }
986   }
987 }
988 
989 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
990                                                uint64_t Size,
991                                                uint16_t Alignment,
992                                                unsigned Flags) {
993   // Register the name with MCContext.
994   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
995 
996   auto GAI = BinaryDataMap.find(Address);
997   BinaryData *BD;
998   if (GAI == BinaryDataMap.end()) {
999     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1000     BinarySection &Section =
1001         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1002     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1003                         Section, Flags);
1004     GAI = BinaryDataMap.emplace(Address, BD).first;
1005     GlobalSymbols[Name] = BD;
1006     updateObjectNesting(GAI);
1007   } else {
1008     BD = GAI->second;
1009     if (!BD->hasName(Name)) {
1010       GlobalSymbols[Name] = BD;
1011       BD->Symbols.push_back(Symbol);
1012     }
1013   }
1014 
1015   return Symbol;
1016 }
1017 
1018 const BinaryData *
1019 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1020   auto NI = BinaryDataMap.lower_bound(Address);
1021   auto End = BinaryDataMap.end();
1022   if ((NI != End && Address == NI->first) ||
1023       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1024     if (NI->second->containsAddress(Address))
1025       return NI->second;
1026 
1027     // If this is a sub-symbol, see if a parent data contains the address.
1028     const BinaryData *BD = NI->second->getParent();
1029     while (BD) {
1030       if (BD->containsAddress(Address))
1031         return BD;
1032       BD = BD->getParent();
1033     }
1034   }
1035   return nullptr;
1036 }
1037 
1038 BinaryData *BinaryContext::getGOTSymbol() {
1039   // First tries to find a global symbol with that name
1040   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1041   if (GOTSymBD)
1042     return GOTSymBD;
1043 
1044   // This symbol might be hidden from run-time link, so fetch the local
1045   // definition if available.
1046   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1047   if (!GOTSymBD)
1048     return nullptr;
1049 
1050   // If the local symbol is not unique, fail
1051   unsigned Index = 2;
1052   SmallString<30> Storage;
1053   while (const BinaryData *BD =
1054              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1055                                      .concat(Twine(Index++))
1056                                      .toStringRef(Storage)))
1057     if (BD->getAddress() != GOTSymBD->getAddress())
1058       return nullptr;
1059 
1060   return GOTSymBD;
1061 }
1062 
1063 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1064   auto NI = BinaryDataMap.find(Address);
1065   assert(NI != BinaryDataMap.end());
1066   if (NI == BinaryDataMap.end())
1067     return false;
1068   // TODO: it's possible that a jump table starts at the same address
1069   // as a larger blob of private data.  When we set the size of the
1070   // jump table, it might be smaller than the total blob size.  In this
1071   // case we just leave the original size since (currently) it won't really
1072   // affect anything.
1073   assert((!NI->second->Size || NI->second->Size == Size ||
1074           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1075          "can't change the size of a symbol that has already had its "
1076          "size set");
1077   if (!NI->second->Size) {
1078     NI->second->Size = Size;
1079     updateObjectNesting(NI);
1080     return true;
1081   }
1082   return false;
1083 }
1084 
1085 void BinaryContext::generateSymbolHashes() {
1086   auto isPadding = [](const BinaryData &BD) {
1087     StringRef Contents = BD.getSection().getContents();
1088     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1089     return (BD.getName().startswith("HOLEat") ||
1090             SymData.find_first_not_of(0) == StringRef::npos);
1091   };
1092 
1093   uint64_t NumCollisions = 0;
1094   for (auto &Entry : BinaryDataMap) {
1095     BinaryData &BD = *Entry.second;
1096     StringRef Name = BD.getName();
1097 
1098     if (!isInternalSymbolName(Name))
1099       continue;
1100 
1101     // First check if a non-anonymous alias exists and move it to the front.
1102     if (BD.getSymbols().size() > 1) {
1103       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1104         return !isInternalSymbolName(Symbol->getName());
1105       });
1106       if (Itr != BD.getSymbols().end()) {
1107         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1108         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1109         continue;
1110       }
1111     }
1112 
1113     // We have to skip 0 size symbols since they will all collide.
1114     if (BD.getSize() == 0) {
1115       continue;
1116     }
1117 
1118     const uint64_t Hash = BD.getSection().hash(BD);
1119     const size_t Idx = Name.find("0x");
1120     std::string NewName =
1121         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1122     if (getBinaryDataByName(NewName)) {
1123       // Ignore collisions for symbols that appear to be padding
1124       // (i.e. all zeros or a "hole")
1125       if (!isPadding(BD)) {
1126         if (opts::Verbosity) {
1127           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1128                  << " with new name (" << NewName << "), skipping.\n";
1129         }
1130         ++NumCollisions;
1131       }
1132       continue;
1133     }
1134     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1135     GlobalSymbols[NewName] = &BD;
1136   }
1137   if (NumCollisions) {
1138     errs() << "BOLT-WARNING: " << NumCollisions
1139            << " collisions detected while hashing binary objects";
1140     if (!opts::Verbosity)
1141       errs() << ". Use -v=1 to see the list.";
1142     errs() << '\n';
1143   }
1144 }
1145 
1146 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1147                                      BinaryFunction &Function) const {
1148   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1149   if (TargetFunction.isChildOf(Function))
1150     return true;
1151   TargetFunction.addParentFragment(Function);
1152   Function.addFragment(TargetFunction);
1153   if (!HasRelocations) {
1154     TargetFunction.setSimple(false);
1155     Function.setSimple(false);
1156   }
1157   if (opts::Verbosity >= 1) {
1158     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1159            << Function << '\n';
1160   }
1161   return true;
1162 }
1163 
1164 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1165                                            MCInst &LoadLowBits,
1166                                            MCInst &LoadHiBits,
1167                                            uint64_t Target) {
1168   const MCSymbol *TargetSymbol;
1169   uint64_t Addend = 0;
1170   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1171                                                     /*IsPCRel*/ true);
1172   int64_t Val;
1173   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1174                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1175   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1176                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1177 }
1178 
1179 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1180   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1181   if (TargetFunction)
1182     return false;
1183 
1184   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1185   assert(Section && "cannot get section for referenced address");
1186   if (!Section->isText())
1187     return false;
1188 
1189   bool Ret = false;
1190   StringRef SectionContents = Section->getContents();
1191   uint64_t Offset = Address - Section->getAddress();
1192   const uint64_t MaxSize = SectionContents.size() - Offset;
1193   const uint8_t *Bytes =
1194       reinterpret_cast<const uint8_t *>(SectionContents.data());
1195   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1196 
1197   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1198                          MCInst &Instruction, uint64_t Offset,
1199                          uint64_t AbsoluteInstrAddr,
1200                          uint64_t TotalSize) -> bool {
1201     MCInst *TargetHiBits, *TargetLowBits;
1202     uint64_t TargetAddress, Count;
1203     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1204                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1205                                    TargetLowBits, TargetAddress);
1206     if (!Count)
1207       return false;
1208 
1209     if (MatchOnly)
1210       return true;
1211 
1212     // NOTE The target symbol was created during disassemble's
1213     // handleExternalReference
1214     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1215     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1216                                                   *Section, Address, TotalSize);
1217     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1218                            TargetAddress);
1219     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1220     Veneer->addInstruction(Offset, std::move(Instruction));
1221     --Count;
1222     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1223       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1224       Veneer->addInstruction(It->first, std::move(It->second));
1225     }
1226 
1227     Veneer->getOrCreateLocalLabel(Address);
1228     Veneer->setMaxSize(TotalSize);
1229     Veneer->updateState(BinaryFunction::State::Disassembled);
1230     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1231                       << "\n");
1232     return true;
1233   };
1234 
1235   uint64_t Size = 0, TotalSize = 0;
1236   BinaryFunction::InstrMapType VeneerInstructions;
1237   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1238     MCInst Instruction;
1239     const uint64_t AbsoluteInstrAddr = Address + Offset;
1240     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1241                                         AbsoluteInstrAddr, nulls()))
1242       break;
1243 
1244     TotalSize += Size;
1245     if (MIB->isBranch(Instruction)) {
1246       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1247                         AbsoluteInstrAddr, TotalSize);
1248       break;
1249     }
1250 
1251     VeneerInstructions.emplace(Offset, std::move(Instruction));
1252   }
1253 
1254   return Ret;
1255 }
1256 
1257 void BinaryContext::processInterproceduralReferences() {
1258   for (const std::pair<BinaryFunction *, uint64_t> &It :
1259        InterproceduralReferences) {
1260     BinaryFunction &Function = *It.first;
1261     uint64_t Address = It.second;
1262     if (!Address || Function.isIgnored())
1263       continue;
1264 
1265     BinaryFunction *TargetFunction =
1266         getBinaryFunctionContainingAddress(Address);
1267     if (&Function == TargetFunction)
1268       continue;
1269 
1270     if (TargetFunction) {
1271       if (TargetFunction->isFragment() &&
1272           !TargetFunction->isChildOf(Function)) {
1273         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1274                   "fragments: "
1275                << Function.getPrintName() << " and "
1276                << TargetFunction->getPrintName() << '\n';
1277       }
1278       if (uint64_t Offset = Address - TargetFunction->getAddress())
1279         TargetFunction->addEntryPointAtOffset(Offset);
1280 
1281       continue;
1282     }
1283 
1284     // Check if address falls in function padding space - this could be
1285     // unmarked data in code. In this case adjust the padding space size.
1286     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1287     assert(Section && "cannot get section for referenced address");
1288 
1289     if (!Section->isText())
1290       continue;
1291 
1292     // PLT requires special handling and could be ignored in this context.
1293     StringRef SectionName = Section->getName();
1294     if (SectionName == ".plt" || SectionName == ".plt.got")
1295       continue;
1296 
1297     // Check if it is aarch64 veneer written at Address
1298     if (isAArch64() && handleAArch64Veneer(Address))
1299       continue;
1300 
1301     if (opts::processAllFunctions()) {
1302       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1303              << "object in code at address 0x" << Twine::utohexstr(Address)
1304              << " belonging to section " << SectionName << " in current mode\n";
1305       exit(1);
1306     }
1307 
1308     TargetFunction = getBinaryFunctionContainingAddress(Address,
1309                                                         /*CheckPastEnd=*/false,
1310                                                         /*UseMaxSize=*/true);
1311     // We are not going to overwrite non-simple functions, but for simple
1312     // ones - adjust the padding size.
1313     if (TargetFunction && TargetFunction->isSimple()) {
1314       errs() << "BOLT-WARNING: function " << *TargetFunction
1315              << " has an object detected in a padding region at address 0x"
1316              << Twine::utohexstr(Address) << '\n';
1317       TargetFunction->setMaxSize(TargetFunction->getSize());
1318     }
1319   }
1320 
1321   InterproceduralReferences.clear();
1322 }
1323 
1324 void BinaryContext::postProcessSymbolTable() {
1325   fixBinaryDataHoles();
1326   bool Valid = true;
1327   for (auto &Entry : BinaryDataMap) {
1328     BinaryData *BD = Entry.second;
1329     if ((BD->getName().startswith("SYMBOLat") ||
1330          BD->getName().startswith("DATAat")) &&
1331         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1332         BD->getSection()) {
1333       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1334       Valid = false;
1335     }
1336   }
1337   assert(Valid);
1338   (void)Valid;
1339   generateSymbolHashes();
1340 }
1341 
1342 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1343                                  BinaryFunction &ParentBF) {
1344   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1345          "cannot merge functions with multiple entry points");
1346 
1347   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1348   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1349       SymbolToFunctionMapMutex, std::defer_lock);
1350 
1351   const StringRef ChildName = ChildBF.getOneName();
1352 
1353   // Move symbols over and update bookkeeping info.
1354   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1355     ParentBF.getSymbols().push_back(Symbol);
1356     WriteSymbolMapLock.lock();
1357     SymbolToFunctionMap[Symbol] = &ParentBF;
1358     WriteSymbolMapLock.unlock();
1359     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1360   }
1361   ChildBF.getSymbols().clear();
1362 
1363   // Move other names the child function is known under.
1364   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1365   ChildBF.Aliases.clear();
1366 
1367   if (HasRelocations) {
1368     // Merge execution counts of ChildBF into those of ParentBF.
1369     // Without relocations, we cannot reliably merge profiles as both functions
1370     // continue to exist and either one can be executed.
1371     ChildBF.mergeProfileDataInto(ParentBF);
1372 
1373     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1374                                                      std::defer_lock);
1375     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1376                                                       std::defer_lock);
1377     // Remove ChildBF from the global set of functions in relocs mode.
1378     ReadBfsLock.lock();
1379     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1380     ReadBfsLock.unlock();
1381 
1382     assert(FI != BinaryFunctions.end() && "function not found");
1383     assert(&ChildBF == &FI->second && "function mismatch");
1384 
1385     WriteBfsLock.lock();
1386     ChildBF.clearDisasmState();
1387     FI = BinaryFunctions.erase(FI);
1388     WriteBfsLock.unlock();
1389 
1390   } else {
1391     // In non-relocation mode we keep the function, but rename it.
1392     std::string NewName = "__ICF_" + ChildName.str();
1393 
1394     WriteCtxLock.lock();
1395     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1396     WriteCtxLock.unlock();
1397 
1398     ChildBF.setFolded(&ParentBF);
1399   }
1400 
1401   ParentBF.setHasFunctionsFoldedInto();
1402 }
1403 
1404 void BinaryContext::fixBinaryDataHoles() {
1405   assert(validateObjectNesting() && "object nesting inconsistency detected");
1406 
1407   for (BinarySection &Section : allocatableSections()) {
1408     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1409 
1410     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1411       BinaryData *BD = Itr->second;
1412       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1413                      (BD->getName().startswith("SYMBOLat0x") ||
1414                       BD->getName().startswith("DATAat0x") ||
1415                       BD->getName().startswith("ANONYMOUS")));
1416       return !isHole && BD->getSection() == Section && !BD->getParent();
1417     };
1418 
1419     auto BDStart = BinaryDataMap.begin();
1420     auto BDEnd = BinaryDataMap.end();
1421     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1422     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1423 
1424     uint64_t EndAddress = Section.getAddress();
1425 
1426     while (Itr != End) {
1427       if (Itr->second->getAddress() > EndAddress) {
1428         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1429         Holes.emplace_back(EndAddress, Gap);
1430       }
1431       EndAddress = Itr->second->getEndAddress();
1432       ++Itr;
1433     }
1434 
1435     if (EndAddress < Section.getEndAddress())
1436       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1437 
1438     // If there is already a symbol at the start of the hole, grow that symbol
1439     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1440     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1441       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1442       if (BD) {
1443         // BD->getSection() can be != Section if there are sections that
1444         // overlap.  In this case it is probably safe to just skip the holes
1445         // since the overlapping section will not(?) have any symbols in it.
1446         if (BD->getSection() == Section)
1447           setBinaryDataSize(Hole.first, Hole.second);
1448       } else {
1449         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1450       }
1451     }
1452   }
1453 
1454   assert(validateObjectNesting() && "object nesting inconsistency detected");
1455   assert(validateHoles() && "top level hole detected in object map");
1456 }
1457 
1458 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1459   const BinarySection *CurrentSection = nullptr;
1460   bool FirstSection = true;
1461 
1462   for (auto &Entry : BinaryDataMap) {
1463     const BinaryData *BD = Entry.second;
1464     const BinarySection &Section = BD->getSection();
1465     if (FirstSection || Section != *CurrentSection) {
1466       uint64_t Address, Size;
1467       StringRef Name = Section.getName();
1468       if (Section) {
1469         Address = Section.getAddress();
1470         Size = Section.getSize();
1471       } else {
1472         Address = BD->getAddress();
1473         Size = BD->getSize();
1474       }
1475       OS << "BOLT-INFO: Section " << Name << ", "
1476          << "0x" + Twine::utohexstr(Address) << ":"
1477          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1478       CurrentSection = &Section;
1479       FirstSection = false;
1480     }
1481 
1482     OS << "BOLT-INFO: ";
1483     const BinaryData *P = BD->getParent();
1484     while (P) {
1485       OS << "  ";
1486       P = P->getParent();
1487     }
1488     OS << *BD << "\n";
1489   }
1490 }
1491 
1492 Expected<unsigned> BinaryContext::getDwarfFile(
1493     StringRef Directory, StringRef FileName, unsigned FileNumber,
1494     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1495     unsigned CUID, unsigned DWARFVersion) {
1496   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1497   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1498                           FileNumber);
1499 }
1500 
1501 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1502                                                const uint32_t SrcCUID,
1503                                                unsigned FileIndex) {
1504   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1505   const DWARFDebugLine::LineTable *LineTable =
1506       DwCtx->getLineTableForUnit(SrcUnit);
1507   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1508       LineTable->Prologue.FileNames;
1509   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1510   // means empty dir.
1511   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1512          "FileIndex out of range for the compilation unit.");
1513   StringRef Dir = "";
1514   if (FileNames[FileIndex - 1].DirIdx != 0) {
1515     if (std::optional<const char *> DirName = dwarf::toString(
1516             LineTable->Prologue
1517                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1518       Dir = *DirName;
1519     }
1520   }
1521   StringRef FileName = "";
1522   if (std::optional<const char *> FName =
1523           dwarf::toString(FileNames[FileIndex - 1].Name))
1524     FileName = *FName;
1525   assert(FileName != "");
1526   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1527   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1528                                DestCUID, DstUnit->getVersion()));
1529 }
1530 
1531 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1532   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1533   llvm::transform(llvm::make_second_range(BinaryFunctions),
1534                   SortedFunctions.begin(),
1535                   [](BinaryFunction &BF) { return &BF; });
1536 
1537   llvm::stable_sort(SortedFunctions,
1538                     [](const BinaryFunction *A, const BinaryFunction *B) {
1539                       if (A->hasValidIndex() && B->hasValidIndex()) {
1540                         return A->getIndex() < B->getIndex();
1541                       }
1542                       return A->hasValidIndex();
1543                     });
1544   return SortedFunctions;
1545 }
1546 
1547 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1548   std::vector<BinaryFunction *> AllFunctions;
1549   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1550   llvm::transform(llvm::make_second_range(BinaryFunctions),
1551                   std::back_inserter(AllFunctions),
1552                   [](BinaryFunction &BF) { return &BF; });
1553   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1554 
1555   return AllFunctions;
1556 }
1557 
1558 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1559   auto Iter = DWOCUs.find(DWOId);
1560   if (Iter == DWOCUs.end())
1561     return std::nullopt;
1562 
1563   return Iter->second;
1564 }
1565 
1566 DWARFContext *BinaryContext::getDWOContext() const {
1567   if (DWOCUs.empty())
1568     return nullptr;
1569   return &DWOCUs.begin()->second->getContext();
1570 }
1571 
1572 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1573 void BinaryContext::preprocessDWODebugInfo() {
1574   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1575     DWARFUnit *const DwarfUnit = CU.get();
1576     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1577       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1578       if (!DWOCU->isDWOUnit()) {
1579         std::string DWOName = dwarf::toString(
1580             DwarfUnit->getUnitDIE().find(
1581                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1582             "");
1583         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1584                << DWOName
1585                << " was not retrieved and won't be updated. Please check "
1586                   "relative path.\n";
1587         continue;
1588       }
1589       DWOCUs[*DWOId] = DWOCU;
1590     }
1591   }
1592   if (!DWOCUs.empty())
1593     outs() << "BOLT-INFO: processing split DWARF\n";
1594 }
1595 
1596 void BinaryContext::preprocessDebugInfo() {
1597   struct CURange {
1598     uint64_t LowPC;
1599     uint64_t HighPC;
1600     DWARFUnit *Unit;
1601 
1602     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1603   };
1604 
1605   // Building a map of address ranges to CUs similar to .debug_aranges and use
1606   // it to assign CU to functions.
1607   std::vector<CURange> AllRanges;
1608   AllRanges.reserve(DwCtx->getNumCompileUnits());
1609   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1610     Expected<DWARFAddressRangesVector> RangesOrError =
1611         CU->getUnitDIE().getAddressRanges();
1612     if (!RangesOrError) {
1613       consumeError(RangesOrError.takeError());
1614       continue;
1615     }
1616     for (DWARFAddressRange &Range : *RangesOrError) {
1617       // Parts of the debug info could be invalidated due to corresponding code
1618       // being removed from the binary by the linker. Hence we check if the
1619       // address is a valid one.
1620       if (containsAddress(Range.LowPC))
1621         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1622     }
1623 
1624     ContainsDwarf5 |= CU->getVersion() >= 5;
1625     ContainsDwarfLegacy |= CU->getVersion() < 5;
1626   }
1627 
1628   llvm::sort(AllRanges);
1629   for (auto &KV : BinaryFunctions) {
1630     const uint64_t FunctionAddress = KV.first;
1631     BinaryFunction &Function = KV.second;
1632 
1633     auto It = llvm::partition_point(
1634         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1635     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1636       Function.setDWARFUnit(It->Unit);
1637   }
1638 
1639   // Discover units with debug info that needs to be updated.
1640   for (const auto &KV : BinaryFunctions) {
1641     const BinaryFunction &BF = KV.second;
1642     if (shouldEmit(BF) && BF.getDWARFUnit())
1643       ProcessedCUs.insert(BF.getDWARFUnit());
1644   }
1645 
1646   // Clear debug info for functions from units that we are not going to process.
1647   for (auto &KV : BinaryFunctions) {
1648     BinaryFunction &BF = KV.second;
1649     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1650       BF.setDWARFUnit(nullptr);
1651   }
1652 
1653   if (opts::Verbosity >= 1) {
1654     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1655            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1656   }
1657 
1658   preprocessDWODebugInfo();
1659 
1660   // Populate MCContext with DWARF files from all units.
1661   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1662   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1663     const uint64_t CUID = CU->getOffset();
1664     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1665     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1666         GlobalPrefix + "line_table_start" + Twine(CUID)));
1667 
1668     if (!ProcessedCUs.count(CU.get()))
1669       continue;
1670 
1671     const DWARFDebugLine::LineTable *LineTable =
1672         DwCtx->getLineTableForUnit(CU.get());
1673     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1674         LineTable->Prologue.FileNames;
1675 
1676     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1677     if (DwarfVersion >= 5) {
1678       std::optional<MD5::MD5Result> Checksum;
1679       if (LineTable->Prologue.ContentTypes.HasMD5)
1680         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1681       std::optional<const char *> Name =
1682           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1683       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1684         auto Iter = DWOCUs.find(*DWOID);
1685         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1686         Name = dwarf::toString(
1687             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1688       }
1689       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1690                                   std::nullopt);
1691     }
1692 
1693     BinaryLineTable.setDwarfVersion(DwarfVersion);
1694 
1695     // Assign a unique label to every line table, one per CU.
1696     // Make sure empty debug line tables are registered too.
1697     if (FileNames.empty()) {
1698       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1699                             CUID, DwarfVersion));
1700       continue;
1701     }
1702     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1703     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1704       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1705       // means empty dir.
1706       StringRef Dir = "";
1707       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1708         if (std::optional<const char *> DirName = dwarf::toString(
1709                 LineTable->Prologue
1710                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1711           Dir = *DirName;
1712       StringRef FileName = "";
1713       if (std::optional<const char *> FName =
1714               dwarf::toString(FileNames[I].Name))
1715         FileName = *FName;
1716       assert(FileName != "");
1717       std::optional<MD5::MD5Result> Checksum;
1718       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1719         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1720       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1721                             DwarfVersion));
1722     }
1723   }
1724 }
1725 
1726 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1727   if (Function.isPseudo())
1728     return false;
1729 
1730   if (opts::processAllFunctions())
1731     return true;
1732 
1733   if (Function.isIgnored())
1734     return false;
1735 
1736   // In relocation mode we will emit non-simple functions with CFG.
1737   // If the function does not have a CFG it should be marked as ignored.
1738   return HasRelocations || Function.isSimple();
1739 }
1740 
1741 void BinaryContext::dump(const MCInst &Inst) const {
1742   if (LLVM_UNLIKELY(!InstPrinter)) {
1743     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1744     return;
1745   }
1746   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1747   dbgs() << "\n";
1748 }
1749 
1750 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1751   uint32_t Operation = Inst.getOperation();
1752   switch (Operation) {
1753   case MCCFIInstruction::OpSameValue:
1754     OS << "OpSameValue Reg" << Inst.getRegister();
1755     break;
1756   case MCCFIInstruction::OpRememberState:
1757     OS << "OpRememberState";
1758     break;
1759   case MCCFIInstruction::OpRestoreState:
1760     OS << "OpRestoreState";
1761     break;
1762   case MCCFIInstruction::OpOffset:
1763     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1764     break;
1765   case MCCFIInstruction::OpDefCfaRegister:
1766     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1767     break;
1768   case MCCFIInstruction::OpDefCfaOffset:
1769     OS << "OpDefCfaOffset " << Inst.getOffset();
1770     break;
1771   case MCCFIInstruction::OpDefCfa:
1772     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1773     break;
1774   case MCCFIInstruction::OpRelOffset:
1775     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1776     break;
1777   case MCCFIInstruction::OpAdjustCfaOffset:
1778     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1779     break;
1780   case MCCFIInstruction::OpEscape:
1781     OS << "OpEscape";
1782     break;
1783   case MCCFIInstruction::OpRestore:
1784     OS << "OpRestore Reg" << Inst.getRegister();
1785     break;
1786   case MCCFIInstruction::OpUndefined:
1787     OS << "OpUndefined Reg" << Inst.getRegister();
1788     break;
1789   case MCCFIInstruction::OpRegister:
1790     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1791        << Inst.getRegister2();
1792     break;
1793   case MCCFIInstruction::OpWindowSave:
1794     OS << "OpWindowSave";
1795     break;
1796   case MCCFIInstruction::OpGnuArgsSize:
1797     OS << "OpGnuArgsSize";
1798     break;
1799   default:
1800     OS << "Op#" << Operation;
1801     break;
1802   }
1803 }
1804 
1805 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1806   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1807   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1808   // the end of a data chunk inside code, $d identifies start of data.
1809   if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1810     return MarkerSymType::NONE;
1811 
1812   Expected<StringRef> NameOrError = Symbol.getName();
1813   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1814 
1815   if (!TypeOrError || !NameOrError)
1816     return MarkerSymType::NONE;
1817 
1818   if (*TypeOrError != SymbolRef::ST_Unknown)
1819     return MarkerSymType::NONE;
1820 
1821   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1822     return MarkerSymType::CODE;
1823 
1824   // $x<ISA>
1825   if (isRISCV() && NameOrError->startswith("$x"))
1826     return MarkerSymType::CODE;
1827 
1828   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1829     return MarkerSymType::DATA;
1830 
1831   return MarkerSymType::NONE;
1832 }
1833 
1834 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1835   return getMarkerType(Symbol) != MarkerSymType::NONE;
1836 }
1837 
1838 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1839                            const BinaryFunction *Function,
1840                            DWARFContext *DwCtx) {
1841   DebugLineTableRowRef RowRef =
1842       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1843   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1844     return;
1845 
1846   const DWARFDebugLine::LineTable *LineTable;
1847   if (Function && Function->getDWARFUnit() &&
1848       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1849     LineTable = Function->getDWARFLineTable();
1850   } else {
1851     LineTable = DwCtx->getLineTableForUnit(
1852         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1853   }
1854   assert(LineTable && "line table expected for instruction with debug info");
1855 
1856   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1857   StringRef FileName = "";
1858   if (std::optional<const char *> FName =
1859           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1860     FileName = *FName;
1861   OS << " # debug line " << FileName << ":" << Row.Line;
1862   if (Row.Column)
1863     OS << ":" << Row.Column;
1864   if (Row.Discriminator)
1865     OS << " discriminator:" << Row.Discriminator;
1866 }
1867 
1868 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1869                                      uint64_t Offset,
1870                                      const BinaryFunction *Function,
1871                                      bool PrintMCInst, bool PrintMemData,
1872                                      bool PrintRelocations,
1873                                      StringRef Endl) const {
1874   OS << format("    %08" PRIx64 ": ", Offset);
1875   if (MIB->isCFI(Instruction)) {
1876     uint32_t Offset = Instruction.getOperand(0).getImm();
1877     OS << "\t!CFI\t$" << Offset << "\t; ";
1878     if (Function)
1879       printCFI(OS, *Function->getCFIFor(Instruction));
1880     OS << Endl;
1881     return;
1882   }
1883   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1884   if (MIB->isCall(Instruction)) {
1885     if (MIB->isTailCall(Instruction))
1886       OS << " # TAILCALL ";
1887     if (MIB->isInvoke(Instruction)) {
1888       const std::optional<MCPlus::MCLandingPad> EHInfo =
1889           MIB->getEHInfo(Instruction);
1890       OS << " # handler: ";
1891       if (EHInfo->first)
1892         OS << *EHInfo->first;
1893       else
1894         OS << '0';
1895       OS << "; action: " << EHInfo->second;
1896       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1897       if (GnuArgsSize >= 0)
1898         OS << "; GNU_args_size = " << GnuArgsSize;
1899     }
1900   } else if (MIB->isIndirectBranch(Instruction)) {
1901     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1902       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1903     } else {
1904       OS << " # UNKNOWN CONTROL FLOW";
1905     }
1906   }
1907   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1908     OS << " # Offset: " << *Offset;
1909   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1910     OS << " # Size: " << *Size;
1911   if (MCSymbol *Label = MIB->getLabel(Instruction))
1912     OS << " # Label: " << *Label;
1913 
1914   MIB->printAnnotations(Instruction, OS);
1915 
1916   if (opts::PrintDebugInfo)
1917     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1918 
1919   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1920     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1921     Function->printRelocations(OS, Offset, Size);
1922   }
1923 
1924   OS << Endl;
1925 
1926   if (PrintMCInst) {
1927     Instruction.dump_pretty(OS, InstPrinter.get());
1928     OS << Endl;
1929   }
1930 }
1931 
1932 std::optional<uint64_t>
1933 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1934                                         uint64_t FileOffset) const {
1935   // Find a segment with a matching file offset.
1936   for (auto &KV : SegmentMapInfo) {
1937     const SegmentInfo &SegInfo = KV.second;
1938     // FileOffset is got from perf event,
1939     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
1940     // If the pagesize is not equal to SegInfo.Alignment.
1941     // FileOffset and SegInfo.FileOffset should be aligned first,
1942     // and then judge whether they are equal.
1943     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
1944         alignDown(FileOffset, SegInfo.Alignment)) {
1945       // The function's offset from base address in VAS is aligned by pagesize
1946       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
1947       // However, The ELF document says that SegInfo.FileOffset should equal
1948       // to SegInfo.Address, modulo the pagesize.
1949       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
1950 
1951       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
1952       // alignDown(SegInfo.Address, pagesize)
1953       //   = SegInfo.Address - (SegInfo.Address % pagesize)
1954       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
1955       //   = SegInfo.Address - SegInfo.FileOffset +
1956       //     alignDown(SegInfo.FileOffset, pagesize)
1957       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
1958       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
1959     }
1960   }
1961 
1962   return std::nullopt;
1963 }
1964 
1965 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1966   auto SI = AddressToSection.upper_bound(Address);
1967   if (SI != AddressToSection.begin()) {
1968     --SI;
1969     uint64_t UpperBound = SI->first + SI->second->getSize();
1970     if (!SI->second->getSize())
1971       UpperBound += 1;
1972     if (UpperBound > Address)
1973       return *SI->second;
1974   }
1975   return std::make_error_code(std::errc::bad_address);
1976 }
1977 
1978 ErrorOr<StringRef>
1979 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1980   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1981     return Section->getName();
1982   return std::make_error_code(std::errc::bad_address);
1983 }
1984 
1985 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1986   auto Res = Sections.insert(Section);
1987   (void)Res;
1988   assert(Res.second && "can't register the same section twice.");
1989 
1990   // Only register allocatable sections in the AddressToSection map.
1991   if (Section->isAllocatable() && Section->getAddress())
1992     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1993   NameToSection.insert(
1994       std::make_pair(std::string(Section->getName()), Section));
1995   if (Section->hasSectionRef())
1996     SectionRefToBinarySection.insert(
1997         std::make_pair(Section->getSectionRef(), Section));
1998 
1999   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2000   return *Section;
2001 }
2002 
2003 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2004   return registerSection(new BinarySection(*this, Section));
2005 }
2006 
2007 BinarySection &
2008 BinaryContext::registerSection(const Twine &SectionName,
2009                                const BinarySection &OriginalSection) {
2010   return registerSection(
2011       new BinarySection(*this, SectionName, OriginalSection));
2012 }
2013 
2014 BinarySection &
2015 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2016                                        unsigned ELFFlags, uint8_t *Data,
2017                                        uint64_t Size, unsigned Alignment) {
2018   auto NamedSections = getSectionByName(Name);
2019   if (NamedSections.begin() != NamedSections.end()) {
2020     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2021            "can only update unique sections");
2022     BinarySection *Section = NamedSections.begin()->second;
2023 
2024     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2025     const bool Flag = Section->isAllocatable();
2026     (void)Flag;
2027     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2028     LLVM_DEBUG(dbgs() << *Section << "\n");
2029     // FIXME: Fix section flags/attributes for MachO.
2030     if (isELF())
2031       assert(Flag == Section->isAllocatable() &&
2032              "can't change section allocation status");
2033     return *Section;
2034   }
2035 
2036   return registerSection(
2037       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2038 }
2039 
2040 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2041   auto NameRange = NameToSection.equal_range(Section.getName().str());
2042   while (NameRange.first != NameRange.second) {
2043     if (NameRange.first->second == &Section) {
2044       NameToSection.erase(NameRange.first);
2045       break;
2046     }
2047     ++NameRange.first;
2048   }
2049 }
2050 
2051 void BinaryContext::deregisterUnusedSections() {
2052   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2053   for (auto SI = Sections.begin(); SI != Sections.end();) {
2054     BinarySection *Section = *SI;
2055     // We check getOutputData() instead of getOutputSize() because sometimes
2056     // zero-sized .text.cold sections are allocated.
2057     if (Section->hasSectionRef() || Section->getOutputData() ||
2058         (AbsSection && Section == &AbsSection.get())) {
2059       ++SI;
2060       continue;
2061     }
2062 
2063     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2064                       << '\n';);
2065     deregisterSectionName(*Section);
2066     SI = Sections.erase(SI);
2067     delete Section;
2068   }
2069 }
2070 
2071 bool BinaryContext::deregisterSection(BinarySection &Section) {
2072   BinarySection *SectionPtr = &Section;
2073   auto Itr = Sections.find(SectionPtr);
2074   if (Itr != Sections.end()) {
2075     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2076     while (Range.first != Range.second) {
2077       if (Range.first->second == SectionPtr) {
2078         AddressToSection.erase(Range.first);
2079         break;
2080       }
2081       ++Range.first;
2082     }
2083 
2084     deregisterSectionName(*SectionPtr);
2085     Sections.erase(Itr);
2086     delete SectionPtr;
2087     return true;
2088   }
2089   return false;
2090 }
2091 
2092 void BinaryContext::renameSection(BinarySection &Section,
2093                                   const Twine &NewName) {
2094   auto Itr = Sections.find(&Section);
2095   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2096   Sections.erase(Itr);
2097 
2098   deregisterSectionName(Section);
2099 
2100   Section.Name = NewName.str();
2101   Section.setOutputName(Section.Name);
2102 
2103   NameToSection.insert(std::make_pair(Section.Name, &Section));
2104 
2105   // Reinsert with the new name.
2106   Sections.insert(&Section);
2107 }
2108 
2109 void BinaryContext::printSections(raw_ostream &OS) const {
2110   for (BinarySection *const &Section : Sections)
2111     OS << "BOLT-INFO: " << *Section << "\n";
2112 }
2113 
2114 BinarySection &BinaryContext::absoluteSection() {
2115   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2116     return *Section;
2117   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2118 }
2119 
2120 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2121                                                            size_t Size) const {
2122   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2123   if (!Section)
2124     return std::make_error_code(std::errc::bad_address);
2125 
2126   if (Section->isVirtual())
2127     return 0;
2128 
2129   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2130                    AsmInfo->getCodePointerSize());
2131   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2132   return DE.getUnsigned(&ValueOffset, Size);
2133 }
2134 
2135 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2136                                                          size_t Size) const {
2137   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2138   if (!Section)
2139     return std::make_error_code(std::errc::bad_address);
2140 
2141   if (Section->isVirtual())
2142     return 0;
2143 
2144   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2145                    AsmInfo->getCodePointerSize());
2146   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2147   return DE.getSigned(&ValueOffset, Size);
2148 }
2149 
2150 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2151                                   uint64_t Type, uint64_t Addend,
2152                                   uint64_t Value) {
2153   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2154   assert(Section && "cannot find section for address");
2155   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2156                          Value);
2157 }
2158 
2159 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2160                                          uint64_t Type, uint64_t Addend,
2161                                          uint64_t Value) {
2162   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2163   assert(Section && "cannot find section for address");
2164   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2165                                 Addend, Value);
2166 }
2167 
2168 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2169   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2170   assert(Section && "cannot find section for address");
2171   return Section->removeRelocationAt(Address - Section->getAddress());
2172 }
2173 
2174 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2175   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2176   if (!Section)
2177     return nullptr;
2178 
2179   return Section->getRelocationAt(Address - Section->getAddress());
2180 }
2181 
2182 const Relocation *
2183 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2184   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2185   if (!Section)
2186     return nullptr;
2187 
2188   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2189 }
2190 
2191 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2192                                              const uint64_t Address) {
2193   auto setImmovable = [&](BinaryData &BD) {
2194     BinaryData *Root = BD.getAtomicRoot();
2195     LLVM_DEBUG(if (Root->isMoveable()) {
2196       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2197              << "due to ambiguous relocation referencing 0x"
2198              << Twine::utohexstr(Address) << '\n';
2199     });
2200     Root->setIsMoveable(false);
2201   };
2202 
2203   if (Address == BD.getAddress()) {
2204     setImmovable(BD);
2205 
2206     // Set previous symbol as immovable
2207     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2208     if (Prev && Prev->getEndAddress() == BD.getAddress())
2209       setImmovable(*Prev);
2210   }
2211 
2212   if (Address == BD.getEndAddress()) {
2213     setImmovable(BD);
2214 
2215     // Set next symbol as immovable
2216     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2217     if (Next && Next->getAddress() == BD.getEndAddress())
2218       setImmovable(*Next);
2219   }
2220 }
2221 
2222 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2223                                                     uint64_t *EntryDesc) {
2224   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2225   auto BFI = SymbolToFunctionMap.find(Symbol);
2226   if (BFI == SymbolToFunctionMap.end())
2227     return nullptr;
2228 
2229   BinaryFunction *BF = BFI->second;
2230   if (EntryDesc)
2231     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2232 
2233   return BF;
2234 }
2235 
2236 void BinaryContext::exitWithBugReport(StringRef Message,
2237                                       const BinaryFunction &Function) const {
2238   errs() << "=======================================\n";
2239   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2240             "this function.\n";
2241   errs() << "If you are running the most recent version of BOLT, you may "
2242             "want to "
2243             "report this and paste this dump.\nPlease check that there is no "
2244             "sensitive contents being shared in this dump.\n";
2245   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2246   ScopedPrinter SP(errs());
2247   SP.printBinaryBlock("Function contents", *Function.getData());
2248   errs() << "\n";
2249   Function.dump();
2250   errs() << "ERROR: " << Message;
2251   errs() << "\n=======================================\n";
2252   exit(1);
2253 }
2254 
2255 BinaryFunction *
2256 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2257                                             bool IsSimple) {
2258   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2259   BinaryFunction *BF = InjectedBinaryFunctions.back();
2260   setSymbolToFunctionMap(BF->getSymbol(), BF);
2261   BF->CurrentState = BinaryFunction::State::CFG;
2262   return BF;
2263 }
2264 
2265 std::pair<size_t, size_t>
2266 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2267   // Adjust branch instruction to match the current layout.
2268   if (FixBranches)
2269     BF.fixBranches();
2270 
2271   // Create local MC context to isolate the effect of ephemeral code emission.
2272   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2273   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2274   MCAsmBackend *MAB =
2275       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2276 
2277   SmallString<256> Code;
2278   raw_svector_ostream VecOS(Code);
2279 
2280   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2281   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2282       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2283       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2284       /*RelaxAll=*/false,
2285       /*IncrementalLinkerCompatible=*/false,
2286       /*DWARFMustBeAtTheEnd=*/false));
2287 
2288   Streamer->initSections(false, *STI);
2289 
2290   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2291   Section->setHasInstructions(true);
2292 
2293   // Create symbols in the LocalCtx so that they get destroyed with it.
2294   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2295   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2296 
2297   Streamer->switchSection(Section);
2298   Streamer->emitLabel(StartLabel);
2299   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2300                    /*EmitCodeOnly=*/true);
2301   Streamer->emitLabel(EndLabel);
2302 
2303   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2304   SmallVector<LabelRange> SplitLabels;
2305   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2306     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2307     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2308     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2309 
2310     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2311         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2312         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2313     SplitSection->setHasInstructions(true);
2314     Streamer->switchSection(SplitSection);
2315 
2316     Streamer->emitLabel(SplitStartLabel);
2317     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2318     Streamer->emitLabel(SplitEndLabel);
2319     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2320     // private
2321     Streamer->emitBytes(StringRef(""));
2322     Streamer->switchSection(Section);
2323   }
2324 
2325   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2326   // MCStreamer::Finish(), which does more than we want
2327   Streamer->emitBytes(StringRef(""));
2328 
2329   MCAssembler &Assembler =
2330       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2331   MCAsmLayout Layout(Assembler);
2332   Assembler.layout(Layout);
2333 
2334   const uint64_t HotSize =
2335       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2336   const uint64_t ColdSize =
2337       std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
2338                       [&](const uint64_t Accu, const LabelRange &Labels) {
2339                         return Accu + Layout.getSymbolOffset(*Labels.second) -
2340                                Layout.getSymbolOffset(*Labels.first);
2341                       });
2342 
2343   // Clean-up the effect of the code emission.
2344   for (const MCSymbol &Symbol : Assembler.symbols()) {
2345     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2346     MutableSymbol->setUndefined();
2347     MutableSymbol->setIsRegistered(false);
2348   }
2349 
2350   return std::make_pair(HotSize, ColdSize);
2351 }
2352 
2353 bool BinaryContext::validateInstructionEncoding(
2354     ArrayRef<uint8_t> InputSequence) const {
2355   MCInst Inst;
2356   uint64_t InstSize;
2357   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2358   assert(InstSize == InputSequence.size() &&
2359          "Disassembled instruction size does not match the sequence.");
2360 
2361   SmallString<256> Code;
2362   SmallVector<MCFixup, 4> Fixups;
2363 
2364   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2365   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2366   if (InputSequence != OutputSequence) {
2367     if (opts::Verbosity > 1) {
2368       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2369              << "      input: " << InputSequence << '\n'
2370              << "     output: " << OutputSequence << '\n';
2371     }
2372     return false;
2373   }
2374 
2375   return true;
2376 }
2377 
2378 uint64_t BinaryContext::getHotThreshold() const {
2379   static uint64_t Threshold = 0;
2380   if (Threshold == 0) {
2381     Threshold = std::max(
2382         (uint64_t)opts::ExecutionCountThreshold,
2383         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2384   }
2385   return Threshold;
2386 }
2387 
2388 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2389     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2390   auto FI = BinaryFunctions.upper_bound(Address);
2391   if (FI == BinaryFunctions.begin())
2392     return nullptr;
2393   --FI;
2394 
2395   const uint64_t UsedSize =
2396       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2397 
2398   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2399     return nullptr;
2400 
2401   return &FI->second;
2402 }
2403 
2404 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2405   // First, try to find a function starting at the given address. If the
2406   // function was folded, this will get us the original folded function if it
2407   // wasn't removed from the list, e.g. in non-relocation mode.
2408   auto BFI = BinaryFunctions.find(Address);
2409   if (BFI != BinaryFunctions.end())
2410     return &BFI->second;
2411 
2412   // We might have folded the function matching the object at the given
2413   // address. In such case, we look for a function matching the symbol
2414   // registered at the original address. The new function (the one that the
2415   // original was folded into) will hold the symbol.
2416   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2417     uint64_t EntryID = 0;
2418     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2419     if (BF && EntryID == 0)
2420       return BF;
2421   }
2422   return nullptr;
2423 }
2424 
2425 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2426     const DWARFAddressRangesVector &InputRanges) const {
2427   DebugAddressRangesVector OutputRanges;
2428 
2429   for (const DWARFAddressRange Range : InputRanges) {
2430     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2431     while (BFI != BinaryFunctions.end()) {
2432       const BinaryFunction &Function = BFI->second;
2433       if (Function.getAddress() >= Range.HighPC)
2434         break;
2435       const DebugAddressRangesVector FunctionRanges =
2436           Function.getOutputAddressRanges();
2437       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2438       std::advance(BFI, 1);
2439     }
2440   }
2441 
2442   return OutputRanges;
2443 }
2444 
2445 } // namespace bolt
2446 } // namespace llvm
2447