xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 6e8a1a45a783c13e4cd19bfd20b7a56cab6f7d81)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
41 
42 using namespace llvm;
43 
44 #undef  DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
46 
47 namespace opts {
48 
49 cl::opt<bool> NoHugePages("no-huge-pages",
50                           cl::desc("use regular size pages for code alignment"),
51                           cl::Hidden, cl::cat(BoltCategory));
52 
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55   cl::desc("print debug info when printing functions"),
56   cl::Hidden,
57   cl::ZeroOrMore,
58   cl::cat(BoltCategory));
59 
60 cl::opt<bool> PrintRelocations(
61     "print-relocations",
62     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63     cl::cat(BoltCategory));
64 
65 static cl::opt<bool>
66 PrintMemData("print-mem-data",
67   cl::desc("print memory data annotations when printing functions"),
68   cl::Hidden,
69   cl::ZeroOrMore,
70   cl::cat(BoltCategory));
71 
72 cl::opt<std::string> CompDirOverride(
73     "comp-dir-override",
74     cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75              "location, which is used with DW_AT_dwo_name to construct a path "
76              "to *.dwo files."),
77     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 char BOLTError::ID = 0;
84 
85 BOLTError::BOLTError(bool IsFatal, const Twine &S)
86     : IsFatal(IsFatal), Msg(S.str()) {}
87 
88 void BOLTError::log(raw_ostream &OS) const {
89   if (IsFatal)
90     OS << "FATAL ";
91   StringRef ErrMsg = StringRef(Msg);
92   // Prepend our error prefix if it is missing
93   if (ErrMsg.empty()) {
94     OS << "BOLT-ERROR\n";
95   } else {
96     if (!ErrMsg.starts_with("BOLT-ERROR"))
97       OS << "BOLT-ERROR: ";
98     OS << ErrMsg << "\n";
99   }
100 }
101 
102 std::error_code BOLTError::convertToErrorCode() const {
103   return inconvertibleErrorCode();
104 }
105 
106 Error createNonFatalBOLTError(const Twine &S) {
107   return make_error<BOLTError>(/*IsFatal*/ false, S);
108 }
109 
110 Error createFatalBOLTError(const Twine &S) {
111   return make_error<BOLTError>(/*IsFatal*/ true, S);
112 }
113 
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
115   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
116     if (!E.getMessage().empty())
117       E.log(this->errs());
118     if (E.isFatal())
119       exit(1);
120   });
121 }
122 
123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124                              std::unique_ptr<DWARFContext> DwCtx,
125                              std::unique_ptr<Triple> TheTriple,
126                              std::shared_ptr<orc::SymbolStringPool> SSP,
127                              const Target *TheTarget, std::string TripleName,
128                              std::unique_ptr<MCCodeEmitter> MCE,
129                              std::unique_ptr<MCObjectFileInfo> MOFI,
130                              std::unique_ptr<const MCAsmInfo> AsmInfo,
131                              std::unique_ptr<const MCInstrInfo> MII,
132                              std::unique_ptr<const MCSubtargetInfo> STI,
133                              std::unique_ptr<MCInstPrinter> InstPrinter,
134                              std::unique_ptr<const MCInstrAnalysis> MIA,
135                              std::unique_ptr<MCPlusBuilder> MIB,
136                              std::unique_ptr<const MCRegisterInfo> MRI,
137                              std::unique_ptr<MCDisassembler> DisAsm,
138                              JournalingStreams Logger)
139     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
140       TheTriple(std::move(TheTriple)), SSP(std::move(SSP)),
141       TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)),
142       MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)),
143       STI(std::move(STI)), InstPrinter(std::move(InstPrinter)),
144       MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)),
145       DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) {
146   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
147   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
148 }
149 
150 BinaryContext::~BinaryContext() {
151   for (BinarySection *Section : Sections)
152     delete Section;
153   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
154     delete InjectedFunction;
155   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
156     delete JTI.second;
157   clearBinaryData();
158 }
159 
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
163     Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
164     StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
165     std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
166   StringRef ArchName = "";
167   std::string FeaturesStr = "";
168   switch (TheTriple.getArch()) {
169   case llvm::Triple::x86_64:
170     if (Features)
171       return createFatalBOLTError(
172           "x86_64 target does not use SubtargetFeatures");
173     ArchName = "x86-64";
174     FeaturesStr = "+nopl";
175     break;
176   case llvm::Triple::aarch64:
177     if (Features)
178       return createFatalBOLTError(
179           "AArch64 target does not use SubtargetFeatures");
180     ArchName = "aarch64";
181     FeaturesStr = "+all";
182     break;
183   case llvm::Triple::riscv64: {
184     ArchName = "riscv64";
185     if (!Features)
186       return createFatalBOLTError("RISCV target needs SubtargetFeatures");
187     // We rely on relaxation for some transformations (e.g., promoting all calls
188     // to PseudoCALL and then making JITLink relax them). Since the relax
189     // feature is not stored in the object file, we manually enable it.
190     Features->AddFeature("relax");
191     FeaturesStr = Features->getString();
192     break;
193   }
194   default:
195     return createStringError(std::errc::not_supported,
196                              "BOLT-ERROR: Unrecognized machine in ELF file");
197   }
198 
199   const std::string TripleName = TheTriple.str();
200 
201   std::string Error;
202   const Target *TheTarget =
203       TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
204   if (!TheTarget)
205     return createStringError(make_error_code(std::errc::not_supported),
206                              Twine("BOLT-ERROR: ", Error));
207 
208   std::unique_ptr<const MCRegisterInfo> MRI(
209       TheTarget->createMCRegInfo(TripleName));
210   if (!MRI)
211     return createStringError(
212         make_error_code(std::errc::not_supported),
213         Twine("BOLT-ERROR: no register info for target ", TripleName));
214 
215   // Set up disassembler.
216   std::unique_ptr<MCAsmInfo> AsmInfo(
217       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
218   if (!AsmInfo)
219     return createStringError(
220         make_error_code(std::errc::not_supported),
221         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
222   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223   // we want to emit such names as using @PLT without double quotes to convey
224   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225   // override the default AsmInfo behavior to emit names the way we want.
226   AsmInfo->setAllowAtInName(true);
227 
228   std::unique_ptr<const MCSubtargetInfo> STI(
229       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
230   if (!STI)
231     return createStringError(
232         make_error_code(std::errc::not_supported),
233         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
234 
235   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
236   if (!MII)
237     return createStringError(
238         make_error_code(std::errc::not_supported),
239         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
240 
241   std::unique_ptr<MCContext> Ctx(
242       new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
243   std::unique_ptr<MCObjectFileInfo> MOFI(
244       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
245   Ctx->setObjectFileInfo(MOFI.get());
246   // We do not support X86 Large code model. Change this in the future.
247   bool Large = false;
248   if (TheTriple.getArch() == llvm::Triple::aarch64)
249     Large = true;
250   unsigned LSDAEncoding =
251       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
252   if (IsPIC) {
253     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
254                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
255   }
256 
257   std::unique_ptr<MCDisassembler> DisAsm(
258       TheTarget->createMCDisassembler(*STI, *Ctx));
259 
260   if (!DisAsm)
261     return createStringError(
262         make_error_code(std::errc::not_supported),
263         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
264 
265   std::unique_ptr<const MCInstrAnalysis> MIA(
266       TheTarget->createMCInstrAnalysis(MII.get()));
267   if (!MIA)
268     return createStringError(
269         make_error_code(std::errc::not_supported),
270         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
271               TripleName));
272 
273   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
274   std::unique_ptr<MCInstPrinter> InstructionPrinter(
275       TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
276                                      *MII, *MRI));
277   if (!InstructionPrinter)
278     return createStringError(
279         make_error_code(std::errc::not_supported),
280         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
281   InstructionPrinter->setPrintImmHex(true);
282 
283   std::unique_ptr<MCCodeEmitter> MCE(
284       TheTarget->createMCCodeEmitter(*MII, *Ctx));
285 
286   auto BC = std::make_unique<BinaryContext>(
287       std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
288       std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE),
289       std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI),
290       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
291       std::move(DisAsm), Logger);
292 
293   BC->LSDAEncoding = LSDAEncoding;
294 
295   BC->MAB = std::unique_ptr<MCAsmBackend>(
296       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
297 
298   BC->setFilename(InputFileName);
299 
300   BC->HasFixedLoadAddress = !IsPIC;
301 
302   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
303       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
304 
305   if (!BC->SymbolicDisAsm)
306     return createStringError(
307         make_error_code(std::errc::not_supported),
308         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
309 
310   return std::move(BC);
311 }
312 
313 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
314   if (opts::HotText &&
315       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
316     return true;
317 
318   if (opts::HotData &&
319       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
320     return true;
321 
322   if (SymbolName == "_end")
323     return true;
324 
325   return false;
326 }
327 
328 std::unique_ptr<MCObjectWriter>
329 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
330   return MAB->createObjectWriter(OS);
331 }
332 
333 bool BinaryContext::validateObjectNesting() const {
334   auto Itr = BinaryDataMap.begin();
335   auto End = BinaryDataMap.end();
336   bool Valid = true;
337   while (Itr != End) {
338     auto Next = std::next(Itr);
339     while (Next != End &&
340            Itr->second->getSection() == Next->second->getSection() &&
341            Itr->second->containsRange(Next->second->getAddress(),
342                                       Next->second->getSize())) {
343       if (Next->second->Parent != Itr->second) {
344         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345                      << "BOLT-WARNING:  " << *Itr->second << "\n"
346                      << "BOLT-WARNING:  " << *Next->second << "\n";
347         Valid = false;
348       }
349       ++Next;
350     }
351     Itr = Next;
352   }
353   return Valid;
354 }
355 
356 bool BinaryContext::validateHoles() const {
357   bool Valid = true;
358   for (BinarySection &Section : sections()) {
359     for (const Relocation &Rel : Section.relocations()) {
360       uint64_t RelAddr = Rel.Offset + Section.getAddress();
361       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
362       if (!BD) {
363         this->errs()
364             << "BOLT-WARNING: no BinaryData found for relocation at address"
365             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
366             << "\n";
367         Valid = false;
368       } else if (!BD->getAtomicRoot()) {
369         this->errs()
370             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
372             << Section.getName() << "\n";
373         Valid = false;
374       }
375     }
376   }
377   return Valid;
378 }
379 
380 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
381   const uint64_t Address = GAI->second->getAddress();
382   const uint64_t Size = GAI->second->getSize();
383 
384   auto fixParents = [&](BinaryDataMapType::iterator Itr,
385                         BinaryData *NewParent) {
386     BinaryData *OldParent = Itr->second->Parent;
387     Itr->second->Parent = NewParent;
388     ++Itr;
389     while (Itr != BinaryDataMap.end() && OldParent &&
390            Itr->second->Parent == OldParent) {
391       Itr->second->Parent = NewParent;
392       ++Itr;
393     }
394   };
395 
396   // Check if the previous symbol contains the newly added symbol.
397   if (GAI != BinaryDataMap.begin()) {
398     BinaryData *Prev = std::prev(GAI)->second;
399     while (Prev) {
400       if (Prev->getSection() == GAI->second->getSection() &&
401           Prev->containsRange(Address, Size)) {
402         fixParents(GAI, Prev);
403       } else {
404         fixParents(GAI, nullptr);
405       }
406       Prev = Prev->Parent;
407     }
408   }
409 
410   // Check if the newly added symbol contains any subsequent symbols.
411   if (Size != 0) {
412     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
413     auto Itr = std::next(GAI);
414     while (
415         Itr != BinaryDataMap.end() &&
416         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
417       Itr->second->Parent = BD;
418       ++Itr;
419     }
420   }
421 }
422 
423 iterator_range<BinaryContext::binary_data_iterator>
424 BinaryContext::getSubBinaryData(BinaryData *BD) {
425   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
426   auto End = Start;
427   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
428     ++End;
429   return make_range(Start, End);
430 }
431 
432 std::pair<const MCSymbol *, uint64_t>
433 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
434                                 bool IsPCRel) {
435   if (isAArch64()) {
436     // Check if this is an access to a constant island and create bookkeeping
437     // to keep track of it and emit it later as part of this function.
438     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
439       return std::make_pair(IslandSym, 0);
440 
441     // Detect custom code written in assembly that refers to arbitrary
442     // constant islands from other functions. Write this reference so we
443     // can pull this constant island and emit it as part of this function
444     // too.
445     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
446 
447     if (IslandIter != AddressToConstantIslandMap.begin() &&
448         (IslandIter == AddressToConstantIslandMap.end() ||
449          IslandIter->first > Address))
450       --IslandIter;
451 
452     if (IslandIter != AddressToConstantIslandMap.end()) {
453       // Fall-back to referencing the original constant island in the presence
454       // of dynamic relocs, as we currently do not support cloning them.
455       // Notice: we might fail to link because of this, if the original constant
456       // island we are referring would be emitted too far away.
457       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
458         MCSymbol *IslandSym =
459             IslandIter->second->getOrCreateIslandAccess(Address);
460         if (IslandSym)
461           return std::make_pair(IslandSym, 0);
462       } else if (MCSymbol *IslandSym =
463                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
464                                                                       BF)) {
465         BF.createIslandDependency(IslandSym, IslandIter->second);
466         return std::make_pair(IslandSym, 0);
467       }
468     }
469   }
470 
471   // Note that the address does not necessarily have to reside inside
472   // a section, it could be an absolute address too.
473   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474   if (Section && Section->isText()) {
475     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
476       if (Address != BF.getAddress()) {
477         // The address could potentially escape. Mark it as another entry
478         // point into the function.
479         if (opts::Verbosity >= 1) {
480           this->outs() << "BOLT-INFO: potentially escaped address 0x"
481                        << Twine::utohexstr(Address) << " in function " << BF
482                        << '\n';
483         }
484         BF.HasInternalLabelReference = true;
485         return std::make_pair(
486             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
487       }
488     } else {
489       addInterproceduralReference(&BF, Address);
490     }
491   }
492 
493   // With relocations, catch jump table references outside of the basic block
494   // containing the indirect jump.
495   if (HasRelocations) {
496     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
497     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
498       const MCSymbol *Symbol =
499           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
500 
501       return std::make_pair(Symbol, 0);
502     }
503   }
504 
505   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
506     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
507 
508   // TODO: use DWARF info to get size/alignment here?
509   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
510   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
511   return std::make_pair(TargetSymbol, 0);
512 }
513 
514 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
515                                                   BinaryFunction &BF) {
516   if (!isX86())
517     return MemoryContentsType::UNKNOWN;
518 
519   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520   if (!Section) {
521     // No section - possibly an absolute address. Since we don't allow
522     // internal function addresses to escape the function scope - we
523     // consider it a tail call.
524     if (opts::Verbosity > 1) {
525       this->errs() << "BOLT-WARNING: no section for address 0x"
526                    << Twine::utohexstr(Address) << " referenced from function "
527                    << BF << '\n';
528     }
529     return MemoryContentsType::UNKNOWN;
530   }
531 
532   if (Section->isVirtual()) {
533     // The contents are filled at runtime.
534     return MemoryContentsType::UNKNOWN;
535   }
536 
537   // No support for jump tables in code yet.
538   if (Section->isText())
539     return MemoryContentsType::UNKNOWN;
540 
541   // Start with checking for PIC jump table. We expect non-PIC jump tables
542   // to have high 32 bits set to 0.
543   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
544     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
545 
546   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
547     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
548 
549   return MemoryContentsType::UNKNOWN;
550 }
551 
552 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
553                                      const JumpTable::JumpTableType Type,
554                                      const BinaryFunction &BF,
555                                      const uint64_t NextJTAddress,
556                                      JumpTable::AddressesType *EntriesAsAddress,
557                                      bool *HasEntryInFragment) const {
558   // Target address of __builtin_unreachable.
559   const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
560 
561   // Is one of the targets __builtin_unreachable?
562   bool HasUnreachable = false;
563 
564   // Does one of the entries match function start address?
565   bool HasStartAsEntry = false;
566 
567   // Number of targets other than __builtin_unreachable.
568   uint64_t NumRealEntries = 0;
569 
570   // Size of the jump table without trailing __builtin_unreachable entries.
571   size_t TrimmedSize = 0;
572 
573   auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
574     if (!EntriesAsAddress)
575       return;
576     EntriesAsAddress->emplace_back(EntryAddress);
577     if (!Unreachable)
578       TrimmedSize = EntriesAsAddress->size();
579   };
580 
581   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
582   if (!Section)
583     return false;
584 
585   // The upper bound is defined by containing object, section limits, and
586   // the next jump table in memory.
587   uint64_t UpperBound = Section->getEndAddress();
588   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
589   if (JumpTableBD && JumpTableBD->getSize()) {
590     assert(JumpTableBD->getEndAddress() <= UpperBound &&
591            "data object cannot cross a section boundary");
592     UpperBound = JumpTableBD->getEndAddress();
593   }
594   if (NextJTAddress)
595     UpperBound = std::min(NextJTAddress, UpperBound);
596 
597   LLVM_DEBUG({
598     using JTT = JumpTable::JumpTableType;
599     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600                       Address, BF.getPrintName(),
601                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
602   });
603   const uint64_t EntrySize = getJumpTableEntrySize(Type);
604   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
605        EntryAddress += EntrySize) {
606     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
607                       << " -> ");
608     // Check if there's a proper relocation against the jump table entry.
609     if (HasRelocations) {
610       if (Type == JumpTable::JTT_PIC &&
611           !DataPCRelocations.count(EntryAddress)) {
612         LLVM_DEBUG(
613             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
614         break;
615       }
616       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
617         LLVM_DEBUG(
618             dbgs()
619             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
620         break;
621       }
622     }
623 
624     const uint64_t Value =
625         (Type == JumpTable::JTT_PIC)
626             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
627             : *getPointerAtAddress(EntryAddress);
628 
629     // __builtin_unreachable() case.
630     if (Value == UnreachableAddress) {
631       addEntryAddress(Value, /*Unreachable*/ true);
632       HasUnreachable = true;
633       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
634       continue;
635     }
636 
637     // Function start is another special case. It is allowed in the jump table,
638     // but we need at least one another regular entry to distinguish the table
639     // from, e.g. a function pointer array.
640     if (Value == BF.getAddress()) {
641       HasStartAsEntry = true;
642       addEntryAddress(Value);
643       continue;
644     }
645 
646     // Function or one of its fragments.
647     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
648     const bool DoesBelongToFunction =
649         BF.containsAddress(Value) ||
650         (TargetBF && areRelatedFragments(TargetBF, &BF));
651     if (!DoesBelongToFunction) {
652       LLVM_DEBUG({
653         if (!BF.containsAddress(Value)) {
654           dbgs() << "FAIL: function doesn't contain this address\n";
655           if (TargetBF) {
656             dbgs() << "  ! function containing this address: "
657                    << TargetBF->getPrintName() << '\n';
658             if (TargetBF->isFragment()) {
659               dbgs() << "  ! is a fragment";
660               for (BinaryFunction *Parent : TargetBF->ParentFragments)
661                 dbgs() << ", parent: " << Parent->getPrintName();
662               dbgs() << '\n';
663             }
664           }
665         }
666       });
667       break;
668     }
669 
670     // Check there's an instruction at this offset.
671     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
673       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674       break;
675     }
676 
677     ++NumRealEntries;
678     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679 
680     if (TargetBF != &BF && HasEntryInFragment)
681       *HasEntryInFragment = true;
682     addEntryAddress(Value);
683   }
684 
685   // Trim direct/normal jump table to exclude trailing unreachable entries that
686   // can collide with a function address.
687   if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
688       TrimmedSize != EntriesAsAddress->size() &&
689       getBinaryFunctionAtAddress(UnreachableAddress))
690     EntriesAsAddress->resize(TrimmedSize);
691 
692   // It's a jump table if the number of real entries is more than 1, or there's
693   // one real entry and one or more special targets. If there are only multiple
694   // special targets, then it's not a jump table.
695   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
696 }
697 
698 void BinaryContext::populateJumpTables() {
699   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700                     << '\n');
701   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702        ++JTI) {
703     JumpTable *JT = JTI->second;
704 
705     bool NonSimpleParent = false;
706     for (BinaryFunction *BF : JT->Parents)
707       NonSimpleParent |= !BF->isSimple();
708     if (NonSimpleParent)
709       continue;
710 
711     uint64_t NextJTAddress = 0;
712     auto NextJTI = std::next(JTI);
713     if (NextJTI != JTE)
714       NextJTAddress = NextJTI->second->getAddress();
715 
716     const bool Success =
717         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
718                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
719     if (!Success) {
720       LLVM_DEBUG({
721         dbgs() << "failed to analyze ";
722         JT->print(dbgs());
723         if (NextJTI != JTE) {
724           dbgs() << "next ";
725           NextJTI->second->print(dbgs());
726         }
727       });
728       llvm_unreachable("jump table heuristic failure");
729     }
730     for (BinaryFunction *Frag : JT->Parents) {
731       if (JT->IsSplit)
732         Frag->setHasIndirectTargetToSplitFragment(true);
733       for (uint64_t EntryAddress : JT->EntriesAsAddress)
734         // if target is builtin_unreachable
735         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
736           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
737                                              Frag->getSize());
738         } else if (EntryAddress >= Frag->getAddress() &&
739                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
740           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
741         }
742     }
743 
744     // In strict mode, erase PC-relative relocation record. Later we check that
745     // all such records are erased and thus have been accounted for.
746     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
747       for (uint64_t Address = JT->getAddress();
748            Address < JT->getAddress() + JT->getSize();
749            Address += JT->EntrySize) {
750         DataPCRelocations.erase(DataPCRelocations.find(Address));
751       }
752     }
753 
754     // Mark to skip the function and all its fragments.
755     for (BinaryFunction *Frag : JT->Parents)
756       if (Frag->hasIndirectTargetToSplitFragment())
757         addFragmentsToSkip(Frag);
758   }
759 
760   if (opts::StrictMode && DataPCRelocations.size()) {
761     LLVM_DEBUG({
762       dbgs() << DataPCRelocations.size()
763              << " unclaimed PC-relative relocations left in data:\n";
764       for (uint64_t Reloc : DataPCRelocations)
765         dbgs() << Twine::utohexstr(Reloc) << '\n';
766     });
767     assert(0 && "unclaimed PC-relative relocations left in data\n");
768   }
769   clearList(DataPCRelocations);
770 }
771 
772 void BinaryContext::skipMarkedFragments() {
773   std::vector<BinaryFunction *> FragmentQueue;
774   // Copy the functions to FragmentQueue.
775   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
776   auto addToWorklist = [&](BinaryFunction *Function) -> void {
777     if (FragmentsToSkip.count(Function))
778       return;
779     FragmentQueue.push_back(Function);
780     addFragmentsToSkip(Function);
781   };
782   // Functions containing split jump tables need to be skipped with all
783   // fragments (transitively).
784   for (size_t I = 0; I != FragmentQueue.size(); I++) {
785     BinaryFunction *BF = FragmentQueue[I];
786     assert(FragmentsToSkip.count(BF) &&
787            "internal error in traversing function fragments");
788     if (opts::Verbosity >= 1)
789       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
790     BF->setSimple(false);
791     BF->setHasIndirectTargetToSplitFragment(true);
792 
793     llvm::for_each(BF->Fragments, addToWorklist);
794     llvm::for_each(BF->ParentFragments, addToWorklist);
795   }
796   if (!FragmentsToSkip.empty())
797     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
798                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
799                  << " due to cold fragments\n";
800 }
801 
802 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
803                                                  uint64_t Size,
804                                                  uint16_t Alignment,
805                                                  unsigned Flags) {
806   auto Itr = BinaryDataMap.find(Address);
807   if (Itr != BinaryDataMap.end()) {
808     assert(Itr->second->getSize() == Size || !Size);
809     return Itr->second->getSymbol();
810   }
811 
812   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
813   assert(!GlobalSymbols.count(Name) && "created name is not unique");
814   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
815 }
816 
817 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
818   return Ctx->getOrCreateSymbol(Name);
819 }
820 
821 BinaryFunction *BinaryContext::createBinaryFunction(
822     const std::string &Name, BinarySection &Section, uint64_t Address,
823     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
824   auto Result = BinaryFunctions.emplace(
825       Address, BinaryFunction(Name, Section, Address, Size, *this));
826   assert(Result.second == true && "unexpected duplicate function");
827   BinaryFunction *BF = &Result.first->second;
828   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
829                         Alignment);
830   setSymbolToFunctionMap(BF->getSymbol(), BF);
831   return BF;
832 }
833 
834 const MCSymbol *
835 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
836                                     JumpTable::JumpTableType Type) {
837   // Two fragments of same function access same jump table
838   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
839     assert(JT->Type == Type && "jump table types have to match");
840     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
841 
842     // Prevent associating a jump table to a specific fragment twice.
843     if (!llvm::is_contained(JT->Parents, &Function)) {
844       assert(llvm::all_of(JT->Parents,
845                           [&](const BinaryFunction *BF) {
846                             return areRelatedFragments(&Function, BF);
847                           }) &&
848              "cannot re-use jump table of a different function");
849       // Duplicate the entry for the parent function for easy access
850       JT->Parents.push_back(&Function);
851       if (opts::Verbosity > 2) {
852         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
853                      << JT->Parents[0]->getPrintName() << "; "
854                      << Function.getPrintName() << "\n";
855         JT->print(this->outs());
856       }
857       Function.JumpTables.emplace(Address, JT);
858       for (BinaryFunction *Parent : JT->Parents)
859         Parent->setHasIndirectTargetToSplitFragment(true);
860     }
861 
862     bool IsJumpTableParent = false;
863     (void)IsJumpTableParent;
864     for (BinaryFunction *Frag : JT->Parents)
865       if (Frag == &Function)
866         IsJumpTableParent = true;
867     assert(IsJumpTableParent &&
868            "cannot re-use jump table of a different function");
869     return JT->getFirstLabel();
870   }
871 
872   // Re-use the existing symbol if possible.
873   MCSymbol *JTLabel = nullptr;
874   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
875     if (!isInternalSymbolName(Object->getSymbol()->getName()))
876       JTLabel = Object->getSymbol();
877   }
878 
879   const uint64_t EntrySize = getJumpTableEntrySize(Type);
880   if (!JTLabel) {
881     const std::string JumpTableName = generateJumpTableName(Function, Address);
882     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
883   }
884 
885   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
886                     << " in function " << Function << '\n');
887 
888   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
889                                 JumpTable::LabelMapType{{0, JTLabel}},
890                                 *getSectionForAddress(Address));
891   JT->Parents.push_back(&Function);
892   if (opts::Verbosity > 2)
893     JT->print(this->outs());
894   JumpTables.emplace(Address, JT);
895 
896   // Duplicate the entry for the parent function for easy access.
897   Function.JumpTables.emplace(Address, JT);
898   return JTLabel;
899 }
900 
901 std::pair<uint64_t, const MCSymbol *>
902 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
903                                   const MCSymbol *OldLabel) {
904   auto L = scopeLock();
905   unsigned Offset = 0;
906   bool Found = false;
907   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
908     if (Elmt.second != OldLabel)
909       continue;
910     Offset = Elmt.first;
911     Found = true;
912     break;
913   }
914   assert(Found && "Label not found");
915   (void)Found;
916   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
917   JumpTable *NewJT =
918       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
919                     JumpTable::LabelMapType{{Offset, NewLabel}},
920                     *getSectionForAddress(JT->getAddress()));
921   NewJT->Parents = JT->Parents;
922   NewJT->Entries = JT->Entries;
923   NewJT->Counts = JT->Counts;
924   uint64_t JumpTableID = ++DuplicatedJumpTables;
925   // Invert it to differentiate from regular jump tables whose IDs are their
926   // addresses in the input binary memory space
927   JumpTableID = ~JumpTableID;
928   JumpTables.emplace(JumpTableID, NewJT);
929   Function.JumpTables.emplace(JumpTableID, NewJT);
930   return std::make_pair(JumpTableID, NewLabel);
931 }
932 
933 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
934                                                  uint64_t Address) {
935   size_t Id;
936   uint64_t Offset = 0;
937   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
938     Offset = Address - JT->getAddress();
939     auto JTLabelsIt = JT->Labels.find(Offset);
940     if (JTLabelsIt != JT->Labels.end())
941       return std::string(JTLabelsIt->second->getName());
942 
943     auto JTIdsIt = JumpTableIds.find(JT->getAddress());
944     assert(JTIdsIt != JumpTableIds.end());
945     Id = JTIdsIt->second;
946   } else {
947     Id = JumpTableIds[Address] = BF.JumpTables.size();
948   }
949   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
950           (Offset ? ("." + std::to_string(Offset)) : ""));
951 }
952 
953 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
954   // FIXME: aarch64 support is missing.
955   if (!isX86())
956     return true;
957 
958   if (BF.getSize() == BF.getMaxSize())
959     return true;
960 
961   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
962   assert(FunctionData && "cannot get function as data");
963 
964   uint64_t Offset = BF.getSize();
965   MCInst Instr;
966   uint64_t InstrSize = 0;
967   uint64_t InstrAddress = BF.getAddress() + Offset;
968   using std::placeholders::_1;
969 
970   // Skip instructions that satisfy the predicate condition.
971   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
972     const uint64_t StartOffset = Offset;
973     for (; Offset < BF.getMaxSize();
974          Offset += InstrSize, InstrAddress += InstrSize) {
975       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
976                                   InstrAddress, nulls()))
977         break;
978       if (!Predicate(Instr))
979         break;
980     }
981 
982     return Offset - StartOffset;
983   };
984 
985   // Skip a sequence of zero bytes.
986   auto skipZeros = [&]() {
987     const uint64_t StartOffset = Offset;
988     for (; Offset < BF.getMaxSize(); ++Offset)
989       if ((*FunctionData)[Offset] != 0)
990         break;
991 
992     return Offset - StartOffset;
993   };
994 
995   // Accept the whole padding area filled with breakpoints.
996   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
997   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
998     return true;
999 
1000   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
1001 
1002   // Some functions have a jump to the next function or to the padding area
1003   // inserted after the body.
1004   auto isSkipJump = [&](const MCInst &Instr) {
1005     uint64_t TargetAddress = 0;
1006     if (MIB->isUnconditionalBranch(Instr) &&
1007         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1008       if (TargetAddress >= InstrAddress + InstrSize &&
1009           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1010         return true;
1011       }
1012     }
1013     return false;
1014   };
1015 
1016   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1017   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1018          skipZeros())
1019     ;
1020 
1021   if (Offset == BF.getMaxSize())
1022     return true;
1023 
1024   if (opts::Verbosity >= 1) {
1025     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1026                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
1027                  << " starting at offset " << (Offset - BF.getSize())
1028                  << " in function " << BF << '\n'
1029                  << FunctionData->slice(BF.getSize(),
1030                                         BF.getMaxSize() - BF.getSize())
1031                  << '\n';
1032   }
1033 
1034   return false;
1035 }
1036 
1037 void BinaryContext::adjustCodePadding() {
1038   for (auto &BFI : BinaryFunctions) {
1039     BinaryFunction &BF = BFI.second;
1040     if (!shouldEmit(BF))
1041       continue;
1042 
1043     if (!hasValidCodePadding(BF)) {
1044       if (HasRelocations) {
1045         if (opts::Verbosity >= 1) {
1046           this->outs() << "BOLT-INFO: function " << BF
1047                        << " has invalid padding. Ignoring the function.\n";
1048         }
1049         BF.setIgnored();
1050       } else {
1051         BF.setMaxSize(BF.getSize());
1052       }
1053     }
1054   }
1055 }
1056 
1057 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1058                                                uint64_t Size,
1059                                                uint16_t Alignment,
1060                                                unsigned Flags) {
1061   // Register the name with MCContext.
1062   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1063 
1064   auto GAI = BinaryDataMap.find(Address);
1065   BinaryData *BD;
1066   if (GAI == BinaryDataMap.end()) {
1067     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1068     BinarySection &Section =
1069         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1070     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1071                         Section, Flags);
1072     GAI = BinaryDataMap.emplace(Address, BD).first;
1073     GlobalSymbols[Name] = BD;
1074     updateObjectNesting(GAI);
1075   } else {
1076     BD = GAI->second;
1077     if (!BD->hasName(Name)) {
1078       GlobalSymbols[Name] = BD;
1079       BD->updateSize(Size);
1080       BD->Symbols.push_back(Symbol);
1081     }
1082   }
1083 
1084   return Symbol;
1085 }
1086 
1087 const BinaryData *
1088 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1089   auto NI = BinaryDataMap.lower_bound(Address);
1090   auto End = BinaryDataMap.end();
1091   if ((NI != End && Address == NI->first) ||
1092       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1093     if (NI->second->containsAddress(Address))
1094       return NI->second;
1095 
1096     // If this is a sub-symbol, see if a parent data contains the address.
1097     const BinaryData *BD = NI->second->getParent();
1098     while (BD) {
1099       if (BD->containsAddress(Address))
1100         return BD;
1101       BD = BD->getParent();
1102     }
1103   }
1104   return nullptr;
1105 }
1106 
1107 BinaryData *BinaryContext::getGOTSymbol() {
1108   // First tries to find a global symbol with that name
1109   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1110   if (GOTSymBD)
1111     return GOTSymBD;
1112 
1113   // This symbol might be hidden from run-time link, so fetch the local
1114   // definition if available.
1115   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1116   if (!GOTSymBD)
1117     return nullptr;
1118 
1119   // If the local symbol is not unique, fail
1120   unsigned Index = 2;
1121   SmallString<30> Storage;
1122   while (const BinaryData *BD =
1123              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1124                                      .concat(Twine(Index++))
1125                                      .toStringRef(Storage)))
1126     if (BD->getAddress() != GOTSymBD->getAddress())
1127       return nullptr;
1128 
1129   return GOTSymBD;
1130 }
1131 
1132 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1133   auto NI = BinaryDataMap.find(Address);
1134   assert(NI != BinaryDataMap.end());
1135   if (NI == BinaryDataMap.end())
1136     return false;
1137   // TODO: it's possible that a jump table starts at the same address
1138   // as a larger blob of private data.  When we set the size of the
1139   // jump table, it might be smaller than the total blob size.  In this
1140   // case we just leave the original size since (currently) it won't really
1141   // affect anything.
1142   assert((!NI->second->Size || NI->second->Size == Size ||
1143           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1144          "can't change the size of a symbol that has already had its "
1145          "size set");
1146   if (!NI->second->Size) {
1147     NI->second->Size = Size;
1148     updateObjectNesting(NI);
1149     return true;
1150   }
1151   return false;
1152 }
1153 
1154 void BinaryContext::generateSymbolHashes() {
1155   auto isPadding = [](const BinaryData &BD) {
1156     StringRef Contents = BD.getSection().getContents();
1157     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1158     return (BD.getName().starts_with("HOLEat") ||
1159             SymData.find_first_not_of(0) == StringRef::npos);
1160   };
1161 
1162   uint64_t NumCollisions = 0;
1163   for (auto &Entry : BinaryDataMap) {
1164     BinaryData &BD = *Entry.second;
1165     StringRef Name = BD.getName();
1166 
1167     if (!isInternalSymbolName(Name))
1168       continue;
1169 
1170     // First check if a non-anonymous alias exists and move it to the front.
1171     if (BD.getSymbols().size() > 1) {
1172       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1173         return !isInternalSymbolName(Symbol->getName());
1174       });
1175       if (Itr != BD.getSymbols().end()) {
1176         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1177         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1178         continue;
1179       }
1180     }
1181 
1182     // We have to skip 0 size symbols since they will all collide.
1183     if (BD.getSize() == 0) {
1184       continue;
1185     }
1186 
1187     const uint64_t Hash = BD.getSection().hash(BD);
1188     const size_t Idx = Name.find("0x");
1189     std::string NewName =
1190         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1191     if (getBinaryDataByName(NewName)) {
1192       // Ignore collisions for symbols that appear to be padding
1193       // (i.e. all zeros or a "hole")
1194       if (!isPadding(BD)) {
1195         if (opts::Verbosity) {
1196           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1197                        << " with new name (" << NewName << "), skipping.\n";
1198         }
1199         ++NumCollisions;
1200       }
1201       continue;
1202     }
1203     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1204     GlobalSymbols[NewName] = &BD;
1205   }
1206   if (NumCollisions) {
1207     this->errs() << "BOLT-WARNING: " << NumCollisions
1208                  << " collisions detected while hashing binary objects";
1209     if (!opts::Verbosity)
1210       this->errs() << ". Use -v=1 to see the list.";
1211     this->errs() << '\n';
1212   }
1213 }
1214 
1215 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1216                                      BinaryFunction &Function) {
1217   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1218   if (TargetFunction.isChildOf(Function))
1219     return true;
1220   TargetFunction.addParentFragment(Function);
1221   Function.addFragment(TargetFunction);
1222   FragmentClasses.unionSets(&TargetFunction, &Function);
1223   if (!HasRelocations) {
1224     TargetFunction.setSimple(false);
1225     Function.setSimple(false);
1226   }
1227   if (opts::Verbosity >= 1) {
1228     this->outs() << "BOLT-INFO: marking " << TargetFunction
1229                  << " as a fragment of " << Function << '\n';
1230   }
1231   return true;
1232 }
1233 
1234 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1235                                            MCInst &LoadLowBits,
1236                                            MCInst &LoadHiBits,
1237                                            uint64_t Target) {
1238   const MCSymbol *TargetSymbol;
1239   uint64_t Addend = 0;
1240   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1241                                                     /*IsPCRel*/ true);
1242   int64_t Val;
1243   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1244                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1245   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1246                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1247 }
1248 
1249 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1250   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1251   if (TargetFunction)
1252     return false;
1253 
1254   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1255   assert(Section && "cannot get section for referenced address");
1256   if (!Section->isText())
1257     return false;
1258 
1259   bool Ret = false;
1260   StringRef SectionContents = Section->getContents();
1261   uint64_t Offset = Address - Section->getAddress();
1262   const uint64_t MaxSize = SectionContents.size() - Offset;
1263   const uint8_t *Bytes =
1264       reinterpret_cast<const uint8_t *>(SectionContents.data());
1265   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1266 
1267   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1268                          MCInst &Instruction, uint64_t Offset,
1269                          uint64_t AbsoluteInstrAddr,
1270                          uint64_t TotalSize) -> bool {
1271     MCInst *TargetHiBits, *TargetLowBits;
1272     uint64_t TargetAddress, Count;
1273     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1274                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1275                                    TargetLowBits, TargetAddress);
1276     if (!Count)
1277       return false;
1278 
1279     if (MatchOnly)
1280       return true;
1281 
1282     // NOTE The target symbol was created during disassemble's
1283     // handleExternalReference
1284     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1285     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1286                                                   *Section, Address, TotalSize);
1287     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1288                            TargetAddress);
1289     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1290     Veneer->addInstruction(Offset, std::move(Instruction));
1291     --Count;
1292     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1293       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1294       Veneer->addInstruction(It->first, std::move(It->second));
1295     }
1296 
1297     Veneer->getOrCreateLocalLabel(Address);
1298     Veneer->setMaxSize(TotalSize);
1299     Veneer->updateState(BinaryFunction::State::Disassembled);
1300     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1301                       << Twine::utohexstr(Address) << "\n");
1302     return true;
1303   };
1304 
1305   uint64_t Size = 0, TotalSize = 0;
1306   BinaryFunction::InstrMapType VeneerInstructions;
1307   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1308     MCInst Instruction;
1309     const uint64_t AbsoluteInstrAddr = Address + Offset;
1310     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1311                                         AbsoluteInstrAddr, nulls()))
1312       break;
1313 
1314     TotalSize += Size;
1315     if (MIB->isBranch(Instruction)) {
1316       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1317                         AbsoluteInstrAddr, TotalSize);
1318       break;
1319     }
1320 
1321     VeneerInstructions.emplace(Offset, std::move(Instruction));
1322   }
1323 
1324   return Ret;
1325 }
1326 
1327 void BinaryContext::processInterproceduralReferences() {
1328   for (const std::pair<BinaryFunction *, uint64_t> &It :
1329        InterproceduralReferences) {
1330     BinaryFunction &Function = *It.first;
1331     uint64_t Address = It.second;
1332     // Process interprocedural references from ignored functions in BAT mode
1333     // (non-simple in non-relocation mode) to properly register entry points
1334     if (!Address || (Function.isIgnored() && !HasBATSection))
1335       continue;
1336 
1337     BinaryFunction *TargetFunction =
1338         getBinaryFunctionContainingAddress(Address);
1339     if (&Function == TargetFunction)
1340       continue;
1341 
1342     if (TargetFunction) {
1343       if (TargetFunction->isFragment() &&
1344           !areRelatedFragments(TargetFunction, &Function)) {
1345         this->errs()
1346             << "BOLT-WARNING: interprocedural reference between unrelated "
1347                "fragments: "
1348             << Function.getPrintName() << " and "
1349             << TargetFunction->getPrintName() << '\n';
1350       }
1351       if (uint64_t Offset = Address - TargetFunction->getAddress())
1352         TargetFunction->addEntryPointAtOffset(Offset);
1353 
1354       continue;
1355     }
1356 
1357     // Check if address falls in function padding space - this could be
1358     // unmarked data in code. In this case adjust the padding space size.
1359     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1360     assert(Section && "cannot get section for referenced address");
1361 
1362     if (!Section->isText())
1363       continue;
1364 
1365     // PLT requires special handling and could be ignored in this context.
1366     StringRef SectionName = Section->getName();
1367     if (SectionName == ".plt" || SectionName == ".plt.got")
1368       continue;
1369 
1370     // Check if it is aarch64 veneer written at Address
1371     if (isAArch64() && handleAArch64Veneer(Address))
1372       continue;
1373 
1374     if (opts::processAllFunctions()) {
1375       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1376                    << "object in code at address 0x"
1377                    << Twine::utohexstr(Address) << " belonging to section "
1378                    << SectionName << " in current mode\n";
1379       exit(1);
1380     }
1381 
1382     TargetFunction = getBinaryFunctionContainingAddress(Address,
1383                                                         /*CheckPastEnd=*/false,
1384                                                         /*UseMaxSize=*/true);
1385     // We are not going to overwrite non-simple functions, but for simple
1386     // ones - adjust the padding size.
1387     if (TargetFunction && TargetFunction->isSimple()) {
1388       this->errs()
1389           << "BOLT-WARNING: function " << *TargetFunction
1390           << " has an object detected in a padding region at address 0x"
1391           << Twine::utohexstr(Address) << '\n';
1392       TargetFunction->setMaxSize(TargetFunction->getSize());
1393     }
1394   }
1395 
1396   InterproceduralReferences.clear();
1397 }
1398 
1399 void BinaryContext::postProcessSymbolTable() {
1400   fixBinaryDataHoles();
1401   bool Valid = true;
1402   for (auto &Entry : BinaryDataMap) {
1403     BinaryData *BD = Entry.second;
1404     if ((BD->getName().starts_with("SYMBOLat") ||
1405          BD->getName().starts_with("DATAat")) &&
1406         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1407         BD->getSection()) {
1408       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1409                    << "\n";
1410       Valid = false;
1411     }
1412   }
1413   assert(Valid);
1414   (void)Valid;
1415   generateSymbolHashes();
1416 }
1417 
1418 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1419                                  BinaryFunction &ParentBF) {
1420   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1421          "cannot merge functions with multiple entry points");
1422 
1423   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1424   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1425       SymbolToFunctionMapMutex, std::defer_lock);
1426 
1427   const StringRef ChildName = ChildBF.getOneName();
1428 
1429   // Move symbols over and update bookkeeping info.
1430   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1431     ParentBF.getSymbols().push_back(Symbol);
1432     WriteSymbolMapLock.lock();
1433     SymbolToFunctionMap[Symbol] = &ParentBF;
1434     WriteSymbolMapLock.unlock();
1435     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1436   }
1437   ChildBF.getSymbols().clear();
1438 
1439   // Move other names the child function is known under.
1440   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1441   ChildBF.Aliases.clear();
1442 
1443   if (HasRelocations) {
1444     // Merge execution counts of ChildBF into those of ParentBF.
1445     // Without relocations, we cannot reliably merge profiles as both functions
1446     // continue to exist and either one can be executed.
1447     ChildBF.mergeProfileDataInto(ParentBF);
1448 
1449     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1450                                                      std::defer_lock);
1451     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1452                                                       std::defer_lock);
1453     // Remove ChildBF from the global set of functions in relocs mode.
1454     ReadBfsLock.lock();
1455     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1456     ReadBfsLock.unlock();
1457 
1458     assert(FI != BinaryFunctions.end() && "function not found");
1459     assert(&ChildBF == &FI->second && "function mismatch");
1460 
1461     WriteBfsLock.lock();
1462     ChildBF.clearDisasmState();
1463     FI = BinaryFunctions.erase(FI);
1464     WriteBfsLock.unlock();
1465 
1466   } else {
1467     // In non-relocation mode we keep the function, but rename it.
1468     std::string NewName = "__ICF_" + ChildName.str();
1469 
1470     WriteCtxLock.lock();
1471     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1472     WriteCtxLock.unlock();
1473 
1474     ChildBF.setFolded(&ParentBF);
1475   }
1476 
1477   ParentBF.setHasFunctionsFoldedInto();
1478 }
1479 
1480 void BinaryContext::fixBinaryDataHoles() {
1481   assert(validateObjectNesting() && "object nesting inconsistency detected");
1482 
1483   for (BinarySection &Section : allocatableSections()) {
1484     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1485 
1486     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1487       BinaryData *BD = Itr->second;
1488       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1489                      (BD->getName().starts_with("SYMBOLat0x") ||
1490                       BD->getName().starts_with("DATAat0x") ||
1491                       BD->getName().starts_with("ANONYMOUS")));
1492       return !isHole && BD->getSection() == Section && !BD->getParent();
1493     };
1494 
1495     auto BDStart = BinaryDataMap.begin();
1496     auto BDEnd = BinaryDataMap.end();
1497     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1498     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1499 
1500     uint64_t EndAddress = Section.getAddress();
1501 
1502     while (Itr != End) {
1503       if (Itr->second->getAddress() > EndAddress) {
1504         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1505         Holes.emplace_back(EndAddress, Gap);
1506       }
1507       EndAddress = Itr->second->getEndAddress();
1508       ++Itr;
1509     }
1510 
1511     if (EndAddress < Section.getEndAddress())
1512       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1513 
1514     // If there is already a symbol at the start of the hole, grow that symbol
1515     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1516     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1517       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1518       if (BD) {
1519         // BD->getSection() can be != Section if there are sections that
1520         // overlap.  In this case it is probably safe to just skip the holes
1521         // since the overlapping section will not(?) have any symbols in it.
1522         if (BD->getSection() == Section)
1523           setBinaryDataSize(Hole.first, Hole.second);
1524       } else {
1525         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1526       }
1527     }
1528   }
1529 
1530   assert(validateObjectNesting() && "object nesting inconsistency detected");
1531   assert(validateHoles() && "top level hole detected in object map");
1532 }
1533 
1534 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1535   const BinarySection *CurrentSection = nullptr;
1536   bool FirstSection = true;
1537 
1538   for (auto &Entry : BinaryDataMap) {
1539     const BinaryData *BD = Entry.second;
1540     const BinarySection &Section = BD->getSection();
1541     if (FirstSection || Section != *CurrentSection) {
1542       uint64_t Address, Size;
1543       StringRef Name = Section.getName();
1544       if (Section) {
1545         Address = Section.getAddress();
1546         Size = Section.getSize();
1547       } else {
1548         Address = BD->getAddress();
1549         Size = BD->getSize();
1550       }
1551       OS << "BOLT-INFO: Section " << Name << ", "
1552          << "0x" + Twine::utohexstr(Address) << ":"
1553          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1554       CurrentSection = &Section;
1555       FirstSection = false;
1556     }
1557 
1558     OS << "BOLT-INFO: ";
1559     const BinaryData *P = BD->getParent();
1560     while (P) {
1561       OS << "  ";
1562       P = P->getParent();
1563     }
1564     OS << *BD << "\n";
1565   }
1566 }
1567 
1568 Expected<unsigned> BinaryContext::getDwarfFile(
1569     StringRef Directory, StringRef FileName, unsigned FileNumber,
1570     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1571     unsigned CUID, unsigned DWARFVersion) {
1572   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1573   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1574                           FileNumber);
1575 }
1576 
1577 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1578                                                const uint32_t SrcCUID,
1579                                                unsigned FileIndex) {
1580   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1581   const DWARFDebugLine::LineTable *LineTable =
1582       DwCtx->getLineTableForUnit(SrcUnit);
1583   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1584       LineTable->Prologue.FileNames;
1585   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1586   // means empty dir.
1587   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1588          "FileIndex out of range for the compilation unit.");
1589   StringRef Dir = "";
1590   if (FileNames[FileIndex - 1].DirIdx != 0) {
1591     if (std::optional<const char *> DirName = dwarf::toString(
1592             LineTable->Prologue
1593                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1594       Dir = *DirName;
1595     }
1596   }
1597   StringRef FileName = "";
1598   if (std::optional<const char *> FName =
1599           dwarf::toString(FileNames[FileIndex - 1].Name))
1600     FileName = *FName;
1601   assert(FileName != "");
1602   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1603   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1604                                DestCUID, DstUnit->getVersion()));
1605 }
1606 
1607 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1608   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1609   llvm::transform(llvm::make_second_range(BinaryFunctions),
1610                   SortedFunctions.begin(),
1611                   [](BinaryFunction &BF) { return &BF; });
1612 
1613   llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
1614   return SortedFunctions;
1615 }
1616 
1617 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1618   std::vector<BinaryFunction *> AllFunctions;
1619   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1620   llvm::transform(llvm::make_second_range(BinaryFunctions),
1621                   std::back_inserter(AllFunctions),
1622                   [](BinaryFunction &BF) { return &BF; });
1623   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1624 
1625   return AllFunctions;
1626 }
1627 
1628 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1629   auto Iter = DWOCUs.find(DWOId);
1630   if (Iter == DWOCUs.end())
1631     return std::nullopt;
1632 
1633   return Iter->second;
1634 }
1635 
1636 DWARFContext *BinaryContext::getDWOContext() const {
1637   if (DWOCUs.empty())
1638     return nullptr;
1639   return &DWOCUs.begin()->second->getContext();
1640 }
1641 
1642 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1643 void BinaryContext::preprocessDWODebugInfo() {
1644   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1645     DWARFUnit *const DwarfUnit = CU.get();
1646     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1647       std::string DWOName = dwarf::toString(
1648           DwarfUnit->getUnitDIE().find(
1649               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1650           "");
1651       SmallString<16> AbsolutePath;
1652       if (!opts::CompDirOverride.empty()) {
1653         sys::path::append(AbsolutePath, opts::CompDirOverride);
1654         sys::path::append(AbsolutePath, DWOName);
1655       }
1656       DWARFUnit *DWOCU =
1657           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1658       if (!DWOCU->isDWOUnit()) {
1659         this->outs()
1660             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1661             << DWOName
1662             << " was not retrieved and won't be updated. Please check "
1663                "relative path.\n";
1664         continue;
1665       }
1666       DWOCUs[*DWOId] = DWOCU;
1667     }
1668   }
1669   if (!DWOCUs.empty())
1670     this->outs() << "BOLT-INFO: processing split DWARF\n";
1671 }
1672 
1673 void BinaryContext::preprocessDebugInfo() {
1674   struct CURange {
1675     uint64_t LowPC;
1676     uint64_t HighPC;
1677     DWARFUnit *Unit;
1678 
1679     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1680   };
1681 
1682   // Building a map of address ranges to CUs similar to .debug_aranges and use
1683   // it to assign CU to functions.
1684   std::vector<CURange> AllRanges;
1685   AllRanges.reserve(DwCtx->getNumCompileUnits());
1686   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1687     Expected<DWARFAddressRangesVector> RangesOrError =
1688         CU->getUnitDIE().getAddressRanges();
1689     if (!RangesOrError) {
1690       consumeError(RangesOrError.takeError());
1691       continue;
1692     }
1693     for (DWARFAddressRange &Range : *RangesOrError) {
1694       // Parts of the debug info could be invalidated due to corresponding code
1695       // being removed from the binary by the linker. Hence we check if the
1696       // address is a valid one.
1697       if (containsAddress(Range.LowPC))
1698         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1699     }
1700 
1701     ContainsDwarf5 |= CU->getVersion() >= 5;
1702     ContainsDwarfLegacy |= CU->getVersion() < 5;
1703   }
1704 
1705   llvm::sort(AllRanges);
1706   for (auto &KV : BinaryFunctions) {
1707     const uint64_t FunctionAddress = KV.first;
1708     BinaryFunction &Function = KV.second;
1709 
1710     auto It = llvm::partition_point(
1711         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1712     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1713       Function.setDWARFUnit(It->Unit);
1714   }
1715 
1716   // Discover units with debug info that needs to be updated.
1717   for (const auto &KV : BinaryFunctions) {
1718     const BinaryFunction &BF = KV.second;
1719     if (shouldEmit(BF) && BF.getDWARFUnit())
1720       ProcessedCUs.insert(BF.getDWARFUnit());
1721   }
1722 
1723   // Clear debug info for functions from units that we are not going to process.
1724   for (auto &KV : BinaryFunctions) {
1725     BinaryFunction &BF = KV.second;
1726     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1727       BF.setDWARFUnit(nullptr);
1728   }
1729 
1730   if (opts::Verbosity >= 1) {
1731     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1732                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1733   }
1734 
1735   preprocessDWODebugInfo();
1736 
1737   // Populate MCContext with DWARF files from all units.
1738   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1739   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1740     const uint64_t CUID = CU->getOffset();
1741     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1742     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1743         GlobalPrefix + "line_table_start" + Twine(CUID)));
1744 
1745     if (!ProcessedCUs.count(CU.get()))
1746       continue;
1747 
1748     const DWARFDebugLine::LineTable *LineTable =
1749         DwCtx->getLineTableForUnit(CU.get());
1750     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1751         LineTable->Prologue.FileNames;
1752 
1753     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1754     if (DwarfVersion >= 5) {
1755       std::optional<MD5::MD5Result> Checksum;
1756       if (LineTable->Prologue.ContentTypes.HasMD5)
1757         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1758       std::optional<const char *> Name =
1759           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1760       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1761         auto Iter = DWOCUs.find(*DWOID);
1762         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1763         Name = dwarf::toString(
1764             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1765       }
1766       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1767                                   std::nullopt);
1768     }
1769 
1770     BinaryLineTable.setDwarfVersion(DwarfVersion);
1771 
1772     // Assign a unique label to every line table, one per CU.
1773     // Make sure empty debug line tables are registered too.
1774     if (FileNames.empty()) {
1775       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1776                             CUID, DwarfVersion));
1777       continue;
1778     }
1779     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1780     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1781       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1782       // means empty dir.
1783       StringRef Dir = "";
1784       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1785         if (std::optional<const char *> DirName = dwarf::toString(
1786                 LineTable->Prologue
1787                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1788           Dir = *DirName;
1789       StringRef FileName = "";
1790       if (std::optional<const char *> FName =
1791               dwarf::toString(FileNames[I].Name))
1792         FileName = *FName;
1793       assert(FileName != "");
1794       std::optional<MD5::MD5Result> Checksum;
1795       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1796         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1797       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1798                             DwarfVersion));
1799     }
1800   }
1801 }
1802 
1803 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1804   if (Function.isPseudo())
1805     return false;
1806 
1807   if (opts::processAllFunctions())
1808     return true;
1809 
1810   if (Function.isIgnored())
1811     return false;
1812 
1813   // In relocation mode we will emit non-simple functions with CFG.
1814   // If the function does not have a CFG it should be marked as ignored.
1815   return HasRelocations || Function.isSimple();
1816 }
1817 
1818 void BinaryContext::dump(const MCInst &Inst) const {
1819   if (LLVM_UNLIKELY(!InstPrinter)) {
1820     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1821     return;
1822   }
1823   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1824   dbgs() << "\n";
1825 }
1826 
1827 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1828   uint32_t Operation = Inst.getOperation();
1829   switch (Operation) {
1830   case MCCFIInstruction::OpSameValue:
1831     OS << "OpSameValue Reg" << Inst.getRegister();
1832     break;
1833   case MCCFIInstruction::OpRememberState:
1834     OS << "OpRememberState";
1835     break;
1836   case MCCFIInstruction::OpRestoreState:
1837     OS << "OpRestoreState";
1838     break;
1839   case MCCFIInstruction::OpOffset:
1840     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1841     break;
1842   case MCCFIInstruction::OpDefCfaRegister:
1843     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1844     break;
1845   case MCCFIInstruction::OpDefCfaOffset:
1846     OS << "OpDefCfaOffset " << Inst.getOffset();
1847     break;
1848   case MCCFIInstruction::OpDefCfa:
1849     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850     break;
1851   case MCCFIInstruction::OpRelOffset:
1852     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1853     break;
1854   case MCCFIInstruction::OpAdjustCfaOffset:
1855     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1856     break;
1857   case MCCFIInstruction::OpEscape:
1858     OS << "OpEscape";
1859     break;
1860   case MCCFIInstruction::OpRestore:
1861     OS << "OpRestore Reg" << Inst.getRegister();
1862     break;
1863   case MCCFIInstruction::OpUndefined:
1864     OS << "OpUndefined Reg" << Inst.getRegister();
1865     break;
1866   case MCCFIInstruction::OpRegister:
1867     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1868        << Inst.getRegister2();
1869     break;
1870   case MCCFIInstruction::OpWindowSave:
1871     OS << "OpWindowSave";
1872     break;
1873   case MCCFIInstruction::OpGnuArgsSize:
1874     OS << "OpGnuArgsSize";
1875     break;
1876   default:
1877     OS << "Op#" << Operation;
1878     break;
1879   }
1880 }
1881 
1882 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1883   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1884   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1885   // the end of a data chunk inside code, $d identifies start of data.
1886   if (isX86() || ELFSymbolRef(Symbol).getSize())
1887     return MarkerSymType::NONE;
1888 
1889   Expected<StringRef> NameOrError = Symbol.getName();
1890   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1891 
1892   if (!TypeOrError || !NameOrError)
1893     return MarkerSymType::NONE;
1894 
1895   if (*TypeOrError != SymbolRef::ST_Unknown)
1896     return MarkerSymType::NONE;
1897 
1898   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1899     return MarkerSymType::CODE;
1900 
1901   // $x<ISA>
1902   if (isRISCV() && NameOrError->starts_with("$x"))
1903     return MarkerSymType::CODE;
1904 
1905   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1906     return MarkerSymType::DATA;
1907 
1908   return MarkerSymType::NONE;
1909 }
1910 
1911 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1912   return getMarkerType(Symbol) != MarkerSymType::NONE;
1913 }
1914 
1915 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1916                            const BinaryFunction *Function,
1917                            DWARFContext *DwCtx) {
1918   DebugLineTableRowRef RowRef =
1919       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1920   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1921     return;
1922 
1923   const DWARFDebugLine::LineTable *LineTable;
1924   if (Function && Function->getDWARFUnit() &&
1925       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1926     LineTable = Function->getDWARFLineTable();
1927   } else {
1928     LineTable = DwCtx->getLineTableForUnit(
1929         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1930   }
1931   assert(LineTable && "line table expected for instruction with debug info");
1932 
1933   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1934   StringRef FileName = "";
1935   if (std::optional<const char *> FName =
1936           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1937     FileName = *FName;
1938   OS << " # debug line " << FileName << ":" << Row.Line;
1939   if (Row.Column)
1940     OS << ":" << Row.Column;
1941   if (Row.Discriminator)
1942     OS << " discriminator:" << Row.Discriminator;
1943 }
1944 
1945 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1946                                      uint64_t Offset,
1947                                      const BinaryFunction *Function,
1948                                      bool PrintMCInst, bool PrintMemData,
1949                                      bool PrintRelocations,
1950                                      StringRef Endl) const {
1951   OS << format("    %08" PRIx64 ": ", Offset);
1952   if (MIB->isCFI(Instruction)) {
1953     uint32_t Offset = Instruction.getOperand(0).getImm();
1954     OS << "\t!CFI\t$" << Offset << "\t; ";
1955     if (Function)
1956       printCFI(OS, *Function->getCFIFor(Instruction));
1957     OS << Endl;
1958     return;
1959   }
1960   if (std::optional<uint32_t> DynamicID =
1961           MIB->getDynamicBranchID(Instruction)) {
1962     OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1963        << " # ID: " << DynamicID;
1964   } else {
1965     // If there are annotations on the instruction, the MCInstPrinter will fail
1966     // to print the preferred alias as it only does so when the number of
1967     // operands is as expected. See
1968     // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
1969     // Therefore, create a temporary copy of the Inst from which the annotations
1970     // are removed, and print that Inst.
1971     MCInst InstNoAnnot = Instruction;
1972     MIB->stripAnnotations(InstNoAnnot);
1973     InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS);
1974   }
1975   if (MIB->isCall(Instruction)) {
1976     if (MIB->isTailCall(Instruction))
1977       OS << " # TAILCALL ";
1978     if (MIB->isInvoke(Instruction)) {
1979       const std::optional<MCPlus::MCLandingPad> EHInfo =
1980           MIB->getEHInfo(Instruction);
1981       OS << " # handler: ";
1982       if (EHInfo->first)
1983         OS << *EHInfo->first;
1984       else
1985         OS << '0';
1986       OS << "; action: " << EHInfo->second;
1987       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1988       if (GnuArgsSize >= 0)
1989         OS << "; GNU_args_size = " << GnuArgsSize;
1990     }
1991   } else if (MIB->isIndirectBranch(Instruction)) {
1992     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1993       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1994     } else {
1995       OS << " # UNKNOWN CONTROL FLOW";
1996     }
1997   }
1998   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1999     OS << " # Offset: " << *Offset;
2000   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
2001     OS << " # Size: " << *Size;
2002   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
2003     OS << " # Label: " << *Label;
2004 
2005   MIB->printAnnotations(Instruction, OS);
2006 
2007   if (opts::PrintDebugInfo)
2008     printDebugInfo(OS, Instruction, Function, DwCtx.get());
2009 
2010   if ((opts::PrintRelocations || PrintRelocations) && Function) {
2011     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2012     Function->printRelocations(OS, Offset, Size);
2013   }
2014 
2015   OS << Endl;
2016 
2017   if (PrintMCInst) {
2018     Instruction.dump_pretty(OS, InstPrinter.get());
2019     OS << Endl;
2020   }
2021 }
2022 
2023 std::optional<uint64_t>
2024 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2025                                         uint64_t FileOffset) const {
2026   // Find a segment with a matching file offset.
2027   for (auto &KV : SegmentMapInfo) {
2028     const SegmentInfo &SegInfo = KV.second;
2029     // Only consider executable segments.
2030     if (!SegInfo.IsExecutable)
2031       continue;
2032     // FileOffset is got from perf event,
2033     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2034     // If the pagesize is not equal to SegInfo.Alignment.
2035     // FileOffset and SegInfo.FileOffset should be aligned first,
2036     // and then judge whether they are equal.
2037     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2038         alignDown(FileOffset, SegInfo.Alignment)) {
2039       // The function's offset from base address in VAS is aligned by pagesize
2040       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2041       // However, The ELF document says that SegInfo.FileOffset should equal
2042       // to SegInfo.Address, modulo the pagesize.
2043       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2044 
2045       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2046       // alignDown(SegInfo.Address, pagesize)
2047       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2048       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2049       //   = SegInfo.Address - SegInfo.FileOffset +
2050       //     alignDown(SegInfo.FileOffset, pagesize)
2051       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2052       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2053     }
2054   }
2055 
2056   return std::nullopt;
2057 }
2058 
2059 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2060   auto SI = AddressToSection.upper_bound(Address);
2061   if (SI != AddressToSection.begin()) {
2062     --SI;
2063     uint64_t UpperBound = SI->first + SI->second->getSize();
2064     if (!SI->second->getSize())
2065       UpperBound += 1;
2066     if (UpperBound > Address)
2067       return *SI->second;
2068   }
2069   return std::make_error_code(std::errc::bad_address);
2070 }
2071 
2072 ErrorOr<StringRef>
2073 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2074   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2075     return Section->getName();
2076   return std::make_error_code(std::errc::bad_address);
2077 }
2078 
2079 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2080   auto Res = Sections.insert(Section);
2081   (void)Res;
2082   assert(Res.second && "can't register the same section twice.");
2083 
2084   // Only register allocatable sections in the AddressToSection map.
2085   if (Section->isAllocatable() && Section->getAddress())
2086     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2087   NameToSection.insert(
2088       std::make_pair(std::string(Section->getName()), Section));
2089   if (Section->hasSectionRef())
2090     SectionRefToBinarySection.insert(
2091         std::make_pair(Section->getSectionRef(), Section));
2092 
2093   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2094   return *Section;
2095 }
2096 
2097 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2098   return registerSection(new BinarySection(*this, Section));
2099 }
2100 
2101 BinarySection &
2102 BinaryContext::registerSection(const Twine &SectionName,
2103                                const BinarySection &OriginalSection) {
2104   return registerSection(
2105       new BinarySection(*this, SectionName, OriginalSection));
2106 }
2107 
2108 BinarySection &
2109 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2110                                        unsigned ELFFlags, uint8_t *Data,
2111                                        uint64_t Size, unsigned Alignment) {
2112   auto NamedSections = getSectionByName(Name);
2113   if (NamedSections.begin() != NamedSections.end()) {
2114     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2115            "can only update unique sections");
2116     BinarySection *Section = NamedSections.begin()->second;
2117 
2118     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2119     const bool Flag = Section->isAllocatable();
2120     (void)Flag;
2121     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2122     LLVM_DEBUG(dbgs() << *Section << "\n");
2123     // FIXME: Fix section flags/attributes for MachO.
2124     if (isELF())
2125       assert(Flag == Section->isAllocatable() &&
2126              "can't change section allocation status");
2127     return *Section;
2128   }
2129 
2130   return registerSection(
2131       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2132 }
2133 
2134 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2135   auto NameRange = NameToSection.equal_range(Section.getName().str());
2136   while (NameRange.first != NameRange.second) {
2137     if (NameRange.first->second == &Section) {
2138       NameToSection.erase(NameRange.first);
2139       break;
2140     }
2141     ++NameRange.first;
2142   }
2143 }
2144 
2145 void BinaryContext::deregisterUnusedSections() {
2146   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2147   for (auto SI = Sections.begin(); SI != Sections.end();) {
2148     BinarySection *Section = *SI;
2149     // We check getOutputData() instead of getOutputSize() because sometimes
2150     // zero-sized .text.cold sections are allocated.
2151     if (Section->hasSectionRef() || Section->getOutputData() ||
2152         (AbsSection && Section == &AbsSection.get())) {
2153       ++SI;
2154       continue;
2155     }
2156 
2157     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2158                       << '\n';);
2159     deregisterSectionName(*Section);
2160     SI = Sections.erase(SI);
2161     delete Section;
2162   }
2163 }
2164 
2165 bool BinaryContext::deregisterSection(BinarySection &Section) {
2166   BinarySection *SectionPtr = &Section;
2167   auto Itr = Sections.find(SectionPtr);
2168   if (Itr != Sections.end()) {
2169     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2170     while (Range.first != Range.second) {
2171       if (Range.first->second == SectionPtr) {
2172         AddressToSection.erase(Range.first);
2173         break;
2174       }
2175       ++Range.first;
2176     }
2177 
2178     deregisterSectionName(*SectionPtr);
2179     Sections.erase(Itr);
2180     delete SectionPtr;
2181     return true;
2182   }
2183   return false;
2184 }
2185 
2186 void BinaryContext::renameSection(BinarySection &Section,
2187                                   const Twine &NewName) {
2188   auto Itr = Sections.find(&Section);
2189   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2190   Sections.erase(Itr);
2191 
2192   deregisterSectionName(Section);
2193 
2194   Section.Name = NewName.str();
2195   Section.setOutputName(Section.Name);
2196 
2197   NameToSection.insert(std::make_pair(Section.Name, &Section));
2198 
2199   // Reinsert with the new name.
2200   Sections.insert(&Section);
2201 }
2202 
2203 void BinaryContext::printSections(raw_ostream &OS) const {
2204   for (BinarySection *const &Section : Sections)
2205     OS << "BOLT-INFO: " << *Section << "\n";
2206 }
2207 
2208 BinarySection &BinaryContext::absoluteSection() {
2209   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2210     return *Section;
2211   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2212 }
2213 
2214 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2215                                                            size_t Size) const {
2216   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2217   if (!Section)
2218     return std::make_error_code(std::errc::bad_address);
2219 
2220   if (Section->isVirtual())
2221     return 0;
2222 
2223   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2224                    AsmInfo->getCodePointerSize());
2225   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2226   return DE.getUnsigned(&ValueOffset, Size);
2227 }
2228 
2229 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2230                                                         size_t Size) const {
2231   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2232   if (!Section)
2233     return std::make_error_code(std::errc::bad_address);
2234 
2235   if (Section->isVirtual())
2236     return 0;
2237 
2238   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2239                    AsmInfo->getCodePointerSize());
2240   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2241   return DE.getSigned(&ValueOffset, Size);
2242 }
2243 
2244 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2245                                   uint64_t Type, uint64_t Addend,
2246                                   uint64_t Value) {
2247   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2248   assert(Section && "cannot find section for address");
2249   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2250                          Value);
2251 }
2252 
2253 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2254                                          uint64_t Type, uint64_t Addend,
2255                                          uint64_t Value) {
2256   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2257   assert(Section && "cannot find section for address");
2258   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2259                                 Addend, Value);
2260 }
2261 
2262 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2263   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2264   assert(Section && "cannot find section for address");
2265   return Section->removeRelocationAt(Address - Section->getAddress());
2266 }
2267 
2268 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2269   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2270   if (!Section)
2271     return nullptr;
2272 
2273   return Section->getRelocationAt(Address - Section->getAddress());
2274 }
2275 
2276 const Relocation *
2277 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2278   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2279   if (!Section)
2280     return nullptr;
2281 
2282   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2283 }
2284 
2285 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2286                                              const uint64_t Address) {
2287   auto setImmovable = [&](BinaryData &BD) {
2288     BinaryData *Root = BD.getAtomicRoot();
2289     LLVM_DEBUG(if (Root->isMoveable()) {
2290       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2291              << "due to ambiguous relocation referencing 0x"
2292              << Twine::utohexstr(Address) << '\n';
2293     });
2294     Root->setIsMoveable(false);
2295   };
2296 
2297   if (Address == BD.getAddress()) {
2298     setImmovable(BD);
2299 
2300     // Set previous symbol as immovable
2301     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2302     if (Prev && Prev->getEndAddress() == BD.getAddress())
2303       setImmovable(*Prev);
2304   }
2305 
2306   if (Address == BD.getEndAddress()) {
2307     setImmovable(BD);
2308 
2309     // Set next symbol as immovable
2310     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2311     if (Next && Next->getAddress() == BD.getEndAddress())
2312       setImmovable(*Next);
2313   }
2314 }
2315 
2316 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2317                                                     uint64_t *EntryDesc) {
2318   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2319   auto BFI = SymbolToFunctionMap.find(Symbol);
2320   if (BFI == SymbolToFunctionMap.end())
2321     return nullptr;
2322 
2323   BinaryFunction *BF = BFI->second;
2324   if (EntryDesc)
2325     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2326 
2327   return BF;
2328 }
2329 
2330 std::string
2331 BinaryContext::generateBugReportMessage(StringRef Message,
2332                                         const BinaryFunction &Function) const {
2333   std::string Msg;
2334   raw_string_ostream SS(Msg);
2335   SS << "=======================================\n";
2336   SS << "BOLT is unable to proceed because it couldn't properly understand "
2337         "this function.\n";
2338   SS << "If you are running the most recent version of BOLT, you may "
2339         "want to "
2340         "report this and paste this dump.\nPlease check that there is no "
2341         "sensitive contents being shared in this dump.\n";
2342   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2343   ScopedPrinter SP(SS);
2344   SP.printBinaryBlock("Function contents", *Function.getData());
2345   SS << "\n";
2346   const_cast<BinaryFunction &>(Function).print(SS, "");
2347   SS << "ERROR: " << Message;
2348   SS << "\n=======================================\n";
2349   return Msg;
2350 }
2351 
2352 BinaryFunction *
2353 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2354                                             bool IsSimple) {
2355   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2356   BinaryFunction *BF = InjectedBinaryFunctions.back();
2357   setSymbolToFunctionMap(BF->getSymbol(), BF);
2358   BF->CurrentState = BinaryFunction::State::CFG;
2359   return BF;
2360 }
2361 
2362 std::pair<size_t, size_t>
2363 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2364   // Adjust branch instruction to match the current layout.
2365   if (FixBranches)
2366     BF.fixBranches();
2367 
2368   // Create local MC context to isolate the effect of ephemeral code emission.
2369   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2370   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2371   MCAsmBackend *MAB =
2372       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2373 
2374   SmallString<256> Code;
2375   raw_svector_ostream VecOS(Code);
2376 
2377   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2378   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2379       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2380       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
2381 
2382   Streamer->initSections(false, *STI);
2383 
2384   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2385   Section->setHasInstructions(true);
2386 
2387   // Create symbols in the LocalCtx so that they get destroyed with it.
2388   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2389   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2390 
2391   Streamer->switchSection(Section);
2392   Streamer->emitLabel(StartLabel);
2393   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2394                    /*EmitCodeOnly=*/true);
2395   Streamer->emitLabel(EndLabel);
2396 
2397   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2398   SmallVector<LabelRange> SplitLabels;
2399   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2400     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2401     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2402     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2403 
2404     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2405         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2406         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2407     SplitSection->setHasInstructions(true);
2408     Streamer->switchSection(SplitSection);
2409 
2410     Streamer->emitLabel(SplitStartLabel);
2411     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2412     Streamer->emitLabel(SplitEndLabel);
2413   }
2414 
2415   MCAssembler &Assembler =
2416       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2417   Assembler.layout();
2418 
2419   // Obtain fragment sizes.
2420   std::vector<uint64_t> FragmentSizes;
2421   // Main fragment size.
2422   const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2423                            Assembler.getSymbolOffset(*StartLabel);
2424   FragmentSizes.push_back(HotSize);
2425   // Split fragment sizes.
2426   uint64_t ColdSize = 0;
2427   for (const auto &Labels : SplitLabels) {
2428     uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2429                     Assembler.getSymbolOffset(*Labels.first);
2430     FragmentSizes.push_back(Size);
2431     ColdSize += Size;
2432   }
2433 
2434   // Populate new start and end offsets of each basic block.
2435   uint64_t FragmentIndex = 0;
2436   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2437     BinaryBasicBlock *PrevBB = nullptr;
2438     for (BinaryBasicBlock *BB : FF) {
2439       const uint64_t BBStartOffset =
2440           Assembler.getSymbolOffset(*(BB->getLabel()));
2441       BB->setOutputStartAddress(BBStartOffset);
2442       if (PrevBB)
2443         PrevBB->setOutputEndAddress(BBStartOffset);
2444       PrevBB = BB;
2445     }
2446     if (PrevBB)
2447       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2448     FragmentIndex++;
2449   }
2450 
2451   // Clean-up the effect of the code emission.
2452   for (const MCSymbol &Symbol : Assembler.symbols()) {
2453     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2454     MutableSymbol->setUndefined();
2455     MutableSymbol->setIsRegistered(false);
2456   }
2457 
2458   return std::make_pair(HotSize, ColdSize);
2459 }
2460 
2461 bool BinaryContext::validateInstructionEncoding(
2462     ArrayRef<uint8_t> InputSequence) const {
2463   MCInst Inst;
2464   uint64_t InstSize;
2465   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2466   assert(InstSize == InputSequence.size() &&
2467          "Disassembled instruction size does not match the sequence.");
2468 
2469   SmallString<256> Code;
2470   SmallVector<MCFixup, 4> Fixups;
2471 
2472   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2473   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2474   if (InputSequence != OutputSequence) {
2475     if (opts::Verbosity > 1) {
2476       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2477                    << "      input: " << InputSequence << '\n'
2478                    << "     output: " << OutputSequence << '\n';
2479     }
2480     return false;
2481   }
2482 
2483   return true;
2484 }
2485 
2486 uint64_t BinaryContext::getHotThreshold() const {
2487   static uint64_t Threshold = 0;
2488   if (Threshold == 0) {
2489     Threshold = std::max(
2490         (uint64_t)opts::ExecutionCountThreshold,
2491         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2492   }
2493   return Threshold;
2494 }
2495 
2496 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2497     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2498   auto FI = BinaryFunctions.upper_bound(Address);
2499   if (FI == BinaryFunctions.begin())
2500     return nullptr;
2501   --FI;
2502 
2503   const uint64_t UsedSize =
2504       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2505 
2506   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2507     return nullptr;
2508 
2509   return &FI->second;
2510 }
2511 
2512 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2513   // First, try to find a function starting at the given address. If the
2514   // function was folded, this will get us the original folded function if it
2515   // wasn't removed from the list, e.g. in non-relocation mode.
2516   auto BFI = BinaryFunctions.find(Address);
2517   if (BFI != BinaryFunctions.end())
2518     return &BFI->second;
2519 
2520   // We might have folded the function matching the object at the given
2521   // address. In such case, we look for a function matching the symbol
2522   // registered at the original address. The new function (the one that the
2523   // original was folded into) will hold the symbol.
2524   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2525     uint64_t EntryID = 0;
2526     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2527     if (BF && EntryID == 0)
2528       return BF;
2529   }
2530   return nullptr;
2531 }
2532 
2533 /// Deregister JumpTable registered at a given \p Address and delete it.
2534 void BinaryContext::deleteJumpTable(uint64_t Address) {
2535   assert(JumpTables.count(Address) && "Must have a jump table at address");
2536   JumpTable *JT = JumpTables.at(Address);
2537   for (BinaryFunction *Parent : JT->Parents)
2538     Parent->JumpTables.erase(Address);
2539   JumpTables.erase(Address);
2540   delete JT;
2541 }
2542 
2543 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2544     const DWARFAddressRangesVector &InputRanges) const {
2545   DebugAddressRangesVector OutputRanges;
2546 
2547   for (const DWARFAddressRange Range : InputRanges) {
2548     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2549     while (BFI != BinaryFunctions.end()) {
2550       const BinaryFunction &Function = BFI->second;
2551       if (Function.getAddress() >= Range.HighPC)
2552         break;
2553       const DebugAddressRangesVector FunctionRanges =
2554           Function.getOutputAddressRanges();
2555       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2556       std::advance(BFI, 1);
2557     }
2558   }
2559 
2560   return OutputRanges;
2561 }
2562 
2563 } // namespace bolt
2564 } // namespace llvm
2565