xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 4d2bc0adc63cf90111d849911ccdddaa0d886e60)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
41 
42 using namespace llvm;
43 
44 #undef  DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
46 
47 namespace opts {
48 
49 cl::opt<bool> NoHugePages("no-huge-pages",
50                           cl::desc("use regular size pages for code alignment"),
51                           cl::Hidden, cl::cat(BoltCategory));
52 
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55   cl::desc("print debug info when printing functions"),
56   cl::Hidden,
57   cl::ZeroOrMore,
58   cl::cat(BoltCategory));
59 
60 cl::opt<bool> PrintRelocations(
61     "print-relocations",
62     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63     cl::cat(BoltCategory));
64 
65 static cl::opt<bool>
66 PrintMemData("print-mem-data",
67   cl::desc("print memory data annotations when printing functions"),
68   cl::Hidden,
69   cl::ZeroOrMore,
70   cl::cat(BoltCategory));
71 
72 cl::opt<std::string> CompDirOverride(
73     "comp-dir-override",
74     cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
75              "location, which is used with DW_AT_dwo_name to construct a path "
76              "to *.dwo files."),
77     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 char BOLTError::ID = 0;
84 
85 BOLTError::BOLTError(bool IsFatal, const Twine &S)
86     : IsFatal(IsFatal), Msg(S.str()) {}
87 
88 void BOLTError::log(raw_ostream &OS) const {
89   if (IsFatal)
90     OS << "FATAL ";
91   StringRef ErrMsg = StringRef(Msg);
92   // Prepend our error prefix if it is missing
93   if (ErrMsg.empty()) {
94     OS << "BOLT-ERROR\n";
95   } else {
96     if (!ErrMsg.starts_with("BOLT-ERROR"))
97       OS << "BOLT-ERROR: ";
98     OS << ErrMsg << "\n";
99   }
100 }
101 
102 std::error_code BOLTError::convertToErrorCode() const {
103   return inconvertibleErrorCode();
104 }
105 
106 Error createNonFatalBOLTError(const Twine &S) {
107   return make_error<BOLTError>(/*IsFatal*/ false, S);
108 }
109 
110 Error createFatalBOLTError(const Twine &S) {
111   return make_error<BOLTError>(/*IsFatal*/ true, S);
112 }
113 
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
115   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
116     if (!E.getMessage().empty())
117       E.log(this->errs());
118     if (E.isFatal())
119       exit(1);
120   });
121 }
122 
123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124                              std::unique_ptr<DWARFContext> DwCtx,
125                              std::unique_ptr<Triple> TheTriple,
126                              const Target *TheTarget, std::string TripleName,
127                              std::unique_ptr<MCCodeEmitter> MCE,
128                              std::unique_ptr<MCObjectFileInfo> MOFI,
129                              std::unique_ptr<const MCAsmInfo> AsmInfo,
130                              std::unique_ptr<const MCInstrInfo> MII,
131                              std::unique_ptr<const MCSubtargetInfo> STI,
132                              std::unique_ptr<MCInstPrinter> InstPrinter,
133                              std::unique_ptr<const MCInstrAnalysis> MIA,
134                              std::unique_ptr<MCPlusBuilder> MIB,
135                              std::unique_ptr<const MCRegisterInfo> MRI,
136                              std::unique_ptr<MCDisassembler> DisAsm,
137                              JournalingStreams Logger)
138     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
139       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
140       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
141       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
142       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
143       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
144       Logger(Logger), InitialDynoStats(isAArch64()) {
145   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
146   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
147 }
148 
149 BinaryContext::~BinaryContext() {
150   for (BinarySection *Section : Sections)
151     delete Section;
152   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
153     delete InjectedFunction;
154   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
155     delete JTI.second;
156   clearBinaryData();
157 }
158 
159 /// Create BinaryContext for a given architecture \p ArchName and
160 /// triple \p TripleName.
161 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
162     Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
163     bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
164   StringRef ArchName = "";
165   std::string FeaturesStr = "";
166   switch (TheTriple.getArch()) {
167   case llvm::Triple::x86_64:
168     if (Features)
169       return createFatalBOLTError(
170           "x86_64 target does not use SubtargetFeatures");
171     ArchName = "x86-64";
172     FeaturesStr = "+nopl";
173     break;
174   case llvm::Triple::aarch64:
175     if (Features)
176       return createFatalBOLTError(
177           "AArch64 target does not use SubtargetFeatures");
178     ArchName = "aarch64";
179     FeaturesStr = "+all";
180     break;
181   case llvm::Triple::riscv64: {
182     ArchName = "riscv64";
183     if (!Features)
184       return createFatalBOLTError("RISCV target needs SubtargetFeatures");
185     // We rely on relaxation for some transformations (e.g., promoting all calls
186     // to PseudoCALL and then making JITLink relax them). Since the relax
187     // feature is not stored in the object file, we manually enable it.
188     Features->AddFeature("relax");
189     FeaturesStr = Features->getString();
190     break;
191   }
192   default:
193     return createStringError(std::errc::not_supported,
194                              "BOLT-ERROR: Unrecognized machine in ELF file");
195   }
196 
197   const std::string TripleName = TheTriple.str();
198 
199   std::string Error;
200   const Target *TheTarget =
201       TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
202   if (!TheTarget)
203     return createStringError(make_error_code(std::errc::not_supported),
204                              Twine("BOLT-ERROR: ", Error));
205 
206   std::unique_ptr<const MCRegisterInfo> MRI(
207       TheTarget->createMCRegInfo(TripleName));
208   if (!MRI)
209     return createStringError(
210         make_error_code(std::errc::not_supported),
211         Twine("BOLT-ERROR: no register info for target ", TripleName));
212 
213   // Set up disassembler.
214   std::unique_ptr<MCAsmInfo> AsmInfo(
215       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
216   if (!AsmInfo)
217     return createStringError(
218         make_error_code(std::errc::not_supported),
219         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
220   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
221   // we want to emit such names as using @PLT without double quotes to convey
222   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
223   // override the default AsmInfo behavior to emit names the way we want.
224   AsmInfo->setAllowAtInName(true);
225 
226   std::unique_ptr<const MCSubtargetInfo> STI(
227       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
228   if (!STI)
229     return createStringError(
230         make_error_code(std::errc::not_supported),
231         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
232 
233   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
234   if (!MII)
235     return createStringError(
236         make_error_code(std::errc::not_supported),
237         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
238 
239   std::unique_ptr<MCContext> Ctx(
240       new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
241   std::unique_ptr<MCObjectFileInfo> MOFI(
242       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
243   Ctx->setObjectFileInfo(MOFI.get());
244   // We do not support X86 Large code model. Change this in the future.
245   bool Large = false;
246   if (TheTriple.getArch() == llvm::Triple::aarch64)
247     Large = true;
248   unsigned LSDAEncoding =
249       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
250   if (IsPIC) {
251     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
252                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
253   }
254 
255   std::unique_ptr<MCDisassembler> DisAsm(
256       TheTarget->createMCDisassembler(*STI, *Ctx));
257 
258   if (!DisAsm)
259     return createStringError(
260         make_error_code(std::errc::not_supported),
261         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
262 
263   std::unique_ptr<const MCInstrAnalysis> MIA(
264       TheTarget->createMCInstrAnalysis(MII.get()));
265   if (!MIA)
266     return createStringError(
267         make_error_code(std::errc::not_supported),
268         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
269               TripleName));
270 
271   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
272   std::unique_ptr<MCInstPrinter> InstructionPrinter(
273       TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
274                                      *MII, *MRI));
275   if (!InstructionPrinter)
276     return createStringError(
277         make_error_code(std::errc::not_supported),
278         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
279   InstructionPrinter->setPrintImmHex(true);
280 
281   std::unique_ptr<MCCodeEmitter> MCE(
282       TheTarget->createMCCodeEmitter(*MII, *Ctx));
283 
284   auto BC = std::make_unique<BinaryContext>(
285       std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
286       TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
287       std::move(AsmInfo), std::move(MII), std::move(STI),
288       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
289       std::move(DisAsm), Logger);
290 
291   BC->LSDAEncoding = LSDAEncoding;
292 
293   BC->MAB = std::unique_ptr<MCAsmBackend>(
294       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
295 
296   BC->setFilename(InputFileName);
297 
298   BC->HasFixedLoadAddress = !IsPIC;
299 
300   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
301       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
302 
303   if (!BC->SymbolicDisAsm)
304     return createStringError(
305         make_error_code(std::errc::not_supported),
306         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
307 
308   return std::move(BC);
309 }
310 
311 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
312   if (opts::HotText &&
313       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
314     return true;
315 
316   if (opts::HotData &&
317       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
318     return true;
319 
320   if (SymbolName == "_end")
321     return true;
322 
323   return false;
324 }
325 
326 std::unique_ptr<MCObjectWriter>
327 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
328   return MAB->createObjectWriter(OS);
329 }
330 
331 bool BinaryContext::validateObjectNesting() const {
332   auto Itr = BinaryDataMap.begin();
333   auto End = BinaryDataMap.end();
334   bool Valid = true;
335   while (Itr != End) {
336     auto Next = std::next(Itr);
337     while (Next != End &&
338            Itr->second->getSection() == Next->second->getSection() &&
339            Itr->second->containsRange(Next->second->getAddress(),
340                                       Next->second->getSize())) {
341       if (Next->second->Parent != Itr->second) {
342         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
343                      << "BOLT-WARNING:  " << *Itr->second << "\n"
344                      << "BOLT-WARNING:  " << *Next->second << "\n";
345         Valid = false;
346       }
347       ++Next;
348     }
349     Itr = Next;
350   }
351   return Valid;
352 }
353 
354 bool BinaryContext::validateHoles() const {
355   bool Valid = true;
356   for (BinarySection &Section : sections()) {
357     for (const Relocation &Rel : Section.relocations()) {
358       uint64_t RelAddr = Rel.Offset + Section.getAddress();
359       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
360       if (!BD) {
361         this->errs()
362             << "BOLT-WARNING: no BinaryData found for relocation at address"
363             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
364             << "\n";
365         Valid = false;
366       } else if (!BD->getAtomicRoot()) {
367         this->errs()
368             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
369             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
370             << Section.getName() << "\n";
371         Valid = false;
372       }
373     }
374   }
375   return Valid;
376 }
377 
378 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
379   const uint64_t Address = GAI->second->getAddress();
380   const uint64_t Size = GAI->second->getSize();
381 
382   auto fixParents = [&](BinaryDataMapType::iterator Itr,
383                         BinaryData *NewParent) {
384     BinaryData *OldParent = Itr->second->Parent;
385     Itr->second->Parent = NewParent;
386     ++Itr;
387     while (Itr != BinaryDataMap.end() && OldParent &&
388            Itr->second->Parent == OldParent) {
389       Itr->second->Parent = NewParent;
390       ++Itr;
391     }
392   };
393 
394   // Check if the previous symbol contains the newly added symbol.
395   if (GAI != BinaryDataMap.begin()) {
396     BinaryData *Prev = std::prev(GAI)->second;
397     while (Prev) {
398       if (Prev->getSection() == GAI->second->getSection() &&
399           Prev->containsRange(Address, Size)) {
400         fixParents(GAI, Prev);
401       } else {
402         fixParents(GAI, nullptr);
403       }
404       Prev = Prev->Parent;
405     }
406   }
407 
408   // Check if the newly added symbol contains any subsequent symbols.
409   if (Size != 0) {
410     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
411     auto Itr = std::next(GAI);
412     while (
413         Itr != BinaryDataMap.end() &&
414         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
415       Itr->second->Parent = BD;
416       ++Itr;
417     }
418   }
419 }
420 
421 iterator_range<BinaryContext::binary_data_iterator>
422 BinaryContext::getSubBinaryData(BinaryData *BD) {
423   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
424   auto End = Start;
425   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
426     ++End;
427   return make_range(Start, End);
428 }
429 
430 std::pair<const MCSymbol *, uint64_t>
431 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
432                                 bool IsPCRel) {
433   if (isAArch64()) {
434     // Check if this is an access to a constant island and create bookkeeping
435     // to keep track of it and emit it later as part of this function.
436     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
437       return std::make_pair(IslandSym, 0);
438 
439     // Detect custom code written in assembly that refers to arbitrary
440     // constant islands from other functions. Write this reference so we
441     // can pull this constant island and emit it as part of this function
442     // too.
443     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
444 
445     if (IslandIter != AddressToConstantIslandMap.begin() &&
446         (IslandIter == AddressToConstantIslandMap.end() ||
447          IslandIter->first > Address))
448       --IslandIter;
449 
450     if (IslandIter != AddressToConstantIslandMap.end()) {
451       // Fall-back to referencing the original constant island in the presence
452       // of dynamic relocs, as we currently do not support cloning them.
453       // Notice: we might fail to link because of this, if the original constant
454       // island we are referring would be emitted too far away.
455       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
456         MCSymbol *IslandSym =
457             IslandIter->second->getOrCreateIslandAccess(Address);
458         if (IslandSym)
459           return std::make_pair(IslandSym, 0);
460       } else if (MCSymbol *IslandSym =
461                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
462                                                                       BF)) {
463         BF.createIslandDependency(IslandSym, IslandIter->second);
464         return std::make_pair(IslandSym, 0);
465       }
466     }
467   }
468 
469   // Note that the address does not necessarily have to reside inside
470   // a section, it could be an absolute address too.
471   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
472   if (Section && Section->isText()) {
473     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
474       if (Address != BF.getAddress()) {
475         // The address could potentially escape. Mark it as another entry
476         // point into the function.
477         if (opts::Verbosity >= 1) {
478           this->outs() << "BOLT-INFO: potentially escaped address 0x"
479                        << Twine::utohexstr(Address) << " in function " << BF
480                        << '\n';
481         }
482         BF.HasInternalLabelReference = true;
483         return std::make_pair(
484             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
485       }
486     } else {
487       addInterproceduralReference(&BF, Address);
488     }
489   }
490 
491   // With relocations, catch jump table references outside of the basic block
492   // containing the indirect jump.
493   if (HasRelocations) {
494     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
495     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
496       const MCSymbol *Symbol =
497           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
498 
499       return std::make_pair(Symbol, 0);
500     }
501   }
502 
503   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
504     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
505 
506   // TODO: use DWARF info to get size/alignment here?
507   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
508   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
509   return std::make_pair(TargetSymbol, 0);
510 }
511 
512 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
513                                                   BinaryFunction &BF) {
514   if (!isX86())
515     return MemoryContentsType::UNKNOWN;
516 
517   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
518   if (!Section) {
519     // No section - possibly an absolute address. Since we don't allow
520     // internal function addresses to escape the function scope - we
521     // consider it a tail call.
522     if (opts::Verbosity > 1) {
523       this->errs() << "BOLT-WARNING: no section for address 0x"
524                    << Twine::utohexstr(Address) << " referenced from function "
525                    << BF << '\n';
526     }
527     return MemoryContentsType::UNKNOWN;
528   }
529 
530   if (Section->isVirtual()) {
531     // The contents are filled at runtime.
532     return MemoryContentsType::UNKNOWN;
533   }
534 
535   // No support for jump tables in code yet.
536   if (Section->isText())
537     return MemoryContentsType::UNKNOWN;
538 
539   // Start with checking for PIC jump table. We expect non-PIC jump tables
540   // to have high 32 bits set to 0.
541   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
542     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
543 
544   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
545     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
546 
547   return MemoryContentsType::UNKNOWN;
548 }
549 
550 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
551                                      const JumpTable::JumpTableType Type,
552                                      const BinaryFunction &BF,
553                                      const uint64_t NextJTAddress,
554                                      JumpTable::AddressesType *EntriesAsAddress,
555                                      bool *HasEntryInFragment) const {
556   // Target address of __builtin_unreachable.
557   const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
558 
559   // Is one of the targets __builtin_unreachable?
560   bool HasUnreachable = false;
561 
562   // Does one of the entries match function start address?
563   bool HasStartAsEntry = false;
564 
565   // Number of targets other than __builtin_unreachable.
566   uint64_t NumRealEntries = 0;
567 
568   // Size of the jump table without trailing __builtin_unreachable entries.
569   size_t TrimmedSize = 0;
570 
571   auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
572     if (!EntriesAsAddress)
573       return;
574     EntriesAsAddress->emplace_back(EntryAddress);
575     if (!Unreachable)
576       TrimmedSize = EntriesAsAddress->size();
577   };
578 
579   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
580   if (!Section)
581     return false;
582 
583   // The upper bound is defined by containing object, section limits, and
584   // the next jump table in memory.
585   uint64_t UpperBound = Section->getEndAddress();
586   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
587   if (JumpTableBD && JumpTableBD->getSize()) {
588     assert(JumpTableBD->getEndAddress() <= UpperBound &&
589            "data object cannot cross a section boundary");
590     UpperBound = JumpTableBD->getEndAddress();
591   }
592   if (NextJTAddress)
593     UpperBound = std::min(NextJTAddress, UpperBound);
594 
595   LLVM_DEBUG({
596     using JTT = JumpTable::JumpTableType;
597     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
598                       Address, BF.getPrintName(),
599                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
600   });
601   const uint64_t EntrySize = getJumpTableEntrySize(Type);
602   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
603        EntryAddress += EntrySize) {
604     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
605                       << " -> ");
606     // Check if there's a proper relocation against the jump table entry.
607     if (HasRelocations) {
608       if (Type == JumpTable::JTT_PIC &&
609           !DataPCRelocations.count(EntryAddress)) {
610         LLVM_DEBUG(
611             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
612         break;
613       }
614       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
615         LLVM_DEBUG(
616             dbgs()
617             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
618         break;
619       }
620     }
621 
622     const uint64_t Value =
623         (Type == JumpTable::JTT_PIC)
624             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
625             : *getPointerAtAddress(EntryAddress);
626 
627     // __builtin_unreachable() case.
628     if (Value == UnreachableAddress) {
629       addEntryAddress(Value, /*Unreachable*/ true);
630       HasUnreachable = true;
631       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
632       continue;
633     }
634 
635     // Function start is another special case. It is allowed in the jump table,
636     // but we need at least one another regular entry to distinguish the table
637     // from, e.g. a function pointer array.
638     if (Value == BF.getAddress()) {
639       HasStartAsEntry = true;
640       addEntryAddress(Value);
641       continue;
642     }
643 
644     // Function or one of its fragments.
645     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
646     const bool DoesBelongToFunction =
647         BF.containsAddress(Value) ||
648         (TargetBF && areRelatedFragments(TargetBF, &BF));
649     if (!DoesBelongToFunction) {
650       LLVM_DEBUG({
651         if (!BF.containsAddress(Value)) {
652           dbgs() << "FAIL: function doesn't contain this address\n";
653           if (TargetBF) {
654             dbgs() << "  ! function containing this address: "
655                    << TargetBF->getPrintName() << '\n';
656             if (TargetBF->isFragment()) {
657               dbgs() << "  ! is a fragment";
658               for (BinaryFunction *Parent : TargetBF->ParentFragments)
659                 dbgs() << ", parent: " << Parent->getPrintName();
660               dbgs() << '\n';
661             }
662           }
663         }
664       });
665       break;
666     }
667 
668     // Check there's an instruction at this offset.
669     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
670         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
671       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
672       break;
673     }
674 
675     ++NumRealEntries;
676     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
677 
678     if (TargetBF != &BF && HasEntryInFragment)
679       *HasEntryInFragment = true;
680     addEntryAddress(Value);
681   }
682 
683   // Trim direct/normal jump table to exclude trailing unreachable entries that
684   // can collide with a function address.
685   if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
686       TrimmedSize != EntriesAsAddress->size() &&
687       getBinaryFunctionAtAddress(UnreachableAddress))
688     EntriesAsAddress->resize(TrimmedSize);
689 
690   // It's a jump table if the number of real entries is more than 1, or there's
691   // one real entry and one or more special targets. If there are only multiple
692   // special targets, then it's not a jump table.
693   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
694 }
695 
696 void BinaryContext::populateJumpTables() {
697   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
698                     << '\n');
699   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
700        ++JTI) {
701     JumpTable *JT = JTI->second;
702 
703     bool NonSimpleParent = false;
704     for (BinaryFunction *BF : JT->Parents)
705       NonSimpleParent |= !BF->isSimple();
706     if (NonSimpleParent)
707       continue;
708 
709     uint64_t NextJTAddress = 0;
710     auto NextJTI = std::next(JTI);
711     if (NextJTI != JTE)
712       NextJTAddress = NextJTI->second->getAddress();
713 
714     const bool Success =
715         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
716                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
717     if (!Success) {
718       LLVM_DEBUG({
719         dbgs() << "failed to analyze ";
720         JT->print(dbgs());
721         if (NextJTI != JTE) {
722           dbgs() << "next ";
723           NextJTI->second->print(dbgs());
724         }
725       });
726       llvm_unreachable("jump table heuristic failure");
727     }
728     for (BinaryFunction *Frag : JT->Parents) {
729       if (JT->IsSplit)
730         Frag->setHasIndirectTargetToSplitFragment(true);
731       for (uint64_t EntryAddress : JT->EntriesAsAddress)
732         // if target is builtin_unreachable
733         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
734           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
735                                              Frag->getSize());
736         } else if (EntryAddress >= Frag->getAddress() &&
737                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
738           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
739         }
740     }
741 
742     // In strict mode, erase PC-relative relocation record. Later we check that
743     // all such records are erased and thus have been accounted for.
744     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
745       for (uint64_t Address = JT->getAddress();
746            Address < JT->getAddress() + JT->getSize();
747            Address += JT->EntrySize) {
748         DataPCRelocations.erase(DataPCRelocations.find(Address));
749       }
750     }
751 
752     // Mark to skip the function and all its fragments.
753     for (BinaryFunction *Frag : JT->Parents)
754       if (Frag->hasIndirectTargetToSplitFragment())
755         addFragmentsToSkip(Frag);
756   }
757 
758   if (opts::StrictMode && DataPCRelocations.size()) {
759     LLVM_DEBUG({
760       dbgs() << DataPCRelocations.size()
761              << " unclaimed PC-relative relocations left in data:\n";
762       for (uint64_t Reloc : DataPCRelocations)
763         dbgs() << Twine::utohexstr(Reloc) << '\n';
764     });
765     assert(0 && "unclaimed PC-relative relocations left in data\n");
766   }
767   clearList(DataPCRelocations);
768 }
769 
770 void BinaryContext::skipMarkedFragments() {
771   std::vector<BinaryFunction *> FragmentQueue;
772   // Copy the functions to FragmentQueue.
773   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
774   auto addToWorklist = [&](BinaryFunction *Function) -> void {
775     if (FragmentsToSkip.count(Function))
776       return;
777     FragmentQueue.push_back(Function);
778     addFragmentsToSkip(Function);
779   };
780   // Functions containing split jump tables need to be skipped with all
781   // fragments (transitively).
782   for (size_t I = 0; I != FragmentQueue.size(); I++) {
783     BinaryFunction *BF = FragmentQueue[I];
784     assert(FragmentsToSkip.count(BF) &&
785            "internal error in traversing function fragments");
786     if (opts::Verbosity >= 1)
787       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
788     BF->setSimple(false);
789     BF->setHasIndirectTargetToSplitFragment(true);
790 
791     llvm::for_each(BF->Fragments, addToWorklist);
792     llvm::for_each(BF->ParentFragments, addToWorklist);
793   }
794   if (!FragmentsToSkip.empty())
795     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
796                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
797                  << " due to cold fragments\n";
798 }
799 
800 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
801                                                  uint64_t Size,
802                                                  uint16_t Alignment,
803                                                  unsigned Flags) {
804   auto Itr = BinaryDataMap.find(Address);
805   if (Itr != BinaryDataMap.end()) {
806     assert(Itr->second->getSize() == Size || !Size);
807     return Itr->second->getSymbol();
808   }
809 
810   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
811   assert(!GlobalSymbols.count(Name) && "created name is not unique");
812   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
813 }
814 
815 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
816   return Ctx->getOrCreateSymbol(Name);
817 }
818 
819 BinaryFunction *BinaryContext::createBinaryFunction(
820     const std::string &Name, BinarySection &Section, uint64_t Address,
821     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
822   auto Result = BinaryFunctions.emplace(
823       Address, BinaryFunction(Name, Section, Address, Size, *this));
824   assert(Result.second == true && "unexpected duplicate function");
825   BinaryFunction *BF = &Result.first->second;
826   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
827                         Alignment);
828   setSymbolToFunctionMap(BF->getSymbol(), BF);
829   return BF;
830 }
831 
832 const MCSymbol *
833 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
834                                     JumpTable::JumpTableType Type) {
835   // Two fragments of same function access same jump table
836   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
837     assert(JT->Type == Type && "jump table types have to match");
838     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
839 
840     // Prevent associating a jump table to a specific fragment twice.
841     if (!llvm::is_contained(JT->Parents, &Function)) {
842       assert(llvm::all_of(JT->Parents,
843                           [&](const BinaryFunction *BF) {
844                             return areRelatedFragments(&Function, BF);
845                           }) &&
846              "cannot re-use jump table of a different function");
847       // Duplicate the entry for the parent function for easy access
848       JT->Parents.push_back(&Function);
849       if (opts::Verbosity > 2) {
850         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
851                      << JT->Parents[0]->getPrintName() << "; "
852                      << Function.getPrintName() << "\n";
853         JT->print(this->outs());
854       }
855       Function.JumpTables.emplace(Address, JT);
856       for (BinaryFunction *Parent : JT->Parents)
857         Parent->setHasIndirectTargetToSplitFragment(true);
858     }
859 
860     bool IsJumpTableParent = false;
861     (void)IsJumpTableParent;
862     for (BinaryFunction *Frag : JT->Parents)
863       if (Frag == &Function)
864         IsJumpTableParent = true;
865     assert(IsJumpTableParent &&
866            "cannot re-use jump table of a different function");
867     return JT->getFirstLabel();
868   }
869 
870   // Re-use the existing symbol if possible.
871   MCSymbol *JTLabel = nullptr;
872   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
873     if (!isInternalSymbolName(Object->getSymbol()->getName()))
874       JTLabel = Object->getSymbol();
875   }
876 
877   const uint64_t EntrySize = getJumpTableEntrySize(Type);
878   if (!JTLabel) {
879     const std::string JumpTableName = generateJumpTableName(Function, Address);
880     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
881   }
882 
883   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
884                     << " in function " << Function << '\n');
885 
886   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
887                                 JumpTable::LabelMapType{{0, JTLabel}},
888                                 *getSectionForAddress(Address));
889   JT->Parents.push_back(&Function);
890   if (opts::Verbosity > 2)
891     JT->print(this->outs());
892   JumpTables.emplace(Address, JT);
893 
894   // Duplicate the entry for the parent function for easy access.
895   Function.JumpTables.emplace(Address, JT);
896   return JTLabel;
897 }
898 
899 std::pair<uint64_t, const MCSymbol *>
900 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
901                                   const MCSymbol *OldLabel) {
902   auto L = scopeLock();
903   unsigned Offset = 0;
904   bool Found = false;
905   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
906     if (Elmt.second != OldLabel)
907       continue;
908     Offset = Elmt.first;
909     Found = true;
910     break;
911   }
912   assert(Found && "Label not found");
913   (void)Found;
914   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
915   JumpTable *NewJT =
916       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
917                     JumpTable::LabelMapType{{Offset, NewLabel}},
918                     *getSectionForAddress(JT->getAddress()));
919   NewJT->Parents = JT->Parents;
920   NewJT->Entries = JT->Entries;
921   NewJT->Counts = JT->Counts;
922   uint64_t JumpTableID = ++DuplicatedJumpTables;
923   // Invert it to differentiate from regular jump tables whose IDs are their
924   // addresses in the input binary memory space
925   JumpTableID = ~JumpTableID;
926   JumpTables.emplace(JumpTableID, NewJT);
927   Function.JumpTables.emplace(JumpTableID, NewJT);
928   return std::make_pair(JumpTableID, NewLabel);
929 }
930 
931 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
932                                                  uint64_t Address) {
933   size_t Id;
934   uint64_t Offset = 0;
935   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
936     Offset = Address - JT->getAddress();
937     auto JTLabelsIt = JT->Labels.find(Offset);
938     if (JTLabelsIt != JT->Labels.end())
939       return std::string(JTLabelsIt->second->getName());
940 
941     auto JTIdsIt = JumpTableIds.find(JT->getAddress());
942     assert(JTIdsIt != JumpTableIds.end());
943     Id = JTIdsIt->second;
944   } else {
945     Id = JumpTableIds[Address] = BF.JumpTables.size();
946   }
947   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
948           (Offset ? ("." + std::to_string(Offset)) : ""));
949 }
950 
951 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
952   // FIXME: aarch64 support is missing.
953   if (!isX86())
954     return true;
955 
956   if (BF.getSize() == BF.getMaxSize())
957     return true;
958 
959   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
960   assert(FunctionData && "cannot get function as data");
961 
962   uint64_t Offset = BF.getSize();
963   MCInst Instr;
964   uint64_t InstrSize = 0;
965   uint64_t InstrAddress = BF.getAddress() + Offset;
966   using std::placeholders::_1;
967 
968   // Skip instructions that satisfy the predicate condition.
969   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
970     const uint64_t StartOffset = Offset;
971     for (; Offset < BF.getMaxSize();
972          Offset += InstrSize, InstrAddress += InstrSize) {
973       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
974                                   InstrAddress, nulls()))
975         break;
976       if (!Predicate(Instr))
977         break;
978     }
979 
980     return Offset - StartOffset;
981   };
982 
983   // Skip a sequence of zero bytes.
984   auto skipZeros = [&]() {
985     const uint64_t StartOffset = Offset;
986     for (; Offset < BF.getMaxSize(); ++Offset)
987       if ((*FunctionData)[Offset] != 0)
988         break;
989 
990     return Offset - StartOffset;
991   };
992 
993   // Accept the whole padding area filled with breakpoints.
994   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
995   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
996     return true;
997 
998   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
999 
1000   // Some functions have a jump to the next function or to the padding area
1001   // inserted after the body.
1002   auto isSkipJump = [&](const MCInst &Instr) {
1003     uint64_t TargetAddress = 0;
1004     if (MIB->isUnconditionalBranch(Instr) &&
1005         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1006       if (TargetAddress >= InstrAddress + InstrSize &&
1007           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1008         return true;
1009       }
1010     }
1011     return false;
1012   };
1013 
1014   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1015   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1016          skipZeros())
1017     ;
1018 
1019   if (Offset == BF.getMaxSize())
1020     return true;
1021 
1022   if (opts::Verbosity >= 1) {
1023     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1024                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
1025                  << " starting at offset " << (Offset - BF.getSize())
1026                  << " in function " << BF << '\n'
1027                  << FunctionData->slice(BF.getSize(),
1028                                         BF.getMaxSize() - BF.getSize())
1029                  << '\n';
1030   }
1031 
1032   return false;
1033 }
1034 
1035 void BinaryContext::adjustCodePadding() {
1036   for (auto &BFI : BinaryFunctions) {
1037     BinaryFunction &BF = BFI.second;
1038     if (!shouldEmit(BF))
1039       continue;
1040 
1041     if (!hasValidCodePadding(BF)) {
1042       if (HasRelocations) {
1043         if (opts::Verbosity >= 1) {
1044           this->outs() << "BOLT-INFO: function " << BF
1045                        << " has invalid padding. Ignoring the function.\n";
1046         }
1047         BF.setIgnored();
1048       } else {
1049         BF.setMaxSize(BF.getSize());
1050       }
1051     }
1052   }
1053 }
1054 
1055 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1056                                                uint64_t Size,
1057                                                uint16_t Alignment,
1058                                                unsigned Flags) {
1059   // Register the name with MCContext.
1060   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1061 
1062   auto GAI = BinaryDataMap.find(Address);
1063   BinaryData *BD;
1064   if (GAI == BinaryDataMap.end()) {
1065     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1066     BinarySection &Section =
1067         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1068     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1069                         Section, Flags);
1070     GAI = BinaryDataMap.emplace(Address, BD).first;
1071     GlobalSymbols[Name] = BD;
1072     updateObjectNesting(GAI);
1073   } else {
1074     BD = GAI->second;
1075     if (!BD->hasName(Name)) {
1076       GlobalSymbols[Name] = BD;
1077       BD->Symbols.push_back(Symbol);
1078     }
1079   }
1080 
1081   return Symbol;
1082 }
1083 
1084 const BinaryData *
1085 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1086   auto NI = BinaryDataMap.lower_bound(Address);
1087   auto End = BinaryDataMap.end();
1088   if ((NI != End && Address == NI->first) ||
1089       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1090     if (NI->second->containsAddress(Address))
1091       return NI->second;
1092 
1093     // If this is a sub-symbol, see if a parent data contains the address.
1094     const BinaryData *BD = NI->second->getParent();
1095     while (BD) {
1096       if (BD->containsAddress(Address))
1097         return BD;
1098       BD = BD->getParent();
1099     }
1100   }
1101   return nullptr;
1102 }
1103 
1104 BinaryData *BinaryContext::getGOTSymbol() {
1105   // First tries to find a global symbol with that name
1106   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1107   if (GOTSymBD)
1108     return GOTSymBD;
1109 
1110   // This symbol might be hidden from run-time link, so fetch the local
1111   // definition if available.
1112   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1113   if (!GOTSymBD)
1114     return nullptr;
1115 
1116   // If the local symbol is not unique, fail
1117   unsigned Index = 2;
1118   SmallString<30> Storage;
1119   while (const BinaryData *BD =
1120              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1121                                      .concat(Twine(Index++))
1122                                      .toStringRef(Storage)))
1123     if (BD->getAddress() != GOTSymBD->getAddress())
1124       return nullptr;
1125 
1126   return GOTSymBD;
1127 }
1128 
1129 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1130   auto NI = BinaryDataMap.find(Address);
1131   assert(NI != BinaryDataMap.end());
1132   if (NI == BinaryDataMap.end())
1133     return false;
1134   // TODO: it's possible that a jump table starts at the same address
1135   // as a larger blob of private data.  When we set the size of the
1136   // jump table, it might be smaller than the total blob size.  In this
1137   // case we just leave the original size since (currently) it won't really
1138   // affect anything.
1139   assert((!NI->second->Size || NI->second->Size == Size ||
1140           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1141          "can't change the size of a symbol that has already had its "
1142          "size set");
1143   if (!NI->second->Size) {
1144     NI->second->Size = Size;
1145     updateObjectNesting(NI);
1146     return true;
1147   }
1148   return false;
1149 }
1150 
1151 void BinaryContext::generateSymbolHashes() {
1152   auto isPadding = [](const BinaryData &BD) {
1153     StringRef Contents = BD.getSection().getContents();
1154     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1155     return (BD.getName().starts_with("HOLEat") ||
1156             SymData.find_first_not_of(0) == StringRef::npos);
1157   };
1158 
1159   uint64_t NumCollisions = 0;
1160   for (auto &Entry : BinaryDataMap) {
1161     BinaryData &BD = *Entry.second;
1162     StringRef Name = BD.getName();
1163 
1164     if (!isInternalSymbolName(Name))
1165       continue;
1166 
1167     // First check if a non-anonymous alias exists and move it to the front.
1168     if (BD.getSymbols().size() > 1) {
1169       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1170         return !isInternalSymbolName(Symbol->getName());
1171       });
1172       if (Itr != BD.getSymbols().end()) {
1173         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1174         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1175         continue;
1176       }
1177     }
1178 
1179     // We have to skip 0 size symbols since they will all collide.
1180     if (BD.getSize() == 0) {
1181       continue;
1182     }
1183 
1184     const uint64_t Hash = BD.getSection().hash(BD);
1185     const size_t Idx = Name.find("0x");
1186     std::string NewName =
1187         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1188     if (getBinaryDataByName(NewName)) {
1189       // Ignore collisions for symbols that appear to be padding
1190       // (i.e. all zeros or a "hole")
1191       if (!isPadding(BD)) {
1192         if (opts::Verbosity) {
1193           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1194                        << " with new name (" << NewName << "), skipping.\n";
1195         }
1196         ++NumCollisions;
1197       }
1198       continue;
1199     }
1200     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1201     GlobalSymbols[NewName] = &BD;
1202   }
1203   if (NumCollisions) {
1204     this->errs() << "BOLT-WARNING: " << NumCollisions
1205                  << " collisions detected while hashing binary objects";
1206     if (!opts::Verbosity)
1207       this->errs() << ". Use -v=1 to see the list.";
1208     this->errs() << '\n';
1209   }
1210 }
1211 
1212 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1213                                      BinaryFunction &Function) {
1214   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1215   if (TargetFunction.isChildOf(Function))
1216     return true;
1217   TargetFunction.addParentFragment(Function);
1218   Function.addFragment(TargetFunction);
1219   FragmentClasses.unionSets(&TargetFunction, &Function);
1220   if (!HasRelocations) {
1221     TargetFunction.setSimple(false);
1222     Function.setSimple(false);
1223   }
1224   if (opts::Verbosity >= 1) {
1225     this->outs() << "BOLT-INFO: marking " << TargetFunction
1226                  << " as a fragment of " << Function << '\n';
1227   }
1228   return true;
1229 }
1230 
1231 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1232                                            MCInst &LoadLowBits,
1233                                            MCInst &LoadHiBits,
1234                                            uint64_t Target) {
1235   const MCSymbol *TargetSymbol;
1236   uint64_t Addend = 0;
1237   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1238                                                     /*IsPCRel*/ true);
1239   int64_t Val;
1240   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1241                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1242   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1243                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1244 }
1245 
1246 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1247   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1248   if (TargetFunction)
1249     return false;
1250 
1251   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1252   assert(Section && "cannot get section for referenced address");
1253   if (!Section->isText())
1254     return false;
1255 
1256   bool Ret = false;
1257   StringRef SectionContents = Section->getContents();
1258   uint64_t Offset = Address - Section->getAddress();
1259   const uint64_t MaxSize = SectionContents.size() - Offset;
1260   const uint8_t *Bytes =
1261       reinterpret_cast<const uint8_t *>(SectionContents.data());
1262   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1263 
1264   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1265                          MCInst &Instruction, uint64_t Offset,
1266                          uint64_t AbsoluteInstrAddr,
1267                          uint64_t TotalSize) -> bool {
1268     MCInst *TargetHiBits, *TargetLowBits;
1269     uint64_t TargetAddress, Count;
1270     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1271                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1272                                    TargetLowBits, TargetAddress);
1273     if (!Count)
1274       return false;
1275 
1276     if (MatchOnly)
1277       return true;
1278 
1279     // NOTE The target symbol was created during disassemble's
1280     // handleExternalReference
1281     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1282     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1283                                                   *Section, Address, TotalSize);
1284     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1285                            TargetAddress);
1286     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1287     Veneer->addInstruction(Offset, std::move(Instruction));
1288     --Count;
1289     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1290       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1291       Veneer->addInstruction(It->first, std::move(It->second));
1292     }
1293 
1294     Veneer->getOrCreateLocalLabel(Address);
1295     Veneer->setMaxSize(TotalSize);
1296     Veneer->updateState(BinaryFunction::State::Disassembled);
1297     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1298                       << Twine::utohexstr(Address) << "\n");
1299     return true;
1300   };
1301 
1302   uint64_t Size = 0, TotalSize = 0;
1303   BinaryFunction::InstrMapType VeneerInstructions;
1304   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1305     MCInst Instruction;
1306     const uint64_t AbsoluteInstrAddr = Address + Offset;
1307     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1308                                         AbsoluteInstrAddr, nulls()))
1309       break;
1310 
1311     TotalSize += Size;
1312     if (MIB->isBranch(Instruction)) {
1313       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1314                         AbsoluteInstrAddr, TotalSize);
1315       break;
1316     }
1317 
1318     VeneerInstructions.emplace(Offset, std::move(Instruction));
1319   }
1320 
1321   return Ret;
1322 }
1323 
1324 void BinaryContext::processInterproceduralReferences() {
1325   for (const std::pair<BinaryFunction *, uint64_t> &It :
1326        InterproceduralReferences) {
1327     BinaryFunction &Function = *It.first;
1328     uint64_t Address = It.second;
1329     // Process interprocedural references from ignored functions in BAT mode
1330     // (non-simple in non-relocation mode) to properly register entry points
1331     if (!Address || (Function.isIgnored() && !HasBATSection))
1332       continue;
1333 
1334     BinaryFunction *TargetFunction =
1335         getBinaryFunctionContainingAddress(Address);
1336     if (&Function == TargetFunction)
1337       continue;
1338 
1339     if (TargetFunction) {
1340       if (TargetFunction->isFragment() &&
1341           !areRelatedFragments(TargetFunction, &Function)) {
1342         this->errs()
1343             << "BOLT-WARNING: interprocedural reference between unrelated "
1344                "fragments: "
1345             << Function.getPrintName() << " and "
1346             << TargetFunction->getPrintName() << '\n';
1347       }
1348       if (uint64_t Offset = Address - TargetFunction->getAddress())
1349         TargetFunction->addEntryPointAtOffset(Offset);
1350 
1351       continue;
1352     }
1353 
1354     // Check if address falls in function padding space - this could be
1355     // unmarked data in code. In this case adjust the padding space size.
1356     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1357     assert(Section && "cannot get section for referenced address");
1358 
1359     if (!Section->isText())
1360       continue;
1361 
1362     // PLT requires special handling and could be ignored in this context.
1363     StringRef SectionName = Section->getName();
1364     if (SectionName == ".plt" || SectionName == ".plt.got")
1365       continue;
1366 
1367     // Check if it is aarch64 veneer written at Address
1368     if (isAArch64() && handleAArch64Veneer(Address))
1369       continue;
1370 
1371     if (opts::processAllFunctions()) {
1372       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1373                    << "object in code at address 0x"
1374                    << Twine::utohexstr(Address) << " belonging to section "
1375                    << SectionName << " in current mode\n";
1376       exit(1);
1377     }
1378 
1379     TargetFunction = getBinaryFunctionContainingAddress(Address,
1380                                                         /*CheckPastEnd=*/false,
1381                                                         /*UseMaxSize=*/true);
1382     // We are not going to overwrite non-simple functions, but for simple
1383     // ones - adjust the padding size.
1384     if (TargetFunction && TargetFunction->isSimple()) {
1385       this->errs()
1386           << "BOLT-WARNING: function " << *TargetFunction
1387           << " has an object detected in a padding region at address 0x"
1388           << Twine::utohexstr(Address) << '\n';
1389       TargetFunction->setMaxSize(TargetFunction->getSize());
1390     }
1391   }
1392 
1393   InterproceduralReferences.clear();
1394 }
1395 
1396 void BinaryContext::postProcessSymbolTable() {
1397   fixBinaryDataHoles();
1398   bool Valid = true;
1399   for (auto &Entry : BinaryDataMap) {
1400     BinaryData *BD = Entry.second;
1401     if ((BD->getName().starts_with("SYMBOLat") ||
1402          BD->getName().starts_with("DATAat")) &&
1403         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1404         BD->getSection()) {
1405       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1406                    << "\n";
1407       Valid = false;
1408     }
1409   }
1410   assert(Valid);
1411   (void)Valid;
1412   generateSymbolHashes();
1413 }
1414 
1415 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1416                                  BinaryFunction &ParentBF) {
1417   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1418          "cannot merge functions with multiple entry points");
1419 
1420   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1421   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1422       SymbolToFunctionMapMutex, std::defer_lock);
1423 
1424   const StringRef ChildName = ChildBF.getOneName();
1425 
1426   // Move symbols over and update bookkeeping info.
1427   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1428     ParentBF.getSymbols().push_back(Symbol);
1429     WriteSymbolMapLock.lock();
1430     SymbolToFunctionMap[Symbol] = &ParentBF;
1431     WriteSymbolMapLock.unlock();
1432     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1433   }
1434   ChildBF.getSymbols().clear();
1435 
1436   // Move other names the child function is known under.
1437   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1438   ChildBF.Aliases.clear();
1439 
1440   if (HasRelocations) {
1441     // Merge execution counts of ChildBF into those of ParentBF.
1442     // Without relocations, we cannot reliably merge profiles as both functions
1443     // continue to exist and either one can be executed.
1444     ChildBF.mergeProfileDataInto(ParentBF);
1445 
1446     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1447                                                      std::defer_lock);
1448     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1449                                                       std::defer_lock);
1450     // Remove ChildBF from the global set of functions in relocs mode.
1451     ReadBfsLock.lock();
1452     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1453     ReadBfsLock.unlock();
1454 
1455     assert(FI != BinaryFunctions.end() && "function not found");
1456     assert(&ChildBF == &FI->second && "function mismatch");
1457 
1458     WriteBfsLock.lock();
1459     ChildBF.clearDisasmState();
1460     FI = BinaryFunctions.erase(FI);
1461     WriteBfsLock.unlock();
1462 
1463   } else {
1464     // In non-relocation mode we keep the function, but rename it.
1465     std::string NewName = "__ICF_" + ChildName.str();
1466 
1467     WriteCtxLock.lock();
1468     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1469     WriteCtxLock.unlock();
1470 
1471     ChildBF.setFolded(&ParentBF);
1472   }
1473 
1474   ParentBF.setHasFunctionsFoldedInto();
1475 }
1476 
1477 void BinaryContext::fixBinaryDataHoles() {
1478   assert(validateObjectNesting() && "object nesting inconsistency detected");
1479 
1480   for (BinarySection &Section : allocatableSections()) {
1481     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1482 
1483     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1484       BinaryData *BD = Itr->second;
1485       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1486                      (BD->getName().starts_with("SYMBOLat0x") ||
1487                       BD->getName().starts_with("DATAat0x") ||
1488                       BD->getName().starts_with("ANONYMOUS")));
1489       return !isHole && BD->getSection() == Section && !BD->getParent();
1490     };
1491 
1492     auto BDStart = BinaryDataMap.begin();
1493     auto BDEnd = BinaryDataMap.end();
1494     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1495     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1496 
1497     uint64_t EndAddress = Section.getAddress();
1498 
1499     while (Itr != End) {
1500       if (Itr->second->getAddress() > EndAddress) {
1501         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1502         Holes.emplace_back(EndAddress, Gap);
1503       }
1504       EndAddress = Itr->second->getEndAddress();
1505       ++Itr;
1506     }
1507 
1508     if (EndAddress < Section.getEndAddress())
1509       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1510 
1511     // If there is already a symbol at the start of the hole, grow that symbol
1512     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1513     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1514       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1515       if (BD) {
1516         // BD->getSection() can be != Section if there are sections that
1517         // overlap.  In this case it is probably safe to just skip the holes
1518         // since the overlapping section will not(?) have any symbols in it.
1519         if (BD->getSection() == Section)
1520           setBinaryDataSize(Hole.first, Hole.second);
1521       } else {
1522         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1523       }
1524     }
1525   }
1526 
1527   assert(validateObjectNesting() && "object nesting inconsistency detected");
1528   assert(validateHoles() && "top level hole detected in object map");
1529 }
1530 
1531 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1532   const BinarySection *CurrentSection = nullptr;
1533   bool FirstSection = true;
1534 
1535   for (auto &Entry : BinaryDataMap) {
1536     const BinaryData *BD = Entry.second;
1537     const BinarySection &Section = BD->getSection();
1538     if (FirstSection || Section != *CurrentSection) {
1539       uint64_t Address, Size;
1540       StringRef Name = Section.getName();
1541       if (Section) {
1542         Address = Section.getAddress();
1543         Size = Section.getSize();
1544       } else {
1545         Address = BD->getAddress();
1546         Size = BD->getSize();
1547       }
1548       OS << "BOLT-INFO: Section " << Name << ", "
1549          << "0x" + Twine::utohexstr(Address) << ":"
1550          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1551       CurrentSection = &Section;
1552       FirstSection = false;
1553     }
1554 
1555     OS << "BOLT-INFO: ";
1556     const BinaryData *P = BD->getParent();
1557     while (P) {
1558       OS << "  ";
1559       P = P->getParent();
1560     }
1561     OS << *BD << "\n";
1562   }
1563 }
1564 
1565 Expected<unsigned> BinaryContext::getDwarfFile(
1566     StringRef Directory, StringRef FileName, unsigned FileNumber,
1567     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1568     unsigned CUID, unsigned DWARFVersion) {
1569   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1570   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1571                           FileNumber);
1572 }
1573 
1574 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1575                                                const uint32_t SrcCUID,
1576                                                unsigned FileIndex) {
1577   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1578   const DWARFDebugLine::LineTable *LineTable =
1579       DwCtx->getLineTableForUnit(SrcUnit);
1580   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1581       LineTable->Prologue.FileNames;
1582   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1583   // means empty dir.
1584   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1585          "FileIndex out of range for the compilation unit.");
1586   StringRef Dir = "";
1587   if (FileNames[FileIndex - 1].DirIdx != 0) {
1588     if (std::optional<const char *> DirName = dwarf::toString(
1589             LineTable->Prologue
1590                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1591       Dir = *DirName;
1592     }
1593   }
1594   StringRef FileName = "";
1595   if (std::optional<const char *> FName =
1596           dwarf::toString(FileNames[FileIndex - 1].Name))
1597     FileName = *FName;
1598   assert(FileName != "");
1599   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1600   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1601                                DestCUID, DstUnit->getVersion()));
1602 }
1603 
1604 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1605   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1606   llvm::transform(llvm::make_second_range(BinaryFunctions),
1607                   SortedFunctions.begin(),
1608                   [](BinaryFunction &BF) { return &BF; });
1609 
1610   llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
1611   return SortedFunctions;
1612 }
1613 
1614 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1615   std::vector<BinaryFunction *> AllFunctions;
1616   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1617   llvm::transform(llvm::make_second_range(BinaryFunctions),
1618                   std::back_inserter(AllFunctions),
1619                   [](BinaryFunction &BF) { return &BF; });
1620   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1621 
1622   return AllFunctions;
1623 }
1624 
1625 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1626   auto Iter = DWOCUs.find(DWOId);
1627   if (Iter == DWOCUs.end())
1628     return std::nullopt;
1629 
1630   return Iter->second;
1631 }
1632 
1633 DWARFContext *BinaryContext::getDWOContext() const {
1634   if (DWOCUs.empty())
1635     return nullptr;
1636   return &DWOCUs.begin()->second->getContext();
1637 }
1638 
1639 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1640 void BinaryContext::preprocessDWODebugInfo() {
1641   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1642     DWARFUnit *const DwarfUnit = CU.get();
1643     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1644       std::string DWOName = dwarf::toString(
1645           DwarfUnit->getUnitDIE().find(
1646               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1647           "");
1648       SmallString<16> AbsolutePath;
1649       if (!opts::CompDirOverride.empty()) {
1650         sys::path::append(AbsolutePath, opts::CompDirOverride);
1651         sys::path::append(AbsolutePath, DWOName);
1652       }
1653       DWARFUnit *DWOCU =
1654           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1655       if (!DWOCU->isDWOUnit()) {
1656         this->outs()
1657             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1658             << DWOName
1659             << " was not retrieved and won't be updated. Please check "
1660                "relative path.\n";
1661         continue;
1662       }
1663       DWOCUs[*DWOId] = DWOCU;
1664     }
1665   }
1666   if (!DWOCUs.empty())
1667     this->outs() << "BOLT-INFO: processing split DWARF\n";
1668 }
1669 
1670 void BinaryContext::preprocessDebugInfo() {
1671   struct CURange {
1672     uint64_t LowPC;
1673     uint64_t HighPC;
1674     DWARFUnit *Unit;
1675 
1676     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1677   };
1678 
1679   // Building a map of address ranges to CUs similar to .debug_aranges and use
1680   // it to assign CU to functions.
1681   std::vector<CURange> AllRanges;
1682   AllRanges.reserve(DwCtx->getNumCompileUnits());
1683   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1684     Expected<DWARFAddressRangesVector> RangesOrError =
1685         CU->getUnitDIE().getAddressRanges();
1686     if (!RangesOrError) {
1687       consumeError(RangesOrError.takeError());
1688       continue;
1689     }
1690     for (DWARFAddressRange &Range : *RangesOrError) {
1691       // Parts of the debug info could be invalidated due to corresponding code
1692       // being removed from the binary by the linker. Hence we check if the
1693       // address is a valid one.
1694       if (containsAddress(Range.LowPC))
1695         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1696     }
1697 
1698     ContainsDwarf5 |= CU->getVersion() >= 5;
1699     ContainsDwarfLegacy |= CU->getVersion() < 5;
1700   }
1701 
1702   llvm::sort(AllRanges);
1703   for (auto &KV : BinaryFunctions) {
1704     const uint64_t FunctionAddress = KV.first;
1705     BinaryFunction &Function = KV.second;
1706 
1707     auto It = llvm::partition_point(
1708         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1709     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1710       Function.setDWARFUnit(It->Unit);
1711   }
1712 
1713   // Discover units with debug info that needs to be updated.
1714   for (const auto &KV : BinaryFunctions) {
1715     const BinaryFunction &BF = KV.second;
1716     if (shouldEmit(BF) && BF.getDWARFUnit())
1717       ProcessedCUs.insert(BF.getDWARFUnit());
1718   }
1719 
1720   // Clear debug info for functions from units that we are not going to process.
1721   for (auto &KV : BinaryFunctions) {
1722     BinaryFunction &BF = KV.second;
1723     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1724       BF.setDWARFUnit(nullptr);
1725   }
1726 
1727   if (opts::Verbosity >= 1) {
1728     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1729                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1730   }
1731 
1732   preprocessDWODebugInfo();
1733 
1734   // Populate MCContext with DWARF files from all units.
1735   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1736   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1737     const uint64_t CUID = CU->getOffset();
1738     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1739     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1740         GlobalPrefix + "line_table_start" + Twine(CUID)));
1741 
1742     if (!ProcessedCUs.count(CU.get()))
1743       continue;
1744 
1745     const DWARFDebugLine::LineTable *LineTable =
1746         DwCtx->getLineTableForUnit(CU.get());
1747     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1748         LineTable->Prologue.FileNames;
1749 
1750     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1751     if (DwarfVersion >= 5) {
1752       std::optional<MD5::MD5Result> Checksum;
1753       if (LineTable->Prologue.ContentTypes.HasMD5)
1754         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1755       std::optional<const char *> Name =
1756           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1757       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1758         auto Iter = DWOCUs.find(*DWOID);
1759         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1760         Name = dwarf::toString(
1761             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1762       }
1763       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1764                                   std::nullopt);
1765     }
1766 
1767     BinaryLineTable.setDwarfVersion(DwarfVersion);
1768 
1769     // Assign a unique label to every line table, one per CU.
1770     // Make sure empty debug line tables are registered too.
1771     if (FileNames.empty()) {
1772       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1773                             CUID, DwarfVersion));
1774       continue;
1775     }
1776     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1777     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1778       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1779       // means empty dir.
1780       StringRef Dir = "";
1781       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1782         if (std::optional<const char *> DirName = dwarf::toString(
1783                 LineTable->Prologue
1784                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1785           Dir = *DirName;
1786       StringRef FileName = "";
1787       if (std::optional<const char *> FName =
1788               dwarf::toString(FileNames[I].Name))
1789         FileName = *FName;
1790       assert(FileName != "");
1791       std::optional<MD5::MD5Result> Checksum;
1792       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1793         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1794       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1795                             DwarfVersion));
1796     }
1797   }
1798 }
1799 
1800 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1801   if (Function.isPseudo())
1802     return false;
1803 
1804   if (opts::processAllFunctions())
1805     return true;
1806 
1807   if (Function.isIgnored())
1808     return false;
1809 
1810   // In relocation mode we will emit non-simple functions with CFG.
1811   // If the function does not have a CFG it should be marked as ignored.
1812   return HasRelocations || Function.isSimple();
1813 }
1814 
1815 void BinaryContext::dump(const MCInst &Inst) const {
1816   if (LLVM_UNLIKELY(!InstPrinter)) {
1817     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1818     return;
1819   }
1820   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1821   dbgs() << "\n";
1822 }
1823 
1824 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1825   uint32_t Operation = Inst.getOperation();
1826   switch (Operation) {
1827   case MCCFIInstruction::OpSameValue:
1828     OS << "OpSameValue Reg" << Inst.getRegister();
1829     break;
1830   case MCCFIInstruction::OpRememberState:
1831     OS << "OpRememberState";
1832     break;
1833   case MCCFIInstruction::OpRestoreState:
1834     OS << "OpRestoreState";
1835     break;
1836   case MCCFIInstruction::OpOffset:
1837     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1838     break;
1839   case MCCFIInstruction::OpDefCfaRegister:
1840     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1841     break;
1842   case MCCFIInstruction::OpDefCfaOffset:
1843     OS << "OpDefCfaOffset " << Inst.getOffset();
1844     break;
1845   case MCCFIInstruction::OpDefCfa:
1846     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1847     break;
1848   case MCCFIInstruction::OpRelOffset:
1849     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850     break;
1851   case MCCFIInstruction::OpAdjustCfaOffset:
1852     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1853     break;
1854   case MCCFIInstruction::OpEscape:
1855     OS << "OpEscape";
1856     break;
1857   case MCCFIInstruction::OpRestore:
1858     OS << "OpRestore Reg" << Inst.getRegister();
1859     break;
1860   case MCCFIInstruction::OpUndefined:
1861     OS << "OpUndefined Reg" << Inst.getRegister();
1862     break;
1863   case MCCFIInstruction::OpRegister:
1864     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1865        << Inst.getRegister2();
1866     break;
1867   case MCCFIInstruction::OpWindowSave:
1868     OS << "OpWindowSave";
1869     break;
1870   case MCCFIInstruction::OpGnuArgsSize:
1871     OS << "OpGnuArgsSize";
1872     break;
1873   default:
1874     OS << "Op#" << Operation;
1875     break;
1876   }
1877 }
1878 
1879 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1880   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1881   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1882   // the end of a data chunk inside code, $d identifies start of data.
1883   if (isX86() || ELFSymbolRef(Symbol).getSize())
1884     return MarkerSymType::NONE;
1885 
1886   Expected<StringRef> NameOrError = Symbol.getName();
1887   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1888 
1889   if (!TypeOrError || !NameOrError)
1890     return MarkerSymType::NONE;
1891 
1892   if (*TypeOrError != SymbolRef::ST_Unknown)
1893     return MarkerSymType::NONE;
1894 
1895   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1896     return MarkerSymType::CODE;
1897 
1898   // $x<ISA>
1899   if (isRISCV() && NameOrError->starts_with("$x"))
1900     return MarkerSymType::CODE;
1901 
1902   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1903     return MarkerSymType::DATA;
1904 
1905   return MarkerSymType::NONE;
1906 }
1907 
1908 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1909   return getMarkerType(Symbol) != MarkerSymType::NONE;
1910 }
1911 
1912 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1913                            const BinaryFunction *Function,
1914                            DWARFContext *DwCtx) {
1915   DebugLineTableRowRef RowRef =
1916       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1917   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1918     return;
1919 
1920   const DWARFDebugLine::LineTable *LineTable;
1921   if (Function && Function->getDWARFUnit() &&
1922       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1923     LineTable = Function->getDWARFLineTable();
1924   } else {
1925     LineTable = DwCtx->getLineTableForUnit(
1926         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1927   }
1928   assert(LineTable && "line table expected for instruction with debug info");
1929 
1930   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1931   StringRef FileName = "";
1932   if (std::optional<const char *> FName =
1933           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1934     FileName = *FName;
1935   OS << " # debug line " << FileName << ":" << Row.Line;
1936   if (Row.Column)
1937     OS << ":" << Row.Column;
1938   if (Row.Discriminator)
1939     OS << " discriminator:" << Row.Discriminator;
1940 }
1941 
1942 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1943                                      uint64_t Offset,
1944                                      const BinaryFunction *Function,
1945                                      bool PrintMCInst, bool PrintMemData,
1946                                      bool PrintRelocations,
1947                                      StringRef Endl) const {
1948   OS << format("    %08" PRIx64 ": ", Offset);
1949   if (MIB->isCFI(Instruction)) {
1950     uint32_t Offset = Instruction.getOperand(0).getImm();
1951     OS << "\t!CFI\t$" << Offset << "\t; ";
1952     if (Function)
1953       printCFI(OS, *Function->getCFIFor(Instruction));
1954     OS << Endl;
1955     return;
1956   }
1957   if (std::optional<uint32_t> DynamicID =
1958           MIB->getDynamicBranchID(Instruction)) {
1959     OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1960        << " # ID: " << DynamicID;
1961   } else {
1962     InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1963   }
1964   if (MIB->isCall(Instruction)) {
1965     if (MIB->isTailCall(Instruction))
1966       OS << " # TAILCALL ";
1967     if (MIB->isInvoke(Instruction)) {
1968       const std::optional<MCPlus::MCLandingPad> EHInfo =
1969           MIB->getEHInfo(Instruction);
1970       OS << " # handler: ";
1971       if (EHInfo->first)
1972         OS << *EHInfo->first;
1973       else
1974         OS << '0';
1975       OS << "; action: " << EHInfo->second;
1976       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1977       if (GnuArgsSize >= 0)
1978         OS << "; GNU_args_size = " << GnuArgsSize;
1979     }
1980   } else if (MIB->isIndirectBranch(Instruction)) {
1981     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1982       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1983     } else {
1984       OS << " # UNKNOWN CONTROL FLOW";
1985     }
1986   }
1987   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1988     OS << " # Offset: " << *Offset;
1989   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1990     OS << " # Size: " << *Size;
1991   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
1992     OS << " # Label: " << *Label;
1993 
1994   MIB->printAnnotations(Instruction, OS);
1995 
1996   if (opts::PrintDebugInfo)
1997     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1998 
1999   if ((opts::PrintRelocations || PrintRelocations) && Function) {
2000     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2001     Function->printRelocations(OS, Offset, Size);
2002   }
2003 
2004   OS << Endl;
2005 
2006   if (PrintMCInst) {
2007     Instruction.dump_pretty(OS, InstPrinter.get());
2008     OS << Endl;
2009   }
2010 }
2011 
2012 std::optional<uint64_t>
2013 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2014                                         uint64_t FileOffset) const {
2015   // Find a segment with a matching file offset.
2016   for (auto &KV : SegmentMapInfo) {
2017     const SegmentInfo &SegInfo = KV.second;
2018     // Only consider executable segments.
2019     if (!SegInfo.IsExecutable)
2020       continue;
2021     // FileOffset is got from perf event,
2022     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2023     // If the pagesize is not equal to SegInfo.Alignment.
2024     // FileOffset and SegInfo.FileOffset should be aligned first,
2025     // and then judge whether they are equal.
2026     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2027         alignDown(FileOffset, SegInfo.Alignment)) {
2028       // The function's offset from base address in VAS is aligned by pagesize
2029       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2030       // However, The ELF document says that SegInfo.FileOffset should equal
2031       // to SegInfo.Address, modulo the pagesize.
2032       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2033 
2034       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2035       // alignDown(SegInfo.Address, pagesize)
2036       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2037       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2038       //   = SegInfo.Address - SegInfo.FileOffset +
2039       //     alignDown(SegInfo.FileOffset, pagesize)
2040       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2041       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2042     }
2043   }
2044 
2045   return std::nullopt;
2046 }
2047 
2048 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2049   auto SI = AddressToSection.upper_bound(Address);
2050   if (SI != AddressToSection.begin()) {
2051     --SI;
2052     uint64_t UpperBound = SI->first + SI->second->getSize();
2053     if (!SI->second->getSize())
2054       UpperBound += 1;
2055     if (UpperBound > Address)
2056       return *SI->second;
2057   }
2058   return std::make_error_code(std::errc::bad_address);
2059 }
2060 
2061 ErrorOr<StringRef>
2062 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2063   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2064     return Section->getName();
2065   return std::make_error_code(std::errc::bad_address);
2066 }
2067 
2068 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2069   auto Res = Sections.insert(Section);
2070   (void)Res;
2071   assert(Res.second && "can't register the same section twice.");
2072 
2073   // Only register allocatable sections in the AddressToSection map.
2074   if (Section->isAllocatable() && Section->getAddress())
2075     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2076   NameToSection.insert(
2077       std::make_pair(std::string(Section->getName()), Section));
2078   if (Section->hasSectionRef())
2079     SectionRefToBinarySection.insert(
2080         std::make_pair(Section->getSectionRef(), Section));
2081 
2082   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2083   return *Section;
2084 }
2085 
2086 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2087   return registerSection(new BinarySection(*this, Section));
2088 }
2089 
2090 BinarySection &
2091 BinaryContext::registerSection(const Twine &SectionName,
2092                                const BinarySection &OriginalSection) {
2093   return registerSection(
2094       new BinarySection(*this, SectionName, OriginalSection));
2095 }
2096 
2097 BinarySection &
2098 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2099                                        unsigned ELFFlags, uint8_t *Data,
2100                                        uint64_t Size, unsigned Alignment) {
2101   auto NamedSections = getSectionByName(Name);
2102   if (NamedSections.begin() != NamedSections.end()) {
2103     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2104            "can only update unique sections");
2105     BinarySection *Section = NamedSections.begin()->second;
2106 
2107     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2108     const bool Flag = Section->isAllocatable();
2109     (void)Flag;
2110     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2111     LLVM_DEBUG(dbgs() << *Section << "\n");
2112     // FIXME: Fix section flags/attributes for MachO.
2113     if (isELF())
2114       assert(Flag == Section->isAllocatable() &&
2115              "can't change section allocation status");
2116     return *Section;
2117   }
2118 
2119   return registerSection(
2120       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2121 }
2122 
2123 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2124   auto NameRange = NameToSection.equal_range(Section.getName().str());
2125   while (NameRange.first != NameRange.second) {
2126     if (NameRange.first->second == &Section) {
2127       NameToSection.erase(NameRange.first);
2128       break;
2129     }
2130     ++NameRange.first;
2131   }
2132 }
2133 
2134 void BinaryContext::deregisterUnusedSections() {
2135   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2136   for (auto SI = Sections.begin(); SI != Sections.end();) {
2137     BinarySection *Section = *SI;
2138     // We check getOutputData() instead of getOutputSize() because sometimes
2139     // zero-sized .text.cold sections are allocated.
2140     if (Section->hasSectionRef() || Section->getOutputData() ||
2141         (AbsSection && Section == &AbsSection.get())) {
2142       ++SI;
2143       continue;
2144     }
2145 
2146     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2147                       << '\n';);
2148     deregisterSectionName(*Section);
2149     SI = Sections.erase(SI);
2150     delete Section;
2151   }
2152 }
2153 
2154 bool BinaryContext::deregisterSection(BinarySection &Section) {
2155   BinarySection *SectionPtr = &Section;
2156   auto Itr = Sections.find(SectionPtr);
2157   if (Itr != Sections.end()) {
2158     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2159     while (Range.first != Range.second) {
2160       if (Range.first->second == SectionPtr) {
2161         AddressToSection.erase(Range.first);
2162         break;
2163       }
2164       ++Range.first;
2165     }
2166 
2167     deregisterSectionName(*SectionPtr);
2168     Sections.erase(Itr);
2169     delete SectionPtr;
2170     return true;
2171   }
2172   return false;
2173 }
2174 
2175 void BinaryContext::renameSection(BinarySection &Section,
2176                                   const Twine &NewName) {
2177   auto Itr = Sections.find(&Section);
2178   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2179   Sections.erase(Itr);
2180 
2181   deregisterSectionName(Section);
2182 
2183   Section.Name = NewName.str();
2184   Section.setOutputName(Section.Name);
2185 
2186   NameToSection.insert(std::make_pair(Section.Name, &Section));
2187 
2188   // Reinsert with the new name.
2189   Sections.insert(&Section);
2190 }
2191 
2192 void BinaryContext::printSections(raw_ostream &OS) const {
2193   for (BinarySection *const &Section : Sections)
2194     OS << "BOLT-INFO: " << *Section << "\n";
2195 }
2196 
2197 BinarySection &BinaryContext::absoluteSection() {
2198   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2199     return *Section;
2200   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2201 }
2202 
2203 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2204                                                            size_t Size) const {
2205   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2206   if (!Section)
2207     return std::make_error_code(std::errc::bad_address);
2208 
2209   if (Section->isVirtual())
2210     return 0;
2211 
2212   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2213                    AsmInfo->getCodePointerSize());
2214   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2215   return DE.getUnsigned(&ValueOffset, Size);
2216 }
2217 
2218 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2219                                                         size_t Size) const {
2220   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2221   if (!Section)
2222     return std::make_error_code(std::errc::bad_address);
2223 
2224   if (Section->isVirtual())
2225     return 0;
2226 
2227   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2228                    AsmInfo->getCodePointerSize());
2229   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2230   return DE.getSigned(&ValueOffset, Size);
2231 }
2232 
2233 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2234                                   uint64_t Type, uint64_t Addend,
2235                                   uint64_t Value) {
2236   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2237   assert(Section && "cannot find section for address");
2238   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2239                          Value);
2240 }
2241 
2242 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2243                                          uint64_t Type, uint64_t Addend,
2244                                          uint64_t Value) {
2245   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2246   assert(Section && "cannot find section for address");
2247   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2248                                 Addend, Value);
2249 }
2250 
2251 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2252   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2253   assert(Section && "cannot find section for address");
2254   return Section->removeRelocationAt(Address - Section->getAddress());
2255 }
2256 
2257 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2258   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2259   if (!Section)
2260     return nullptr;
2261 
2262   return Section->getRelocationAt(Address - Section->getAddress());
2263 }
2264 
2265 const Relocation *
2266 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2267   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2268   if (!Section)
2269     return nullptr;
2270 
2271   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2272 }
2273 
2274 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2275                                              const uint64_t Address) {
2276   auto setImmovable = [&](BinaryData &BD) {
2277     BinaryData *Root = BD.getAtomicRoot();
2278     LLVM_DEBUG(if (Root->isMoveable()) {
2279       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2280              << "due to ambiguous relocation referencing 0x"
2281              << Twine::utohexstr(Address) << '\n';
2282     });
2283     Root->setIsMoveable(false);
2284   };
2285 
2286   if (Address == BD.getAddress()) {
2287     setImmovable(BD);
2288 
2289     // Set previous symbol as immovable
2290     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2291     if (Prev && Prev->getEndAddress() == BD.getAddress())
2292       setImmovable(*Prev);
2293   }
2294 
2295   if (Address == BD.getEndAddress()) {
2296     setImmovable(BD);
2297 
2298     // Set next symbol as immovable
2299     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2300     if (Next && Next->getAddress() == BD.getEndAddress())
2301       setImmovable(*Next);
2302   }
2303 }
2304 
2305 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2306                                                     uint64_t *EntryDesc) {
2307   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2308   auto BFI = SymbolToFunctionMap.find(Symbol);
2309   if (BFI == SymbolToFunctionMap.end())
2310     return nullptr;
2311 
2312   BinaryFunction *BF = BFI->second;
2313   if (EntryDesc)
2314     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2315 
2316   return BF;
2317 }
2318 
2319 std::string
2320 BinaryContext::generateBugReportMessage(StringRef Message,
2321                                         const BinaryFunction &Function) const {
2322   std::string Msg;
2323   raw_string_ostream SS(Msg);
2324   SS << "=======================================\n";
2325   SS << "BOLT is unable to proceed because it couldn't properly understand "
2326         "this function.\n";
2327   SS << "If you are running the most recent version of BOLT, you may "
2328         "want to "
2329         "report this and paste this dump.\nPlease check that there is no "
2330         "sensitive contents being shared in this dump.\n";
2331   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2332   ScopedPrinter SP(SS);
2333   SP.printBinaryBlock("Function contents", *Function.getData());
2334   SS << "\n";
2335   const_cast<BinaryFunction &>(Function).print(SS, "");
2336   SS << "ERROR: " << Message;
2337   SS << "\n=======================================\n";
2338   return Msg;
2339 }
2340 
2341 BinaryFunction *
2342 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2343                                             bool IsSimple) {
2344   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2345   BinaryFunction *BF = InjectedBinaryFunctions.back();
2346   setSymbolToFunctionMap(BF->getSymbol(), BF);
2347   BF->CurrentState = BinaryFunction::State::CFG;
2348   return BF;
2349 }
2350 
2351 std::pair<size_t, size_t>
2352 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2353   // Adjust branch instruction to match the current layout.
2354   if (FixBranches)
2355     BF.fixBranches();
2356 
2357   // Create local MC context to isolate the effect of ephemeral code emission.
2358   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2359   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2360   MCAsmBackend *MAB =
2361       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2362 
2363   SmallString<256> Code;
2364   raw_svector_ostream VecOS(Code);
2365 
2366   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2367   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2368       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2369       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
2370 
2371   Streamer->initSections(false, *STI);
2372 
2373   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2374   Section->setHasInstructions(true);
2375 
2376   // Create symbols in the LocalCtx so that they get destroyed with it.
2377   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2378   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2379 
2380   Streamer->switchSection(Section);
2381   Streamer->emitLabel(StartLabel);
2382   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2383                    /*EmitCodeOnly=*/true);
2384   Streamer->emitLabel(EndLabel);
2385 
2386   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2387   SmallVector<LabelRange> SplitLabels;
2388   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2389     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2390     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2391     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2392 
2393     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2394         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2395         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2396     SplitSection->setHasInstructions(true);
2397     Streamer->switchSection(SplitSection);
2398 
2399     Streamer->emitLabel(SplitStartLabel);
2400     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2401     Streamer->emitLabel(SplitEndLabel);
2402   }
2403 
2404   MCAssembler &Assembler =
2405       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2406   Assembler.layout();
2407 
2408   // Obtain fragment sizes.
2409   std::vector<uint64_t> FragmentSizes;
2410   // Main fragment size.
2411   const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2412                            Assembler.getSymbolOffset(*StartLabel);
2413   FragmentSizes.push_back(HotSize);
2414   // Split fragment sizes.
2415   uint64_t ColdSize = 0;
2416   for (const auto &Labels : SplitLabels) {
2417     uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2418                     Assembler.getSymbolOffset(*Labels.first);
2419     FragmentSizes.push_back(Size);
2420     ColdSize += Size;
2421   }
2422 
2423   // Populate new start and end offsets of each basic block.
2424   uint64_t FragmentIndex = 0;
2425   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2426     BinaryBasicBlock *PrevBB = nullptr;
2427     for (BinaryBasicBlock *BB : FF) {
2428       const uint64_t BBStartOffset =
2429           Assembler.getSymbolOffset(*(BB->getLabel()));
2430       BB->setOutputStartAddress(BBStartOffset);
2431       if (PrevBB)
2432         PrevBB->setOutputEndAddress(BBStartOffset);
2433       PrevBB = BB;
2434     }
2435     if (PrevBB)
2436       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2437     FragmentIndex++;
2438   }
2439 
2440   // Clean-up the effect of the code emission.
2441   for (const MCSymbol &Symbol : Assembler.symbols()) {
2442     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2443     MutableSymbol->setUndefined();
2444     MutableSymbol->setIsRegistered(false);
2445   }
2446 
2447   return std::make_pair(HotSize, ColdSize);
2448 }
2449 
2450 bool BinaryContext::validateInstructionEncoding(
2451     ArrayRef<uint8_t> InputSequence) const {
2452   MCInst Inst;
2453   uint64_t InstSize;
2454   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2455   assert(InstSize == InputSequence.size() &&
2456          "Disassembled instruction size does not match the sequence.");
2457 
2458   SmallString<256> Code;
2459   SmallVector<MCFixup, 4> Fixups;
2460 
2461   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2462   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2463   if (InputSequence != OutputSequence) {
2464     if (opts::Verbosity > 1) {
2465       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2466                    << "      input: " << InputSequence << '\n'
2467                    << "     output: " << OutputSequence << '\n';
2468     }
2469     return false;
2470   }
2471 
2472   return true;
2473 }
2474 
2475 uint64_t BinaryContext::getHotThreshold() const {
2476   static uint64_t Threshold = 0;
2477   if (Threshold == 0) {
2478     Threshold = std::max(
2479         (uint64_t)opts::ExecutionCountThreshold,
2480         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2481   }
2482   return Threshold;
2483 }
2484 
2485 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2486     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2487   auto FI = BinaryFunctions.upper_bound(Address);
2488   if (FI == BinaryFunctions.begin())
2489     return nullptr;
2490   --FI;
2491 
2492   const uint64_t UsedSize =
2493       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2494 
2495   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2496     return nullptr;
2497 
2498   return &FI->second;
2499 }
2500 
2501 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2502   // First, try to find a function starting at the given address. If the
2503   // function was folded, this will get us the original folded function if it
2504   // wasn't removed from the list, e.g. in non-relocation mode.
2505   auto BFI = BinaryFunctions.find(Address);
2506   if (BFI != BinaryFunctions.end())
2507     return &BFI->second;
2508 
2509   // We might have folded the function matching the object at the given
2510   // address. In such case, we look for a function matching the symbol
2511   // registered at the original address. The new function (the one that the
2512   // original was folded into) will hold the symbol.
2513   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2514     uint64_t EntryID = 0;
2515     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2516     if (BF && EntryID == 0)
2517       return BF;
2518   }
2519   return nullptr;
2520 }
2521 
2522 /// Deregister JumpTable registered at a given \p Address and delete it.
2523 void BinaryContext::deleteJumpTable(uint64_t Address) {
2524   assert(JumpTables.count(Address) && "Must have a jump table at address");
2525   JumpTable *JT = JumpTables.at(Address);
2526   for (BinaryFunction *Parent : JT->Parents)
2527     Parent->JumpTables.erase(Address);
2528   JumpTables.erase(Address);
2529   delete JT;
2530 }
2531 
2532 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2533     const DWARFAddressRangesVector &InputRanges) const {
2534   DebugAddressRangesVector OutputRanges;
2535 
2536   for (const DWARFAddressRange Range : InputRanges) {
2537     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2538     while (BFI != BinaryFunctions.end()) {
2539       const BinaryFunction &Function = BFI->second;
2540       if (Function.getAddress() >= Range.HighPC)
2541         break;
2542       const DebugAddressRangesVector FunctionRanges =
2543           Function.getOutputAddressRanges();
2544       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2545       std::advance(BFI, 1);
2546     }
2547   }
2548 
2549   return OutputRanges;
2550 }
2551 
2552 } // namespace bolt
2553 } // namespace llvm
2554