xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
24 #include "llvm/MC/MCAsmLayout.h"
25 #include "llvm/MC/MCAssembler.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
28 #include "llvm/MC/MCInstPrinter.h"
29 #include "llvm/MC/MCObjectStreamer.h"
30 #include "llvm/MC/MCObjectWriter.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSectionELF.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/Regex.h"
39 #include <algorithm>
40 #include <functional>
41 #include <iterator>
42 #include <numeric>
43 #include <unordered_set>
44 
45 using namespace llvm;
46 
47 #undef  DEBUG_TYPE
48 #define DEBUG_TYPE "bolt"
49 
50 namespace opts {
51 
52 cl::opt<bool> NoHugePages("no-huge-pages",
53                           cl::desc("use regular size pages for code alignment"),
54                           cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool>
57 PrintDebugInfo("print-debug-info",
58   cl::desc("print debug info when printing functions"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 cl::opt<bool> PrintRelocations(
64     "print-relocations",
65     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
66     cl::cat(BoltCategory));
67 
68 static cl::opt<bool>
69 PrintMemData("print-mem-data",
70   cl::desc("print memory data annotations when printing functions"),
71   cl::Hidden,
72   cl::ZeroOrMore,
73   cl::cat(BoltCategory));
74 
75 cl::opt<std::string> CompDirOverride(
76     "comp-dir-override",
77     cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
78              "location, which is used with DW_AT_dwo_name to construct a path "
79              "to *.dwo files."),
80     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
81 } // namespace opts
82 
83 namespace llvm {
84 namespace bolt {
85 
86 char BOLTError::ID = 0;
87 
88 BOLTError::BOLTError(bool IsFatal, const Twine &S)
89     : IsFatal(IsFatal), Msg(S.str()) {}
90 
91 void BOLTError::log(raw_ostream &OS) const {
92   if (IsFatal)
93     OS << "FATAL ";
94   StringRef ErrMsg = StringRef(Msg);
95   // Prepend our error prefix if it is missing
96   if (ErrMsg.empty()) {
97     OS << "BOLT-ERROR\n";
98   } else {
99     if (!ErrMsg.starts_with("BOLT-ERROR"))
100       OS << "BOLT-ERROR: ";
101     OS << ErrMsg << "\n";
102   }
103 }
104 
105 std::error_code BOLTError::convertToErrorCode() const {
106   return inconvertibleErrorCode();
107 }
108 
109 Error createNonFatalBOLTError(const Twine &S) {
110   return make_error<BOLTError>(/*IsFatal*/ false, S);
111 }
112 
113 Error createFatalBOLTError(const Twine &S) {
114   return make_error<BOLTError>(/*IsFatal*/ true, S);
115 }
116 
117 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
118   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
119     if (!E.getMessage().empty())
120       E.log(this->errs());
121     if (E.isFatal())
122       exit(1);
123   });
124 }
125 
126 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
127                              std::unique_ptr<DWARFContext> DwCtx,
128                              std::unique_ptr<Triple> TheTriple,
129                              const Target *TheTarget, std::string TripleName,
130                              std::unique_ptr<MCCodeEmitter> MCE,
131                              std::unique_ptr<MCObjectFileInfo> MOFI,
132                              std::unique_ptr<const MCAsmInfo> AsmInfo,
133                              std::unique_ptr<const MCInstrInfo> MII,
134                              std::unique_ptr<const MCSubtargetInfo> STI,
135                              std::unique_ptr<MCInstPrinter> InstPrinter,
136                              std::unique_ptr<const MCInstrAnalysis> MIA,
137                              std::unique_ptr<MCPlusBuilder> MIB,
138                              std::unique_ptr<const MCRegisterInfo> MRI,
139                              std::unique_ptr<MCDisassembler> DisAsm,
140                              JournalingStreams Logger)
141     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
142       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
143       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
144       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
145       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
146       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
147       Logger(Logger) {
148   Relocation::Arch = this->TheTriple->getArch();
149   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
150   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
151 }
152 
153 BinaryContext::~BinaryContext() {
154   for (BinarySection *Section : Sections)
155     delete Section;
156   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
157     delete InjectedFunction;
158   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
159     delete JTI.second;
160   clearBinaryData();
161 }
162 
163 /// Create BinaryContext for a given architecture \p ArchName and
164 /// triple \p TripleName.
165 Expected<std::unique_ptr<BinaryContext>>
166 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
167                                    std::unique_ptr<DWARFContext> DwCtx,
168                                    JournalingStreams Logger) {
169   StringRef ArchName = "";
170   std::string FeaturesStr = "";
171   switch (File->getArch()) {
172   case llvm::Triple::x86_64:
173     ArchName = "x86-64";
174     FeaturesStr = "+nopl";
175     break;
176   case llvm::Triple::aarch64:
177     ArchName = "aarch64";
178     FeaturesStr = "+all";
179     break;
180   case llvm::Triple::riscv64: {
181     ArchName = "riscv64";
182     Expected<SubtargetFeatures> Features = File->getFeatures();
183 
184     if (auto E = Features.takeError())
185       return std::move(E);
186 
187     // We rely on relaxation for some transformations (e.g., promoting all calls
188     // to PseudoCALL and then making JITLink relax them). Since the relax
189     // feature is not stored in the object file, we manually enable it.
190     Features->AddFeature("relax");
191     FeaturesStr = Features->getString();
192     break;
193   }
194   default:
195     return createStringError(std::errc::not_supported,
196                              "BOLT-ERROR: Unrecognized machine in ELF file");
197   }
198 
199   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
200   const std::string TripleName = TheTriple->str();
201 
202   std::string Error;
203   const Target *TheTarget =
204       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
205   if (!TheTarget)
206     return createStringError(make_error_code(std::errc::not_supported),
207                              Twine("BOLT-ERROR: ", Error));
208 
209   std::unique_ptr<const MCRegisterInfo> MRI(
210       TheTarget->createMCRegInfo(TripleName));
211   if (!MRI)
212     return createStringError(
213         make_error_code(std::errc::not_supported),
214         Twine("BOLT-ERROR: no register info for target ", TripleName));
215 
216   // Set up disassembler.
217   std::unique_ptr<MCAsmInfo> AsmInfo(
218       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
219   if (!AsmInfo)
220     return createStringError(
221         make_error_code(std::errc::not_supported),
222         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
223   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
224   // we want to emit such names as using @PLT without double quotes to convey
225   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
226   // override the default AsmInfo behavior to emit names the way we want.
227   AsmInfo->setAllowAtInName(true);
228 
229   std::unique_ptr<const MCSubtargetInfo> STI(
230       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
231   if (!STI)
232     return createStringError(
233         make_error_code(std::errc::not_supported),
234         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
235 
236   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
237   if (!MII)
238     return createStringError(
239         make_error_code(std::errc::not_supported),
240         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
241 
242   std::unique_ptr<MCContext> Ctx(
243       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
244   std::unique_ptr<MCObjectFileInfo> MOFI(
245       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
246   Ctx->setObjectFileInfo(MOFI.get());
247   // We do not support X86 Large code model. Change this in the future.
248   bool Large = false;
249   if (TheTriple->getArch() == llvm::Triple::aarch64)
250     Large = true;
251   unsigned LSDAEncoding =
252       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
253   if (IsPIC) {
254     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
255                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
256   }
257 
258   std::unique_ptr<MCDisassembler> DisAsm(
259       TheTarget->createMCDisassembler(*STI, *Ctx));
260 
261   if (!DisAsm)
262     return createStringError(
263         make_error_code(std::errc::not_supported),
264         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
265 
266   std::unique_ptr<const MCInstrAnalysis> MIA(
267       TheTarget->createMCInstrAnalysis(MII.get()));
268   if (!MIA)
269     return createStringError(
270         make_error_code(std::errc::not_supported),
271         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
272               TripleName));
273 
274   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
275   std::unique_ptr<MCInstPrinter> InstructionPrinter(
276       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
277                                      *MII, *MRI));
278   if (!InstructionPrinter)
279     return createStringError(
280         make_error_code(std::errc::not_supported),
281         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
282   InstructionPrinter->setPrintImmHex(true);
283 
284   std::unique_ptr<MCCodeEmitter> MCE(
285       TheTarget->createMCCodeEmitter(*MII, *Ctx));
286 
287   auto BC = std::make_unique<BinaryContext>(
288       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
289       std::string(TripleName), std::move(MCE), std::move(MOFI),
290       std::move(AsmInfo), std::move(MII), std::move(STI),
291       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
292       std::move(DisAsm), Logger);
293 
294   BC->LSDAEncoding = LSDAEncoding;
295 
296   BC->MAB = std::unique_ptr<MCAsmBackend>(
297       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
298 
299   BC->setFilename(File->getFileName());
300 
301   BC->HasFixedLoadAddress = !IsPIC;
302 
303   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
304       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
305 
306   if (!BC->SymbolicDisAsm)
307     return createStringError(
308         make_error_code(std::errc::not_supported),
309         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
310 
311   return std::move(BC);
312 }
313 
314 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
315   if (opts::HotText &&
316       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
317     return true;
318 
319   if (opts::HotData &&
320       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
321     return true;
322 
323   if (SymbolName == "_end")
324     return true;
325 
326   return false;
327 }
328 
329 std::unique_ptr<MCObjectWriter>
330 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
331   return MAB->createObjectWriter(OS);
332 }
333 
334 bool BinaryContext::validateObjectNesting() const {
335   auto Itr = BinaryDataMap.begin();
336   auto End = BinaryDataMap.end();
337   bool Valid = true;
338   while (Itr != End) {
339     auto Next = std::next(Itr);
340     while (Next != End &&
341            Itr->second->getSection() == Next->second->getSection() &&
342            Itr->second->containsRange(Next->second->getAddress(),
343                                       Next->second->getSize())) {
344       if (Next->second->Parent != Itr->second) {
345         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
346                      << "BOLT-WARNING:  " << *Itr->second << "\n"
347                      << "BOLT-WARNING:  " << *Next->second << "\n";
348         Valid = false;
349       }
350       ++Next;
351     }
352     Itr = Next;
353   }
354   return Valid;
355 }
356 
357 bool BinaryContext::validateHoles() const {
358   bool Valid = true;
359   for (BinarySection &Section : sections()) {
360     for (const Relocation &Rel : Section.relocations()) {
361       uint64_t RelAddr = Rel.Offset + Section.getAddress();
362       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
363       if (!BD) {
364         this->errs()
365             << "BOLT-WARNING: no BinaryData found for relocation at address"
366             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
367             << "\n";
368         Valid = false;
369       } else if (!BD->getAtomicRoot()) {
370         this->errs()
371             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
372             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
373             << Section.getName() << "\n";
374         Valid = false;
375       }
376     }
377   }
378   return Valid;
379 }
380 
381 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
382   const uint64_t Address = GAI->second->getAddress();
383   const uint64_t Size = GAI->second->getSize();
384 
385   auto fixParents = [&](BinaryDataMapType::iterator Itr,
386                         BinaryData *NewParent) {
387     BinaryData *OldParent = Itr->second->Parent;
388     Itr->second->Parent = NewParent;
389     ++Itr;
390     while (Itr != BinaryDataMap.end() && OldParent &&
391            Itr->second->Parent == OldParent) {
392       Itr->second->Parent = NewParent;
393       ++Itr;
394     }
395   };
396 
397   // Check if the previous symbol contains the newly added symbol.
398   if (GAI != BinaryDataMap.begin()) {
399     BinaryData *Prev = std::prev(GAI)->second;
400     while (Prev) {
401       if (Prev->getSection() == GAI->second->getSection() &&
402           Prev->containsRange(Address, Size)) {
403         fixParents(GAI, Prev);
404       } else {
405         fixParents(GAI, nullptr);
406       }
407       Prev = Prev->Parent;
408     }
409   }
410 
411   // Check if the newly added symbol contains any subsequent symbols.
412   if (Size != 0) {
413     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
414     auto Itr = std::next(GAI);
415     while (
416         Itr != BinaryDataMap.end() &&
417         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
418       Itr->second->Parent = BD;
419       ++Itr;
420     }
421   }
422 }
423 
424 iterator_range<BinaryContext::binary_data_iterator>
425 BinaryContext::getSubBinaryData(BinaryData *BD) {
426   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
427   auto End = Start;
428   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
429     ++End;
430   return make_range(Start, End);
431 }
432 
433 std::pair<const MCSymbol *, uint64_t>
434 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
435                                 bool IsPCRel) {
436   if (isAArch64()) {
437     // Check if this is an access to a constant island and create bookkeeping
438     // to keep track of it and emit it later as part of this function.
439     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
440       return std::make_pair(IslandSym, 0);
441 
442     // Detect custom code written in assembly that refers to arbitrary
443     // constant islands from other functions. Write this reference so we
444     // can pull this constant island and emit it as part of this function
445     // too.
446     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
447 
448     if (IslandIter != AddressToConstantIslandMap.begin() &&
449         (IslandIter == AddressToConstantIslandMap.end() ||
450          IslandIter->first > Address))
451       --IslandIter;
452 
453     if (IslandIter != AddressToConstantIslandMap.end()) {
454       // Fall-back to referencing the original constant island in the presence
455       // of dynamic relocs, as we currently do not support cloning them.
456       // Notice: we might fail to link because of this, if the original constant
457       // island we are referring would be emitted too far away.
458       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
459         MCSymbol *IslandSym =
460             IslandIter->second->getOrCreateIslandAccess(Address);
461         if (IslandSym)
462           return std::make_pair(IslandSym, 0);
463       } else if (MCSymbol *IslandSym =
464                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
465                                                                       BF)) {
466         BF.createIslandDependency(IslandSym, IslandIter->second);
467         return std::make_pair(IslandSym, 0);
468       }
469     }
470   }
471 
472   // Note that the address does not necessarily have to reside inside
473   // a section, it could be an absolute address too.
474   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
475   if (Section && Section->isText()) {
476     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
477       if (Address != BF.getAddress()) {
478         // The address could potentially escape. Mark it as another entry
479         // point into the function.
480         if (opts::Verbosity >= 1) {
481           this->outs() << "BOLT-INFO: potentially escaped address 0x"
482                        << Twine::utohexstr(Address) << " in function " << BF
483                        << '\n';
484         }
485         BF.HasInternalLabelReference = true;
486         return std::make_pair(
487             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
488       }
489     } else {
490       addInterproceduralReference(&BF, Address);
491     }
492   }
493 
494   // With relocations, catch jump table references outside of the basic block
495   // containing the indirect jump.
496   if (HasRelocations) {
497     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
498     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
499       const MCSymbol *Symbol =
500           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
501 
502       return std::make_pair(Symbol, 0);
503     }
504   }
505 
506   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
507     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
508 
509   // TODO: use DWARF info to get size/alignment here?
510   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
511   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
512   return std::make_pair(TargetSymbol, 0);
513 }
514 
515 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
516                                                   BinaryFunction &BF) {
517   if (!isX86())
518     return MemoryContentsType::UNKNOWN;
519 
520   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
521   if (!Section) {
522     // No section - possibly an absolute address. Since we don't allow
523     // internal function addresses to escape the function scope - we
524     // consider it a tail call.
525     if (opts::Verbosity > 1) {
526       this->errs() << "BOLT-WARNING: no section for address 0x"
527                    << Twine::utohexstr(Address) << " referenced from function "
528                    << BF << '\n';
529     }
530     return MemoryContentsType::UNKNOWN;
531   }
532 
533   if (Section->isVirtual()) {
534     // The contents are filled at runtime.
535     return MemoryContentsType::UNKNOWN;
536   }
537 
538   // No support for jump tables in code yet.
539   if (Section->isText())
540     return MemoryContentsType::UNKNOWN;
541 
542   // Start with checking for PIC jump table. We expect non-PIC jump tables
543   // to have high 32 bits set to 0.
544   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
545     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
546 
547   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
548     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
549 
550   return MemoryContentsType::UNKNOWN;
551 }
552 
553 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
554                                      const JumpTable::JumpTableType Type,
555                                      const BinaryFunction &BF,
556                                      const uint64_t NextJTAddress,
557                                      JumpTable::AddressesType *EntriesAsAddress,
558                                      bool *HasEntryInFragment) const {
559   // Is one of the targets __builtin_unreachable?
560   bool HasUnreachable = false;
561 
562   // Does one of the entries match function start address?
563   bool HasStartAsEntry = false;
564 
565   // Number of targets other than __builtin_unreachable.
566   uint64_t NumRealEntries = 0;
567 
568   auto addEntryAddress = [&](uint64_t EntryAddress) {
569     if (EntriesAsAddress)
570       EntriesAsAddress->emplace_back(EntryAddress);
571   };
572 
573   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
574   if (!Section)
575     return false;
576 
577   // The upper bound is defined by containing object, section limits, and
578   // the next jump table in memory.
579   uint64_t UpperBound = Section->getEndAddress();
580   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
581   if (JumpTableBD && JumpTableBD->getSize()) {
582     assert(JumpTableBD->getEndAddress() <= UpperBound &&
583            "data object cannot cross a section boundary");
584     UpperBound = JumpTableBD->getEndAddress();
585   }
586   if (NextJTAddress)
587     UpperBound = std::min(NextJTAddress, UpperBound);
588 
589   LLVM_DEBUG({
590     using JTT = JumpTable::JumpTableType;
591     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
592                       Address, BF.getPrintName(),
593                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
594   });
595   const uint64_t EntrySize = getJumpTableEntrySize(Type);
596   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
597        EntryAddress += EntrySize) {
598     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
599                       << " -> ");
600     // Check if there's a proper relocation against the jump table entry.
601     if (HasRelocations) {
602       if (Type == JumpTable::JTT_PIC &&
603           !DataPCRelocations.count(EntryAddress)) {
604         LLVM_DEBUG(
605             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
606         break;
607       }
608       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
609         LLVM_DEBUG(
610             dbgs()
611             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
612         break;
613       }
614     }
615 
616     const uint64_t Value =
617         (Type == JumpTable::JTT_PIC)
618             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
619             : *getPointerAtAddress(EntryAddress);
620 
621     // __builtin_unreachable() case.
622     if (Value == BF.getAddress() + BF.getSize()) {
623       addEntryAddress(Value);
624       HasUnreachable = true;
625       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
626       continue;
627     }
628 
629     // Function start is another special case. It is allowed in the jump table,
630     // but we need at least one another regular entry to distinguish the table
631     // from, e.g. a function pointer array.
632     if (Value == BF.getAddress()) {
633       HasStartAsEntry = true;
634       addEntryAddress(Value);
635       continue;
636     }
637 
638     // Function or one of its fragments.
639     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
640     const bool DoesBelongToFunction =
641         BF.containsAddress(Value) ||
642         (TargetBF && TargetBF->isParentOrChildOf(BF));
643     if (!DoesBelongToFunction) {
644       LLVM_DEBUG({
645         if (!BF.containsAddress(Value)) {
646           dbgs() << "FAIL: function doesn't contain this address\n";
647           if (TargetBF) {
648             dbgs() << "  ! function containing this address: "
649                    << TargetBF->getPrintName() << '\n';
650             if (TargetBF->isFragment()) {
651               dbgs() << "  ! is a fragment";
652               for (BinaryFunction *Parent : TargetBF->ParentFragments)
653                 dbgs() << ", parent: " << Parent->getPrintName();
654               dbgs() << '\n';
655             }
656           }
657         }
658       });
659       break;
660     }
661 
662     // Check there's an instruction at this offset.
663     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
664         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
665       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
666       break;
667     }
668 
669     ++NumRealEntries;
670     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
671 
672     if (TargetBF != &BF && HasEntryInFragment)
673       *HasEntryInFragment = true;
674     addEntryAddress(Value);
675   }
676 
677   // It's a jump table if the number of real entries is more than 1, or there's
678   // one real entry and one or more special targets. If there are only multiple
679   // special targets, then it's not a jump table.
680   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
681 }
682 
683 void BinaryContext::populateJumpTables() {
684   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
685                     << '\n');
686   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
687        ++JTI) {
688     JumpTable *JT = JTI->second;
689 
690     bool NonSimpleParent = false;
691     for (BinaryFunction *BF : JT->Parents)
692       NonSimpleParent |= !BF->isSimple();
693     if (NonSimpleParent)
694       continue;
695 
696     uint64_t NextJTAddress = 0;
697     auto NextJTI = std::next(JTI);
698     if (NextJTI != JTE)
699       NextJTAddress = NextJTI->second->getAddress();
700 
701     const bool Success =
702         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
703                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
704     if (!Success) {
705       LLVM_DEBUG({
706         dbgs() << "failed to analyze ";
707         JT->print(dbgs());
708         if (NextJTI != JTE) {
709           dbgs() << "next ";
710           NextJTI->second->print(dbgs());
711         }
712       });
713       llvm_unreachable("jump table heuristic failure");
714     }
715     for (BinaryFunction *Frag : JT->Parents) {
716       if (JT->IsSplit)
717         Frag->setHasIndirectTargetToSplitFragment(true);
718       for (uint64_t EntryAddress : JT->EntriesAsAddress)
719         // if target is builtin_unreachable
720         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
721           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
722                                              Frag->getSize());
723         } else if (EntryAddress >= Frag->getAddress() &&
724                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
725           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
726         }
727     }
728 
729     // In strict mode, erase PC-relative relocation record. Later we check that
730     // all such records are erased and thus have been accounted for.
731     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
732       for (uint64_t Address = JT->getAddress();
733            Address < JT->getAddress() + JT->getSize();
734            Address += JT->EntrySize) {
735         DataPCRelocations.erase(DataPCRelocations.find(Address));
736       }
737     }
738 
739     // Mark to skip the function and all its fragments.
740     for (BinaryFunction *Frag : JT->Parents)
741       if (Frag->hasIndirectTargetToSplitFragment())
742         addFragmentsToSkip(Frag);
743   }
744 
745   if (opts::StrictMode && DataPCRelocations.size()) {
746     LLVM_DEBUG({
747       dbgs() << DataPCRelocations.size()
748              << " unclaimed PC-relative relocations left in data:\n";
749       for (uint64_t Reloc : DataPCRelocations)
750         dbgs() << Twine::utohexstr(Reloc) << '\n';
751     });
752     assert(0 && "unclaimed PC-relative relocations left in data\n");
753   }
754   clearList(DataPCRelocations);
755 }
756 
757 void BinaryContext::skipMarkedFragments() {
758   std::vector<BinaryFunction *> FragmentQueue;
759   // Copy the functions to FragmentQueue.
760   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
761   auto addToWorklist = [&](BinaryFunction *Function) -> void {
762     if (FragmentsToSkip.count(Function))
763       return;
764     FragmentQueue.push_back(Function);
765     addFragmentsToSkip(Function);
766   };
767   // Functions containing split jump tables need to be skipped with all
768   // fragments (transitively).
769   for (size_t I = 0; I != FragmentQueue.size(); I++) {
770     BinaryFunction *BF = FragmentQueue[I];
771     assert(FragmentsToSkip.count(BF) &&
772            "internal error in traversing function fragments");
773     if (opts::Verbosity >= 1)
774       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
775     BF->setSimple(false);
776     BF->setHasIndirectTargetToSplitFragment(true);
777 
778     llvm::for_each(BF->Fragments, addToWorklist);
779     llvm::for_each(BF->ParentFragments, addToWorklist);
780   }
781   if (!FragmentsToSkip.empty())
782     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
783                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
784                  << " due to cold fragments\n";
785 }
786 
787 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
788                                                  uint64_t Size,
789                                                  uint16_t Alignment,
790                                                  unsigned Flags) {
791   auto Itr = BinaryDataMap.find(Address);
792   if (Itr != BinaryDataMap.end()) {
793     assert(Itr->second->getSize() == Size || !Size);
794     return Itr->second->getSymbol();
795   }
796 
797   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
798   assert(!GlobalSymbols.count(Name) && "created name is not unique");
799   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
800 }
801 
802 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
803   return Ctx->getOrCreateSymbol(Name);
804 }
805 
806 BinaryFunction *BinaryContext::createBinaryFunction(
807     const std::string &Name, BinarySection &Section, uint64_t Address,
808     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
809   auto Result = BinaryFunctions.emplace(
810       Address, BinaryFunction(Name, Section, Address, Size, *this));
811   assert(Result.second == true && "unexpected duplicate function");
812   BinaryFunction *BF = &Result.first->second;
813   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
814                         Alignment);
815   setSymbolToFunctionMap(BF->getSymbol(), BF);
816   return BF;
817 }
818 
819 const MCSymbol *
820 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
821                                     JumpTable::JumpTableType Type) {
822   // Two fragments of same function access same jump table
823   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
824     assert(JT->Type == Type && "jump table types have to match");
825     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
826 
827     // Prevent associating a jump table to a specific fragment twice.
828     // This simple check arises from the assumption: no more than 2 fragments.
829     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
830       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
831              "cannot re-use jump table of a different function");
832       // Duplicate the entry for the parent function for easy access
833       JT->Parents.push_back(&Function);
834       if (opts::Verbosity > 2) {
835         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
836                      << JT->Parents[0]->getPrintName() << "; "
837                      << Function.getPrintName() << "\n";
838         JT->print(this->outs());
839       }
840       Function.JumpTables.emplace(Address, JT);
841       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
842       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
843     }
844 
845     bool IsJumpTableParent = false;
846     (void)IsJumpTableParent;
847     for (BinaryFunction *Frag : JT->Parents)
848       if (Frag == &Function)
849         IsJumpTableParent = true;
850     assert(IsJumpTableParent &&
851            "cannot re-use jump table of a different function");
852     return JT->getFirstLabel();
853   }
854 
855   // Re-use the existing symbol if possible.
856   MCSymbol *JTLabel = nullptr;
857   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
858     if (!isInternalSymbolName(Object->getSymbol()->getName()))
859       JTLabel = Object->getSymbol();
860   }
861 
862   const uint64_t EntrySize = getJumpTableEntrySize(Type);
863   if (!JTLabel) {
864     const std::string JumpTableName = generateJumpTableName(Function, Address);
865     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
866   }
867 
868   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
869                     << " in function " << Function << '\n');
870 
871   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
872                                 JumpTable::LabelMapType{{0, JTLabel}},
873                                 *getSectionForAddress(Address));
874   JT->Parents.push_back(&Function);
875   if (opts::Verbosity > 2)
876     JT->print(this->outs());
877   JumpTables.emplace(Address, JT);
878 
879   // Duplicate the entry for the parent function for easy access.
880   Function.JumpTables.emplace(Address, JT);
881   return JTLabel;
882 }
883 
884 std::pair<uint64_t, const MCSymbol *>
885 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
886                                   const MCSymbol *OldLabel) {
887   auto L = scopeLock();
888   unsigned Offset = 0;
889   bool Found = false;
890   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
891     if (Elmt.second != OldLabel)
892       continue;
893     Offset = Elmt.first;
894     Found = true;
895     break;
896   }
897   assert(Found && "Label not found");
898   (void)Found;
899   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
900   JumpTable *NewJT =
901       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
902                     JumpTable::LabelMapType{{Offset, NewLabel}},
903                     *getSectionForAddress(JT->getAddress()));
904   NewJT->Parents = JT->Parents;
905   NewJT->Entries = JT->Entries;
906   NewJT->Counts = JT->Counts;
907   uint64_t JumpTableID = ++DuplicatedJumpTables;
908   // Invert it to differentiate from regular jump tables whose IDs are their
909   // addresses in the input binary memory space
910   JumpTableID = ~JumpTableID;
911   JumpTables.emplace(JumpTableID, NewJT);
912   Function.JumpTables.emplace(JumpTableID, NewJT);
913   return std::make_pair(JumpTableID, NewLabel);
914 }
915 
916 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
917                                                  uint64_t Address) {
918   size_t Id;
919   uint64_t Offset = 0;
920   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
921     Offset = Address - JT->getAddress();
922     auto Itr = JT->Labels.find(Offset);
923     if (Itr != JT->Labels.end())
924       return std::string(Itr->second->getName());
925     Id = JumpTableIds.at(JT->getAddress());
926   } else {
927     Id = JumpTableIds[Address] = BF.JumpTables.size();
928   }
929   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
930           (Offset ? ("." + std::to_string(Offset)) : ""));
931 }
932 
933 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
934   // FIXME: aarch64 support is missing.
935   if (!isX86())
936     return true;
937 
938   if (BF.getSize() == BF.getMaxSize())
939     return true;
940 
941   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
942   assert(FunctionData && "cannot get function as data");
943 
944   uint64_t Offset = BF.getSize();
945   MCInst Instr;
946   uint64_t InstrSize = 0;
947   uint64_t InstrAddress = BF.getAddress() + Offset;
948   using std::placeholders::_1;
949 
950   // Skip instructions that satisfy the predicate condition.
951   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
952     const uint64_t StartOffset = Offset;
953     for (; Offset < BF.getMaxSize();
954          Offset += InstrSize, InstrAddress += InstrSize) {
955       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
956                                   InstrAddress, nulls()))
957         break;
958       if (!Predicate(Instr))
959         break;
960     }
961 
962     return Offset - StartOffset;
963   };
964 
965   // Skip a sequence of zero bytes.
966   auto skipZeros = [&]() {
967     const uint64_t StartOffset = Offset;
968     for (; Offset < BF.getMaxSize(); ++Offset)
969       if ((*FunctionData)[Offset] != 0)
970         break;
971 
972     return Offset - StartOffset;
973   };
974 
975   // Accept the whole padding area filled with breakpoints.
976   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
977   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
978     return true;
979 
980   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
981 
982   // Some functions have a jump to the next function or to the padding area
983   // inserted after the body.
984   auto isSkipJump = [&](const MCInst &Instr) {
985     uint64_t TargetAddress = 0;
986     if (MIB->isUnconditionalBranch(Instr) &&
987         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
988       if (TargetAddress >= InstrAddress + InstrSize &&
989           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
990         return true;
991       }
992     }
993     return false;
994   };
995 
996   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
997   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
998          skipZeros())
999     ;
1000 
1001   if (Offset == BF.getMaxSize())
1002     return true;
1003 
1004   if (opts::Verbosity >= 1) {
1005     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1006                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
1007                  << " starting at offset " << (Offset - BF.getSize())
1008                  << " in function " << BF << '\n'
1009                  << FunctionData->slice(BF.getSize(),
1010                                         BF.getMaxSize() - BF.getSize())
1011                  << '\n';
1012   }
1013 
1014   return false;
1015 }
1016 
1017 void BinaryContext::adjustCodePadding() {
1018   for (auto &BFI : BinaryFunctions) {
1019     BinaryFunction &BF = BFI.second;
1020     if (!shouldEmit(BF))
1021       continue;
1022 
1023     if (!hasValidCodePadding(BF)) {
1024       if (HasRelocations) {
1025         if (opts::Verbosity >= 1) {
1026           this->outs() << "BOLT-INFO: function " << BF
1027                        << " has invalid padding. Ignoring the function.\n";
1028         }
1029         BF.setIgnored();
1030       } else {
1031         BF.setMaxSize(BF.getSize());
1032       }
1033     }
1034   }
1035 }
1036 
1037 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1038                                                uint64_t Size,
1039                                                uint16_t Alignment,
1040                                                unsigned Flags) {
1041   // Register the name with MCContext.
1042   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1043 
1044   auto GAI = BinaryDataMap.find(Address);
1045   BinaryData *BD;
1046   if (GAI == BinaryDataMap.end()) {
1047     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1048     BinarySection &Section =
1049         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1050     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1051                         Section, Flags);
1052     GAI = BinaryDataMap.emplace(Address, BD).first;
1053     GlobalSymbols[Name] = BD;
1054     updateObjectNesting(GAI);
1055   } else {
1056     BD = GAI->second;
1057     if (!BD->hasName(Name)) {
1058       GlobalSymbols[Name] = BD;
1059       BD->Symbols.push_back(Symbol);
1060     }
1061   }
1062 
1063   return Symbol;
1064 }
1065 
1066 const BinaryData *
1067 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1068   auto NI = BinaryDataMap.lower_bound(Address);
1069   auto End = BinaryDataMap.end();
1070   if ((NI != End && Address == NI->first) ||
1071       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1072     if (NI->second->containsAddress(Address))
1073       return NI->second;
1074 
1075     // If this is a sub-symbol, see if a parent data contains the address.
1076     const BinaryData *BD = NI->second->getParent();
1077     while (BD) {
1078       if (BD->containsAddress(Address))
1079         return BD;
1080       BD = BD->getParent();
1081     }
1082   }
1083   return nullptr;
1084 }
1085 
1086 BinaryData *BinaryContext::getGOTSymbol() {
1087   // First tries to find a global symbol with that name
1088   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1089   if (GOTSymBD)
1090     return GOTSymBD;
1091 
1092   // This symbol might be hidden from run-time link, so fetch the local
1093   // definition if available.
1094   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1095   if (!GOTSymBD)
1096     return nullptr;
1097 
1098   // If the local symbol is not unique, fail
1099   unsigned Index = 2;
1100   SmallString<30> Storage;
1101   while (const BinaryData *BD =
1102              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1103                                      .concat(Twine(Index++))
1104                                      .toStringRef(Storage)))
1105     if (BD->getAddress() != GOTSymBD->getAddress())
1106       return nullptr;
1107 
1108   return GOTSymBD;
1109 }
1110 
1111 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1112   auto NI = BinaryDataMap.find(Address);
1113   assert(NI != BinaryDataMap.end());
1114   if (NI == BinaryDataMap.end())
1115     return false;
1116   // TODO: it's possible that a jump table starts at the same address
1117   // as a larger blob of private data.  When we set the size of the
1118   // jump table, it might be smaller than the total blob size.  In this
1119   // case we just leave the original size since (currently) it won't really
1120   // affect anything.
1121   assert((!NI->second->Size || NI->second->Size == Size ||
1122           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1123          "can't change the size of a symbol that has already had its "
1124          "size set");
1125   if (!NI->second->Size) {
1126     NI->second->Size = Size;
1127     updateObjectNesting(NI);
1128     return true;
1129   }
1130   return false;
1131 }
1132 
1133 void BinaryContext::generateSymbolHashes() {
1134   auto isPadding = [](const BinaryData &BD) {
1135     StringRef Contents = BD.getSection().getContents();
1136     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1137     return (BD.getName().starts_with("HOLEat") ||
1138             SymData.find_first_not_of(0) == StringRef::npos);
1139   };
1140 
1141   uint64_t NumCollisions = 0;
1142   for (auto &Entry : BinaryDataMap) {
1143     BinaryData &BD = *Entry.second;
1144     StringRef Name = BD.getName();
1145 
1146     if (!isInternalSymbolName(Name))
1147       continue;
1148 
1149     // First check if a non-anonymous alias exists and move it to the front.
1150     if (BD.getSymbols().size() > 1) {
1151       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1152         return !isInternalSymbolName(Symbol->getName());
1153       });
1154       if (Itr != BD.getSymbols().end()) {
1155         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1156         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1157         continue;
1158       }
1159     }
1160 
1161     // We have to skip 0 size symbols since they will all collide.
1162     if (BD.getSize() == 0) {
1163       continue;
1164     }
1165 
1166     const uint64_t Hash = BD.getSection().hash(BD);
1167     const size_t Idx = Name.find("0x");
1168     std::string NewName =
1169         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1170     if (getBinaryDataByName(NewName)) {
1171       // Ignore collisions for symbols that appear to be padding
1172       // (i.e. all zeros or a "hole")
1173       if (!isPadding(BD)) {
1174         if (opts::Verbosity) {
1175           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1176                        << " with new name (" << NewName << "), skipping.\n";
1177         }
1178         ++NumCollisions;
1179       }
1180       continue;
1181     }
1182     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1183     GlobalSymbols[NewName] = &BD;
1184   }
1185   if (NumCollisions) {
1186     this->errs() << "BOLT-WARNING: " << NumCollisions
1187                  << " collisions detected while hashing binary objects";
1188     if (!opts::Verbosity)
1189       this->errs() << ". Use -v=1 to see the list.";
1190     this->errs() << '\n';
1191   }
1192 }
1193 
1194 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1195                                      BinaryFunction &Function) const {
1196   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1197   if (TargetFunction.isChildOf(Function))
1198     return true;
1199   TargetFunction.addParentFragment(Function);
1200   Function.addFragment(TargetFunction);
1201   if (!HasRelocations) {
1202     TargetFunction.setSimple(false);
1203     Function.setSimple(false);
1204   }
1205   if (opts::Verbosity >= 1) {
1206     this->outs() << "BOLT-INFO: marking " << TargetFunction
1207                  << " as a fragment of " << Function << '\n';
1208   }
1209   return true;
1210 }
1211 
1212 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1213                                            MCInst &LoadLowBits,
1214                                            MCInst &LoadHiBits,
1215                                            uint64_t Target) {
1216   const MCSymbol *TargetSymbol;
1217   uint64_t Addend = 0;
1218   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1219                                                     /*IsPCRel*/ true);
1220   int64_t Val;
1221   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1222                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1223   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1224                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1225 }
1226 
1227 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1228   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1229   if (TargetFunction)
1230     return false;
1231 
1232   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1233   assert(Section && "cannot get section for referenced address");
1234   if (!Section->isText())
1235     return false;
1236 
1237   bool Ret = false;
1238   StringRef SectionContents = Section->getContents();
1239   uint64_t Offset = Address - Section->getAddress();
1240   const uint64_t MaxSize = SectionContents.size() - Offset;
1241   const uint8_t *Bytes =
1242       reinterpret_cast<const uint8_t *>(SectionContents.data());
1243   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1244 
1245   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1246                          MCInst &Instruction, uint64_t Offset,
1247                          uint64_t AbsoluteInstrAddr,
1248                          uint64_t TotalSize) -> bool {
1249     MCInst *TargetHiBits, *TargetLowBits;
1250     uint64_t TargetAddress, Count;
1251     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1252                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1253                                    TargetLowBits, TargetAddress);
1254     if (!Count)
1255       return false;
1256 
1257     if (MatchOnly)
1258       return true;
1259 
1260     // NOTE The target symbol was created during disassemble's
1261     // handleExternalReference
1262     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1263     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1264                                                   *Section, Address, TotalSize);
1265     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1266                            TargetAddress);
1267     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1268     Veneer->addInstruction(Offset, std::move(Instruction));
1269     --Count;
1270     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1271       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1272       Veneer->addInstruction(It->first, std::move(It->second));
1273     }
1274 
1275     Veneer->getOrCreateLocalLabel(Address);
1276     Veneer->setMaxSize(TotalSize);
1277     Veneer->updateState(BinaryFunction::State::Disassembled);
1278     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1279                       << "\n");
1280     return true;
1281   };
1282 
1283   uint64_t Size = 0, TotalSize = 0;
1284   BinaryFunction::InstrMapType VeneerInstructions;
1285   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1286     MCInst Instruction;
1287     const uint64_t AbsoluteInstrAddr = Address + Offset;
1288     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1289                                         AbsoluteInstrAddr, nulls()))
1290       break;
1291 
1292     TotalSize += Size;
1293     if (MIB->isBranch(Instruction)) {
1294       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1295                         AbsoluteInstrAddr, TotalSize);
1296       break;
1297     }
1298 
1299     VeneerInstructions.emplace(Offset, std::move(Instruction));
1300   }
1301 
1302   return Ret;
1303 }
1304 
1305 void BinaryContext::processInterproceduralReferences() {
1306   for (const std::pair<BinaryFunction *, uint64_t> &It :
1307        InterproceduralReferences) {
1308     BinaryFunction &Function = *It.first;
1309     uint64_t Address = It.second;
1310     if (!Address || Function.isIgnored())
1311       continue;
1312 
1313     BinaryFunction *TargetFunction =
1314         getBinaryFunctionContainingAddress(Address);
1315     if (&Function == TargetFunction)
1316       continue;
1317 
1318     if (TargetFunction) {
1319       if (TargetFunction->isFragment() &&
1320           !TargetFunction->isChildOf(Function)) {
1321         this->errs()
1322             << "BOLT-WARNING: interprocedural reference between unrelated "
1323                "fragments: "
1324             << Function.getPrintName() << " and "
1325             << TargetFunction->getPrintName() << '\n';
1326       }
1327       if (uint64_t Offset = Address - TargetFunction->getAddress())
1328         TargetFunction->addEntryPointAtOffset(Offset);
1329 
1330       continue;
1331     }
1332 
1333     // Check if address falls in function padding space - this could be
1334     // unmarked data in code. In this case adjust the padding space size.
1335     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1336     assert(Section && "cannot get section for referenced address");
1337 
1338     if (!Section->isText())
1339       continue;
1340 
1341     // PLT requires special handling and could be ignored in this context.
1342     StringRef SectionName = Section->getName();
1343     if (SectionName == ".plt" || SectionName == ".plt.got")
1344       continue;
1345 
1346     // Check if it is aarch64 veneer written at Address
1347     if (isAArch64() && handleAArch64Veneer(Address))
1348       continue;
1349 
1350     if (opts::processAllFunctions()) {
1351       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1352                    << "object in code at address 0x"
1353                    << Twine::utohexstr(Address) << " belonging to section "
1354                    << SectionName << " in current mode\n";
1355       exit(1);
1356     }
1357 
1358     TargetFunction = getBinaryFunctionContainingAddress(Address,
1359                                                         /*CheckPastEnd=*/false,
1360                                                         /*UseMaxSize=*/true);
1361     // We are not going to overwrite non-simple functions, but for simple
1362     // ones - adjust the padding size.
1363     if (TargetFunction && TargetFunction->isSimple()) {
1364       this->errs()
1365           << "BOLT-WARNING: function " << *TargetFunction
1366           << " has an object detected in a padding region at address 0x"
1367           << Twine::utohexstr(Address) << '\n';
1368       TargetFunction->setMaxSize(TargetFunction->getSize());
1369     }
1370   }
1371 
1372   InterproceduralReferences.clear();
1373 }
1374 
1375 void BinaryContext::postProcessSymbolTable() {
1376   fixBinaryDataHoles();
1377   bool Valid = true;
1378   for (auto &Entry : BinaryDataMap) {
1379     BinaryData *BD = Entry.second;
1380     if ((BD->getName().starts_with("SYMBOLat") ||
1381          BD->getName().starts_with("DATAat")) &&
1382         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1383         BD->getSection()) {
1384       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1385                    << "\n";
1386       Valid = false;
1387     }
1388   }
1389   assert(Valid);
1390   (void)Valid;
1391   generateSymbolHashes();
1392 }
1393 
1394 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1395                                  BinaryFunction &ParentBF) {
1396   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1397          "cannot merge functions with multiple entry points");
1398 
1399   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1400   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1401       SymbolToFunctionMapMutex, std::defer_lock);
1402 
1403   const StringRef ChildName = ChildBF.getOneName();
1404 
1405   // Move symbols over and update bookkeeping info.
1406   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1407     ParentBF.getSymbols().push_back(Symbol);
1408     WriteSymbolMapLock.lock();
1409     SymbolToFunctionMap[Symbol] = &ParentBF;
1410     WriteSymbolMapLock.unlock();
1411     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1412   }
1413   ChildBF.getSymbols().clear();
1414 
1415   // Move other names the child function is known under.
1416   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1417   ChildBF.Aliases.clear();
1418 
1419   if (HasRelocations) {
1420     // Merge execution counts of ChildBF into those of ParentBF.
1421     // Without relocations, we cannot reliably merge profiles as both functions
1422     // continue to exist and either one can be executed.
1423     ChildBF.mergeProfileDataInto(ParentBF);
1424 
1425     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1426                                                      std::defer_lock);
1427     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1428                                                       std::defer_lock);
1429     // Remove ChildBF from the global set of functions in relocs mode.
1430     ReadBfsLock.lock();
1431     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1432     ReadBfsLock.unlock();
1433 
1434     assert(FI != BinaryFunctions.end() && "function not found");
1435     assert(&ChildBF == &FI->second && "function mismatch");
1436 
1437     WriteBfsLock.lock();
1438     ChildBF.clearDisasmState();
1439     FI = BinaryFunctions.erase(FI);
1440     WriteBfsLock.unlock();
1441 
1442   } else {
1443     // In non-relocation mode we keep the function, but rename it.
1444     std::string NewName = "__ICF_" + ChildName.str();
1445 
1446     WriteCtxLock.lock();
1447     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1448     WriteCtxLock.unlock();
1449 
1450     ChildBF.setFolded(&ParentBF);
1451   }
1452 
1453   ParentBF.setHasFunctionsFoldedInto();
1454 }
1455 
1456 void BinaryContext::fixBinaryDataHoles() {
1457   assert(validateObjectNesting() && "object nesting inconsistency detected");
1458 
1459   for (BinarySection &Section : allocatableSections()) {
1460     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1461 
1462     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1463       BinaryData *BD = Itr->second;
1464       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1465                      (BD->getName().starts_with("SYMBOLat0x") ||
1466                       BD->getName().starts_with("DATAat0x") ||
1467                       BD->getName().starts_with("ANONYMOUS")));
1468       return !isHole && BD->getSection() == Section && !BD->getParent();
1469     };
1470 
1471     auto BDStart = BinaryDataMap.begin();
1472     auto BDEnd = BinaryDataMap.end();
1473     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1474     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1475 
1476     uint64_t EndAddress = Section.getAddress();
1477 
1478     while (Itr != End) {
1479       if (Itr->second->getAddress() > EndAddress) {
1480         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1481         Holes.emplace_back(EndAddress, Gap);
1482       }
1483       EndAddress = Itr->second->getEndAddress();
1484       ++Itr;
1485     }
1486 
1487     if (EndAddress < Section.getEndAddress())
1488       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1489 
1490     // If there is already a symbol at the start of the hole, grow that symbol
1491     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1492     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1493       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1494       if (BD) {
1495         // BD->getSection() can be != Section if there are sections that
1496         // overlap.  In this case it is probably safe to just skip the holes
1497         // since the overlapping section will not(?) have any symbols in it.
1498         if (BD->getSection() == Section)
1499           setBinaryDataSize(Hole.first, Hole.second);
1500       } else {
1501         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1502       }
1503     }
1504   }
1505 
1506   assert(validateObjectNesting() && "object nesting inconsistency detected");
1507   assert(validateHoles() && "top level hole detected in object map");
1508 }
1509 
1510 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1511   const BinarySection *CurrentSection = nullptr;
1512   bool FirstSection = true;
1513 
1514   for (auto &Entry : BinaryDataMap) {
1515     const BinaryData *BD = Entry.second;
1516     const BinarySection &Section = BD->getSection();
1517     if (FirstSection || Section != *CurrentSection) {
1518       uint64_t Address, Size;
1519       StringRef Name = Section.getName();
1520       if (Section) {
1521         Address = Section.getAddress();
1522         Size = Section.getSize();
1523       } else {
1524         Address = BD->getAddress();
1525         Size = BD->getSize();
1526       }
1527       OS << "BOLT-INFO: Section " << Name << ", "
1528          << "0x" + Twine::utohexstr(Address) << ":"
1529          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1530       CurrentSection = &Section;
1531       FirstSection = false;
1532     }
1533 
1534     OS << "BOLT-INFO: ";
1535     const BinaryData *P = BD->getParent();
1536     while (P) {
1537       OS << "  ";
1538       P = P->getParent();
1539     }
1540     OS << *BD << "\n";
1541   }
1542 }
1543 
1544 Expected<unsigned> BinaryContext::getDwarfFile(
1545     StringRef Directory, StringRef FileName, unsigned FileNumber,
1546     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1547     unsigned CUID, unsigned DWARFVersion) {
1548   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1549   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1550                           FileNumber);
1551 }
1552 
1553 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1554                                                const uint32_t SrcCUID,
1555                                                unsigned FileIndex) {
1556   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1557   const DWARFDebugLine::LineTable *LineTable =
1558       DwCtx->getLineTableForUnit(SrcUnit);
1559   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1560       LineTable->Prologue.FileNames;
1561   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1562   // means empty dir.
1563   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1564          "FileIndex out of range for the compilation unit.");
1565   StringRef Dir = "";
1566   if (FileNames[FileIndex - 1].DirIdx != 0) {
1567     if (std::optional<const char *> DirName = dwarf::toString(
1568             LineTable->Prologue
1569                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1570       Dir = *DirName;
1571     }
1572   }
1573   StringRef FileName = "";
1574   if (std::optional<const char *> FName =
1575           dwarf::toString(FileNames[FileIndex - 1].Name))
1576     FileName = *FName;
1577   assert(FileName != "");
1578   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1579   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1580                                DestCUID, DstUnit->getVersion()));
1581 }
1582 
1583 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1584   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1585   llvm::transform(llvm::make_second_range(BinaryFunctions),
1586                   SortedFunctions.begin(),
1587                   [](BinaryFunction &BF) { return &BF; });
1588 
1589   llvm::stable_sort(SortedFunctions,
1590                     [](const BinaryFunction *A, const BinaryFunction *B) {
1591                       if (A->hasValidIndex() && B->hasValidIndex()) {
1592                         return A->getIndex() < B->getIndex();
1593                       }
1594                       return A->hasValidIndex();
1595                     });
1596   return SortedFunctions;
1597 }
1598 
1599 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1600   std::vector<BinaryFunction *> AllFunctions;
1601   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1602   llvm::transform(llvm::make_second_range(BinaryFunctions),
1603                   std::back_inserter(AllFunctions),
1604                   [](BinaryFunction &BF) { return &BF; });
1605   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1606 
1607   return AllFunctions;
1608 }
1609 
1610 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1611   auto Iter = DWOCUs.find(DWOId);
1612   if (Iter == DWOCUs.end())
1613     return std::nullopt;
1614 
1615   return Iter->second;
1616 }
1617 
1618 DWARFContext *BinaryContext::getDWOContext() const {
1619   if (DWOCUs.empty())
1620     return nullptr;
1621   return &DWOCUs.begin()->second->getContext();
1622 }
1623 
1624 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1625 void BinaryContext::preprocessDWODebugInfo() {
1626   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1627     DWARFUnit *const DwarfUnit = CU.get();
1628     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1629       std::string DWOName = dwarf::toString(
1630           DwarfUnit->getUnitDIE().find(
1631               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1632           "");
1633       SmallString<16> AbsolutePath;
1634       if (!opts::CompDirOverride.empty()) {
1635         sys::path::append(AbsolutePath, opts::CompDirOverride);
1636         sys::path::append(AbsolutePath, DWOName);
1637       }
1638       DWARFUnit *DWOCU =
1639           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1640       if (!DWOCU->isDWOUnit()) {
1641         this->outs()
1642             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1643             << DWOName
1644             << " was not retrieved and won't be updated. Please check "
1645                "relative path.\n";
1646         continue;
1647       }
1648       DWOCUs[*DWOId] = DWOCU;
1649     }
1650   }
1651   if (!DWOCUs.empty())
1652     this->outs() << "BOLT-INFO: processing split DWARF\n";
1653 }
1654 
1655 void BinaryContext::preprocessDebugInfo() {
1656   struct CURange {
1657     uint64_t LowPC;
1658     uint64_t HighPC;
1659     DWARFUnit *Unit;
1660 
1661     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1662   };
1663 
1664   // Building a map of address ranges to CUs similar to .debug_aranges and use
1665   // it to assign CU to functions.
1666   std::vector<CURange> AllRanges;
1667   AllRanges.reserve(DwCtx->getNumCompileUnits());
1668   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1669     Expected<DWARFAddressRangesVector> RangesOrError =
1670         CU->getUnitDIE().getAddressRanges();
1671     if (!RangesOrError) {
1672       consumeError(RangesOrError.takeError());
1673       continue;
1674     }
1675     for (DWARFAddressRange &Range : *RangesOrError) {
1676       // Parts of the debug info could be invalidated due to corresponding code
1677       // being removed from the binary by the linker. Hence we check if the
1678       // address is a valid one.
1679       if (containsAddress(Range.LowPC))
1680         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1681     }
1682 
1683     ContainsDwarf5 |= CU->getVersion() >= 5;
1684     ContainsDwarfLegacy |= CU->getVersion() < 5;
1685   }
1686 
1687   llvm::sort(AllRanges);
1688   for (auto &KV : BinaryFunctions) {
1689     const uint64_t FunctionAddress = KV.first;
1690     BinaryFunction &Function = KV.second;
1691 
1692     auto It = llvm::partition_point(
1693         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1694     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1695       Function.setDWARFUnit(It->Unit);
1696   }
1697 
1698   // Discover units with debug info that needs to be updated.
1699   for (const auto &KV : BinaryFunctions) {
1700     const BinaryFunction &BF = KV.second;
1701     if (shouldEmit(BF) && BF.getDWARFUnit())
1702       ProcessedCUs.insert(BF.getDWARFUnit());
1703   }
1704 
1705   // Clear debug info for functions from units that we are not going to process.
1706   for (auto &KV : BinaryFunctions) {
1707     BinaryFunction &BF = KV.second;
1708     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1709       BF.setDWARFUnit(nullptr);
1710   }
1711 
1712   if (opts::Verbosity >= 1) {
1713     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1714                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1715   }
1716 
1717   preprocessDWODebugInfo();
1718 
1719   // Populate MCContext with DWARF files from all units.
1720   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1721   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1722     const uint64_t CUID = CU->getOffset();
1723     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1724     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1725         GlobalPrefix + "line_table_start" + Twine(CUID)));
1726 
1727     if (!ProcessedCUs.count(CU.get()))
1728       continue;
1729 
1730     const DWARFDebugLine::LineTable *LineTable =
1731         DwCtx->getLineTableForUnit(CU.get());
1732     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1733         LineTable->Prologue.FileNames;
1734 
1735     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1736     if (DwarfVersion >= 5) {
1737       std::optional<MD5::MD5Result> Checksum;
1738       if (LineTable->Prologue.ContentTypes.HasMD5)
1739         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1740       std::optional<const char *> Name =
1741           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1742       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1743         auto Iter = DWOCUs.find(*DWOID);
1744         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1745         Name = dwarf::toString(
1746             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1747       }
1748       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1749                                   std::nullopt);
1750     }
1751 
1752     BinaryLineTable.setDwarfVersion(DwarfVersion);
1753 
1754     // Assign a unique label to every line table, one per CU.
1755     // Make sure empty debug line tables are registered too.
1756     if (FileNames.empty()) {
1757       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1758                             CUID, DwarfVersion));
1759       continue;
1760     }
1761     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1762     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1763       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1764       // means empty dir.
1765       StringRef Dir = "";
1766       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1767         if (std::optional<const char *> DirName = dwarf::toString(
1768                 LineTable->Prologue
1769                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1770           Dir = *DirName;
1771       StringRef FileName = "";
1772       if (std::optional<const char *> FName =
1773               dwarf::toString(FileNames[I].Name))
1774         FileName = *FName;
1775       assert(FileName != "");
1776       std::optional<MD5::MD5Result> Checksum;
1777       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1778         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1779       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1780                             DwarfVersion));
1781     }
1782   }
1783 }
1784 
1785 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1786   if (Function.isPseudo())
1787     return false;
1788 
1789   if (opts::processAllFunctions())
1790     return true;
1791 
1792   if (Function.isIgnored())
1793     return false;
1794 
1795   // In relocation mode we will emit non-simple functions with CFG.
1796   // If the function does not have a CFG it should be marked as ignored.
1797   return HasRelocations || Function.isSimple();
1798 }
1799 
1800 void BinaryContext::dump(const MCInst &Inst) const {
1801   if (LLVM_UNLIKELY(!InstPrinter)) {
1802     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1803     return;
1804   }
1805   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1806   dbgs() << "\n";
1807 }
1808 
1809 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1810   uint32_t Operation = Inst.getOperation();
1811   switch (Operation) {
1812   case MCCFIInstruction::OpSameValue:
1813     OS << "OpSameValue Reg" << Inst.getRegister();
1814     break;
1815   case MCCFIInstruction::OpRememberState:
1816     OS << "OpRememberState";
1817     break;
1818   case MCCFIInstruction::OpRestoreState:
1819     OS << "OpRestoreState";
1820     break;
1821   case MCCFIInstruction::OpOffset:
1822     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1823     break;
1824   case MCCFIInstruction::OpDefCfaRegister:
1825     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1826     break;
1827   case MCCFIInstruction::OpDefCfaOffset:
1828     OS << "OpDefCfaOffset " << Inst.getOffset();
1829     break;
1830   case MCCFIInstruction::OpDefCfa:
1831     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1832     break;
1833   case MCCFIInstruction::OpRelOffset:
1834     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1835     break;
1836   case MCCFIInstruction::OpAdjustCfaOffset:
1837     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1838     break;
1839   case MCCFIInstruction::OpEscape:
1840     OS << "OpEscape";
1841     break;
1842   case MCCFIInstruction::OpRestore:
1843     OS << "OpRestore Reg" << Inst.getRegister();
1844     break;
1845   case MCCFIInstruction::OpUndefined:
1846     OS << "OpUndefined Reg" << Inst.getRegister();
1847     break;
1848   case MCCFIInstruction::OpRegister:
1849     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1850        << Inst.getRegister2();
1851     break;
1852   case MCCFIInstruction::OpWindowSave:
1853     OS << "OpWindowSave";
1854     break;
1855   case MCCFIInstruction::OpGnuArgsSize:
1856     OS << "OpGnuArgsSize";
1857     break;
1858   default:
1859     OS << "Op#" << Operation;
1860     break;
1861   }
1862 }
1863 
1864 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1865   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1866   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1867   // the end of a data chunk inside code, $d identifies start of data.
1868   if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1869     return MarkerSymType::NONE;
1870 
1871   Expected<StringRef> NameOrError = Symbol.getName();
1872   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1873 
1874   if (!TypeOrError || !NameOrError)
1875     return MarkerSymType::NONE;
1876 
1877   if (*TypeOrError != SymbolRef::ST_Unknown)
1878     return MarkerSymType::NONE;
1879 
1880   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1881     return MarkerSymType::CODE;
1882 
1883   // $x<ISA>
1884   if (isRISCV() && NameOrError->starts_with("$x"))
1885     return MarkerSymType::CODE;
1886 
1887   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1888     return MarkerSymType::DATA;
1889 
1890   return MarkerSymType::NONE;
1891 }
1892 
1893 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1894   return getMarkerType(Symbol) != MarkerSymType::NONE;
1895 }
1896 
1897 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1898                            const BinaryFunction *Function,
1899                            DWARFContext *DwCtx) {
1900   DebugLineTableRowRef RowRef =
1901       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1902   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1903     return;
1904 
1905   const DWARFDebugLine::LineTable *LineTable;
1906   if (Function && Function->getDWARFUnit() &&
1907       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1908     LineTable = Function->getDWARFLineTable();
1909   } else {
1910     LineTable = DwCtx->getLineTableForUnit(
1911         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1912   }
1913   assert(LineTable && "line table expected for instruction with debug info");
1914 
1915   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1916   StringRef FileName = "";
1917   if (std::optional<const char *> FName =
1918           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1919     FileName = *FName;
1920   OS << " # debug line " << FileName << ":" << Row.Line;
1921   if (Row.Column)
1922     OS << ":" << Row.Column;
1923   if (Row.Discriminator)
1924     OS << " discriminator:" << Row.Discriminator;
1925 }
1926 
1927 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1928                                      uint64_t Offset,
1929                                      const BinaryFunction *Function,
1930                                      bool PrintMCInst, bool PrintMemData,
1931                                      bool PrintRelocations,
1932                                      StringRef Endl) const {
1933   OS << format("    %08" PRIx64 ": ", Offset);
1934   if (MIB->isCFI(Instruction)) {
1935     uint32_t Offset = Instruction.getOperand(0).getImm();
1936     OS << "\t!CFI\t$" << Offset << "\t; ";
1937     if (Function)
1938       printCFI(OS, *Function->getCFIFor(Instruction));
1939     OS << Endl;
1940     return;
1941   }
1942   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1943   if (MIB->isCall(Instruction)) {
1944     if (MIB->isTailCall(Instruction))
1945       OS << " # TAILCALL ";
1946     if (MIB->isInvoke(Instruction)) {
1947       const std::optional<MCPlus::MCLandingPad> EHInfo =
1948           MIB->getEHInfo(Instruction);
1949       OS << " # handler: ";
1950       if (EHInfo->first)
1951         OS << *EHInfo->first;
1952       else
1953         OS << '0';
1954       OS << "; action: " << EHInfo->second;
1955       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1956       if (GnuArgsSize >= 0)
1957         OS << "; GNU_args_size = " << GnuArgsSize;
1958     }
1959   } else if (MIB->isIndirectBranch(Instruction)) {
1960     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1961       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1962     } else {
1963       OS << " # UNKNOWN CONTROL FLOW";
1964     }
1965   }
1966   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1967     OS << " # Offset: " << *Offset;
1968   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1969     OS << " # Size: " << *Size;
1970   if (MCSymbol *Label = MIB->getLabel(Instruction))
1971     OS << " # Label: " << *Label;
1972 
1973   MIB->printAnnotations(Instruction, OS);
1974 
1975   if (opts::PrintDebugInfo)
1976     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1977 
1978   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1979     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1980     Function->printRelocations(OS, Offset, Size);
1981   }
1982 
1983   OS << Endl;
1984 
1985   if (PrintMCInst) {
1986     Instruction.dump_pretty(OS, InstPrinter.get());
1987     OS << Endl;
1988   }
1989 }
1990 
1991 std::optional<uint64_t>
1992 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1993                                         uint64_t FileOffset) const {
1994   // Find a segment with a matching file offset.
1995   for (auto &KV : SegmentMapInfo) {
1996     const SegmentInfo &SegInfo = KV.second;
1997     // FileOffset is got from perf event,
1998     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
1999     // If the pagesize is not equal to SegInfo.Alignment.
2000     // FileOffset and SegInfo.FileOffset should be aligned first,
2001     // and then judge whether they are equal.
2002     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2003         alignDown(FileOffset, SegInfo.Alignment)) {
2004       // The function's offset from base address in VAS is aligned by pagesize
2005       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2006       // However, The ELF document says that SegInfo.FileOffset should equal
2007       // to SegInfo.Address, modulo the pagesize.
2008       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2009 
2010       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2011       // alignDown(SegInfo.Address, pagesize)
2012       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2013       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2014       //   = SegInfo.Address - SegInfo.FileOffset +
2015       //     alignDown(SegInfo.FileOffset, pagesize)
2016       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2017       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2018     }
2019   }
2020 
2021   return std::nullopt;
2022 }
2023 
2024 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2025   auto SI = AddressToSection.upper_bound(Address);
2026   if (SI != AddressToSection.begin()) {
2027     --SI;
2028     uint64_t UpperBound = SI->first + SI->second->getSize();
2029     if (!SI->second->getSize())
2030       UpperBound += 1;
2031     if (UpperBound > Address)
2032       return *SI->second;
2033   }
2034   return std::make_error_code(std::errc::bad_address);
2035 }
2036 
2037 ErrorOr<StringRef>
2038 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2039   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2040     return Section->getName();
2041   return std::make_error_code(std::errc::bad_address);
2042 }
2043 
2044 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2045   auto Res = Sections.insert(Section);
2046   (void)Res;
2047   assert(Res.second && "can't register the same section twice.");
2048 
2049   // Only register allocatable sections in the AddressToSection map.
2050   if (Section->isAllocatable() && Section->getAddress())
2051     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2052   NameToSection.insert(
2053       std::make_pair(std::string(Section->getName()), Section));
2054   if (Section->hasSectionRef())
2055     SectionRefToBinarySection.insert(
2056         std::make_pair(Section->getSectionRef(), Section));
2057 
2058   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2059   return *Section;
2060 }
2061 
2062 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2063   return registerSection(new BinarySection(*this, Section));
2064 }
2065 
2066 BinarySection &
2067 BinaryContext::registerSection(const Twine &SectionName,
2068                                const BinarySection &OriginalSection) {
2069   return registerSection(
2070       new BinarySection(*this, SectionName, OriginalSection));
2071 }
2072 
2073 BinarySection &
2074 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2075                                        unsigned ELFFlags, uint8_t *Data,
2076                                        uint64_t Size, unsigned Alignment) {
2077   auto NamedSections = getSectionByName(Name);
2078   if (NamedSections.begin() != NamedSections.end()) {
2079     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2080            "can only update unique sections");
2081     BinarySection *Section = NamedSections.begin()->second;
2082 
2083     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2084     const bool Flag = Section->isAllocatable();
2085     (void)Flag;
2086     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2087     LLVM_DEBUG(dbgs() << *Section << "\n");
2088     // FIXME: Fix section flags/attributes for MachO.
2089     if (isELF())
2090       assert(Flag == Section->isAllocatable() &&
2091              "can't change section allocation status");
2092     return *Section;
2093   }
2094 
2095   return registerSection(
2096       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2097 }
2098 
2099 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2100   auto NameRange = NameToSection.equal_range(Section.getName().str());
2101   while (NameRange.first != NameRange.second) {
2102     if (NameRange.first->second == &Section) {
2103       NameToSection.erase(NameRange.first);
2104       break;
2105     }
2106     ++NameRange.first;
2107   }
2108 }
2109 
2110 void BinaryContext::deregisterUnusedSections() {
2111   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2112   for (auto SI = Sections.begin(); SI != Sections.end();) {
2113     BinarySection *Section = *SI;
2114     // We check getOutputData() instead of getOutputSize() because sometimes
2115     // zero-sized .text.cold sections are allocated.
2116     if (Section->hasSectionRef() || Section->getOutputData() ||
2117         (AbsSection && Section == &AbsSection.get())) {
2118       ++SI;
2119       continue;
2120     }
2121 
2122     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2123                       << '\n';);
2124     deregisterSectionName(*Section);
2125     SI = Sections.erase(SI);
2126     delete Section;
2127   }
2128 }
2129 
2130 bool BinaryContext::deregisterSection(BinarySection &Section) {
2131   BinarySection *SectionPtr = &Section;
2132   auto Itr = Sections.find(SectionPtr);
2133   if (Itr != Sections.end()) {
2134     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2135     while (Range.first != Range.second) {
2136       if (Range.first->second == SectionPtr) {
2137         AddressToSection.erase(Range.first);
2138         break;
2139       }
2140       ++Range.first;
2141     }
2142 
2143     deregisterSectionName(*SectionPtr);
2144     Sections.erase(Itr);
2145     delete SectionPtr;
2146     return true;
2147   }
2148   return false;
2149 }
2150 
2151 void BinaryContext::renameSection(BinarySection &Section,
2152                                   const Twine &NewName) {
2153   auto Itr = Sections.find(&Section);
2154   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2155   Sections.erase(Itr);
2156 
2157   deregisterSectionName(Section);
2158 
2159   Section.Name = NewName.str();
2160   Section.setOutputName(Section.Name);
2161 
2162   NameToSection.insert(std::make_pair(Section.Name, &Section));
2163 
2164   // Reinsert with the new name.
2165   Sections.insert(&Section);
2166 }
2167 
2168 void BinaryContext::printSections(raw_ostream &OS) const {
2169   for (BinarySection *const &Section : Sections)
2170     OS << "BOLT-INFO: " << *Section << "\n";
2171 }
2172 
2173 BinarySection &BinaryContext::absoluteSection() {
2174   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2175     return *Section;
2176   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2177 }
2178 
2179 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2180                                                            size_t Size) const {
2181   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2182   if (!Section)
2183     return std::make_error_code(std::errc::bad_address);
2184 
2185   if (Section->isVirtual())
2186     return 0;
2187 
2188   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2189                    AsmInfo->getCodePointerSize());
2190   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2191   return DE.getUnsigned(&ValueOffset, Size);
2192 }
2193 
2194 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2195                                                          size_t Size) const {
2196   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2197   if (!Section)
2198     return std::make_error_code(std::errc::bad_address);
2199 
2200   if (Section->isVirtual())
2201     return 0;
2202 
2203   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2204                    AsmInfo->getCodePointerSize());
2205   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2206   return DE.getSigned(&ValueOffset, Size);
2207 }
2208 
2209 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2210                                   uint64_t Type, uint64_t Addend,
2211                                   uint64_t Value) {
2212   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2213   assert(Section && "cannot find section for address");
2214   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2215                          Value);
2216 }
2217 
2218 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2219                                          uint64_t Type, uint64_t Addend,
2220                                          uint64_t Value) {
2221   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2222   assert(Section && "cannot find section for address");
2223   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2224                                 Addend, Value);
2225 }
2226 
2227 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2228   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2229   assert(Section && "cannot find section for address");
2230   return Section->removeRelocationAt(Address - Section->getAddress());
2231 }
2232 
2233 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2234   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2235   if (!Section)
2236     return nullptr;
2237 
2238   return Section->getRelocationAt(Address - Section->getAddress());
2239 }
2240 
2241 const Relocation *
2242 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2243   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2244   if (!Section)
2245     return nullptr;
2246 
2247   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2248 }
2249 
2250 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2251                                              const uint64_t Address) {
2252   auto setImmovable = [&](BinaryData &BD) {
2253     BinaryData *Root = BD.getAtomicRoot();
2254     LLVM_DEBUG(if (Root->isMoveable()) {
2255       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2256              << "due to ambiguous relocation referencing 0x"
2257              << Twine::utohexstr(Address) << '\n';
2258     });
2259     Root->setIsMoveable(false);
2260   };
2261 
2262   if (Address == BD.getAddress()) {
2263     setImmovable(BD);
2264 
2265     // Set previous symbol as immovable
2266     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2267     if (Prev && Prev->getEndAddress() == BD.getAddress())
2268       setImmovable(*Prev);
2269   }
2270 
2271   if (Address == BD.getEndAddress()) {
2272     setImmovable(BD);
2273 
2274     // Set next symbol as immovable
2275     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2276     if (Next && Next->getAddress() == BD.getEndAddress())
2277       setImmovable(*Next);
2278   }
2279 }
2280 
2281 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2282                                                     uint64_t *EntryDesc) {
2283   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2284   auto BFI = SymbolToFunctionMap.find(Symbol);
2285   if (BFI == SymbolToFunctionMap.end())
2286     return nullptr;
2287 
2288   BinaryFunction *BF = BFI->second;
2289   if (EntryDesc)
2290     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2291 
2292   return BF;
2293 }
2294 
2295 std::string
2296 BinaryContext::generateBugReportMessage(StringRef Message,
2297                                         const BinaryFunction &Function) const {
2298   std::string Msg;
2299   raw_string_ostream SS(Msg);
2300   SS << "=======================================\n";
2301   SS << "BOLT is unable to proceed because it couldn't properly understand "
2302         "this function.\n";
2303   SS << "If you are running the most recent version of BOLT, you may "
2304         "want to "
2305         "report this and paste this dump.\nPlease check that there is no "
2306         "sensitive contents being shared in this dump.\n";
2307   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2308   ScopedPrinter SP(SS);
2309   SP.printBinaryBlock("Function contents", *Function.getData());
2310   SS << "\n";
2311   const_cast<BinaryFunction &>(Function).print(SS, "");
2312   SS << "ERROR: " << Message;
2313   SS << "\n=======================================\n";
2314   return Msg;
2315 }
2316 
2317 BinaryFunction *
2318 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2319                                             bool IsSimple) {
2320   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2321   BinaryFunction *BF = InjectedBinaryFunctions.back();
2322   setSymbolToFunctionMap(BF->getSymbol(), BF);
2323   BF->CurrentState = BinaryFunction::State::CFG;
2324   return BF;
2325 }
2326 
2327 std::pair<size_t, size_t>
2328 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2329   // Adjust branch instruction to match the current layout.
2330   if (FixBranches)
2331     BF.fixBranches();
2332 
2333   // Create local MC context to isolate the effect of ephemeral code emission.
2334   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2335   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2336   MCAsmBackend *MAB =
2337       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2338 
2339   SmallString<256> Code;
2340   raw_svector_ostream VecOS(Code);
2341 
2342   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2343   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2344       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2345       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2346       /*RelaxAll=*/false,
2347       /*IncrementalLinkerCompatible=*/false,
2348       /*DWARFMustBeAtTheEnd=*/false));
2349 
2350   Streamer->initSections(false, *STI);
2351 
2352   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2353   Section->setHasInstructions(true);
2354 
2355   // Create symbols in the LocalCtx so that they get destroyed with it.
2356   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2357   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2358 
2359   Streamer->switchSection(Section);
2360   Streamer->emitLabel(StartLabel);
2361   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2362                    /*EmitCodeOnly=*/true);
2363   Streamer->emitLabel(EndLabel);
2364 
2365   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2366   SmallVector<LabelRange> SplitLabels;
2367   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2368     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2369     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2370     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2371 
2372     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2373         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2374         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2375     SplitSection->setHasInstructions(true);
2376     Streamer->switchSection(SplitSection);
2377 
2378     Streamer->emitLabel(SplitStartLabel);
2379     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2380     Streamer->emitLabel(SplitEndLabel);
2381     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2382     // private
2383     Streamer->emitBytes(StringRef(""));
2384     Streamer->switchSection(Section);
2385   }
2386 
2387   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2388   // MCStreamer::Finish(), which does more than we want
2389   Streamer->emitBytes(StringRef(""));
2390 
2391   MCAssembler &Assembler =
2392       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2393   MCAsmLayout Layout(Assembler);
2394   Assembler.layout(Layout);
2395 
2396   // Obtain fragment sizes.
2397   std::vector<uint64_t> FragmentSizes;
2398   // Main fragment size.
2399   const uint64_t HotSize =
2400       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2401   FragmentSizes.push_back(HotSize);
2402   // Split fragment sizes.
2403   uint64_t ColdSize = 0;
2404   for (const auto &Labels : SplitLabels) {
2405     uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2406                     Layout.getSymbolOffset(*Labels.first);
2407     FragmentSizes.push_back(Size);
2408     ColdSize += Size;
2409   }
2410 
2411   // Populate new start and end offsets of each basic block.
2412   uint64_t FragmentIndex = 0;
2413   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2414     BinaryBasicBlock *PrevBB = nullptr;
2415     for (BinaryBasicBlock *BB : FF) {
2416       const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2417       BB->setOutputStartAddress(BBStartOffset);
2418       if (PrevBB)
2419         PrevBB->setOutputEndAddress(BBStartOffset);
2420       PrevBB = BB;
2421     }
2422     if (PrevBB)
2423       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2424     FragmentIndex++;
2425   }
2426 
2427   // Clean-up the effect of the code emission.
2428   for (const MCSymbol &Symbol : Assembler.symbols()) {
2429     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2430     MutableSymbol->setUndefined();
2431     MutableSymbol->setIsRegistered(false);
2432   }
2433 
2434   return std::make_pair(HotSize, ColdSize);
2435 }
2436 
2437 bool BinaryContext::validateInstructionEncoding(
2438     ArrayRef<uint8_t> InputSequence) const {
2439   MCInst Inst;
2440   uint64_t InstSize;
2441   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2442   assert(InstSize == InputSequence.size() &&
2443          "Disassembled instruction size does not match the sequence.");
2444 
2445   SmallString<256> Code;
2446   SmallVector<MCFixup, 4> Fixups;
2447 
2448   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2449   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2450   if (InputSequence != OutputSequence) {
2451     if (opts::Verbosity > 1) {
2452       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2453                    << "      input: " << InputSequence << '\n'
2454                    << "     output: " << OutputSequence << '\n';
2455     }
2456     return false;
2457   }
2458 
2459   return true;
2460 }
2461 
2462 uint64_t BinaryContext::getHotThreshold() const {
2463   static uint64_t Threshold = 0;
2464   if (Threshold == 0) {
2465     Threshold = std::max(
2466         (uint64_t)opts::ExecutionCountThreshold,
2467         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2468   }
2469   return Threshold;
2470 }
2471 
2472 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2473     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2474   auto FI = BinaryFunctions.upper_bound(Address);
2475   if (FI == BinaryFunctions.begin())
2476     return nullptr;
2477   --FI;
2478 
2479   const uint64_t UsedSize =
2480       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2481 
2482   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2483     return nullptr;
2484 
2485   return &FI->second;
2486 }
2487 
2488 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2489   // First, try to find a function starting at the given address. If the
2490   // function was folded, this will get us the original folded function if it
2491   // wasn't removed from the list, e.g. in non-relocation mode.
2492   auto BFI = BinaryFunctions.find(Address);
2493   if (BFI != BinaryFunctions.end())
2494     return &BFI->second;
2495 
2496   // We might have folded the function matching the object at the given
2497   // address. In such case, we look for a function matching the symbol
2498   // registered at the original address. The new function (the one that the
2499   // original was folded into) will hold the symbol.
2500   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2501     uint64_t EntryID = 0;
2502     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2503     if (BF && EntryID == 0)
2504       return BF;
2505   }
2506   return nullptr;
2507 }
2508 
2509 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2510     const DWARFAddressRangesVector &InputRanges) const {
2511   DebugAddressRangesVector OutputRanges;
2512 
2513   for (const DWARFAddressRange Range : InputRanges) {
2514     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2515     while (BFI != BinaryFunctions.end()) {
2516       const BinaryFunction &Function = BFI->second;
2517       if (Function.getAddress() >= Range.HighPC)
2518         break;
2519       const DebugAddressRangesVector FunctionRanges =
2520           Function.getOutputAddressRanges();
2521       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2522       std::advance(BFI, 1);
2523     }
2524   }
2525 
2526   return OutputRanges;
2527 }
2528 
2529 } // namespace bolt
2530 } // namespace llvm
2531