xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 3023b15fb1ec00dbe6a1cb630236125f500978ef)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Regex.h"
37 #include <algorithm>
38 #include <functional>
39 #include <iterator>
40 #include <unordered_set>
41 
42 using namespace llvm;
43 
44 #undef  DEBUG_TYPE
45 #define DEBUG_TYPE "bolt"
46 
47 namespace opts {
48 
49 cl::opt<bool> NoHugePages("no-huge-pages",
50                           cl::desc("use regular size pages for code alignment"),
51                           cl::Hidden, cl::cat(BoltCategory));
52 
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55   cl::desc("print debug info when printing functions"),
56   cl::Hidden,
57   cl::ZeroOrMore,
58   cl::cat(BoltCategory));
59 
60 cl::opt<bool> PrintRelocations(
61     "print-relocations",
62     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
63     cl::cat(BoltCategory));
64 
65 static cl::opt<bool>
66 PrintMemData("print-mem-data",
67   cl::desc("print memory data annotations when printing functions"),
68   cl::Hidden,
69   cl::ZeroOrMore,
70   cl::cat(BoltCategory));
71 
72 cl::opt<std::string> CompDirOverride(
73     "comp-dir-override",
74     cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
75              "location, which is used with DW_AT_dwo_name to construct a path "
76              "to *.dwo files."),
77     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 char BOLTError::ID = 0;
84 
85 BOLTError::BOLTError(bool IsFatal, const Twine &S)
86     : IsFatal(IsFatal), Msg(S.str()) {}
87 
88 void BOLTError::log(raw_ostream &OS) const {
89   if (IsFatal)
90     OS << "FATAL ";
91   StringRef ErrMsg = StringRef(Msg);
92   // Prepend our error prefix if it is missing
93   if (ErrMsg.empty()) {
94     OS << "BOLT-ERROR\n";
95   } else {
96     if (!ErrMsg.starts_with("BOLT-ERROR"))
97       OS << "BOLT-ERROR: ";
98     OS << ErrMsg << "\n";
99   }
100 }
101 
102 std::error_code BOLTError::convertToErrorCode() const {
103   return inconvertibleErrorCode();
104 }
105 
106 Error createNonFatalBOLTError(const Twine &S) {
107   return make_error<BOLTError>(/*IsFatal*/ false, S);
108 }
109 
110 Error createFatalBOLTError(const Twine &S) {
111   return make_error<BOLTError>(/*IsFatal*/ true, S);
112 }
113 
114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
115   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
116     if (!E.getMessage().empty())
117       E.log(this->errs());
118     if (E.isFatal())
119       exit(1);
120   });
121 }
122 
123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
124                              std::unique_ptr<DWARFContext> DwCtx,
125                              std::unique_ptr<Triple> TheTriple,
126                              const Target *TheTarget, std::string TripleName,
127                              std::unique_ptr<MCCodeEmitter> MCE,
128                              std::unique_ptr<MCObjectFileInfo> MOFI,
129                              std::unique_ptr<const MCAsmInfo> AsmInfo,
130                              std::unique_ptr<const MCInstrInfo> MII,
131                              std::unique_ptr<const MCSubtargetInfo> STI,
132                              std::unique_ptr<MCInstPrinter> InstPrinter,
133                              std::unique_ptr<const MCInstrAnalysis> MIA,
134                              std::unique_ptr<MCPlusBuilder> MIB,
135                              std::unique_ptr<const MCRegisterInfo> MRI,
136                              std::unique_ptr<MCDisassembler> DisAsm,
137                              JournalingStreams Logger)
138     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
139       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
140       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
141       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
142       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
143       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
144       Logger(Logger), InitialDynoStats(isAArch64()) {
145   Relocation::Arch = this->TheTriple->getArch();
146   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
147   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
148 }
149 
150 BinaryContext::~BinaryContext() {
151   for (BinarySection *Section : Sections)
152     delete Section;
153   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
154     delete InjectedFunction;
155   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
156     delete JTI.second;
157   clearBinaryData();
158 }
159 
160 /// Create BinaryContext for a given architecture \p ArchName and
161 /// triple \p TripleName.
162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
163     Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
164     bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
165   StringRef ArchName = "";
166   std::string FeaturesStr = "";
167   switch (TheTriple.getArch()) {
168   case llvm::Triple::x86_64:
169     if (Features)
170       return createFatalBOLTError(
171           "x86_64 target does not use SubtargetFeatures");
172     ArchName = "x86-64";
173     FeaturesStr = "+nopl";
174     break;
175   case llvm::Triple::aarch64:
176     if (Features)
177       return createFatalBOLTError(
178           "AArch64 target does not use SubtargetFeatures");
179     ArchName = "aarch64";
180     FeaturesStr = "+all";
181     break;
182   case llvm::Triple::riscv64: {
183     ArchName = "riscv64";
184     if (!Features)
185       return createFatalBOLTError("RISCV target needs SubtargetFeatures");
186     // We rely on relaxation for some transformations (e.g., promoting all calls
187     // to PseudoCALL and then making JITLink relax them). Since the relax
188     // feature is not stored in the object file, we manually enable it.
189     Features->AddFeature("relax");
190     FeaturesStr = Features->getString();
191     break;
192   }
193   default:
194     return createStringError(std::errc::not_supported,
195                              "BOLT-ERROR: Unrecognized machine in ELF file");
196   }
197 
198   const std::string TripleName = TheTriple.str();
199 
200   std::string Error;
201   const Target *TheTarget =
202       TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
203   if (!TheTarget)
204     return createStringError(make_error_code(std::errc::not_supported),
205                              Twine("BOLT-ERROR: ", Error));
206 
207   std::unique_ptr<const MCRegisterInfo> MRI(
208       TheTarget->createMCRegInfo(TripleName));
209   if (!MRI)
210     return createStringError(
211         make_error_code(std::errc::not_supported),
212         Twine("BOLT-ERROR: no register info for target ", TripleName));
213 
214   // Set up disassembler.
215   std::unique_ptr<MCAsmInfo> AsmInfo(
216       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
217   if (!AsmInfo)
218     return createStringError(
219         make_error_code(std::errc::not_supported),
220         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
221   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
222   // we want to emit such names as using @PLT without double quotes to convey
223   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
224   // override the default AsmInfo behavior to emit names the way we want.
225   AsmInfo->setAllowAtInName(true);
226 
227   std::unique_ptr<const MCSubtargetInfo> STI(
228       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
229   if (!STI)
230     return createStringError(
231         make_error_code(std::errc::not_supported),
232         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
233 
234   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
235   if (!MII)
236     return createStringError(
237         make_error_code(std::errc::not_supported),
238         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
239 
240   std::unique_ptr<MCContext> Ctx(
241       new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
242   std::unique_ptr<MCObjectFileInfo> MOFI(
243       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
244   Ctx->setObjectFileInfo(MOFI.get());
245   // We do not support X86 Large code model. Change this in the future.
246   bool Large = false;
247   if (TheTriple.getArch() == llvm::Triple::aarch64)
248     Large = true;
249   unsigned LSDAEncoding =
250       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
251   if (IsPIC) {
252     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
253                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
254   }
255 
256   std::unique_ptr<MCDisassembler> DisAsm(
257       TheTarget->createMCDisassembler(*STI, *Ctx));
258 
259   if (!DisAsm)
260     return createStringError(
261         make_error_code(std::errc::not_supported),
262         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
263 
264   std::unique_ptr<const MCInstrAnalysis> MIA(
265       TheTarget->createMCInstrAnalysis(MII.get()));
266   if (!MIA)
267     return createStringError(
268         make_error_code(std::errc::not_supported),
269         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
270               TripleName));
271 
272   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
273   std::unique_ptr<MCInstPrinter> InstructionPrinter(
274       TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
275                                      *MII, *MRI));
276   if (!InstructionPrinter)
277     return createStringError(
278         make_error_code(std::errc::not_supported),
279         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
280   InstructionPrinter->setPrintImmHex(true);
281 
282   std::unique_ptr<MCCodeEmitter> MCE(
283       TheTarget->createMCCodeEmitter(*MII, *Ctx));
284 
285   auto BC = std::make_unique<BinaryContext>(
286       std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
287       TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
288       std::move(AsmInfo), std::move(MII), std::move(STI),
289       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
290       std::move(DisAsm), Logger);
291 
292   BC->LSDAEncoding = LSDAEncoding;
293 
294   BC->MAB = std::unique_ptr<MCAsmBackend>(
295       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
296 
297   BC->setFilename(InputFileName);
298 
299   BC->HasFixedLoadAddress = !IsPIC;
300 
301   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
302       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
303 
304   if (!BC->SymbolicDisAsm)
305     return createStringError(
306         make_error_code(std::errc::not_supported),
307         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
308 
309   return std::move(BC);
310 }
311 
312 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
313   if (opts::HotText &&
314       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
315     return true;
316 
317   if (opts::HotData &&
318       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
319     return true;
320 
321   if (SymbolName == "_end")
322     return true;
323 
324   return false;
325 }
326 
327 std::unique_ptr<MCObjectWriter>
328 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
329   return MAB->createObjectWriter(OS);
330 }
331 
332 bool BinaryContext::validateObjectNesting() const {
333   auto Itr = BinaryDataMap.begin();
334   auto End = BinaryDataMap.end();
335   bool Valid = true;
336   while (Itr != End) {
337     auto Next = std::next(Itr);
338     while (Next != End &&
339            Itr->second->getSection() == Next->second->getSection() &&
340            Itr->second->containsRange(Next->second->getAddress(),
341                                       Next->second->getSize())) {
342       if (Next->second->Parent != Itr->second) {
343         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
344                      << "BOLT-WARNING:  " << *Itr->second << "\n"
345                      << "BOLT-WARNING:  " << *Next->second << "\n";
346         Valid = false;
347       }
348       ++Next;
349     }
350     Itr = Next;
351   }
352   return Valid;
353 }
354 
355 bool BinaryContext::validateHoles() const {
356   bool Valid = true;
357   for (BinarySection &Section : sections()) {
358     for (const Relocation &Rel : Section.relocations()) {
359       uint64_t RelAddr = Rel.Offset + Section.getAddress();
360       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
361       if (!BD) {
362         this->errs()
363             << "BOLT-WARNING: no BinaryData found for relocation at address"
364             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
365             << "\n";
366         Valid = false;
367       } else if (!BD->getAtomicRoot()) {
368         this->errs()
369             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
370             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
371             << Section.getName() << "\n";
372         Valid = false;
373       }
374     }
375   }
376   return Valid;
377 }
378 
379 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
380   const uint64_t Address = GAI->second->getAddress();
381   const uint64_t Size = GAI->second->getSize();
382 
383   auto fixParents = [&](BinaryDataMapType::iterator Itr,
384                         BinaryData *NewParent) {
385     BinaryData *OldParent = Itr->second->Parent;
386     Itr->second->Parent = NewParent;
387     ++Itr;
388     while (Itr != BinaryDataMap.end() && OldParent &&
389            Itr->second->Parent == OldParent) {
390       Itr->second->Parent = NewParent;
391       ++Itr;
392     }
393   };
394 
395   // Check if the previous symbol contains the newly added symbol.
396   if (GAI != BinaryDataMap.begin()) {
397     BinaryData *Prev = std::prev(GAI)->second;
398     while (Prev) {
399       if (Prev->getSection() == GAI->second->getSection() &&
400           Prev->containsRange(Address, Size)) {
401         fixParents(GAI, Prev);
402       } else {
403         fixParents(GAI, nullptr);
404       }
405       Prev = Prev->Parent;
406     }
407   }
408 
409   // Check if the newly added symbol contains any subsequent symbols.
410   if (Size != 0) {
411     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
412     auto Itr = std::next(GAI);
413     while (
414         Itr != BinaryDataMap.end() &&
415         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
416       Itr->second->Parent = BD;
417       ++Itr;
418     }
419   }
420 }
421 
422 iterator_range<BinaryContext::binary_data_iterator>
423 BinaryContext::getSubBinaryData(BinaryData *BD) {
424   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
425   auto End = Start;
426   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
427     ++End;
428   return make_range(Start, End);
429 }
430 
431 std::pair<const MCSymbol *, uint64_t>
432 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
433                                 bool IsPCRel) {
434   if (isAArch64()) {
435     // Check if this is an access to a constant island and create bookkeeping
436     // to keep track of it and emit it later as part of this function.
437     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
438       return std::make_pair(IslandSym, 0);
439 
440     // Detect custom code written in assembly that refers to arbitrary
441     // constant islands from other functions. Write this reference so we
442     // can pull this constant island and emit it as part of this function
443     // too.
444     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
445 
446     if (IslandIter != AddressToConstantIslandMap.begin() &&
447         (IslandIter == AddressToConstantIslandMap.end() ||
448          IslandIter->first > Address))
449       --IslandIter;
450 
451     if (IslandIter != AddressToConstantIslandMap.end()) {
452       // Fall-back to referencing the original constant island in the presence
453       // of dynamic relocs, as we currently do not support cloning them.
454       // Notice: we might fail to link because of this, if the original constant
455       // island we are referring would be emitted too far away.
456       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
457         MCSymbol *IslandSym =
458             IslandIter->second->getOrCreateIslandAccess(Address);
459         if (IslandSym)
460           return std::make_pair(IslandSym, 0);
461       } else if (MCSymbol *IslandSym =
462                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
463                                                                       BF)) {
464         BF.createIslandDependency(IslandSym, IslandIter->second);
465         return std::make_pair(IslandSym, 0);
466       }
467     }
468   }
469 
470   // Note that the address does not necessarily have to reside inside
471   // a section, it could be an absolute address too.
472   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
473   if (Section && Section->isText()) {
474     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
475       if (Address != BF.getAddress()) {
476         // The address could potentially escape. Mark it as another entry
477         // point into the function.
478         if (opts::Verbosity >= 1) {
479           this->outs() << "BOLT-INFO: potentially escaped address 0x"
480                        << Twine::utohexstr(Address) << " in function " << BF
481                        << '\n';
482         }
483         BF.HasInternalLabelReference = true;
484         return std::make_pair(
485             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
486       }
487     } else {
488       addInterproceduralReference(&BF, Address);
489     }
490   }
491 
492   // With relocations, catch jump table references outside of the basic block
493   // containing the indirect jump.
494   if (HasRelocations) {
495     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
496     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
497       const MCSymbol *Symbol =
498           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
499 
500       return std::make_pair(Symbol, 0);
501     }
502   }
503 
504   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
505     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
506 
507   // TODO: use DWARF info to get size/alignment here?
508   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
509   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
510   return std::make_pair(TargetSymbol, 0);
511 }
512 
513 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
514                                                   BinaryFunction &BF) {
515   if (!isX86())
516     return MemoryContentsType::UNKNOWN;
517 
518   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
519   if (!Section) {
520     // No section - possibly an absolute address. Since we don't allow
521     // internal function addresses to escape the function scope - we
522     // consider it a tail call.
523     if (opts::Verbosity > 1) {
524       this->errs() << "BOLT-WARNING: no section for address 0x"
525                    << Twine::utohexstr(Address) << " referenced from function "
526                    << BF << '\n';
527     }
528     return MemoryContentsType::UNKNOWN;
529   }
530 
531   if (Section->isVirtual()) {
532     // The contents are filled at runtime.
533     return MemoryContentsType::UNKNOWN;
534   }
535 
536   // No support for jump tables in code yet.
537   if (Section->isText())
538     return MemoryContentsType::UNKNOWN;
539 
540   // Start with checking for PIC jump table. We expect non-PIC jump tables
541   // to have high 32 bits set to 0.
542   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
543     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
544 
545   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
546     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
547 
548   return MemoryContentsType::UNKNOWN;
549 }
550 
551 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
552                                      const JumpTable::JumpTableType Type,
553                                      const BinaryFunction &BF,
554                                      const uint64_t NextJTAddress,
555                                      JumpTable::AddressesType *EntriesAsAddress,
556                                      bool *HasEntryInFragment) const {
557   // Target address of __builtin_unreachable.
558   const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
559 
560   // Is one of the targets __builtin_unreachable?
561   bool HasUnreachable = false;
562 
563   // Does one of the entries match function start address?
564   bool HasStartAsEntry = false;
565 
566   // Number of targets other than __builtin_unreachable.
567   uint64_t NumRealEntries = 0;
568 
569   // Size of the jump table without trailing __builtin_unreachable entries.
570   size_t TrimmedSize = 0;
571 
572   auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
573     if (!EntriesAsAddress)
574       return;
575     EntriesAsAddress->emplace_back(EntryAddress);
576     if (!Unreachable)
577       TrimmedSize = EntriesAsAddress->size();
578   };
579 
580   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
581   if (!Section)
582     return false;
583 
584   // The upper bound is defined by containing object, section limits, and
585   // the next jump table in memory.
586   uint64_t UpperBound = Section->getEndAddress();
587   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
588   if (JumpTableBD && JumpTableBD->getSize()) {
589     assert(JumpTableBD->getEndAddress() <= UpperBound &&
590            "data object cannot cross a section boundary");
591     UpperBound = JumpTableBD->getEndAddress();
592   }
593   if (NextJTAddress)
594     UpperBound = std::min(NextJTAddress, UpperBound);
595 
596   LLVM_DEBUG({
597     using JTT = JumpTable::JumpTableType;
598     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
599                       Address, BF.getPrintName(),
600                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
601   });
602   const uint64_t EntrySize = getJumpTableEntrySize(Type);
603   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
604        EntryAddress += EntrySize) {
605     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
606                       << " -> ");
607     // Check if there's a proper relocation against the jump table entry.
608     if (HasRelocations) {
609       if (Type == JumpTable::JTT_PIC &&
610           !DataPCRelocations.count(EntryAddress)) {
611         LLVM_DEBUG(
612             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
613         break;
614       }
615       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
616         LLVM_DEBUG(
617             dbgs()
618             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
619         break;
620       }
621     }
622 
623     const uint64_t Value =
624         (Type == JumpTable::JTT_PIC)
625             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
626             : *getPointerAtAddress(EntryAddress);
627 
628     // __builtin_unreachable() case.
629     if (Value == UnreachableAddress) {
630       addEntryAddress(Value, /*Unreachable*/ true);
631       HasUnreachable = true;
632       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
633       continue;
634     }
635 
636     // Function start is another special case. It is allowed in the jump table,
637     // but we need at least one another regular entry to distinguish the table
638     // from, e.g. a function pointer array.
639     if (Value == BF.getAddress()) {
640       HasStartAsEntry = true;
641       addEntryAddress(Value);
642       continue;
643     }
644 
645     // Function or one of its fragments.
646     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
647     const bool DoesBelongToFunction =
648         BF.containsAddress(Value) ||
649         (TargetBF && TargetBF->isParentOrChildOf(BF));
650     if (!DoesBelongToFunction) {
651       LLVM_DEBUG({
652         if (!BF.containsAddress(Value)) {
653           dbgs() << "FAIL: function doesn't contain this address\n";
654           if (TargetBF) {
655             dbgs() << "  ! function containing this address: "
656                    << TargetBF->getPrintName() << '\n';
657             if (TargetBF->isFragment()) {
658               dbgs() << "  ! is a fragment";
659               for (BinaryFunction *Parent : TargetBF->ParentFragments)
660                 dbgs() << ", parent: " << Parent->getPrintName();
661               dbgs() << '\n';
662             }
663           }
664         }
665       });
666       break;
667     }
668 
669     // Check there's an instruction at this offset.
670     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
671         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
672       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
673       break;
674     }
675 
676     ++NumRealEntries;
677     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
678 
679     if (TargetBF != &BF && HasEntryInFragment)
680       *HasEntryInFragment = true;
681     addEntryAddress(Value);
682   }
683 
684   // Trim direct/normal jump table to exclude trailing unreachable entries that
685   // can collide with a function address.
686   if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
687       TrimmedSize != EntriesAsAddress->size() &&
688       getBinaryFunctionAtAddress(UnreachableAddress))
689     EntriesAsAddress->resize(TrimmedSize);
690 
691   // It's a jump table if the number of real entries is more than 1, or there's
692   // one real entry and one or more special targets. If there are only multiple
693   // special targets, then it's not a jump table.
694   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
695 }
696 
697 void BinaryContext::populateJumpTables() {
698   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
699                     << '\n');
700   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
701        ++JTI) {
702     JumpTable *JT = JTI->second;
703 
704     bool NonSimpleParent = false;
705     for (BinaryFunction *BF : JT->Parents)
706       NonSimpleParent |= !BF->isSimple();
707     if (NonSimpleParent)
708       continue;
709 
710     uint64_t NextJTAddress = 0;
711     auto NextJTI = std::next(JTI);
712     if (NextJTI != JTE)
713       NextJTAddress = NextJTI->second->getAddress();
714 
715     const bool Success =
716         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
717                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
718     if (!Success) {
719       LLVM_DEBUG({
720         dbgs() << "failed to analyze ";
721         JT->print(dbgs());
722         if (NextJTI != JTE) {
723           dbgs() << "next ";
724           NextJTI->second->print(dbgs());
725         }
726       });
727       llvm_unreachable("jump table heuristic failure");
728     }
729     for (BinaryFunction *Frag : JT->Parents) {
730       if (JT->IsSplit)
731         Frag->setHasIndirectTargetToSplitFragment(true);
732       for (uint64_t EntryAddress : JT->EntriesAsAddress)
733         // if target is builtin_unreachable
734         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
735           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
736                                              Frag->getSize());
737         } else if (EntryAddress >= Frag->getAddress() &&
738                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
739           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
740         }
741     }
742 
743     // In strict mode, erase PC-relative relocation record. Later we check that
744     // all such records are erased and thus have been accounted for.
745     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
746       for (uint64_t Address = JT->getAddress();
747            Address < JT->getAddress() + JT->getSize();
748            Address += JT->EntrySize) {
749         DataPCRelocations.erase(DataPCRelocations.find(Address));
750       }
751     }
752 
753     // Mark to skip the function and all its fragments.
754     for (BinaryFunction *Frag : JT->Parents)
755       if (Frag->hasIndirectTargetToSplitFragment())
756         addFragmentsToSkip(Frag);
757   }
758 
759   if (opts::StrictMode && DataPCRelocations.size()) {
760     LLVM_DEBUG({
761       dbgs() << DataPCRelocations.size()
762              << " unclaimed PC-relative relocations left in data:\n";
763       for (uint64_t Reloc : DataPCRelocations)
764         dbgs() << Twine::utohexstr(Reloc) << '\n';
765     });
766     assert(0 && "unclaimed PC-relative relocations left in data\n");
767   }
768   clearList(DataPCRelocations);
769 }
770 
771 void BinaryContext::skipMarkedFragments() {
772   std::vector<BinaryFunction *> FragmentQueue;
773   // Copy the functions to FragmentQueue.
774   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
775   auto addToWorklist = [&](BinaryFunction *Function) -> void {
776     if (FragmentsToSkip.count(Function))
777       return;
778     FragmentQueue.push_back(Function);
779     addFragmentsToSkip(Function);
780   };
781   // Functions containing split jump tables need to be skipped with all
782   // fragments (transitively).
783   for (size_t I = 0; I != FragmentQueue.size(); I++) {
784     BinaryFunction *BF = FragmentQueue[I];
785     assert(FragmentsToSkip.count(BF) &&
786            "internal error in traversing function fragments");
787     if (opts::Verbosity >= 1)
788       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
789     BF->setSimple(false);
790     BF->setHasIndirectTargetToSplitFragment(true);
791 
792     llvm::for_each(BF->Fragments, addToWorklist);
793     llvm::for_each(BF->ParentFragments, addToWorklist);
794   }
795   if (!FragmentsToSkip.empty())
796     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
797                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
798                  << " due to cold fragments\n";
799 }
800 
801 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
802                                                  uint64_t Size,
803                                                  uint16_t Alignment,
804                                                  unsigned Flags) {
805   auto Itr = BinaryDataMap.find(Address);
806   if (Itr != BinaryDataMap.end()) {
807     assert(Itr->second->getSize() == Size || !Size);
808     return Itr->second->getSymbol();
809   }
810 
811   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
812   assert(!GlobalSymbols.count(Name) && "created name is not unique");
813   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
814 }
815 
816 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
817   return Ctx->getOrCreateSymbol(Name);
818 }
819 
820 BinaryFunction *BinaryContext::createBinaryFunction(
821     const std::string &Name, BinarySection &Section, uint64_t Address,
822     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
823   auto Result = BinaryFunctions.emplace(
824       Address, BinaryFunction(Name, Section, Address, Size, *this));
825   assert(Result.second == true && "unexpected duplicate function");
826   BinaryFunction *BF = &Result.first->second;
827   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
828                         Alignment);
829   setSymbolToFunctionMap(BF->getSymbol(), BF);
830   return BF;
831 }
832 
833 const MCSymbol *
834 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
835                                     JumpTable::JumpTableType Type) {
836   // Two fragments of same function access same jump table
837   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
838     assert(JT->Type == Type && "jump table types have to match");
839     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
840 
841     // Prevent associating a jump table to a specific fragment twice.
842     // This simple check arises from the assumption: no more than 2 fragments.
843     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
844       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
845              "cannot re-use jump table of a different function");
846       // Duplicate the entry for the parent function for easy access
847       JT->Parents.push_back(&Function);
848       if (opts::Verbosity > 2) {
849         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
850                      << JT->Parents[0]->getPrintName() << "; "
851                      << Function.getPrintName() << "\n";
852         JT->print(this->outs());
853       }
854       Function.JumpTables.emplace(Address, JT);
855       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
856       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
857     }
858 
859     bool IsJumpTableParent = false;
860     (void)IsJumpTableParent;
861     for (BinaryFunction *Frag : JT->Parents)
862       if (Frag == &Function)
863         IsJumpTableParent = true;
864     assert(IsJumpTableParent &&
865            "cannot re-use jump table of a different function");
866     return JT->getFirstLabel();
867   }
868 
869   // Re-use the existing symbol if possible.
870   MCSymbol *JTLabel = nullptr;
871   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
872     if (!isInternalSymbolName(Object->getSymbol()->getName()))
873       JTLabel = Object->getSymbol();
874   }
875 
876   const uint64_t EntrySize = getJumpTableEntrySize(Type);
877   if (!JTLabel) {
878     const std::string JumpTableName = generateJumpTableName(Function, Address);
879     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
880   }
881 
882   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
883                     << " in function " << Function << '\n');
884 
885   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
886                                 JumpTable::LabelMapType{{0, JTLabel}},
887                                 *getSectionForAddress(Address));
888   JT->Parents.push_back(&Function);
889   if (opts::Verbosity > 2)
890     JT->print(this->outs());
891   JumpTables.emplace(Address, JT);
892 
893   // Duplicate the entry for the parent function for easy access.
894   Function.JumpTables.emplace(Address, JT);
895   return JTLabel;
896 }
897 
898 std::pair<uint64_t, const MCSymbol *>
899 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
900                                   const MCSymbol *OldLabel) {
901   auto L = scopeLock();
902   unsigned Offset = 0;
903   bool Found = false;
904   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
905     if (Elmt.second != OldLabel)
906       continue;
907     Offset = Elmt.first;
908     Found = true;
909     break;
910   }
911   assert(Found && "Label not found");
912   (void)Found;
913   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
914   JumpTable *NewJT =
915       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
916                     JumpTable::LabelMapType{{Offset, NewLabel}},
917                     *getSectionForAddress(JT->getAddress()));
918   NewJT->Parents = JT->Parents;
919   NewJT->Entries = JT->Entries;
920   NewJT->Counts = JT->Counts;
921   uint64_t JumpTableID = ++DuplicatedJumpTables;
922   // Invert it to differentiate from regular jump tables whose IDs are their
923   // addresses in the input binary memory space
924   JumpTableID = ~JumpTableID;
925   JumpTables.emplace(JumpTableID, NewJT);
926   Function.JumpTables.emplace(JumpTableID, NewJT);
927   return std::make_pair(JumpTableID, NewLabel);
928 }
929 
930 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
931                                                  uint64_t Address) {
932   size_t Id;
933   uint64_t Offset = 0;
934   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
935     Offset = Address - JT->getAddress();
936     auto JTLabelsIt = JT->Labels.find(Offset);
937     if (JTLabelsIt != JT->Labels.end())
938       return std::string(JTLabelsIt->second->getName());
939 
940     auto JTIdsIt = JumpTableIds.find(JT->getAddress());
941     assert(JTIdsIt != JumpTableIds.end());
942     Id = JTIdsIt->second;
943   } else {
944     Id = JumpTableIds[Address] = BF.JumpTables.size();
945   }
946   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
947           (Offset ? ("." + std::to_string(Offset)) : ""));
948 }
949 
950 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
951   // FIXME: aarch64 support is missing.
952   if (!isX86())
953     return true;
954 
955   if (BF.getSize() == BF.getMaxSize())
956     return true;
957 
958   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
959   assert(FunctionData && "cannot get function as data");
960 
961   uint64_t Offset = BF.getSize();
962   MCInst Instr;
963   uint64_t InstrSize = 0;
964   uint64_t InstrAddress = BF.getAddress() + Offset;
965   using std::placeholders::_1;
966 
967   // Skip instructions that satisfy the predicate condition.
968   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
969     const uint64_t StartOffset = Offset;
970     for (; Offset < BF.getMaxSize();
971          Offset += InstrSize, InstrAddress += InstrSize) {
972       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
973                                   InstrAddress, nulls()))
974         break;
975       if (!Predicate(Instr))
976         break;
977     }
978 
979     return Offset - StartOffset;
980   };
981 
982   // Skip a sequence of zero bytes.
983   auto skipZeros = [&]() {
984     const uint64_t StartOffset = Offset;
985     for (; Offset < BF.getMaxSize(); ++Offset)
986       if ((*FunctionData)[Offset] != 0)
987         break;
988 
989     return Offset - StartOffset;
990   };
991 
992   // Accept the whole padding area filled with breakpoints.
993   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
994   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
995     return true;
996 
997   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
998 
999   // Some functions have a jump to the next function or to the padding area
1000   // inserted after the body.
1001   auto isSkipJump = [&](const MCInst &Instr) {
1002     uint64_t TargetAddress = 0;
1003     if (MIB->isUnconditionalBranch(Instr) &&
1004         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1005       if (TargetAddress >= InstrAddress + InstrSize &&
1006           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1007         return true;
1008       }
1009     }
1010     return false;
1011   };
1012 
1013   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1014   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1015          skipZeros())
1016     ;
1017 
1018   if (Offset == BF.getMaxSize())
1019     return true;
1020 
1021   if (opts::Verbosity >= 1) {
1022     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1023                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
1024                  << " starting at offset " << (Offset - BF.getSize())
1025                  << " in function " << BF << '\n'
1026                  << FunctionData->slice(BF.getSize(),
1027                                         BF.getMaxSize() - BF.getSize())
1028                  << '\n';
1029   }
1030 
1031   return false;
1032 }
1033 
1034 void BinaryContext::adjustCodePadding() {
1035   for (auto &BFI : BinaryFunctions) {
1036     BinaryFunction &BF = BFI.second;
1037     if (!shouldEmit(BF))
1038       continue;
1039 
1040     if (!hasValidCodePadding(BF)) {
1041       if (HasRelocations) {
1042         if (opts::Verbosity >= 1) {
1043           this->outs() << "BOLT-INFO: function " << BF
1044                        << " has invalid padding. Ignoring the function.\n";
1045         }
1046         BF.setIgnored();
1047       } else {
1048         BF.setMaxSize(BF.getSize());
1049       }
1050     }
1051   }
1052 }
1053 
1054 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1055                                                uint64_t Size,
1056                                                uint16_t Alignment,
1057                                                unsigned Flags) {
1058   // Register the name with MCContext.
1059   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1060 
1061   auto GAI = BinaryDataMap.find(Address);
1062   BinaryData *BD;
1063   if (GAI == BinaryDataMap.end()) {
1064     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1065     BinarySection &Section =
1066         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1067     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1068                         Section, Flags);
1069     GAI = BinaryDataMap.emplace(Address, BD).first;
1070     GlobalSymbols[Name] = BD;
1071     updateObjectNesting(GAI);
1072   } else {
1073     BD = GAI->second;
1074     if (!BD->hasName(Name)) {
1075       GlobalSymbols[Name] = BD;
1076       BD->Symbols.push_back(Symbol);
1077     }
1078   }
1079 
1080   return Symbol;
1081 }
1082 
1083 const BinaryData *
1084 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1085   auto NI = BinaryDataMap.lower_bound(Address);
1086   auto End = BinaryDataMap.end();
1087   if ((NI != End && Address == NI->first) ||
1088       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1089     if (NI->second->containsAddress(Address))
1090       return NI->second;
1091 
1092     // If this is a sub-symbol, see if a parent data contains the address.
1093     const BinaryData *BD = NI->second->getParent();
1094     while (BD) {
1095       if (BD->containsAddress(Address))
1096         return BD;
1097       BD = BD->getParent();
1098     }
1099   }
1100   return nullptr;
1101 }
1102 
1103 BinaryData *BinaryContext::getGOTSymbol() {
1104   // First tries to find a global symbol with that name
1105   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1106   if (GOTSymBD)
1107     return GOTSymBD;
1108 
1109   // This symbol might be hidden from run-time link, so fetch the local
1110   // definition if available.
1111   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1112   if (!GOTSymBD)
1113     return nullptr;
1114 
1115   // If the local symbol is not unique, fail
1116   unsigned Index = 2;
1117   SmallString<30> Storage;
1118   while (const BinaryData *BD =
1119              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1120                                      .concat(Twine(Index++))
1121                                      .toStringRef(Storage)))
1122     if (BD->getAddress() != GOTSymBD->getAddress())
1123       return nullptr;
1124 
1125   return GOTSymBD;
1126 }
1127 
1128 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1129   auto NI = BinaryDataMap.find(Address);
1130   assert(NI != BinaryDataMap.end());
1131   if (NI == BinaryDataMap.end())
1132     return false;
1133   // TODO: it's possible that a jump table starts at the same address
1134   // as a larger blob of private data.  When we set the size of the
1135   // jump table, it might be smaller than the total blob size.  In this
1136   // case we just leave the original size since (currently) it won't really
1137   // affect anything.
1138   assert((!NI->second->Size || NI->second->Size == Size ||
1139           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1140          "can't change the size of a symbol that has already had its "
1141          "size set");
1142   if (!NI->second->Size) {
1143     NI->second->Size = Size;
1144     updateObjectNesting(NI);
1145     return true;
1146   }
1147   return false;
1148 }
1149 
1150 void BinaryContext::generateSymbolHashes() {
1151   auto isPadding = [](const BinaryData &BD) {
1152     StringRef Contents = BD.getSection().getContents();
1153     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1154     return (BD.getName().starts_with("HOLEat") ||
1155             SymData.find_first_not_of(0) == StringRef::npos);
1156   };
1157 
1158   uint64_t NumCollisions = 0;
1159   for (auto &Entry : BinaryDataMap) {
1160     BinaryData &BD = *Entry.second;
1161     StringRef Name = BD.getName();
1162 
1163     if (!isInternalSymbolName(Name))
1164       continue;
1165 
1166     // First check if a non-anonymous alias exists and move it to the front.
1167     if (BD.getSymbols().size() > 1) {
1168       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1169         return !isInternalSymbolName(Symbol->getName());
1170       });
1171       if (Itr != BD.getSymbols().end()) {
1172         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1173         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1174         continue;
1175       }
1176     }
1177 
1178     // We have to skip 0 size symbols since they will all collide.
1179     if (BD.getSize() == 0) {
1180       continue;
1181     }
1182 
1183     const uint64_t Hash = BD.getSection().hash(BD);
1184     const size_t Idx = Name.find("0x");
1185     std::string NewName =
1186         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1187     if (getBinaryDataByName(NewName)) {
1188       // Ignore collisions for symbols that appear to be padding
1189       // (i.e. all zeros or a "hole")
1190       if (!isPadding(BD)) {
1191         if (opts::Verbosity) {
1192           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1193                        << " with new name (" << NewName << "), skipping.\n";
1194         }
1195         ++NumCollisions;
1196       }
1197       continue;
1198     }
1199     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1200     GlobalSymbols[NewName] = &BD;
1201   }
1202   if (NumCollisions) {
1203     this->errs() << "BOLT-WARNING: " << NumCollisions
1204                  << " collisions detected while hashing binary objects";
1205     if (!opts::Verbosity)
1206       this->errs() << ". Use -v=1 to see the list.";
1207     this->errs() << '\n';
1208   }
1209 }
1210 
1211 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1212                                      BinaryFunction &Function) const {
1213   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1214   if (TargetFunction.isChildOf(Function))
1215     return true;
1216   TargetFunction.addParentFragment(Function);
1217   Function.addFragment(TargetFunction);
1218   if (!HasRelocations) {
1219     TargetFunction.setSimple(false);
1220     Function.setSimple(false);
1221   }
1222   if (opts::Verbosity >= 1) {
1223     this->outs() << "BOLT-INFO: marking " << TargetFunction
1224                  << " as a fragment of " << Function << '\n';
1225   }
1226   return true;
1227 }
1228 
1229 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1230                                            MCInst &LoadLowBits,
1231                                            MCInst &LoadHiBits,
1232                                            uint64_t Target) {
1233   const MCSymbol *TargetSymbol;
1234   uint64_t Addend = 0;
1235   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1236                                                     /*IsPCRel*/ true);
1237   int64_t Val;
1238   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1239                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1240   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1241                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1242 }
1243 
1244 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1245   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1246   if (TargetFunction)
1247     return false;
1248 
1249   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1250   assert(Section && "cannot get section for referenced address");
1251   if (!Section->isText())
1252     return false;
1253 
1254   bool Ret = false;
1255   StringRef SectionContents = Section->getContents();
1256   uint64_t Offset = Address - Section->getAddress();
1257   const uint64_t MaxSize = SectionContents.size() - Offset;
1258   const uint8_t *Bytes =
1259       reinterpret_cast<const uint8_t *>(SectionContents.data());
1260   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1261 
1262   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1263                          MCInst &Instruction, uint64_t Offset,
1264                          uint64_t AbsoluteInstrAddr,
1265                          uint64_t TotalSize) -> bool {
1266     MCInst *TargetHiBits, *TargetLowBits;
1267     uint64_t TargetAddress, Count;
1268     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1269                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1270                                    TargetLowBits, TargetAddress);
1271     if (!Count)
1272       return false;
1273 
1274     if (MatchOnly)
1275       return true;
1276 
1277     // NOTE The target symbol was created during disassemble's
1278     // handleExternalReference
1279     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1280     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1281                                                   *Section, Address, TotalSize);
1282     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1283                            TargetAddress);
1284     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1285     Veneer->addInstruction(Offset, std::move(Instruction));
1286     --Count;
1287     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1288       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1289       Veneer->addInstruction(It->first, std::move(It->second));
1290     }
1291 
1292     Veneer->getOrCreateLocalLabel(Address);
1293     Veneer->setMaxSize(TotalSize);
1294     Veneer->updateState(BinaryFunction::State::Disassembled);
1295     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1296                       << "\n");
1297     return true;
1298   };
1299 
1300   uint64_t Size = 0, TotalSize = 0;
1301   BinaryFunction::InstrMapType VeneerInstructions;
1302   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1303     MCInst Instruction;
1304     const uint64_t AbsoluteInstrAddr = Address + Offset;
1305     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1306                                         AbsoluteInstrAddr, nulls()))
1307       break;
1308 
1309     TotalSize += Size;
1310     if (MIB->isBranch(Instruction)) {
1311       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1312                         AbsoluteInstrAddr, TotalSize);
1313       break;
1314     }
1315 
1316     VeneerInstructions.emplace(Offset, std::move(Instruction));
1317   }
1318 
1319   return Ret;
1320 }
1321 
1322 void BinaryContext::processInterproceduralReferences() {
1323   for (const std::pair<BinaryFunction *, uint64_t> &It :
1324        InterproceduralReferences) {
1325     BinaryFunction &Function = *It.first;
1326     uint64_t Address = It.second;
1327     // Process interprocedural references from ignored functions in BAT mode
1328     // (non-simple in non-relocation mode) to properly register entry points
1329     if (!Address || (Function.isIgnored() && !HasBATSection))
1330       continue;
1331 
1332     BinaryFunction *TargetFunction =
1333         getBinaryFunctionContainingAddress(Address);
1334     if (&Function == TargetFunction)
1335       continue;
1336 
1337     if (TargetFunction) {
1338       if (TargetFunction->isFragment() &&
1339           !TargetFunction->isChildOf(Function)) {
1340         this->errs()
1341             << "BOLT-WARNING: interprocedural reference between unrelated "
1342                "fragments: "
1343             << Function.getPrintName() << " and "
1344             << TargetFunction->getPrintName() << '\n';
1345       }
1346       if (uint64_t Offset = Address - TargetFunction->getAddress())
1347         TargetFunction->addEntryPointAtOffset(Offset);
1348 
1349       continue;
1350     }
1351 
1352     // Check if address falls in function padding space - this could be
1353     // unmarked data in code. In this case adjust the padding space size.
1354     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1355     assert(Section && "cannot get section for referenced address");
1356 
1357     if (!Section->isText())
1358       continue;
1359 
1360     // PLT requires special handling and could be ignored in this context.
1361     StringRef SectionName = Section->getName();
1362     if (SectionName == ".plt" || SectionName == ".plt.got")
1363       continue;
1364 
1365     // Check if it is aarch64 veneer written at Address
1366     if (isAArch64() && handleAArch64Veneer(Address))
1367       continue;
1368 
1369     if (opts::processAllFunctions()) {
1370       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1371                    << "object in code at address 0x"
1372                    << Twine::utohexstr(Address) << " belonging to section "
1373                    << SectionName << " in current mode\n";
1374       exit(1);
1375     }
1376 
1377     TargetFunction = getBinaryFunctionContainingAddress(Address,
1378                                                         /*CheckPastEnd=*/false,
1379                                                         /*UseMaxSize=*/true);
1380     // We are not going to overwrite non-simple functions, but for simple
1381     // ones - adjust the padding size.
1382     if (TargetFunction && TargetFunction->isSimple()) {
1383       this->errs()
1384           << "BOLT-WARNING: function " << *TargetFunction
1385           << " has an object detected in a padding region at address 0x"
1386           << Twine::utohexstr(Address) << '\n';
1387       TargetFunction->setMaxSize(TargetFunction->getSize());
1388     }
1389   }
1390 
1391   InterproceduralReferences.clear();
1392 }
1393 
1394 void BinaryContext::postProcessSymbolTable() {
1395   fixBinaryDataHoles();
1396   bool Valid = true;
1397   for (auto &Entry : BinaryDataMap) {
1398     BinaryData *BD = Entry.second;
1399     if ((BD->getName().starts_with("SYMBOLat") ||
1400          BD->getName().starts_with("DATAat")) &&
1401         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1402         BD->getSection()) {
1403       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1404                    << "\n";
1405       Valid = false;
1406     }
1407   }
1408   assert(Valid);
1409   (void)Valid;
1410   generateSymbolHashes();
1411 }
1412 
1413 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1414                                  BinaryFunction &ParentBF) {
1415   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1416          "cannot merge functions with multiple entry points");
1417 
1418   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1419   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1420       SymbolToFunctionMapMutex, std::defer_lock);
1421 
1422   const StringRef ChildName = ChildBF.getOneName();
1423 
1424   // Move symbols over and update bookkeeping info.
1425   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1426     ParentBF.getSymbols().push_back(Symbol);
1427     WriteSymbolMapLock.lock();
1428     SymbolToFunctionMap[Symbol] = &ParentBF;
1429     WriteSymbolMapLock.unlock();
1430     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1431   }
1432   ChildBF.getSymbols().clear();
1433 
1434   // Move other names the child function is known under.
1435   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1436   ChildBF.Aliases.clear();
1437 
1438   if (HasRelocations) {
1439     // Merge execution counts of ChildBF into those of ParentBF.
1440     // Without relocations, we cannot reliably merge profiles as both functions
1441     // continue to exist and either one can be executed.
1442     ChildBF.mergeProfileDataInto(ParentBF);
1443 
1444     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1445                                                      std::defer_lock);
1446     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1447                                                       std::defer_lock);
1448     // Remove ChildBF from the global set of functions in relocs mode.
1449     ReadBfsLock.lock();
1450     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1451     ReadBfsLock.unlock();
1452 
1453     assert(FI != BinaryFunctions.end() && "function not found");
1454     assert(&ChildBF == &FI->second && "function mismatch");
1455 
1456     WriteBfsLock.lock();
1457     ChildBF.clearDisasmState();
1458     FI = BinaryFunctions.erase(FI);
1459     WriteBfsLock.unlock();
1460 
1461   } else {
1462     // In non-relocation mode we keep the function, but rename it.
1463     std::string NewName = "__ICF_" + ChildName.str();
1464 
1465     WriteCtxLock.lock();
1466     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1467     WriteCtxLock.unlock();
1468 
1469     ChildBF.setFolded(&ParentBF);
1470   }
1471 
1472   ParentBF.setHasFunctionsFoldedInto();
1473 }
1474 
1475 void BinaryContext::fixBinaryDataHoles() {
1476   assert(validateObjectNesting() && "object nesting inconsistency detected");
1477 
1478   for (BinarySection &Section : allocatableSections()) {
1479     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1480 
1481     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1482       BinaryData *BD = Itr->second;
1483       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1484                      (BD->getName().starts_with("SYMBOLat0x") ||
1485                       BD->getName().starts_with("DATAat0x") ||
1486                       BD->getName().starts_with("ANONYMOUS")));
1487       return !isHole && BD->getSection() == Section && !BD->getParent();
1488     };
1489 
1490     auto BDStart = BinaryDataMap.begin();
1491     auto BDEnd = BinaryDataMap.end();
1492     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1493     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1494 
1495     uint64_t EndAddress = Section.getAddress();
1496 
1497     while (Itr != End) {
1498       if (Itr->second->getAddress() > EndAddress) {
1499         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1500         Holes.emplace_back(EndAddress, Gap);
1501       }
1502       EndAddress = Itr->second->getEndAddress();
1503       ++Itr;
1504     }
1505 
1506     if (EndAddress < Section.getEndAddress())
1507       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1508 
1509     // If there is already a symbol at the start of the hole, grow that symbol
1510     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1511     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1512       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1513       if (BD) {
1514         // BD->getSection() can be != Section if there are sections that
1515         // overlap.  In this case it is probably safe to just skip the holes
1516         // since the overlapping section will not(?) have any symbols in it.
1517         if (BD->getSection() == Section)
1518           setBinaryDataSize(Hole.first, Hole.second);
1519       } else {
1520         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1521       }
1522     }
1523   }
1524 
1525   assert(validateObjectNesting() && "object nesting inconsistency detected");
1526   assert(validateHoles() && "top level hole detected in object map");
1527 }
1528 
1529 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1530   const BinarySection *CurrentSection = nullptr;
1531   bool FirstSection = true;
1532 
1533   for (auto &Entry : BinaryDataMap) {
1534     const BinaryData *BD = Entry.second;
1535     const BinarySection &Section = BD->getSection();
1536     if (FirstSection || Section != *CurrentSection) {
1537       uint64_t Address, Size;
1538       StringRef Name = Section.getName();
1539       if (Section) {
1540         Address = Section.getAddress();
1541         Size = Section.getSize();
1542       } else {
1543         Address = BD->getAddress();
1544         Size = BD->getSize();
1545       }
1546       OS << "BOLT-INFO: Section " << Name << ", "
1547          << "0x" + Twine::utohexstr(Address) << ":"
1548          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1549       CurrentSection = &Section;
1550       FirstSection = false;
1551     }
1552 
1553     OS << "BOLT-INFO: ";
1554     const BinaryData *P = BD->getParent();
1555     while (P) {
1556       OS << "  ";
1557       P = P->getParent();
1558     }
1559     OS << *BD << "\n";
1560   }
1561 }
1562 
1563 Expected<unsigned> BinaryContext::getDwarfFile(
1564     StringRef Directory, StringRef FileName, unsigned FileNumber,
1565     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1566     unsigned CUID, unsigned DWARFVersion) {
1567   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1568   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1569                           FileNumber);
1570 }
1571 
1572 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1573                                                const uint32_t SrcCUID,
1574                                                unsigned FileIndex) {
1575   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1576   const DWARFDebugLine::LineTable *LineTable =
1577       DwCtx->getLineTableForUnit(SrcUnit);
1578   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1579       LineTable->Prologue.FileNames;
1580   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1581   // means empty dir.
1582   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1583          "FileIndex out of range for the compilation unit.");
1584   StringRef Dir = "";
1585   if (FileNames[FileIndex - 1].DirIdx != 0) {
1586     if (std::optional<const char *> DirName = dwarf::toString(
1587             LineTable->Prologue
1588                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1589       Dir = *DirName;
1590     }
1591   }
1592   StringRef FileName = "";
1593   if (std::optional<const char *> FName =
1594           dwarf::toString(FileNames[FileIndex - 1].Name))
1595     FileName = *FName;
1596   assert(FileName != "");
1597   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1598   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1599                                DestCUID, DstUnit->getVersion()));
1600 }
1601 
1602 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1603   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1604   llvm::transform(llvm::make_second_range(BinaryFunctions),
1605                   SortedFunctions.begin(),
1606                   [](BinaryFunction &BF) { return &BF; });
1607 
1608   llvm::stable_sort(SortedFunctions,
1609                     [](const BinaryFunction *A, const BinaryFunction *B) {
1610                       if (A->hasValidIndex() && B->hasValidIndex()) {
1611                         return A->getIndex() < B->getIndex();
1612                       }
1613                       return A->hasValidIndex();
1614                     });
1615   return SortedFunctions;
1616 }
1617 
1618 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1619   std::vector<BinaryFunction *> AllFunctions;
1620   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1621   llvm::transform(llvm::make_second_range(BinaryFunctions),
1622                   std::back_inserter(AllFunctions),
1623                   [](BinaryFunction &BF) { return &BF; });
1624   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1625 
1626   return AllFunctions;
1627 }
1628 
1629 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1630   auto Iter = DWOCUs.find(DWOId);
1631   if (Iter == DWOCUs.end())
1632     return std::nullopt;
1633 
1634   return Iter->second;
1635 }
1636 
1637 DWARFContext *BinaryContext::getDWOContext() const {
1638   if (DWOCUs.empty())
1639     return nullptr;
1640   return &DWOCUs.begin()->second->getContext();
1641 }
1642 
1643 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1644 void BinaryContext::preprocessDWODebugInfo() {
1645   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1646     DWARFUnit *const DwarfUnit = CU.get();
1647     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1648       std::string DWOName = dwarf::toString(
1649           DwarfUnit->getUnitDIE().find(
1650               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1651           "");
1652       SmallString<16> AbsolutePath;
1653       if (!opts::CompDirOverride.empty()) {
1654         sys::path::append(AbsolutePath, opts::CompDirOverride);
1655         sys::path::append(AbsolutePath, DWOName);
1656       }
1657       DWARFUnit *DWOCU =
1658           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1659       if (!DWOCU->isDWOUnit()) {
1660         this->outs()
1661             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1662             << DWOName
1663             << " was not retrieved and won't be updated. Please check "
1664                "relative path.\n";
1665         continue;
1666       }
1667       DWOCUs[*DWOId] = DWOCU;
1668     }
1669   }
1670   if (!DWOCUs.empty())
1671     this->outs() << "BOLT-INFO: processing split DWARF\n";
1672 }
1673 
1674 void BinaryContext::preprocessDebugInfo() {
1675   struct CURange {
1676     uint64_t LowPC;
1677     uint64_t HighPC;
1678     DWARFUnit *Unit;
1679 
1680     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1681   };
1682 
1683   // Building a map of address ranges to CUs similar to .debug_aranges and use
1684   // it to assign CU to functions.
1685   std::vector<CURange> AllRanges;
1686   AllRanges.reserve(DwCtx->getNumCompileUnits());
1687   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1688     Expected<DWARFAddressRangesVector> RangesOrError =
1689         CU->getUnitDIE().getAddressRanges();
1690     if (!RangesOrError) {
1691       consumeError(RangesOrError.takeError());
1692       continue;
1693     }
1694     for (DWARFAddressRange &Range : *RangesOrError) {
1695       // Parts of the debug info could be invalidated due to corresponding code
1696       // being removed from the binary by the linker. Hence we check if the
1697       // address is a valid one.
1698       if (containsAddress(Range.LowPC))
1699         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1700     }
1701 
1702     ContainsDwarf5 |= CU->getVersion() >= 5;
1703     ContainsDwarfLegacy |= CU->getVersion() < 5;
1704   }
1705 
1706   llvm::sort(AllRanges);
1707   for (auto &KV : BinaryFunctions) {
1708     const uint64_t FunctionAddress = KV.first;
1709     BinaryFunction &Function = KV.second;
1710 
1711     auto It = llvm::partition_point(
1712         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1713     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1714       Function.setDWARFUnit(It->Unit);
1715   }
1716 
1717   // Discover units with debug info that needs to be updated.
1718   for (const auto &KV : BinaryFunctions) {
1719     const BinaryFunction &BF = KV.second;
1720     if (shouldEmit(BF) && BF.getDWARFUnit())
1721       ProcessedCUs.insert(BF.getDWARFUnit());
1722   }
1723 
1724   // Clear debug info for functions from units that we are not going to process.
1725   for (auto &KV : BinaryFunctions) {
1726     BinaryFunction &BF = KV.second;
1727     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1728       BF.setDWARFUnit(nullptr);
1729   }
1730 
1731   if (opts::Verbosity >= 1) {
1732     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1733                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1734   }
1735 
1736   preprocessDWODebugInfo();
1737 
1738   // Populate MCContext with DWARF files from all units.
1739   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1740   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1741     const uint64_t CUID = CU->getOffset();
1742     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1743     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1744         GlobalPrefix + "line_table_start" + Twine(CUID)));
1745 
1746     if (!ProcessedCUs.count(CU.get()))
1747       continue;
1748 
1749     const DWARFDebugLine::LineTable *LineTable =
1750         DwCtx->getLineTableForUnit(CU.get());
1751     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1752         LineTable->Prologue.FileNames;
1753 
1754     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1755     if (DwarfVersion >= 5) {
1756       std::optional<MD5::MD5Result> Checksum;
1757       if (LineTable->Prologue.ContentTypes.HasMD5)
1758         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1759       std::optional<const char *> Name =
1760           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1761       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1762         auto Iter = DWOCUs.find(*DWOID);
1763         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1764         Name = dwarf::toString(
1765             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1766       }
1767       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1768                                   std::nullopt);
1769     }
1770 
1771     BinaryLineTable.setDwarfVersion(DwarfVersion);
1772 
1773     // Assign a unique label to every line table, one per CU.
1774     // Make sure empty debug line tables are registered too.
1775     if (FileNames.empty()) {
1776       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1777                             CUID, DwarfVersion));
1778       continue;
1779     }
1780     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1781     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1782       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1783       // means empty dir.
1784       StringRef Dir = "";
1785       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1786         if (std::optional<const char *> DirName = dwarf::toString(
1787                 LineTable->Prologue
1788                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1789           Dir = *DirName;
1790       StringRef FileName = "";
1791       if (std::optional<const char *> FName =
1792               dwarf::toString(FileNames[I].Name))
1793         FileName = *FName;
1794       assert(FileName != "");
1795       std::optional<MD5::MD5Result> Checksum;
1796       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1797         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1798       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1799                             DwarfVersion));
1800     }
1801   }
1802 }
1803 
1804 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1805   if (Function.isPseudo())
1806     return false;
1807 
1808   if (opts::processAllFunctions())
1809     return true;
1810 
1811   if (Function.isIgnored())
1812     return false;
1813 
1814   // In relocation mode we will emit non-simple functions with CFG.
1815   // If the function does not have a CFG it should be marked as ignored.
1816   return HasRelocations || Function.isSimple();
1817 }
1818 
1819 void BinaryContext::dump(const MCInst &Inst) const {
1820   if (LLVM_UNLIKELY(!InstPrinter)) {
1821     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1822     return;
1823   }
1824   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1825   dbgs() << "\n";
1826 }
1827 
1828 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1829   uint32_t Operation = Inst.getOperation();
1830   switch (Operation) {
1831   case MCCFIInstruction::OpSameValue:
1832     OS << "OpSameValue Reg" << Inst.getRegister();
1833     break;
1834   case MCCFIInstruction::OpRememberState:
1835     OS << "OpRememberState";
1836     break;
1837   case MCCFIInstruction::OpRestoreState:
1838     OS << "OpRestoreState";
1839     break;
1840   case MCCFIInstruction::OpOffset:
1841     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1842     break;
1843   case MCCFIInstruction::OpDefCfaRegister:
1844     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1845     break;
1846   case MCCFIInstruction::OpDefCfaOffset:
1847     OS << "OpDefCfaOffset " << Inst.getOffset();
1848     break;
1849   case MCCFIInstruction::OpDefCfa:
1850     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1851     break;
1852   case MCCFIInstruction::OpRelOffset:
1853     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1854     break;
1855   case MCCFIInstruction::OpAdjustCfaOffset:
1856     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1857     break;
1858   case MCCFIInstruction::OpEscape:
1859     OS << "OpEscape";
1860     break;
1861   case MCCFIInstruction::OpRestore:
1862     OS << "OpRestore Reg" << Inst.getRegister();
1863     break;
1864   case MCCFIInstruction::OpUndefined:
1865     OS << "OpUndefined Reg" << Inst.getRegister();
1866     break;
1867   case MCCFIInstruction::OpRegister:
1868     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1869        << Inst.getRegister2();
1870     break;
1871   case MCCFIInstruction::OpWindowSave:
1872     OS << "OpWindowSave";
1873     break;
1874   case MCCFIInstruction::OpGnuArgsSize:
1875     OS << "OpGnuArgsSize";
1876     break;
1877   default:
1878     OS << "Op#" << Operation;
1879     break;
1880   }
1881 }
1882 
1883 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1884   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1885   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1886   // the end of a data chunk inside code, $d identifies start of data.
1887   if (isX86() || ELFSymbolRef(Symbol).getSize())
1888     return MarkerSymType::NONE;
1889 
1890   Expected<StringRef> NameOrError = Symbol.getName();
1891   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1892 
1893   if (!TypeOrError || !NameOrError)
1894     return MarkerSymType::NONE;
1895 
1896   if (*TypeOrError != SymbolRef::ST_Unknown)
1897     return MarkerSymType::NONE;
1898 
1899   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1900     return MarkerSymType::CODE;
1901 
1902   // $x<ISA>
1903   if (isRISCV() && NameOrError->starts_with("$x"))
1904     return MarkerSymType::CODE;
1905 
1906   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1907     return MarkerSymType::DATA;
1908 
1909   return MarkerSymType::NONE;
1910 }
1911 
1912 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1913   return getMarkerType(Symbol) != MarkerSymType::NONE;
1914 }
1915 
1916 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1917                            const BinaryFunction *Function,
1918                            DWARFContext *DwCtx) {
1919   DebugLineTableRowRef RowRef =
1920       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1921   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1922     return;
1923 
1924   const DWARFDebugLine::LineTable *LineTable;
1925   if (Function && Function->getDWARFUnit() &&
1926       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1927     LineTable = Function->getDWARFLineTable();
1928   } else {
1929     LineTable = DwCtx->getLineTableForUnit(
1930         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1931   }
1932   assert(LineTable && "line table expected for instruction with debug info");
1933 
1934   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1935   StringRef FileName = "";
1936   if (std::optional<const char *> FName =
1937           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1938     FileName = *FName;
1939   OS << " # debug line " << FileName << ":" << Row.Line;
1940   if (Row.Column)
1941     OS << ":" << Row.Column;
1942   if (Row.Discriminator)
1943     OS << " discriminator:" << Row.Discriminator;
1944 }
1945 
1946 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1947                                      uint64_t Offset,
1948                                      const BinaryFunction *Function,
1949                                      bool PrintMCInst, bool PrintMemData,
1950                                      bool PrintRelocations,
1951                                      StringRef Endl) const {
1952   OS << format("    %08" PRIx64 ": ", Offset);
1953   if (MIB->isCFI(Instruction)) {
1954     uint32_t Offset = Instruction.getOperand(0).getImm();
1955     OS << "\t!CFI\t$" << Offset << "\t; ";
1956     if (Function)
1957       printCFI(OS, *Function->getCFIFor(Instruction));
1958     OS << Endl;
1959     return;
1960   }
1961   if (std::optional<uint32_t> DynamicID =
1962           MIB->getDynamicBranchID(Instruction)) {
1963     OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1964        << " # ID: " << DynamicID;
1965   } else {
1966     InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1967   }
1968   if (MIB->isCall(Instruction)) {
1969     if (MIB->isTailCall(Instruction))
1970       OS << " # TAILCALL ";
1971     if (MIB->isInvoke(Instruction)) {
1972       const std::optional<MCPlus::MCLandingPad> EHInfo =
1973           MIB->getEHInfo(Instruction);
1974       OS << " # handler: ";
1975       if (EHInfo->first)
1976         OS << *EHInfo->first;
1977       else
1978         OS << '0';
1979       OS << "; action: " << EHInfo->second;
1980       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1981       if (GnuArgsSize >= 0)
1982         OS << "; GNU_args_size = " << GnuArgsSize;
1983     }
1984   } else if (MIB->isIndirectBranch(Instruction)) {
1985     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1986       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1987     } else {
1988       OS << " # UNKNOWN CONTROL FLOW";
1989     }
1990   }
1991   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1992     OS << " # Offset: " << *Offset;
1993   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1994     OS << " # Size: " << *Size;
1995   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
1996     OS << " # Label: " << *Label;
1997 
1998   MIB->printAnnotations(Instruction, OS);
1999 
2000   if (opts::PrintDebugInfo)
2001     printDebugInfo(OS, Instruction, Function, DwCtx.get());
2002 
2003   if ((opts::PrintRelocations || PrintRelocations) && Function) {
2004     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2005     Function->printRelocations(OS, Offset, Size);
2006   }
2007 
2008   OS << Endl;
2009 
2010   if (PrintMCInst) {
2011     Instruction.dump_pretty(OS, InstPrinter.get());
2012     OS << Endl;
2013   }
2014 }
2015 
2016 std::optional<uint64_t>
2017 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2018                                         uint64_t FileOffset) const {
2019   // Find a segment with a matching file offset.
2020   for (auto &KV : SegmentMapInfo) {
2021     const SegmentInfo &SegInfo = KV.second;
2022     // FileOffset is got from perf event,
2023     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2024     // If the pagesize is not equal to SegInfo.Alignment.
2025     // FileOffset and SegInfo.FileOffset should be aligned first,
2026     // and then judge whether they are equal.
2027     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2028         alignDown(FileOffset, SegInfo.Alignment)) {
2029       // The function's offset from base address in VAS is aligned by pagesize
2030       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2031       // However, The ELF document says that SegInfo.FileOffset should equal
2032       // to SegInfo.Address, modulo the pagesize.
2033       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2034 
2035       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2036       // alignDown(SegInfo.Address, pagesize)
2037       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2038       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2039       //   = SegInfo.Address - SegInfo.FileOffset +
2040       //     alignDown(SegInfo.FileOffset, pagesize)
2041       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2042       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2043     }
2044   }
2045 
2046   return std::nullopt;
2047 }
2048 
2049 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2050   auto SI = AddressToSection.upper_bound(Address);
2051   if (SI != AddressToSection.begin()) {
2052     --SI;
2053     uint64_t UpperBound = SI->first + SI->second->getSize();
2054     if (!SI->second->getSize())
2055       UpperBound += 1;
2056     if (UpperBound > Address)
2057       return *SI->second;
2058   }
2059   return std::make_error_code(std::errc::bad_address);
2060 }
2061 
2062 ErrorOr<StringRef>
2063 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2064   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2065     return Section->getName();
2066   return std::make_error_code(std::errc::bad_address);
2067 }
2068 
2069 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2070   auto Res = Sections.insert(Section);
2071   (void)Res;
2072   assert(Res.second && "can't register the same section twice.");
2073 
2074   // Only register allocatable sections in the AddressToSection map.
2075   if (Section->isAllocatable() && Section->getAddress())
2076     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2077   NameToSection.insert(
2078       std::make_pair(std::string(Section->getName()), Section));
2079   if (Section->hasSectionRef())
2080     SectionRefToBinarySection.insert(
2081         std::make_pair(Section->getSectionRef(), Section));
2082 
2083   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2084   return *Section;
2085 }
2086 
2087 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2088   return registerSection(new BinarySection(*this, Section));
2089 }
2090 
2091 BinarySection &
2092 BinaryContext::registerSection(const Twine &SectionName,
2093                                const BinarySection &OriginalSection) {
2094   return registerSection(
2095       new BinarySection(*this, SectionName, OriginalSection));
2096 }
2097 
2098 BinarySection &
2099 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2100                                        unsigned ELFFlags, uint8_t *Data,
2101                                        uint64_t Size, unsigned Alignment) {
2102   auto NamedSections = getSectionByName(Name);
2103   if (NamedSections.begin() != NamedSections.end()) {
2104     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2105            "can only update unique sections");
2106     BinarySection *Section = NamedSections.begin()->second;
2107 
2108     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2109     const bool Flag = Section->isAllocatable();
2110     (void)Flag;
2111     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2112     LLVM_DEBUG(dbgs() << *Section << "\n");
2113     // FIXME: Fix section flags/attributes for MachO.
2114     if (isELF())
2115       assert(Flag == Section->isAllocatable() &&
2116              "can't change section allocation status");
2117     return *Section;
2118   }
2119 
2120   return registerSection(
2121       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2122 }
2123 
2124 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2125   auto NameRange = NameToSection.equal_range(Section.getName().str());
2126   while (NameRange.first != NameRange.second) {
2127     if (NameRange.first->second == &Section) {
2128       NameToSection.erase(NameRange.first);
2129       break;
2130     }
2131     ++NameRange.first;
2132   }
2133 }
2134 
2135 void BinaryContext::deregisterUnusedSections() {
2136   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2137   for (auto SI = Sections.begin(); SI != Sections.end();) {
2138     BinarySection *Section = *SI;
2139     // We check getOutputData() instead of getOutputSize() because sometimes
2140     // zero-sized .text.cold sections are allocated.
2141     if (Section->hasSectionRef() || Section->getOutputData() ||
2142         (AbsSection && Section == &AbsSection.get())) {
2143       ++SI;
2144       continue;
2145     }
2146 
2147     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2148                       << '\n';);
2149     deregisterSectionName(*Section);
2150     SI = Sections.erase(SI);
2151     delete Section;
2152   }
2153 }
2154 
2155 bool BinaryContext::deregisterSection(BinarySection &Section) {
2156   BinarySection *SectionPtr = &Section;
2157   auto Itr = Sections.find(SectionPtr);
2158   if (Itr != Sections.end()) {
2159     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2160     while (Range.first != Range.second) {
2161       if (Range.first->second == SectionPtr) {
2162         AddressToSection.erase(Range.first);
2163         break;
2164       }
2165       ++Range.first;
2166     }
2167 
2168     deregisterSectionName(*SectionPtr);
2169     Sections.erase(Itr);
2170     delete SectionPtr;
2171     return true;
2172   }
2173   return false;
2174 }
2175 
2176 void BinaryContext::renameSection(BinarySection &Section,
2177                                   const Twine &NewName) {
2178   auto Itr = Sections.find(&Section);
2179   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2180   Sections.erase(Itr);
2181 
2182   deregisterSectionName(Section);
2183 
2184   Section.Name = NewName.str();
2185   Section.setOutputName(Section.Name);
2186 
2187   NameToSection.insert(std::make_pair(Section.Name, &Section));
2188 
2189   // Reinsert with the new name.
2190   Sections.insert(&Section);
2191 }
2192 
2193 void BinaryContext::printSections(raw_ostream &OS) const {
2194   for (BinarySection *const &Section : Sections)
2195     OS << "BOLT-INFO: " << *Section << "\n";
2196 }
2197 
2198 BinarySection &BinaryContext::absoluteSection() {
2199   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2200     return *Section;
2201   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2202 }
2203 
2204 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2205                                                            size_t Size) const {
2206   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2207   if (!Section)
2208     return std::make_error_code(std::errc::bad_address);
2209 
2210   if (Section->isVirtual())
2211     return 0;
2212 
2213   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2214                    AsmInfo->getCodePointerSize());
2215   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2216   return DE.getUnsigned(&ValueOffset, Size);
2217 }
2218 
2219 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2220                                                         size_t Size) const {
2221   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2222   if (!Section)
2223     return std::make_error_code(std::errc::bad_address);
2224 
2225   if (Section->isVirtual())
2226     return 0;
2227 
2228   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2229                    AsmInfo->getCodePointerSize());
2230   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2231   return DE.getSigned(&ValueOffset, Size);
2232 }
2233 
2234 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2235                                   uint64_t Type, uint64_t Addend,
2236                                   uint64_t Value) {
2237   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2238   assert(Section && "cannot find section for address");
2239   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2240                          Value);
2241 }
2242 
2243 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2244                                          uint64_t Type, uint64_t Addend,
2245                                          uint64_t Value) {
2246   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2247   assert(Section && "cannot find section for address");
2248   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2249                                 Addend, Value);
2250 }
2251 
2252 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2253   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2254   assert(Section && "cannot find section for address");
2255   return Section->removeRelocationAt(Address - Section->getAddress());
2256 }
2257 
2258 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2259   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2260   if (!Section)
2261     return nullptr;
2262 
2263   return Section->getRelocationAt(Address - Section->getAddress());
2264 }
2265 
2266 const Relocation *
2267 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2268   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2269   if (!Section)
2270     return nullptr;
2271 
2272   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2273 }
2274 
2275 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2276                                              const uint64_t Address) {
2277   auto setImmovable = [&](BinaryData &BD) {
2278     BinaryData *Root = BD.getAtomicRoot();
2279     LLVM_DEBUG(if (Root->isMoveable()) {
2280       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2281              << "due to ambiguous relocation referencing 0x"
2282              << Twine::utohexstr(Address) << '\n';
2283     });
2284     Root->setIsMoveable(false);
2285   };
2286 
2287   if (Address == BD.getAddress()) {
2288     setImmovable(BD);
2289 
2290     // Set previous symbol as immovable
2291     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2292     if (Prev && Prev->getEndAddress() == BD.getAddress())
2293       setImmovable(*Prev);
2294   }
2295 
2296   if (Address == BD.getEndAddress()) {
2297     setImmovable(BD);
2298 
2299     // Set next symbol as immovable
2300     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2301     if (Next && Next->getAddress() == BD.getEndAddress())
2302       setImmovable(*Next);
2303   }
2304 }
2305 
2306 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2307                                                     uint64_t *EntryDesc) {
2308   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2309   auto BFI = SymbolToFunctionMap.find(Symbol);
2310   if (BFI == SymbolToFunctionMap.end())
2311     return nullptr;
2312 
2313   BinaryFunction *BF = BFI->second;
2314   if (EntryDesc)
2315     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2316 
2317   return BF;
2318 }
2319 
2320 std::string
2321 BinaryContext::generateBugReportMessage(StringRef Message,
2322                                         const BinaryFunction &Function) const {
2323   std::string Msg;
2324   raw_string_ostream SS(Msg);
2325   SS << "=======================================\n";
2326   SS << "BOLT is unable to proceed because it couldn't properly understand "
2327         "this function.\n";
2328   SS << "If you are running the most recent version of BOLT, you may "
2329         "want to "
2330         "report this and paste this dump.\nPlease check that there is no "
2331         "sensitive contents being shared in this dump.\n";
2332   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2333   ScopedPrinter SP(SS);
2334   SP.printBinaryBlock("Function contents", *Function.getData());
2335   SS << "\n";
2336   const_cast<BinaryFunction &>(Function).print(SS, "");
2337   SS << "ERROR: " << Message;
2338   SS << "\n=======================================\n";
2339   return Msg;
2340 }
2341 
2342 BinaryFunction *
2343 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2344                                             bool IsSimple) {
2345   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2346   BinaryFunction *BF = InjectedBinaryFunctions.back();
2347   setSymbolToFunctionMap(BF->getSymbol(), BF);
2348   BF->CurrentState = BinaryFunction::State::CFG;
2349   return BF;
2350 }
2351 
2352 std::pair<size_t, size_t>
2353 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2354   // Adjust branch instruction to match the current layout.
2355   if (FixBranches)
2356     BF.fixBranches();
2357 
2358   // Create local MC context to isolate the effect of ephemeral code emission.
2359   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2360   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2361   MCAsmBackend *MAB =
2362       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2363 
2364   SmallString<256> Code;
2365   raw_svector_ostream VecOS(Code);
2366 
2367   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2368   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2369       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2370       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2371       /*RelaxAll=*/false,
2372       /*IncrementalLinkerCompatible=*/false,
2373       /*DWARFMustBeAtTheEnd=*/false));
2374 
2375   Streamer->initSections(false, *STI);
2376 
2377   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2378   Section->setHasInstructions(true);
2379 
2380   // Create symbols in the LocalCtx so that they get destroyed with it.
2381   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2382   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2383 
2384   Streamer->switchSection(Section);
2385   Streamer->emitLabel(StartLabel);
2386   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2387                    /*EmitCodeOnly=*/true);
2388   Streamer->emitLabel(EndLabel);
2389 
2390   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2391   SmallVector<LabelRange> SplitLabels;
2392   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2393     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2394     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2395     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2396 
2397     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2398         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2399         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2400     SplitSection->setHasInstructions(true);
2401     Streamer->switchSection(SplitSection);
2402 
2403     Streamer->emitLabel(SplitStartLabel);
2404     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2405     Streamer->emitLabel(SplitEndLabel);
2406   }
2407 
2408   MCAssembler &Assembler =
2409       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2410   Assembler.layout();
2411 
2412   // Obtain fragment sizes.
2413   std::vector<uint64_t> FragmentSizes;
2414   // Main fragment size.
2415   const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
2416                            Assembler.getSymbolOffset(*StartLabel);
2417   FragmentSizes.push_back(HotSize);
2418   // Split fragment sizes.
2419   uint64_t ColdSize = 0;
2420   for (const auto &Labels : SplitLabels) {
2421     uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
2422                     Assembler.getSymbolOffset(*Labels.first);
2423     FragmentSizes.push_back(Size);
2424     ColdSize += Size;
2425   }
2426 
2427   // Populate new start and end offsets of each basic block.
2428   uint64_t FragmentIndex = 0;
2429   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2430     BinaryBasicBlock *PrevBB = nullptr;
2431     for (BinaryBasicBlock *BB : FF) {
2432       const uint64_t BBStartOffset =
2433           Assembler.getSymbolOffset(*(BB->getLabel()));
2434       BB->setOutputStartAddress(BBStartOffset);
2435       if (PrevBB)
2436         PrevBB->setOutputEndAddress(BBStartOffset);
2437       PrevBB = BB;
2438     }
2439     if (PrevBB)
2440       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2441     FragmentIndex++;
2442   }
2443 
2444   // Clean-up the effect of the code emission.
2445   for (const MCSymbol &Symbol : Assembler.symbols()) {
2446     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2447     MutableSymbol->setUndefined();
2448     MutableSymbol->setIsRegistered(false);
2449   }
2450 
2451   return std::make_pair(HotSize, ColdSize);
2452 }
2453 
2454 bool BinaryContext::validateInstructionEncoding(
2455     ArrayRef<uint8_t> InputSequence) const {
2456   MCInst Inst;
2457   uint64_t InstSize;
2458   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2459   assert(InstSize == InputSequence.size() &&
2460          "Disassembled instruction size does not match the sequence.");
2461 
2462   SmallString<256> Code;
2463   SmallVector<MCFixup, 4> Fixups;
2464 
2465   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2466   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2467   if (InputSequence != OutputSequence) {
2468     if (opts::Verbosity > 1) {
2469       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2470                    << "      input: " << InputSequence << '\n'
2471                    << "     output: " << OutputSequence << '\n';
2472     }
2473     return false;
2474   }
2475 
2476   return true;
2477 }
2478 
2479 uint64_t BinaryContext::getHotThreshold() const {
2480   static uint64_t Threshold = 0;
2481   if (Threshold == 0) {
2482     Threshold = std::max(
2483         (uint64_t)opts::ExecutionCountThreshold,
2484         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2485   }
2486   return Threshold;
2487 }
2488 
2489 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2490     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2491   auto FI = BinaryFunctions.upper_bound(Address);
2492   if (FI == BinaryFunctions.begin())
2493     return nullptr;
2494   --FI;
2495 
2496   const uint64_t UsedSize =
2497       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2498 
2499   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2500     return nullptr;
2501 
2502   return &FI->second;
2503 }
2504 
2505 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2506   // First, try to find a function starting at the given address. If the
2507   // function was folded, this will get us the original folded function if it
2508   // wasn't removed from the list, e.g. in non-relocation mode.
2509   auto BFI = BinaryFunctions.find(Address);
2510   if (BFI != BinaryFunctions.end())
2511     return &BFI->second;
2512 
2513   // We might have folded the function matching the object at the given
2514   // address. In such case, we look for a function matching the symbol
2515   // registered at the original address. The new function (the one that the
2516   // original was folded into) will hold the symbol.
2517   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2518     uint64_t EntryID = 0;
2519     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2520     if (BF && EntryID == 0)
2521       return BF;
2522   }
2523   return nullptr;
2524 }
2525 
2526 /// Deregister JumpTable registered at a given \p Address and delete it.
2527 void BinaryContext::deleteJumpTable(uint64_t Address) {
2528   assert(JumpTables.count(Address) && "Must have a jump table at address");
2529   JumpTable *JT = JumpTables.at(Address);
2530   for (BinaryFunction *Parent : JT->Parents)
2531     Parent->JumpTables.erase(Address);
2532   JumpTables.erase(Address);
2533   delete JT;
2534 }
2535 
2536 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2537     const DWARFAddressRangesVector &InputRanges) const {
2538   DebugAddressRangesVector OutputRanges;
2539 
2540   for (const DWARFAddressRange Range : InputRanges) {
2541     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2542     while (BFI != BinaryFunctions.end()) {
2543       const BinaryFunction &Function = BFI->second;
2544       if (Function.getAddress() >= Range.HighPC)
2545         break;
2546       const DebugAddressRangesVector FunctionRanges =
2547           Function.getOutputAddressRanges();
2548       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2549       std::advance(BFI, 1);
2550     }
2551   }
2552 
2553   return OutputRanges;
2554 }
2555 
2556 } // namespace bolt
2557 } // namespace llvm
2558