xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 43d0891d3bb1fc40ff5dcea91c28d1582978caff)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/Utils.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool> NoHugePages("no-huge-pages",
51                           cl::desc("use regular size pages for code alignment"),
52                           cl::Hidden, cl::cat(BoltCategory));
53 
54 static cl::opt<bool>
55 PrintDebugInfo("print-debug-info",
56   cl::desc("print debug info when printing functions"),
57   cl::Hidden,
58   cl::ZeroOrMore,
59   cl::cat(BoltCategory));
60 
61 cl::opt<bool> PrintRelocations(
62     "print-relocations",
63     cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64     cl::cat(BoltCategory));
65 
66 static cl::opt<bool>
67 PrintMemData("print-mem-data",
68   cl::desc("print memory data annotations when printing functions"),
69   cl::Hidden,
70   cl::ZeroOrMore,
71   cl::cat(BoltCategory));
72 
73 cl::opt<std::string> CompDirOverride(
74     "comp-dir-override",
75     cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
76              "location, which is used with DW_AT_dwo_name to construct a path "
77              "to *.dwo files."),
78     cl::Hidden, cl::init(""), cl::cat(BoltCategory));
79 } // namespace opts
80 
81 namespace llvm {
82 namespace bolt {
83 
84 char BOLTError::ID = 0;
85 
86 BOLTError::BOLTError(bool IsFatal, const Twine &S)
87     : IsFatal(IsFatal), Msg(S.str()) {}
88 
89 void BOLTError::log(raw_ostream &OS) const {
90   if (IsFatal)
91     OS << "FATAL ";
92   StringRef ErrMsg = StringRef(Msg);
93   // Prepend our error prefix if it is missing
94   if (ErrMsg.empty()) {
95     OS << "BOLT-ERROR\n";
96   } else {
97     if (!ErrMsg.starts_with("BOLT-ERROR"))
98       OS << "BOLT-ERROR: ";
99     OS << ErrMsg << "\n";
100   }
101 }
102 
103 std::error_code BOLTError::convertToErrorCode() const {
104   return inconvertibleErrorCode();
105 }
106 
107 Error createNonFatalBOLTError(const Twine &S) {
108   return make_error<BOLTError>(/*IsFatal*/ false, S);
109 }
110 
111 Error createFatalBOLTError(const Twine &S) {
112   return make_error<BOLTError>(/*IsFatal*/ true, S);
113 }
114 
115 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
116   handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) {
117     if (!E.getMessage().empty())
118       E.log(this->errs());
119     if (E.isFatal())
120       exit(1);
121   });
122 }
123 
124 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
125                              std::unique_ptr<DWARFContext> DwCtx,
126                              std::unique_ptr<Triple> TheTriple,
127                              const Target *TheTarget, std::string TripleName,
128                              std::unique_ptr<MCCodeEmitter> MCE,
129                              std::unique_ptr<MCObjectFileInfo> MOFI,
130                              std::unique_ptr<const MCAsmInfo> AsmInfo,
131                              std::unique_ptr<const MCInstrInfo> MII,
132                              std::unique_ptr<const MCSubtargetInfo> STI,
133                              std::unique_ptr<MCInstPrinter> InstPrinter,
134                              std::unique_ptr<const MCInstrAnalysis> MIA,
135                              std::unique_ptr<MCPlusBuilder> MIB,
136                              std::unique_ptr<const MCRegisterInfo> MRI,
137                              std::unique_ptr<MCDisassembler> DisAsm,
138                              JournalingStreams Logger)
139     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
140       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
141       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
142       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
143       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
144       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
145       Logger(Logger) {
146   Relocation::Arch = this->TheTriple->getArch();
147   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
148   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
149 }
150 
151 BinaryContext::~BinaryContext() {
152   for (BinarySection *Section : Sections)
153     delete Section;
154   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
155     delete InjectedFunction;
156   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
157     delete JTI.second;
158   clearBinaryData();
159 }
160 
161 /// Create BinaryContext for a given architecture \p ArchName and
162 /// triple \p TripleName.
163 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
164     Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
165     bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
166   StringRef ArchName = "";
167   std::string FeaturesStr = "";
168   switch (TheTriple.getArch()) {
169   case llvm::Triple::x86_64:
170     if (Features)
171       return createFatalBOLTError(
172           "x86_64 target does not use SubtargetFeatures");
173     ArchName = "x86-64";
174     FeaturesStr = "+nopl";
175     break;
176   case llvm::Triple::aarch64:
177     if (Features)
178       return createFatalBOLTError(
179           "AArch64 target does not use SubtargetFeatures");
180     ArchName = "aarch64";
181     FeaturesStr = "+all";
182     break;
183   case llvm::Triple::riscv64: {
184     ArchName = "riscv64";
185     if (!Features)
186       return createFatalBOLTError("RISCV target needs SubtargetFeatures");
187     // We rely on relaxation for some transformations (e.g., promoting all calls
188     // to PseudoCALL and then making JITLink relax them). Since the relax
189     // feature is not stored in the object file, we manually enable it.
190     Features->AddFeature("relax");
191     FeaturesStr = Features->getString();
192     break;
193   }
194   default:
195     return createStringError(std::errc::not_supported,
196                              "BOLT-ERROR: Unrecognized machine in ELF file");
197   }
198 
199   const std::string TripleName = TheTriple.str();
200 
201   std::string Error;
202   const Target *TheTarget =
203       TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
204   if (!TheTarget)
205     return createStringError(make_error_code(std::errc::not_supported),
206                              Twine("BOLT-ERROR: ", Error));
207 
208   std::unique_ptr<const MCRegisterInfo> MRI(
209       TheTarget->createMCRegInfo(TripleName));
210   if (!MRI)
211     return createStringError(
212         make_error_code(std::errc::not_supported),
213         Twine("BOLT-ERROR: no register info for target ", TripleName));
214 
215   // Set up disassembler.
216   std::unique_ptr<MCAsmInfo> AsmInfo(
217       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
218   if (!AsmInfo)
219     return createStringError(
220         make_error_code(std::errc::not_supported),
221         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
222   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223   // we want to emit such names as using @PLT without double quotes to convey
224   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225   // override the default AsmInfo behavior to emit names the way we want.
226   AsmInfo->setAllowAtInName(true);
227 
228   std::unique_ptr<const MCSubtargetInfo> STI(
229       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
230   if (!STI)
231     return createStringError(
232         make_error_code(std::errc::not_supported),
233         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
234 
235   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
236   if (!MII)
237     return createStringError(
238         make_error_code(std::errc::not_supported),
239         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
240 
241   std::unique_ptr<MCContext> Ctx(
242       new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
243   std::unique_ptr<MCObjectFileInfo> MOFI(
244       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
245   Ctx->setObjectFileInfo(MOFI.get());
246   // We do not support X86 Large code model. Change this in the future.
247   bool Large = false;
248   if (TheTriple.getArch() == llvm::Triple::aarch64)
249     Large = true;
250   unsigned LSDAEncoding =
251       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
252   if (IsPIC) {
253     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
254                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
255   }
256 
257   std::unique_ptr<MCDisassembler> DisAsm(
258       TheTarget->createMCDisassembler(*STI, *Ctx));
259 
260   if (!DisAsm)
261     return createStringError(
262         make_error_code(std::errc::not_supported),
263         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
264 
265   std::unique_ptr<const MCInstrAnalysis> MIA(
266       TheTarget->createMCInstrAnalysis(MII.get()));
267   if (!MIA)
268     return createStringError(
269         make_error_code(std::errc::not_supported),
270         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
271               TripleName));
272 
273   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
274   std::unique_ptr<MCInstPrinter> InstructionPrinter(
275       TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
276                                      *MII, *MRI));
277   if (!InstructionPrinter)
278     return createStringError(
279         make_error_code(std::errc::not_supported),
280         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
281   InstructionPrinter->setPrintImmHex(true);
282 
283   std::unique_ptr<MCCodeEmitter> MCE(
284       TheTarget->createMCCodeEmitter(*MII, *Ctx));
285 
286   auto BC = std::make_unique<BinaryContext>(
287       std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
288       TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
289       std::move(AsmInfo), std::move(MII), std::move(STI),
290       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
291       std::move(DisAsm), Logger);
292 
293   BC->LSDAEncoding = LSDAEncoding;
294 
295   BC->MAB = std::unique_ptr<MCAsmBackend>(
296       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
297 
298   BC->setFilename(InputFileName);
299 
300   BC->HasFixedLoadAddress = !IsPIC;
301 
302   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
303       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
304 
305   if (!BC->SymbolicDisAsm)
306     return createStringError(
307         make_error_code(std::errc::not_supported),
308         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
309 
310   return std::move(BC);
311 }
312 
313 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
314   if (opts::HotText &&
315       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
316     return true;
317 
318   if (opts::HotData &&
319       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
320     return true;
321 
322   if (SymbolName == "_end")
323     return true;
324 
325   return false;
326 }
327 
328 std::unique_ptr<MCObjectWriter>
329 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
330   return MAB->createObjectWriter(OS);
331 }
332 
333 bool BinaryContext::validateObjectNesting() const {
334   auto Itr = BinaryDataMap.begin();
335   auto End = BinaryDataMap.end();
336   bool Valid = true;
337   while (Itr != End) {
338     auto Next = std::next(Itr);
339     while (Next != End &&
340            Itr->second->getSection() == Next->second->getSection() &&
341            Itr->second->containsRange(Next->second->getAddress(),
342                                       Next->second->getSize())) {
343       if (Next->second->Parent != Itr->second) {
344         this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345                      << "BOLT-WARNING:  " << *Itr->second << "\n"
346                      << "BOLT-WARNING:  " << *Next->second << "\n";
347         Valid = false;
348       }
349       ++Next;
350     }
351     Itr = Next;
352   }
353   return Valid;
354 }
355 
356 bool BinaryContext::validateHoles() const {
357   bool Valid = true;
358   for (BinarySection &Section : sections()) {
359     for (const Relocation &Rel : Section.relocations()) {
360       uint64_t RelAddr = Rel.Offset + Section.getAddress();
361       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
362       if (!BD) {
363         this->errs()
364             << "BOLT-WARNING: no BinaryData found for relocation at address"
365             << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName()
366             << "\n";
367         Valid = false;
368       } else if (!BD->getAtomicRoot()) {
369         this->errs()
370             << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371             << "address 0x" << Twine::utohexstr(RelAddr) << " in "
372             << Section.getName() << "\n";
373         Valid = false;
374       }
375     }
376   }
377   return Valid;
378 }
379 
380 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
381   const uint64_t Address = GAI->second->getAddress();
382   const uint64_t Size = GAI->second->getSize();
383 
384   auto fixParents = [&](BinaryDataMapType::iterator Itr,
385                         BinaryData *NewParent) {
386     BinaryData *OldParent = Itr->second->Parent;
387     Itr->second->Parent = NewParent;
388     ++Itr;
389     while (Itr != BinaryDataMap.end() && OldParent &&
390            Itr->second->Parent == OldParent) {
391       Itr->second->Parent = NewParent;
392       ++Itr;
393     }
394   };
395 
396   // Check if the previous symbol contains the newly added symbol.
397   if (GAI != BinaryDataMap.begin()) {
398     BinaryData *Prev = std::prev(GAI)->second;
399     while (Prev) {
400       if (Prev->getSection() == GAI->second->getSection() &&
401           Prev->containsRange(Address, Size)) {
402         fixParents(GAI, Prev);
403       } else {
404         fixParents(GAI, nullptr);
405       }
406       Prev = Prev->Parent;
407     }
408   }
409 
410   // Check if the newly added symbol contains any subsequent symbols.
411   if (Size != 0) {
412     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
413     auto Itr = std::next(GAI);
414     while (
415         Itr != BinaryDataMap.end() &&
416         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
417       Itr->second->Parent = BD;
418       ++Itr;
419     }
420   }
421 }
422 
423 iterator_range<BinaryContext::binary_data_iterator>
424 BinaryContext::getSubBinaryData(BinaryData *BD) {
425   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
426   auto End = Start;
427   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
428     ++End;
429   return make_range(Start, End);
430 }
431 
432 std::pair<const MCSymbol *, uint64_t>
433 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
434                                 bool IsPCRel) {
435   if (isAArch64()) {
436     // Check if this is an access to a constant island and create bookkeeping
437     // to keep track of it and emit it later as part of this function.
438     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
439       return std::make_pair(IslandSym, 0);
440 
441     // Detect custom code written in assembly that refers to arbitrary
442     // constant islands from other functions. Write this reference so we
443     // can pull this constant island and emit it as part of this function
444     // too.
445     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
446 
447     if (IslandIter != AddressToConstantIslandMap.begin() &&
448         (IslandIter == AddressToConstantIslandMap.end() ||
449          IslandIter->first > Address))
450       --IslandIter;
451 
452     if (IslandIter != AddressToConstantIslandMap.end()) {
453       // Fall-back to referencing the original constant island in the presence
454       // of dynamic relocs, as we currently do not support cloning them.
455       // Notice: we might fail to link because of this, if the original constant
456       // island we are referring would be emitted too far away.
457       if (IslandIter->second->hasDynamicRelocationAtIsland()) {
458         MCSymbol *IslandSym =
459             IslandIter->second->getOrCreateIslandAccess(Address);
460         if (IslandSym)
461           return std::make_pair(IslandSym, 0);
462       } else if (MCSymbol *IslandSym =
463                      IslandIter->second->getOrCreateProxyIslandAccess(Address,
464                                                                       BF)) {
465         BF.createIslandDependency(IslandSym, IslandIter->second);
466         return std::make_pair(IslandSym, 0);
467       }
468     }
469   }
470 
471   // Note that the address does not necessarily have to reside inside
472   // a section, it could be an absolute address too.
473   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474   if (Section && Section->isText()) {
475     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
476       if (Address != BF.getAddress()) {
477         // The address could potentially escape. Mark it as another entry
478         // point into the function.
479         if (opts::Verbosity >= 1) {
480           this->outs() << "BOLT-INFO: potentially escaped address 0x"
481                        << Twine::utohexstr(Address) << " in function " << BF
482                        << '\n';
483         }
484         BF.HasInternalLabelReference = true;
485         return std::make_pair(
486             BF.addEntryPointAtOffset(Address - BF.getAddress()), 0);
487       }
488     } else {
489       addInterproceduralReference(&BF, Address);
490     }
491   }
492 
493   // With relocations, catch jump table references outside of the basic block
494   // containing the indirect jump.
495   if (HasRelocations) {
496     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
497     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
498       const MCSymbol *Symbol =
499           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
500 
501       return std::make_pair(Symbol, 0);
502     }
503   }
504 
505   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
506     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
507 
508   // TODO: use DWARF info to get size/alignment here?
509   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
510   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
511   return std::make_pair(TargetSymbol, 0);
512 }
513 
514 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
515                                                   BinaryFunction &BF) {
516   if (!isX86())
517     return MemoryContentsType::UNKNOWN;
518 
519   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520   if (!Section) {
521     // No section - possibly an absolute address. Since we don't allow
522     // internal function addresses to escape the function scope - we
523     // consider it a tail call.
524     if (opts::Verbosity > 1) {
525       this->errs() << "BOLT-WARNING: no section for address 0x"
526                    << Twine::utohexstr(Address) << " referenced from function "
527                    << BF << '\n';
528     }
529     return MemoryContentsType::UNKNOWN;
530   }
531 
532   if (Section->isVirtual()) {
533     // The contents are filled at runtime.
534     return MemoryContentsType::UNKNOWN;
535   }
536 
537   // No support for jump tables in code yet.
538   if (Section->isText())
539     return MemoryContentsType::UNKNOWN;
540 
541   // Start with checking for PIC jump table. We expect non-PIC jump tables
542   // to have high 32 bits set to 0.
543   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
544     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
545 
546   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
547     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
548 
549   return MemoryContentsType::UNKNOWN;
550 }
551 
552 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
553                                      const JumpTable::JumpTableType Type,
554                                      const BinaryFunction &BF,
555                                      const uint64_t NextJTAddress,
556                                      JumpTable::AddressesType *EntriesAsAddress,
557                                      bool *HasEntryInFragment) const {
558   // Target address of __builtin_unreachable.
559   const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
560 
561   // Is one of the targets __builtin_unreachable?
562   bool HasUnreachable = false;
563 
564   // Does one of the entries match function start address?
565   bool HasStartAsEntry = false;
566 
567   // Number of targets other than __builtin_unreachable.
568   uint64_t NumRealEntries = 0;
569 
570   // Size of the jump table without trailing __builtin_unreachable entries.
571   size_t TrimmedSize = 0;
572 
573   auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
574     if (!EntriesAsAddress)
575       return;
576     EntriesAsAddress->emplace_back(EntryAddress);
577     if (!Unreachable)
578       TrimmedSize = EntriesAsAddress->size();
579   };
580 
581   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
582   if (!Section)
583     return false;
584 
585   // The upper bound is defined by containing object, section limits, and
586   // the next jump table in memory.
587   uint64_t UpperBound = Section->getEndAddress();
588   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
589   if (JumpTableBD && JumpTableBD->getSize()) {
590     assert(JumpTableBD->getEndAddress() <= UpperBound &&
591            "data object cannot cross a section boundary");
592     UpperBound = JumpTableBD->getEndAddress();
593   }
594   if (NextJTAddress)
595     UpperBound = std::min(NextJTAddress, UpperBound);
596 
597   LLVM_DEBUG({
598     using JTT = JumpTable::JumpTableType;
599     dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600                       Address, BF.getPrintName(),
601                       Type == JTT::JTT_PIC ? "PIC" : "Normal");
602   });
603   const uint64_t EntrySize = getJumpTableEntrySize(Type);
604   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
605        EntryAddress += EntrySize) {
606     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
607                       << " -> ");
608     // Check if there's a proper relocation against the jump table entry.
609     if (HasRelocations) {
610       if (Type == JumpTable::JTT_PIC &&
611           !DataPCRelocations.count(EntryAddress)) {
612         LLVM_DEBUG(
613             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
614         break;
615       }
616       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
617         LLVM_DEBUG(
618             dbgs()
619             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
620         break;
621       }
622     }
623 
624     const uint64_t Value =
625         (Type == JumpTable::JTT_PIC)
626             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
627             : *getPointerAtAddress(EntryAddress);
628 
629     // __builtin_unreachable() case.
630     if (Value == UnreachableAddress) {
631       addEntryAddress(Value, /*Unreachable*/ true);
632       HasUnreachable = true;
633       LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
634       continue;
635     }
636 
637     // Function start is another special case. It is allowed in the jump table,
638     // but we need at least one another regular entry to distinguish the table
639     // from, e.g. a function pointer array.
640     if (Value == BF.getAddress()) {
641       HasStartAsEntry = true;
642       addEntryAddress(Value);
643       continue;
644     }
645 
646     // Function or one of its fragments.
647     const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
648     const bool DoesBelongToFunction =
649         BF.containsAddress(Value) ||
650         (TargetBF && TargetBF->isParentOrChildOf(BF));
651     if (!DoesBelongToFunction) {
652       LLVM_DEBUG({
653         if (!BF.containsAddress(Value)) {
654           dbgs() << "FAIL: function doesn't contain this address\n";
655           if (TargetBF) {
656             dbgs() << "  ! function containing this address: "
657                    << TargetBF->getPrintName() << '\n';
658             if (TargetBF->isFragment()) {
659               dbgs() << "  ! is a fragment";
660               for (BinaryFunction *Parent : TargetBF->ParentFragments)
661                 dbgs() << ", parent: " << Parent->getPrintName();
662               dbgs() << '\n';
663             }
664           }
665         }
666       });
667       break;
668     }
669 
670     // Check there's an instruction at this offset.
671     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
673       LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674       break;
675     }
676 
677     ++NumRealEntries;
678     LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679 
680     if (TargetBF != &BF && HasEntryInFragment)
681       *HasEntryInFragment = true;
682     addEntryAddress(Value);
683   }
684 
685   // Trim direct/normal jump table to exclude trailing unreachable entries that
686   // can collide with a function address.
687   if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
688       TrimmedSize != EntriesAsAddress->size() &&
689       getBinaryFunctionAtAddress(UnreachableAddress))
690     EntriesAsAddress->resize(TrimmedSize);
691 
692   // It's a jump table if the number of real entries is more than 1, or there's
693   // one real entry and one or more special targets. If there are only multiple
694   // special targets, then it's not a jump table.
695   return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
696 }
697 
698 void BinaryContext::populateJumpTables() {
699   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700                     << '\n');
701   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702        ++JTI) {
703     JumpTable *JT = JTI->second;
704 
705     bool NonSimpleParent = false;
706     for (BinaryFunction *BF : JT->Parents)
707       NonSimpleParent |= !BF->isSimple();
708     if (NonSimpleParent)
709       continue;
710 
711     uint64_t NextJTAddress = 0;
712     auto NextJTI = std::next(JTI);
713     if (NextJTI != JTE)
714       NextJTAddress = NextJTI->second->getAddress();
715 
716     const bool Success =
717         analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]),
718                          NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit);
719     if (!Success) {
720       LLVM_DEBUG({
721         dbgs() << "failed to analyze ";
722         JT->print(dbgs());
723         if (NextJTI != JTE) {
724           dbgs() << "next ";
725           NextJTI->second->print(dbgs());
726         }
727       });
728       llvm_unreachable("jump table heuristic failure");
729     }
730     for (BinaryFunction *Frag : JT->Parents) {
731       if (JT->IsSplit)
732         Frag->setHasIndirectTargetToSplitFragment(true);
733       for (uint64_t EntryAddress : JT->EntriesAsAddress)
734         // if target is builtin_unreachable
735         if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
736           Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(),
737                                              Frag->getSize());
738         } else if (EntryAddress >= Frag->getAddress() &&
739                    EntryAddress < Frag->getAddress() + Frag->getSize()) {
740           Frag->registerReferencedOffset(EntryAddress - Frag->getAddress());
741         }
742     }
743 
744     // In strict mode, erase PC-relative relocation record. Later we check that
745     // all such records are erased and thus have been accounted for.
746     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
747       for (uint64_t Address = JT->getAddress();
748            Address < JT->getAddress() + JT->getSize();
749            Address += JT->EntrySize) {
750         DataPCRelocations.erase(DataPCRelocations.find(Address));
751       }
752     }
753 
754     // Mark to skip the function and all its fragments.
755     for (BinaryFunction *Frag : JT->Parents)
756       if (Frag->hasIndirectTargetToSplitFragment())
757         addFragmentsToSkip(Frag);
758   }
759 
760   if (opts::StrictMode && DataPCRelocations.size()) {
761     LLVM_DEBUG({
762       dbgs() << DataPCRelocations.size()
763              << " unclaimed PC-relative relocations left in data:\n";
764       for (uint64_t Reloc : DataPCRelocations)
765         dbgs() << Twine::utohexstr(Reloc) << '\n';
766     });
767     assert(0 && "unclaimed PC-relative relocations left in data\n");
768   }
769   clearList(DataPCRelocations);
770 }
771 
772 void BinaryContext::skipMarkedFragments() {
773   std::vector<BinaryFunction *> FragmentQueue;
774   // Copy the functions to FragmentQueue.
775   FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end());
776   auto addToWorklist = [&](BinaryFunction *Function) -> void {
777     if (FragmentsToSkip.count(Function))
778       return;
779     FragmentQueue.push_back(Function);
780     addFragmentsToSkip(Function);
781   };
782   // Functions containing split jump tables need to be skipped with all
783   // fragments (transitively).
784   for (size_t I = 0; I != FragmentQueue.size(); I++) {
785     BinaryFunction *BF = FragmentQueue[I];
786     assert(FragmentsToSkip.count(BF) &&
787            "internal error in traversing function fragments");
788     if (opts::Verbosity >= 1)
789       this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
790     BF->setSimple(false);
791     BF->setHasIndirectTargetToSplitFragment(true);
792 
793     llvm::for_each(BF->Fragments, addToWorklist);
794     llvm::for_each(BF->ParentFragments, addToWorklist);
795   }
796   if (!FragmentsToSkip.empty())
797     this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
798                  << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
799                  << " due to cold fragments\n";
800 }
801 
802 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
803                                                  uint64_t Size,
804                                                  uint16_t Alignment,
805                                                  unsigned Flags) {
806   auto Itr = BinaryDataMap.find(Address);
807   if (Itr != BinaryDataMap.end()) {
808     assert(Itr->second->getSize() == Size || !Size);
809     return Itr->second->getSymbol();
810   }
811 
812   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
813   assert(!GlobalSymbols.count(Name) && "created name is not unique");
814   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
815 }
816 
817 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
818   return Ctx->getOrCreateSymbol(Name);
819 }
820 
821 BinaryFunction *BinaryContext::createBinaryFunction(
822     const std::string &Name, BinarySection &Section, uint64_t Address,
823     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
824   auto Result = BinaryFunctions.emplace(
825       Address, BinaryFunction(Name, Section, Address, Size, *this));
826   assert(Result.second == true && "unexpected duplicate function");
827   BinaryFunction *BF = &Result.first->second;
828   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
829                         Alignment);
830   setSymbolToFunctionMap(BF->getSymbol(), BF);
831   return BF;
832 }
833 
834 const MCSymbol *
835 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
836                                     JumpTable::JumpTableType Type) {
837   // Two fragments of same function access same jump table
838   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
839     assert(JT->Type == Type && "jump table types have to match");
840     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
841 
842     // Prevent associating a jump table to a specific fragment twice.
843     // This simple check arises from the assumption: no more than 2 fragments.
844     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
845       assert(JT->Parents[0]->isParentOrChildOf(Function) &&
846              "cannot re-use jump table of a different function");
847       // Duplicate the entry for the parent function for easy access
848       JT->Parents.push_back(&Function);
849       if (opts::Verbosity > 2) {
850         this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
851                      << JT->Parents[0]->getPrintName() << "; "
852                      << Function.getPrintName() << "\n";
853         JT->print(this->outs());
854       }
855       Function.JumpTables.emplace(Address, JT);
856       JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
857       JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
858     }
859 
860     bool IsJumpTableParent = false;
861     (void)IsJumpTableParent;
862     for (BinaryFunction *Frag : JT->Parents)
863       if (Frag == &Function)
864         IsJumpTableParent = true;
865     assert(IsJumpTableParent &&
866            "cannot re-use jump table of a different function");
867     return JT->getFirstLabel();
868   }
869 
870   // Re-use the existing symbol if possible.
871   MCSymbol *JTLabel = nullptr;
872   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
873     if (!isInternalSymbolName(Object->getSymbol()->getName()))
874       JTLabel = Object->getSymbol();
875   }
876 
877   const uint64_t EntrySize = getJumpTableEntrySize(Type);
878   if (!JTLabel) {
879     const std::string JumpTableName = generateJumpTableName(Function, Address);
880     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
881   }
882 
883   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
884                     << " in function " << Function << '\n');
885 
886   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
887                                 JumpTable::LabelMapType{{0, JTLabel}},
888                                 *getSectionForAddress(Address));
889   JT->Parents.push_back(&Function);
890   if (opts::Verbosity > 2)
891     JT->print(this->outs());
892   JumpTables.emplace(Address, JT);
893 
894   // Duplicate the entry for the parent function for easy access.
895   Function.JumpTables.emplace(Address, JT);
896   return JTLabel;
897 }
898 
899 std::pair<uint64_t, const MCSymbol *>
900 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
901                                   const MCSymbol *OldLabel) {
902   auto L = scopeLock();
903   unsigned Offset = 0;
904   bool Found = false;
905   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
906     if (Elmt.second != OldLabel)
907       continue;
908     Offset = Elmt.first;
909     Found = true;
910     break;
911   }
912   assert(Found && "Label not found");
913   (void)Found;
914   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
915   JumpTable *NewJT =
916       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
917                     JumpTable::LabelMapType{{Offset, NewLabel}},
918                     *getSectionForAddress(JT->getAddress()));
919   NewJT->Parents = JT->Parents;
920   NewJT->Entries = JT->Entries;
921   NewJT->Counts = JT->Counts;
922   uint64_t JumpTableID = ++DuplicatedJumpTables;
923   // Invert it to differentiate from regular jump tables whose IDs are their
924   // addresses in the input binary memory space
925   JumpTableID = ~JumpTableID;
926   JumpTables.emplace(JumpTableID, NewJT);
927   Function.JumpTables.emplace(JumpTableID, NewJT);
928   return std::make_pair(JumpTableID, NewLabel);
929 }
930 
931 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
932                                                  uint64_t Address) {
933   size_t Id;
934   uint64_t Offset = 0;
935   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
936     Offset = Address - JT->getAddress();
937     auto Itr = JT->Labels.find(Offset);
938     if (Itr != JT->Labels.end())
939       return std::string(Itr->second->getName());
940     Id = JumpTableIds.at(JT->getAddress());
941   } else {
942     Id = JumpTableIds[Address] = BF.JumpTables.size();
943   }
944   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
945           (Offset ? ("." + std::to_string(Offset)) : ""));
946 }
947 
948 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
949   // FIXME: aarch64 support is missing.
950   if (!isX86())
951     return true;
952 
953   if (BF.getSize() == BF.getMaxSize())
954     return true;
955 
956   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
957   assert(FunctionData && "cannot get function as data");
958 
959   uint64_t Offset = BF.getSize();
960   MCInst Instr;
961   uint64_t InstrSize = 0;
962   uint64_t InstrAddress = BF.getAddress() + Offset;
963   using std::placeholders::_1;
964 
965   // Skip instructions that satisfy the predicate condition.
966   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
967     const uint64_t StartOffset = Offset;
968     for (; Offset < BF.getMaxSize();
969          Offset += InstrSize, InstrAddress += InstrSize) {
970       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
971                                   InstrAddress, nulls()))
972         break;
973       if (!Predicate(Instr))
974         break;
975     }
976 
977     return Offset - StartOffset;
978   };
979 
980   // Skip a sequence of zero bytes.
981   auto skipZeros = [&]() {
982     const uint64_t StartOffset = Offset;
983     for (; Offset < BF.getMaxSize(); ++Offset)
984       if ((*FunctionData)[Offset] != 0)
985         break;
986 
987     return Offset - StartOffset;
988   };
989 
990   // Accept the whole padding area filled with breakpoints.
991   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
992   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
993     return true;
994 
995   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
996 
997   // Some functions have a jump to the next function or to the padding area
998   // inserted after the body.
999   auto isSkipJump = [&](const MCInst &Instr) {
1000     uint64_t TargetAddress = 0;
1001     if (MIB->isUnconditionalBranch(Instr) &&
1002         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
1003       if (TargetAddress >= InstrAddress + InstrSize &&
1004           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1005         return true;
1006       }
1007     }
1008     return false;
1009   };
1010 
1011   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1012   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1013          skipZeros())
1014     ;
1015 
1016   if (Offset == BF.getMaxSize())
1017     return true;
1018 
1019   if (opts::Verbosity >= 1) {
1020     this->errs() << "BOLT-WARNING: bad padding at address 0x"
1021                  << Twine::utohexstr(BF.getAddress() + BF.getSize())
1022                  << " starting at offset " << (Offset - BF.getSize())
1023                  << " in function " << BF << '\n'
1024                  << FunctionData->slice(BF.getSize(),
1025                                         BF.getMaxSize() - BF.getSize())
1026                  << '\n';
1027   }
1028 
1029   return false;
1030 }
1031 
1032 void BinaryContext::adjustCodePadding() {
1033   for (auto &BFI : BinaryFunctions) {
1034     BinaryFunction &BF = BFI.second;
1035     if (!shouldEmit(BF))
1036       continue;
1037 
1038     if (!hasValidCodePadding(BF)) {
1039       if (HasRelocations) {
1040         if (opts::Verbosity >= 1) {
1041           this->outs() << "BOLT-INFO: function " << BF
1042                        << " has invalid padding. Ignoring the function.\n";
1043         }
1044         BF.setIgnored();
1045       } else {
1046         BF.setMaxSize(BF.getSize());
1047       }
1048     }
1049   }
1050 }
1051 
1052 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1053                                                uint64_t Size,
1054                                                uint16_t Alignment,
1055                                                unsigned Flags) {
1056   // Register the name with MCContext.
1057   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1058 
1059   auto GAI = BinaryDataMap.find(Address);
1060   BinaryData *BD;
1061   if (GAI == BinaryDataMap.end()) {
1062     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1063     BinarySection &Section =
1064         SectionOrErr ? SectionOrErr.get() : absoluteSection();
1065     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1066                         Section, Flags);
1067     GAI = BinaryDataMap.emplace(Address, BD).first;
1068     GlobalSymbols[Name] = BD;
1069     updateObjectNesting(GAI);
1070   } else {
1071     BD = GAI->second;
1072     if (!BD->hasName(Name)) {
1073       GlobalSymbols[Name] = BD;
1074       BD->Symbols.push_back(Symbol);
1075     }
1076   }
1077 
1078   return Symbol;
1079 }
1080 
1081 const BinaryData *
1082 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1083   auto NI = BinaryDataMap.lower_bound(Address);
1084   auto End = BinaryDataMap.end();
1085   if ((NI != End && Address == NI->first) ||
1086       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1087     if (NI->second->containsAddress(Address))
1088       return NI->second;
1089 
1090     // If this is a sub-symbol, see if a parent data contains the address.
1091     const BinaryData *BD = NI->second->getParent();
1092     while (BD) {
1093       if (BD->containsAddress(Address))
1094         return BD;
1095       BD = BD->getParent();
1096     }
1097   }
1098   return nullptr;
1099 }
1100 
1101 BinaryData *BinaryContext::getGOTSymbol() {
1102   // First tries to find a global symbol with that name
1103   BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1104   if (GOTSymBD)
1105     return GOTSymBD;
1106 
1107   // This symbol might be hidden from run-time link, so fetch the local
1108   // definition if available.
1109   GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1110   if (!GOTSymBD)
1111     return nullptr;
1112 
1113   // If the local symbol is not unique, fail
1114   unsigned Index = 2;
1115   SmallString<30> Storage;
1116   while (const BinaryData *BD =
1117              getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1118                                      .concat(Twine(Index++))
1119                                      .toStringRef(Storage)))
1120     if (BD->getAddress() != GOTSymBD->getAddress())
1121       return nullptr;
1122 
1123   return GOTSymBD;
1124 }
1125 
1126 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1127   auto NI = BinaryDataMap.find(Address);
1128   assert(NI != BinaryDataMap.end());
1129   if (NI == BinaryDataMap.end())
1130     return false;
1131   // TODO: it's possible that a jump table starts at the same address
1132   // as a larger blob of private data.  When we set the size of the
1133   // jump table, it might be smaller than the total blob size.  In this
1134   // case we just leave the original size since (currently) it won't really
1135   // affect anything.
1136   assert((!NI->second->Size || NI->second->Size == Size ||
1137           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1138          "can't change the size of a symbol that has already had its "
1139          "size set");
1140   if (!NI->second->Size) {
1141     NI->second->Size = Size;
1142     updateObjectNesting(NI);
1143     return true;
1144   }
1145   return false;
1146 }
1147 
1148 void BinaryContext::generateSymbolHashes() {
1149   auto isPadding = [](const BinaryData &BD) {
1150     StringRef Contents = BD.getSection().getContents();
1151     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1152     return (BD.getName().starts_with("HOLEat") ||
1153             SymData.find_first_not_of(0) == StringRef::npos);
1154   };
1155 
1156   uint64_t NumCollisions = 0;
1157   for (auto &Entry : BinaryDataMap) {
1158     BinaryData &BD = *Entry.second;
1159     StringRef Name = BD.getName();
1160 
1161     if (!isInternalSymbolName(Name))
1162       continue;
1163 
1164     // First check if a non-anonymous alias exists and move it to the front.
1165     if (BD.getSymbols().size() > 1) {
1166       auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) {
1167         return !isInternalSymbolName(Symbol->getName());
1168       });
1169       if (Itr != BD.getSymbols().end()) {
1170         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1171         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1172         continue;
1173       }
1174     }
1175 
1176     // We have to skip 0 size symbols since they will all collide.
1177     if (BD.getSize() == 0) {
1178       continue;
1179     }
1180 
1181     const uint64_t Hash = BD.getSection().hash(BD);
1182     const size_t Idx = Name.find("0x");
1183     std::string NewName =
1184         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1185     if (getBinaryDataByName(NewName)) {
1186       // Ignore collisions for symbols that appear to be padding
1187       // (i.e. all zeros or a "hole")
1188       if (!isPadding(BD)) {
1189         if (opts::Verbosity) {
1190           this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1191                        << " with new name (" << NewName << "), skipping.\n";
1192         }
1193         ++NumCollisions;
1194       }
1195       continue;
1196     }
1197     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1198     GlobalSymbols[NewName] = &BD;
1199   }
1200   if (NumCollisions) {
1201     this->errs() << "BOLT-WARNING: " << NumCollisions
1202                  << " collisions detected while hashing binary objects";
1203     if (!opts::Verbosity)
1204       this->errs() << ". Use -v=1 to see the list.";
1205     this->errs() << '\n';
1206   }
1207 }
1208 
1209 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1210                                      BinaryFunction &Function) const {
1211   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1212   if (TargetFunction.isChildOf(Function))
1213     return true;
1214   TargetFunction.addParentFragment(Function);
1215   Function.addFragment(TargetFunction);
1216   if (!HasRelocations) {
1217     TargetFunction.setSimple(false);
1218     Function.setSimple(false);
1219   }
1220   if (opts::Verbosity >= 1) {
1221     this->outs() << "BOLT-INFO: marking " << TargetFunction
1222                  << " as a fragment of " << Function << '\n';
1223   }
1224   return true;
1225 }
1226 
1227 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1228                                            MCInst &LoadLowBits,
1229                                            MCInst &LoadHiBits,
1230                                            uint64_t Target) {
1231   const MCSymbol *TargetSymbol;
1232   uint64_t Addend = 0;
1233   std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF,
1234                                                     /*IsPCRel*/ true);
1235   int64_t Val;
1236   MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val,
1237                                ELF::R_AARCH64_ADR_PREL_PG_HI21);
1238   MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(),
1239                                Val, ELF::R_AARCH64_ADD_ABS_LO12_NC);
1240 }
1241 
1242 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1243   BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1244   if (TargetFunction)
1245     return false;
1246 
1247   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1248   assert(Section && "cannot get section for referenced address");
1249   if (!Section->isText())
1250     return false;
1251 
1252   bool Ret = false;
1253   StringRef SectionContents = Section->getContents();
1254   uint64_t Offset = Address - Section->getAddress();
1255   const uint64_t MaxSize = SectionContents.size() - Offset;
1256   const uint8_t *Bytes =
1257       reinterpret_cast<const uint8_t *>(SectionContents.data());
1258   ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1259 
1260   auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1261                          MCInst &Instruction, uint64_t Offset,
1262                          uint64_t AbsoluteInstrAddr,
1263                          uint64_t TotalSize) -> bool {
1264     MCInst *TargetHiBits, *TargetLowBits;
1265     uint64_t TargetAddress, Count;
1266     Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(),
1267                                    AbsoluteInstrAddr, Instruction, TargetHiBits,
1268                                    TargetLowBits, TargetAddress);
1269     if (!Count)
1270       return false;
1271 
1272     if (MatchOnly)
1273       return true;
1274 
1275     // NOTE The target symbol was created during disassemble's
1276     // handleExternalReference
1277     const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat");
1278     BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(),
1279                                                   *Section, Address, TotalSize);
1280     addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits,
1281                            TargetAddress);
1282     MIB->addAnnotation(Instruction, "AArch64Veneer", true);
1283     Veneer->addInstruction(Offset, std::move(Instruction));
1284     --Count;
1285     for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1286       MIB->addAnnotation(It->second, "AArch64Veneer", true);
1287       Veneer->addInstruction(It->first, std::move(It->second));
1288     }
1289 
1290     Veneer->getOrCreateLocalLabel(Address);
1291     Veneer->setMaxSize(TotalSize);
1292     Veneer->updateState(BinaryFunction::State::Disassembled);
1293     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1294                       << "\n");
1295     return true;
1296   };
1297 
1298   uint64_t Size = 0, TotalSize = 0;
1299   BinaryFunction::InstrMapType VeneerInstructions;
1300   for (Offset = 0; Offset < MaxSize; Offset += Size) {
1301     MCInst Instruction;
1302     const uint64_t AbsoluteInstrAddr = Address + Offset;
1303     if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset),
1304                                         AbsoluteInstrAddr, nulls()))
1305       break;
1306 
1307     TotalSize += Size;
1308     if (MIB->isBranch(Instruction)) {
1309       Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1310                         AbsoluteInstrAddr, TotalSize);
1311       break;
1312     }
1313 
1314     VeneerInstructions.emplace(Offset, std::move(Instruction));
1315   }
1316 
1317   return Ret;
1318 }
1319 
1320 void BinaryContext::processInterproceduralReferences() {
1321   for (const std::pair<BinaryFunction *, uint64_t> &It :
1322        InterproceduralReferences) {
1323     BinaryFunction &Function = *It.first;
1324     uint64_t Address = It.second;
1325     if (!Address || Function.isIgnored())
1326       continue;
1327 
1328     BinaryFunction *TargetFunction =
1329         getBinaryFunctionContainingAddress(Address);
1330     if (&Function == TargetFunction)
1331       continue;
1332 
1333     if (TargetFunction) {
1334       if (TargetFunction->isFragment() &&
1335           !TargetFunction->isChildOf(Function)) {
1336         this->errs()
1337             << "BOLT-WARNING: interprocedural reference between unrelated "
1338                "fragments: "
1339             << Function.getPrintName() << " and "
1340             << TargetFunction->getPrintName() << '\n';
1341       }
1342       if (uint64_t Offset = Address - TargetFunction->getAddress())
1343         TargetFunction->addEntryPointAtOffset(Offset);
1344 
1345       continue;
1346     }
1347 
1348     // Check if address falls in function padding space - this could be
1349     // unmarked data in code. In this case adjust the padding space size.
1350     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1351     assert(Section && "cannot get section for referenced address");
1352 
1353     if (!Section->isText())
1354       continue;
1355 
1356     // PLT requires special handling and could be ignored in this context.
1357     StringRef SectionName = Section->getName();
1358     if (SectionName == ".plt" || SectionName == ".plt.got")
1359       continue;
1360 
1361     // Check if it is aarch64 veneer written at Address
1362     if (isAArch64() && handleAArch64Veneer(Address))
1363       continue;
1364 
1365     if (opts::processAllFunctions()) {
1366       this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1367                    << "object in code at address 0x"
1368                    << Twine::utohexstr(Address) << " belonging to section "
1369                    << SectionName << " in current mode\n";
1370       exit(1);
1371     }
1372 
1373     TargetFunction = getBinaryFunctionContainingAddress(Address,
1374                                                         /*CheckPastEnd=*/false,
1375                                                         /*UseMaxSize=*/true);
1376     // We are not going to overwrite non-simple functions, but for simple
1377     // ones - adjust the padding size.
1378     if (TargetFunction && TargetFunction->isSimple()) {
1379       this->errs()
1380           << "BOLT-WARNING: function " << *TargetFunction
1381           << " has an object detected in a padding region at address 0x"
1382           << Twine::utohexstr(Address) << '\n';
1383       TargetFunction->setMaxSize(TargetFunction->getSize());
1384     }
1385   }
1386 
1387   InterproceduralReferences.clear();
1388 }
1389 
1390 void BinaryContext::postProcessSymbolTable() {
1391   fixBinaryDataHoles();
1392   bool Valid = true;
1393   for (auto &Entry : BinaryDataMap) {
1394     BinaryData *BD = Entry.second;
1395     if ((BD->getName().starts_with("SYMBOLat") ||
1396          BD->getName().starts_with("DATAat")) &&
1397         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1398         BD->getSection()) {
1399       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1400                    << "\n";
1401       Valid = false;
1402     }
1403   }
1404   assert(Valid);
1405   (void)Valid;
1406   generateSymbolHashes();
1407 }
1408 
1409 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1410                                  BinaryFunction &ParentBF) {
1411   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1412          "cannot merge functions with multiple entry points");
1413 
1414   std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1415   std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1416       SymbolToFunctionMapMutex, std::defer_lock);
1417 
1418   const StringRef ChildName = ChildBF.getOneName();
1419 
1420   // Move symbols over and update bookkeeping info.
1421   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1422     ParentBF.getSymbols().push_back(Symbol);
1423     WriteSymbolMapLock.lock();
1424     SymbolToFunctionMap[Symbol] = &ParentBF;
1425     WriteSymbolMapLock.unlock();
1426     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1427   }
1428   ChildBF.getSymbols().clear();
1429 
1430   // Move other names the child function is known under.
1431   llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
1432   ChildBF.Aliases.clear();
1433 
1434   if (HasRelocations) {
1435     // Merge execution counts of ChildBF into those of ParentBF.
1436     // Without relocations, we cannot reliably merge profiles as both functions
1437     // continue to exist and either one can be executed.
1438     ChildBF.mergeProfileDataInto(ParentBF);
1439 
1440     std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1441                                                      std::defer_lock);
1442     std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1443                                                       std::defer_lock);
1444     // Remove ChildBF from the global set of functions in relocs mode.
1445     ReadBfsLock.lock();
1446     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1447     ReadBfsLock.unlock();
1448 
1449     assert(FI != BinaryFunctions.end() && "function not found");
1450     assert(&ChildBF == &FI->second && "function mismatch");
1451 
1452     WriteBfsLock.lock();
1453     ChildBF.clearDisasmState();
1454     FI = BinaryFunctions.erase(FI);
1455     WriteBfsLock.unlock();
1456 
1457   } else {
1458     // In non-relocation mode we keep the function, but rename it.
1459     std::string NewName = "__ICF_" + ChildName.str();
1460 
1461     WriteCtxLock.lock();
1462     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1463     WriteCtxLock.unlock();
1464 
1465     ChildBF.setFolded(&ParentBF);
1466   }
1467 
1468   ParentBF.setHasFunctionsFoldedInto();
1469 }
1470 
1471 void BinaryContext::fixBinaryDataHoles() {
1472   assert(validateObjectNesting() && "object nesting inconsistency detected");
1473 
1474   for (BinarySection &Section : allocatableSections()) {
1475     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1476 
1477     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1478       BinaryData *BD = Itr->second;
1479       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1480                      (BD->getName().starts_with("SYMBOLat0x") ||
1481                       BD->getName().starts_with("DATAat0x") ||
1482                       BD->getName().starts_with("ANONYMOUS")));
1483       return !isHole && BD->getSection() == Section && !BD->getParent();
1484     };
1485 
1486     auto BDStart = BinaryDataMap.begin();
1487     auto BDEnd = BinaryDataMap.end();
1488     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1489     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1490 
1491     uint64_t EndAddress = Section.getAddress();
1492 
1493     while (Itr != End) {
1494       if (Itr->second->getAddress() > EndAddress) {
1495         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1496         Holes.emplace_back(EndAddress, Gap);
1497       }
1498       EndAddress = Itr->second->getEndAddress();
1499       ++Itr;
1500     }
1501 
1502     if (EndAddress < Section.getEndAddress())
1503       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1504 
1505     // If there is already a symbol at the start of the hole, grow that symbol
1506     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1507     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1508       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1509       if (BD) {
1510         // BD->getSection() can be != Section if there are sections that
1511         // overlap.  In this case it is probably safe to just skip the holes
1512         // since the overlapping section will not(?) have any symbols in it.
1513         if (BD->getSection() == Section)
1514           setBinaryDataSize(Hole.first, Hole.second);
1515       } else {
1516         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1517       }
1518     }
1519   }
1520 
1521   assert(validateObjectNesting() && "object nesting inconsistency detected");
1522   assert(validateHoles() && "top level hole detected in object map");
1523 }
1524 
1525 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1526   const BinarySection *CurrentSection = nullptr;
1527   bool FirstSection = true;
1528 
1529   for (auto &Entry : BinaryDataMap) {
1530     const BinaryData *BD = Entry.second;
1531     const BinarySection &Section = BD->getSection();
1532     if (FirstSection || Section != *CurrentSection) {
1533       uint64_t Address, Size;
1534       StringRef Name = Section.getName();
1535       if (Section) {
1536         Address = Section.getAddress();
1537         Size = Section.getSize();
1538       } else {
1539         Address = BD->getAddress();
1540         Size = BD->getSize();
1541       }
1542       OS << "BOLT-INFO: Section " << Name << ", "
1543          << "0x" + Twine::utohexstr(Address) << ":"
1544          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1545       CurrentSection = &Section;
1546       FirstSection = false;
1547     }
1548 
1549     OS << "BOLT-INFO: ";
1550     const BinaryData *P = BD->getParent();
1551     while (P) {
1552       OS << "  ";
1553       P = P->getParent();
1554     }
1555     OS << *BD << "\n";
1556   }
1557 }
1558 
1559 Expected<unsigned> BinaryContext::getDwarfFile(
1560     StringRef Directory, StringRef FileName, unsigned FileNumber,
1561     std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1562     unsigned CUID, unsigned DWARFVersion) {
1563   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1564   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1565                           FileNumber);
1566 }
1567 
1568 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1569                                                const uint32_t SrcCUID,
1570                                                unsigned FileIndex) {
1571   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1572   const DWARFDebugLine::LineTable *LineTable =
1573       DwCtx->getLineTableForUnit(SrcUnit);
1574   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1575       LineTable->Prologue.FileNames;
1576   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1577   // means empty dir.
1578   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1579          "FileIndex out of range for the compilation unit.");
1580   StringRef Dir = "";
1581   if (FileNames[FileIndex - 1].DirIdx != 0) {
1582     if (std::optional<const char *> DirName = dwarf::toString(
1583             LineTable->Prologue
1584                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1585       Dir = *DirName;
1586     }
1587   }
1588   StringRef FileName = "";
1589   if (std::optional<const char *> FName =
1590           dwarf::toString(FileNames[FileIndex - 1].Name))
1591     FileName = *FName;
1592   assert(FileName != "");
1593   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1594   return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt,
1595                                DestCUID, DstUnit->getVersion()));
1596 }
1597 
1598 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1599   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1600   llvm::transform(llvm::make_second_range(BinaryFunctions),
1601                   SortedFunctions.begin(),
1602                   [](BinaryFunction &BF) { return &BF; });
1603 
1604   llvm::stable_sort(SortedFunctions,
1605                     [](const BinaryFunction *A, const BinaryFunction *B) {
1606                       if (A->hasValidIndex() && B->hasValidIndex()) {
1607                         return A->getIndex() < B->getIndex();
1608                       }
1609                       return A->hasValidIndex();
1610                     });
1611   return SortedFunctions;
1612 }
1613 
1614 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1615   std::vector<BinaryFunction *> AllFunctions;
1616   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1617   llvm::transform(llvm::make_second_range(BinaryFunctions),
1618                   std::back_inserter(AllFunctions),
1619                   [](BinaryFunction &BF) { return &BF; });
1620   llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions));
1621 
1622   return AllFunctions;
1623 }
1624 
1625 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1626   auto Iter = DWOCUs.find(DWOId);
1627   if (Iter == DWOCUs.end())
1628     return std::nullopt;
1629 
1630   return Iter->second;
1631 }
1632 
1633 DWARFContext *BinaryContext::getDWOContext() const {
1634   if (DWOCUs.empty())
1635     return nullptr;
1636   return &DWOCUs.begin()->second->getContext();
1637 }
1638 
1639 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1640 void BinaryContext::preprocessDWODebugInfo() {
1641   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1642     DWARFUnit *const DwarfUnit = CU.get();
1643     if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1644       std::string DWOName = dwarf::toString(
1645           DwarfUnit->getUnitDIE().find(
1646               {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1647           "");
1648       SmallString<16> AbsolutePath;
1649       if (!opts::CompDirOverride.empty()) {
1650         sys::path::append(AbsolutePath, opts::CompDirOverride);
1651         sys::path::append(AbsolutePath, DWOName);
1652       }
1653       DWARFUnit *DWOCU =
1654           DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit();
1655       if (!DWOCU->isDWOUnit()) {
1656         this->outs()
1657             << "BOLT-WARNING: Debug Fission: DWO debug information for "
1658             << DWOName
1659             << " was not retrieved and won't be updated. Please check "
1660                "relative path.\n";
1661         continue;
1662       }
1663       DWOCUs[*DWOId] = DWOCU;
1664     }
1665   }
1666   if (!DWOCUs.empty())
1667     this->outs() << "BOLT-INFO: processing split DWARF\n";
1668 }
1669 
1670 void BinaryContext::preprocessDebugInfo() {
1671   struct CURange {
1672     uint64_t LowPC;
1673     uint64_t HighPC;
1674     DWARFUnit *Unit;
1675 
1676     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1677   };
1678 
1679   // Building a map of address ranges to CUs similar to .debug_aranges and use
1680   // it to assign CU to functions.
1681   std::vector<CURange> AllRanges;
1682   AllRanges.reserve(DwCtx->getNumCompileUnits());
1683   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1684     Expected<DWARFAddressRangesVector> RangesOrError =
1685         CU->getUnitDIE().getAddressRanges();
1686     if (!RangesOrError) {
1687       consumeError(RangesOrError.takeError());
1688       continue;
1689     }
1690     for (DWARFAddressRange &Range : *RangesOrError) {
1691       // Parts of the debug info could be invalidated due to corresponding code
1692       // being removed from the binary by the linker. Hence we check if the
1693       // address is a valid one.
1694       if (containsAddress(Range.LowPC))
1695         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1696     }
1697 
1698     ContainsDwarf5 |= CU->getVersion() >= 5;
1699     ContainsDwarfLegacy |= CU->getVersion() < 5;
1700   }
1701 
1702   llvm::sort(AllRanges);
1703   for (auto &KV : BinaryFunctions) {
1704     const uint64_t FunctionAddress = KV.first;
1705     BinaryFunction &Function = KV.second;
1706 
1707     auto It = llvm::partition_point(
1708         AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
1709     if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1710       Function.setDWARFUnit(It->Unit);
1711   }
1712 
1713   // Discover units with debug info that needs to be updated.
1714   for (const auto &KV : BinaryFunctions) {
1715     const BinaryFunction &BF = KV.second;
1716     if (shouldEmit(BF) && BF.getDWARFUnit())
1717       ProcessedCUs.insert(BF.getDWARFUnit());
1718   }
1719 
1720   // Clear debug info for functions from units that we are not going to process.
1721   for (auto &KV : BinaryFunctions) {
1722     BinaryFunction &BF = KV.second;
1723     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1724       BF.setDWARFUnit(nullptr);
1725   }
1726 
1727   if (opts::Verbosity >= 1) {
1728     this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1729                  << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1730   }
1731 
1732   preprocessDWODebugInfo();
1733 
1734   // Populate MCContext with DWARF files from all units.
1735   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1736   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1737     const uint64_t CUID = CU->getOffset();
1738     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1739     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1740         GlobalPrefix + "line_table_start" + Twine(CUID)));
1741 
1742     if (!ProcessedCUs.count(CU.get()))
1743       continue;
1744 
1745     const DWARFDebugLine::LineTable *LineTable =
1746         DwCtx->getLineTableForUnit(CU.get());
1747     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1748         LineTable->Prologue.FileNames;
1749 
1750     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1751     if (DwarfVersion >= 5) {
1752       std::optional<MD5::MD5Result> Checksum;
1753       if (LineTable->Prologue.ContentTypes.HasMD5)
1754         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1755       std::optional<const char *> Name =
1756           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1757       if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1758         auto Iter = DWOCUs.find(*DWOID);
1759         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1760         Name = dwarf::toString(
1761             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1762       }
1763       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1764                                   std::nullopt);
1765     }
1766 
1767     BinaryLineTable.setDwarfVersion(DwarfVersion);
1768 
1769     // Assign a unique label to every line table, one per CU.
1770     // Make sure empty debug line tables are registered too.
1771     if (FileNames.empty()) {
1772       cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt,
1773                             CUID, DwarfVersion));
1774       continue;
1775     }
1776     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1777     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1778       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1779       // means empty dir.
1780       StringRef Dir = "";
1781       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1782         if (std::optional<const char *> DirName = dwarf::toString(
1783                 LineTable->Prologue
1784                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1785           Dir = *DirName;
1786       StringRef FileName = "";
1787       if (std::optional<const char *> FName =
1788               dwarf::toString(FileNames[I].Name))
1789         FileName = *FName;
1790       assert(FileName != "");
1791       std::optional<MD5::MD5Result> Checksum;
1792       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1793         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1794       cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID,
1795                             DwarfVersion));
1796     }
1797   }
1798 }
1799 
1800 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1801   if (Function.isPseudo())
1802     return false;
1803 
1804   if (opts::processAllFunctions())
1805     return true;
1806 
1807   if (Function.isIgnored())
1808     return false;
1809 
1810   // In relocation mode we will emit non-simple functions with CFG.
1811   // If the function does not have a CFG it should be marked as ignored.
1812   return HasRelocations || Function.isSimple();
1813 }
1814 
1815 void BinaryContext::dump(const MCInst &Inst) const {
1816   if (LLVM_UNLIKELY(!InstPrinter)) {
1817     dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1818     return;
1819   }
1820   InstPrinter->printInst(&Inst, 0, "", *STI, dbgs());
1821   dbgs() << "\n";
1822 }
1823 
1824 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1825   uint32_t Operation = Inst.getOperation();
1826   switch (Operation) {
1827   case MCCFIInstruction::OpSameValue:
1828     OS << "OpSameValue Reg" << Inst.getRegister();
1829     break;
1830   case MCCFIInstruction::OpRememberState:
1831     OS << "OpRememberState";
1832     break;
1833   case MCCFIInstruction::OpRestoreState:
1834     OS << "OpRestoreState";
1835     break;
1836   case MCCFIInstruction::OpOffset:
1837     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1838     break;
1839   case MCCFIInstruction::OpDefCfaRegister:
1840     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1841     break;
1842   case MCCFIInstruction::OpDefCfaOffset:
1843     OS << "OpDefCfaOffset " << Inst.getOffset();
1844     break;
1845   case MCCFIInstruction::OpDefCfa:
1846     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1847     break;
1848   case MCCFIInstruction::OpRelOffset:
1849     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850     break;
1851   case MCCFIInstruction::OpAdjustCfaOffset:
1852     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1853     break;
1854   case MCCFIInstruction::OpEscape:
1855     OS << "OpEscape";
1856     break;
1857   case MCCFIInstruction::OpRestore:
1858     OS << "OpRestore Reg" << Inst.getRegister();
1859     break;
1860   case MCCFIInstruction::OpUndefined:
1861     OS << "OpUndefined Reg" << Inst.getRegister();
1862     break;
1863   case MCCFIInstruction::OpRegister:
1864     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1865        << Inst.getRegister2();
1866     break;
1867   case MCCFIInstruction::OpWindowSave:
1868     OS << "OpWindowSave";
1869     break;
1870   case MCCFIInstruction::OpGnuArgsSize:
1871     OS << "OpGnuArgsSize";
1872     break;
1873   default:
1874     OS << "Op#" << Operation;
1875     break;
1876   }
1877 }
1878 
1879 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1880   // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1881   // in the code section (see IHI0056B). $x identifies a symbol starting code or
1882   // the end of a data chunk inside code, $d identifies start of data.
1883   if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize())
1884     return MarkerSymType::NONE;
1885 
1886   Expected<StringRef> NameOrError = Symbol.getName();
1887   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1888 
1889   if (!TypeOrError || !NameOrError)
1890     return MarkerSymType::NONE;
1891 
1892   if (*TypeOrError != SymbolRef::ST_Unknown)
1893     return MarkerSymType::NONE;
1894 
1895   if (*NameOrError == "$x" || NameOrError->starts_with("$x."))
1896     return MarkerSymType::CODE;
1897 
1898   // $x<ISA>
1899   if (isRISCV() && NameOrError->starts_with("$x"))
1900     return MarkerSymType::CODE;
1901 
1902   if (*NameOrError == "$d" || NameOrError->starts_with("$d."))
1903     return MarkerSymType::DATA;
1904 
1905   return MarkerSymType::NONE;
1906 }
1907 
1908 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1909   return getMarkerType(Symbol) != MarkerSymType::NONE;
1910 }
1911 
1912 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1913                            const BinaryFunction *Function,
1914                            DWARFContext *DwCtx) {
1915   DebugLineTableRowRef RowRef =
1916       DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1917   if (RowRef == DebugLineTableRowRef::NULL_ROW)
1918     return;
1919 
1920   const DWARFDebugLine::LineTable *LineTable;
1921   if (Function && Function->getDWARFUnit() &&
1922       Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1923     LineTable = Function->getDWARFLineTable();
1924   } else {
1925     LineTable = DwCtx->getLineTableForUnit(
1926         DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1927   }
1928   assert(LineTable && "line table expected for instruction with debug info");
1929 
1930   const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1931   StringRef FileName = "";
1932   if (std::optional<const char *> FName =
1933           dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1934     FileName = *FName;
1935   OS << " # debug line " << FileName << ":" << Row.Line;
1936   if (Row.Column)
1937     OS << ":" << Row.Column;
1938   if (Row.Discriminator)
1939     OS << " discriminator:" << Row.Discriminator;
1940 }
1941 
1942 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1943                                      uint64_t Offset,
1944                                      const BinaryFunction *Function,
1945                                      bool PrintMCInst, bool PrintMemData,
1946                                      bool PrintRelocations,
1947                                      StringRef Endl) const {
1948   OS << format("    %08" PRIx64 ": ", Offset);
1949   if (MIB->isCFI(Instruction)) {
1950     uint32_t Offset = Instruction.getOperand(0).getImm();
1951     OS << "\t!CFI\t$" << Offset << "\t; ";
1952     if (Function)
1953       printCFI(OS, *Function->getCFIFor(Instruction));
1954     OS << Endl;
1955     return;
1956   }
1957   if (std::optional<uint32_t> DynamicID =
1958           MIB->getDynamicBranchID(Instruction)) {
1959     OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
1960        << " # ID: " << DynamicID;
1961   } else {
1962     InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1963   }
1964   if (MIB->isCall(Instruction)) {
1965     if (MIB->isTailCall(Instruction))
1966       OS << " # TAILCALL ";
1967     if (MIB->isInvoke(Instruction)) {
1968       const std::optional<MCPlus::MCLandingPad> EHInfo =
1969           MIB->getEHInfo(Instruction);
1970       OS << " # handler: ";
1971       if (EHInfo->first)
1972         OS << *EHInfo->first;
1973       else
1974         OS << '0';
1975       OS << "; action: " << EHInfo->second;
1976       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1977       if (GnuArgsSize >= 0)
1978         OS << "; GNU_args_size = " << GnuArgsSize;
1979     }
1980   } else if (MIB->isIndirectBranch(Instruction)) {
1981     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1982       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1983     } else {
1984       OS << " # UNKNOWN CONTROL FLOW";
1985     }
1986   }
1987   if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
1988     OS << " # Offset: " << *Offset;
1989   if (std::optional<uint32_t> Size = MIB->getSize(Instruction))
1990     OS << " # Size: " << *Size;
1991   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
1992     OS << " # Label: " << *Label;
1993 
1994   MIB->printAnnotations(Instruction, OS);
1995 
1996   if (opts::PrintDebugInfo)
1997     printDebugInfo(OS, Instruction, Function, DwCtx.get());
1998 
1999   if ((opts::PrintRelocations || PrintRelocations) && Function) {
2000     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
2001     Function->printRelocations(OS, Offset, Size);
2002   }
2003 
2004   OS << Endl;
2005 
2006   if (PrintMCInst) {
2007     Instruction.dump_pretty(OS, InstPrinter.get());
2008     OS << Endl;
2009   }
2010 }
2011 
2012 std::optional<uint64_t>
2013 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2014                                         uint64_t FileOffset) const {
2015   // Find a segment with a matching file offset.
2016   for (auto &KV : SegmentMapInfo) {
2017     const SegmentInfo &SegInfo = KV.second;
2018     // FileOffset is got from perf event,
2019     // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2020     // If the pagesize is not equal to SegInfo.Alignment.
2021     // FileOffset and SegInfo.FileOffset should be aligned first,
2022     // and then judge whether they are equal.
2023     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
2024         alignDown(FileOffset, SegInfo.Alignment)) {
2025       // The function's offset from base address in VAS is aligned by pagesize
2026       // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2027       // However, The ELF document says that SegInfo.FileOffset should equal
2028       // to SegInfo.Address, modulo the pagesize.
2029       // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2030 
2031       // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2032       // alignDown(SegInfo.Address, pagesize)
2033       //   = SegInfo.Address - (SegInfo.Address % pagesize)
2034       //   = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2035       //   = SegInfo.Address - SegInfo.FileOffset +
2036       //     alignDown(SegInfo.FileOffset, pagesize)
2037       //   = SegInfo.Address - SegInfo.FileOffset + FileOffset
2038       return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2039     }
2040   }
2041 
2042   return std::nullopt;
2043 }
2044 
2045 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2046   auto SI = AddressToSection.upper_bound(Address);
2047   if (SI != AddressToSection.begin()) {
2048     --SI;
2049     uint64_t UpperBound = SI->first + SI->second->getSize();
2050     if (!SI->second->getSize())
2051       UpperBound += 1;
2052     if (UpperBound > Address)
2053       return *SI->second;
2054   }
2055   return std::make_error_code(std::errc::bad_address);
2056 }
2057 
2058 ErrorOr<StringRef>
2059 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2060   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2061     return Section->getName();
2062   return std::make_error_code(std::errc::bad_address);
2063 }
2064 
2065 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2066   auto Res = Sections.insert(Section);
2067   (void)Res;
2068   assert(Res.second && "can't register the same section twice.");
2069 
2070   // Only register allocatable sections in the AddressToSection map.
2071   if (Section->isAllocatable() && Section->getAddress())
2072     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
2073   NameToSection.insert(
2074       std::make_pair(std::string(Section->getName()), Section));
2075   if (Section->hasSectionRef())
2076     SectionRefToBinarySection.insert(
2077         std::make_pair(Section->getSectionRef(), Section));
2078 
2079   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2080   return *Section;
2081 }
2082 
2083 BinarySection &BinaryContext::registerSection(SectionRef Section) {
2084   return registerSection(new BinarySection(*this, Section));
2085 }
2086 
2087 BinarySection &
2088 BinaryContext::registerSection(const Twine &SectionName,
2089                                const BinarySection &OriginalSection) {
2090   return registerSection(
2091       new BinarySection(*this, SectionName, OriginalSection));
2092 }
2093 
2094 BinarySection &
2095 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2096                                        unsigned ELFFlags, uint8_t *Data,
2097                                        uint64_t Size, unsigned Alignment) {
2098   auto NamedSections = getSectionByName(Name);
2099   if (NamedSections.begin() != NamedSections.end()) {
2100     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2101            "can only update unique sections");
2102     BinarySection *Section = NamedSections.begin()->second;
2103 
2104     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2105     const bool Flag = Section->isAllocatable();
2106     (void)Flag;
2107     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
2108     LLVM_DEBUG(dbgs() << *Section << "\n");
2109     // FIXME: Fix section flags/attributes for MachO.
2110     if (isELF())
2111       assert(Flag == Section->isAllocatable() &&
2112              "can't change section allocation status");
2113     return *Section;
2114   }
2115 
2116   return registerSection(
2117       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2118 }
2119 
2120 void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2121   auto NameRange = NameToSection.equal_range(Section.getName().str());
2122   while (NameRange.first != NameRange.second) {
2123     if (NameRange.first->second == &Section) {
2124       NameToSection.erase(NameRange.first);
2125       break;
2126     }
2127     ++NameRange.first;
2128   }
2129 }
2130 
2131 void BinaryContext::deregisterUnusedSections() {
2132   ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>");
2133   for (auto SI = Sections.begin(); SI != Sections.end();) {
2134     BinarySection *Section = *SI;
2135     // We check getOutputData() instead of getOutputSize() because sometimes
2136     // zero-sized .text.cold sections are allocated.
2137     if (Section->hasSectionRef() || Section->getOutputData() ||
2138         (AbsSection && Section == &AbsSection.get())) {
2139       ++SI;
2140       continue;
2141     }
2142 
2143     LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2144                       << '\n';);
2145     deregisterSectionName(*Section);
2146     SI = Sections.erase(SI);
2147     delete Section;
2148   }
2149 }
2150 
2151 bool BinaryContext::deregisterSection(BinarySection &Section) {
2152   BinarySection *SectionPtr = &Section;
2153   auto Itr = Sections.find(SectionPtr);
2154   if (Itr != Sections.end()) {
2155     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
2156     while (Range.first != Range.second) {
2157       if (Range.first->second == SectionPtr) {
2158         AddressToSection.erase(Range.first);
2159         break;
2160       }
2161       ++Range.first;
2162     }
2163 
2164     deregisterSectionName(*SectionPtr);
2165     Sections.erase(Itr);
2166     delete SectionPtr;
2167     return true;
2168   }
2169   return false;
2170 }
2171 
2172 void BinaryContext::renameSection(BinarySection &Section,
2173                                   const Twine &NewName) {
2174   auto Itr = Sections.find(&Section);
2175   assert(Itr != Sections.end() && "Section must exist to be renamed.");
2176   Sections.erase(Itr);
2177 
2178   deregisterSectionName(Section);
2179 
2180   Section.Name = NewName.str();
2181   Section.setOutputName(Section.Name);
2182 
2183   NameToSection.insert(std::make_pair(Section.Name, &Section));
2184 
2185   // Reinsert with the new name.
2186   Sections.insert(&Section);
2187 }
2188 
2189 void BinaryContext::printSections(raw_ostream &OS) const {
2190   for (BinarySection *const &Section : Sections)
2191     OS << "BOLT-INFO: " << *Section << "\n";
2192 }
2193 
2194 BinarySection &BinaryContext::absoluteSection() {
2195   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
2196     return *Section;
2197   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
2198 }
2199 
2200 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2201                                                            size_t Size) const {
2202   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2203   if (!Section)
2204     return std::make_error_code(std::errc::bad_address);
2205 
2206   if (Section->isVirtual())
2207     return 0;
2208 
2209   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2210                    AsmInfo->getCodePointerSize());
2211   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2212   return DE.getUnsigned(&ValueOffset, Size);
2213 }
2214 
2215 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2216                                                          size_t Size) const {
2217   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2218   if (!Section)
2219     return std::make_error_code(std::errc::bad_address);
2220 
2221   if (Section->isVirtual())
2222     return 0;
2223 
2224   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2225                    AsmInfo->getCodePointerSize());
2226   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2227   return DE.getSigned(&ValueOffset, Size);
2228 }
2229 
2230 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2231                                   uint64_t Type, uint64_t Addend,
2232                                   uint64_t Value) {
2233   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2234   assert(Section && "cannot find section for address");
2235   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
2236                          Value);
2237 }
2238 
2239 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2240                                          uint64_t Type, uint64_t Addend,
2241                                          uint64_t Value) {
2242   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2243   assert(Section && "cannot find section for address");
2244   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
2245                                 Addend, Value);
2246 }
2247 
2248 bool BinaryContext::removeRelocationAt(uint64_t Address) {
2249   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2250   assert(Section && "cannot find section for address");
2251   return Section->removeRelocationAt(Address - Section->getAddress());
2252 }
2253 
2254 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2255   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2256   if (!Section)
2257     return nullptr;
2258 
2259   return Section->getRelocationAt(Address - Section->getAddress());
2260 }
2261 
2262 const Relocation *
2263 BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2264   ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2265   if (!Section)
2266     return nullptr;
2267 
2268   return Section->getDynamicRelocationAt(Address - Section->getAddress());
2269 }
2270 
2271 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2272                                              const uint64_t Address) {
2273   auto setImmovable = [&](BinaryData &BD) {
2274     BinaryData *Root = BD.getAtomicRoot();
2275     LLVM_DEBUG(if (Root->isMoveable()) {
2276       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2277              << "due to ambiguous relocation referencing 0x"
2278              << Twine::utohexstr(Address) << '\n';
2279     });
2280     Root->setIsMoveable(false);
2281   };
2282 
2283   if (Address == BD.getAddress()) {
2284     setImmovable(BD);
2285 
2286     // Set previous symbol as immovable
2287     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
2288     if (Prev && Prev->getEndAddress() == BD.getAddress())
2289       setImmovable(*Prev);
2290   }
2291 
2292   if (Address == BD.getEndAddress()) {
2293     setImmovable(BD);
2294 
2295     // Set next symbol as immovable
2296     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
2297     if (Next && Next->getAddress() == BD.getEndAddress())
2298       setImmovable(*Next);
2299   }
2300 }
2301 
2302 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2303                                                     uint64_t *EntryDesc) {
2304   std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2305   auto BFI = SymbolToFunctionMap.find(Symbol);
2306   if (BFI == SymbolToFunctionMap.end())
2307     return nullptr;
2308 
2309   BinaryFunction *BF = BFI->second;
2310   if (EntryDesc)
2311     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2312 
2313   return BF;
2314 }
2315 
2316 std::string
2317 BinaryContext::generateBugReportMessage(StringRef Message,
2318                                         const BinaryFunction &Function) const {
2319   std::string Msg;
2320   raw_string_ostream SS(Msg);
2321   SS << "=======================================\n";
2322   SS << "BOLT is unable to proceed because it couldn't properly understand "
2323         "this function.\n";
2324   SS << "If you are running the most recent version of BOLT, you may "
2325         "want to "
2326         "report this and paste this dump.\nPlease check that there is no "
2327         "sensitive contents being shared in this dump.\n";
2328   SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2329   ScopedPrinter SP(SS);
2330   SP.printBinaryBlock("Function contents", *Function.getData());
2331   SS << "\n";
2332   const_cast<BinaryFunction &>(Function).print(SS, "");
2333   SS << "ERROR: " << Message;
2334   SS << "\n=======================================\n";
2335   return Msg;
2336 }
2337 
2338 BinaryFunction *
2339 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2340                                             bool IsSimple) {
2341   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2342   BinaryFunction *BF = InjectedBinaryFunctions.back();
2343   setSymbolToFunctionMap(BF->getSymbol(), BF);
2344   BF->CurrentState = BinaryFunction::State::CFG;
2345   return BF;
2346 }
2347 
2348 std::pair<size_t, size_t>
2349 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2350   // Adjust branch instruction to match the current layout.
2351   if (FixBranches)
2352     BF.fixBranches();
2353 
2354   // Create local MC context to isolate the effect of ephemeral code emission.
2355   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2356   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2357   MCAsmBackend *MAB =
2358       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2359 
2360   SmallString<256> Code;
2361   raw_svector_ostream VecOS(Code);
2362 
2363   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2364   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2365       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2366       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2367       /*RelaxAll=*/false,
2368       /*IncrementalLinkerCompatible=*/false,
2369       /*DWARFMustBeAtTheEnd=*/false));
2370 
2371   Streamer->initSections(false, *STI);
2372 
2373   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2374   Section->setHasInstructions(true);
2375 
2376   // Create symbols in the LocalCtx so that they get destroyed with it.
2377   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2378   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2379 
2380   Streamer->switchSection(Section);
2381   Streamer->emitLabel(StartLabel);
2382   emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
2383                    /*EmitCodeOnly=*/true);
2384   Streamer->emitLabel(EndLabel);
2385 
2386   using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2387   SmallVector<LabelRange> SplitLabels;
2388   for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2389     MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2390     MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2391     SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);
2392 
2393     MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2394         BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
2395         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2396     SplitSection->setHasInstructions(true);
2397     Streamer->switchSection(SplitSection);
2398 
2399     Streamer->emitLabel(SplitStartLabel);
2400     emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
2401     Streamer->emitLabel(SplitEndLabel);
2402     // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2403     // private
2404     Streamer->emitBytes(StringRef(""));
2405     Streamer->switchSection(Section);
2406   }
2407 
2408   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2409   // MCStreamer::Finish(), which does more than we want
2410   Streamer->emitBytes(StringRef(""));
2411 
2412   MCAssembler &Assembler =
2413       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2414   MCAsmLayout Layout(Assembler);
2415   Assembler.layout(Layout);
2416 
2417   // Obtain fragment sizes.
2418   std::vector<uint64_t> FragmentSizes;
2419   // Main fragment size.
2420   const uint64_t HotSize =
2421       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2422   FragmentSizes.push_back(HotSize);
2423   // Split fragment sizes.
2424   uint64_t ColdSize = 0;
2425   for (const auto &Labels : SplitLabels) {
2426     uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
2427                     Layout.getSymbolOffset(*Labels.first);
2428     FragmentSizes.push_back(Size);
2429     ColdSize += Size;
2430   }
2431 
2432   // Populate new start and end offsets of each basic block.
2433   uint64_t FragmentIndex = 0;
2434   for (FunctionFragment &FF : BF.getLayout().fragments()) {
2435     BinaryBasicBlock *PrevBB = nullptr;
2436     for (BinaryBasicBlock *BB : FF) {
2437       const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
2438       BB->setOutputStartAddress(BBStartOffset);
2439       if (PrevBB)
2440         PrevBB->setOutputEndAddress(BBStartOffset);
2441       PrevBB = BB;
2442     }
2443     if (PrevBB)
2444       PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2445     FragmentIndex++;
2446   }
2447 
2448   // Clean-up the effect of the code emission.
2449   for (const MCSymbol &Symbol : Assembler.symbols()) {
2450     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2451     MutableSymbol->setUndefined();
2452     MutableSymbol->setIsRegistered(false);
2453   }
2454 
2455   return std::make_pair(HotSize, ColdSize);
2456 }
2457 
2458 bool BinaryContext::validateInstructionEncoding(
2459     ArrayRef<uint8_t> InputSequence) const {
2460   MCInst Inst;
2461   uint64_t InstSize;
2462   DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
2463   assert(InstSize == InputSequence.size() &&
2464          "Disassembled instruction size does not match the sequence.");
2465 
2466   SmallString<256> Code;
2467   SmallVector<MCFixup, 4> Fixups;
2468 
2469   MCE->encodeInstruction(Inst, Code, Fixups, *STI);
2470   auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2471   if (InputSequence != OutputSequence) {
2472     if (opts::Verbosity > 1) {
2473       this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2474                    << "      input: " << InputSequence << '\n'
2475                    << "     output: " << OutputSequence << '\n';
2476     }
2477     return false;
2478   }
2479 
2480   return true;
2481 }
2482 
2483 uint64_t BinaryContext::getHotThreshold() const {
2484   static uint64_t Threshold = 0;
2485   if (Threshold == 0) {
2486     Threshold = std::max(
2487         (uint64_t)opts::ExecutionCountThreshold,
2488         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2489   }
2490   return Threshold;
2491 }
2492 
2493 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2494     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2495   auto FI = BinaryFunctions.upper_bound(Address);
2496   if (FI == BinaryFunctions.begin())
2497     return nullptr;
2498   --FI;
2499 
2500   const uint64_t UsedSize =
2501       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2502 
2503   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2504     return nullptr;
2505 
2506   return &FI->second;
2507 }
2508 
2509 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2510   // First, try to find a function starting at the given address. If the
2511   // function was folded, this will get us the original folded function if it
2512   // wasn't removed from the list, e.g. in non-relocation mode.
2513   auto BFI = BinaryFunctions.find(Address);
2514   if (BFI != BinaryFunctions.end())
2515     return &BFI->second;
2516 
2517   // We might have folded the function matching the object at the given
2518   // address. In such case, we look for a function matching the symbol
2519   // registered at the original address. The new function (the one that the
2520   // original was folded into) will hold the symbol.
2521   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2522     uint64_t EntryID = 0;
2523     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2524     if (BF && EntryID == 0)
2525       return BF;
2526   }
2527   return nullptr;
2528 }
2529 
2530 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2531     const DWARFAddressRangesVector &InputRanges) const {
2532   DebugAddressRangesVector OutputRanges;
2533 
2534   for (const DWARFAddressRange Range : InputRanges) {
2535     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2536     while (BFI != BinaryFunctions.end()) {
2537       const BinaryFunction &Function = BFI->second;
2538       if (Function.getAddress() >= Range.HighPC)
2539         break;
2540       const DebugAddressRangesVector FunctionRanges =
2541           Function.getOutputAddressRanges();
2542       llvm::move(FunctionRanges, std::back_inserter(OutputRanges));
2543       std::advance(BFI, 1);
2544     }
2545   }
2546 
2547   return OutputRanges;
2548 }
2549 
2550 } // namespace bolt
2551 } // namespace llvm
2552