xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 32d2473a5dba417eb8d34146575289e4e53c91fa)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool>
51 NoHugePages("no-huge-pages",
52   cl::desc("use regular size pages for code alignment"),
53   cl::ZeroOrMore,
54   cl::Hidden,
55   cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58 PrintDebugInfo("print-debug-info",
59   cl::desc("print debug info when printing functions"),
60   cl::Hidden,
61   cl::ZeroOrMore,
62   cl::cat(BoltCategory));
63 
64 cl::opt<bool>
65 PrintRelocations("print-relocations",
66   cl::desc("print relocations when printing functions/objects"),
67   cl::Hidden,
68   cl::ZeroOrMore,
69   cl::cat(BoltCategory));
70 
71 static cl::opt<bool>
72 PrintMemData("print-mem-data",
73   cl::desc("print memory data annotations when printing functions"),
74   cl::Hidden,
75   cl::ZeroOrMore,
76   cl::cat(BoltCategory));
77 
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
84                              std::unique_ptr<DWARFContext> DwCtx,
85                              std::unique_ptr<Triple> TheTriple,
86                              const Target *TheTarget, std::string TripleName,
87                              std::unique_ptr<MCCodeEmitter> MCE,
88                              std::unique_ptr<MCObjectFileInfo> MOFI,
89                              std::unique_ptr<const MCAsmInfo> AsmInfo,
90                              std::unique_ptr<const MCInstrInfo> MII,
91                              std::unique_ptr<const MCSubtargetInfo> STI,
92                              std::unique_ptr<MCInstPrinter> InstPrinter,
93                              std::unique_ptr<const MCInstrAnalysis> MIA,
94                              std::unique_ptr<MCPlusBuilder> MIB,
95                              std::unique_ptr<const MCRegisterInfo> MRI,
96                              std::unique_ptr<MCDisassembler> DisAsm)
97     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
98       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
99       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
100       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
101       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
102       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
103   Relocation::Arch = this->TheTriple->getArch();
104   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
105 }
106 
107 BinaryContext::~BinaryContext() {
108   for (BinarySection *Section : Sections)
109     delete Section;
110   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
111     delete InjectedFunction;
112   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
113     delete JTI.second;
114   clearBinaryData();
115 }
116 
117 /// Create BinaryContext for a given architecture \p ArchName and
118 /// triple \p TripleName.
119 Expected<std::unique_ptr<BinaryContext>>
120 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
121                                    std::unique_ptr<DWARFContext> DwCtx) {
122   StringRef ArchName = "";
123   StringRef FeaturesStr = "";
124   switch (File->getArch()) {
125   case llvm::Triple::x86_64:
126     ArchName = "x86-64";
127     FeaturesStr = "+nopl";
128     break;
129   case llvm::Triple::aarch64:
130     ArchName = "aarch64";
131     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
132                   "+fullfp16,+spe,+fuse-aes,+rcpc";
133     break;
134   default:
135     return createStringError(std::errc::not_supported,
136                              "BOLT-ERROR: Unrecognized machine in ELF file");
137   }
138 
139   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
140   const std::string TripleName = TheTriple->str();
141 
142   std::string Error;
143   const Target *TheTarget =
144       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
145   if (!TheTarget)
146     return createStringError(make_error_code(std::errc::not_supported),
147                              Twine("BOLT-ERROR: ", Error));
148 
149   std::unique_ptr<const MCRegisterInfo> MRI(
150       TheTarget->createMCRegInfo(TripleName));
151   if (!MRI)
152     return createStringError(
153         make_error_code(std::errc::not_supported),
154         Twine("BOLT-ERROR: no register info for target ", TripleName));
155 
156   // Set up disassembler.
157   std::unique_ptr<const MCAsmInfo> AsmInfo(
158       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
159   if (!AsmInfo)
160     return createStringError(
161         make_error_code(std::errc::not_supported),
162         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
163 
164   std::unique_ptr<const MCSubtargetInfo> STI(
165       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
166   if (!STI)
167     return createStringError(
168         make_error_code(std::errc::not_supported),
169         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
170 
171   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
172   if (!MII)
173     return createStringError(
174         make_error_code(std::errc::not_supported),
175         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
176 
177   std::unique_ptr<MCContext> Ctx(
178       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
179   std::unique_ptr<MCObjectFileInfo> MOFI(
180       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
181   Ctx->setObjectFileInfo(MOFI.get());
182   // We do not support X86 Large code model. Change this in the future.
183   bool Large = false;
184   if (TheTriple->getArch() == llvm::Triple::aarch64)
185     Large = true;
186   unsigned LSDAEncoding =
187       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
188   unsigned TTypeEncoding =
189       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
190   if (IsPIC) {
191     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
192                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
194                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
195   }
196 
197   std::unique_ptr<MCDisassembler> DisAsm(
198       TheTarget->createMCDisassembler(*STI, *Ctx));
199 
200   if (!DisAsm)
201     return createStringError(
202         make_error_code(std::errc::not_supported),
203         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
204 
205   std::unique_ptr<const MCInstrAnalysis> MIA(
206       TheTarget->createMCInstrAnalysis(MII.get()));
207   if (!MIA)
208     return createStringError(
209         make_error_code(std::errc::not_supported),
210         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
211               TripleName));
212 
213   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
214   std::unique_ptr<MCInstPrinter> InstructionPrinter(
215       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
216                                      *MII, *MRI));
217   if (!InstructionPrinter)
218     return createStringError(
219         make_error_code(std::errc::not_supported),
220         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
221   InstructionPrinter->setPrintImmHex(true);
222 
223   std::unique_ptr<MCCodeEmitter> MCE(
224       TheTarget->createMCCodeEmitter(*MII, *Ctx));
225 
226   // Make sure we don't miss any output on core dumps.
227   outs().SetUnbuffered();
228   errs().SetUnbuffered();
229   dbgs().SetUnbuffered();
230 
231   auto BC = std::make_unique<BinaryContext>(
232       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
233       std::string(TripleName), std::move(MCE), std::move(MOFI),
234       std::move(AsmInfo), std::move(MII), std::move(STI),
235       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
236       std::move(DisAsm));
237 
238   BC->TTypeEncoding = TTypeEncoding;
239   BC->LSDAEncoding = LSDAEncoding;
240 
241   BC->MAB = std::unique_ptr<MCAsmBackend>(
242       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
243 
244   BC->setFilename(File->getFileName());
245 
246   BC->HasFixedLoadAddress = !IsPIC;
247 
248   return BC;
249 }
250 
251 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
252   if (opts::HotText &&
253       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
254     return true;
255 
256   if (opts::HotData &&
257       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
258     return true;
259 
260   if (SymbolName == "_end")
261     return true;
262 
263   return false;
264 }
265 
266 std::unique_ptr<MCObjectWriter>
267 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
268   return MAB->createObjectWriter(OS);
269 }
270 
271 bool BinaryContext::validateObjectNesting() const {
272   auto Itr = BinaryDataMap.begin();
273   auto End = BinaryDataMap.end();
274   bool Valid = true;
275   while (Itr != End) {
276     auto Next = std::next(Itr);
277     while (Next != End &&
278            Itr->second->getSection() == Next->second->getSection() &&
279            Itr->second->containsRange(Next->second->getAddress(),
280                                       Next->second->getSize())) {
281       if (Next->second->Parent != Itr->second) {
282         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
283                << "BOLT-WARNING:  " << *Itr->second << "\n"
284                << "BOLT-WARNING:  " << *Next->second << "\n";
285         Valid = false;
286       }
287       ++Next;
288     }
289     Itr = Next;
290   }
291   return Valid;
292 }
293 
294 bool BinaryContext::validateHoles() const {
295   bool Valid = true;
296   for (BinarySection &Section : sections()) {
297     for (const Relocation &Rel : Section.relocations()) {
298       uint64_t RelAddr = Rel.Offset + Section.getAddress();
299       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
300       if (!BD) {
301         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
302                << " 0x" << Twine::utohexstr(RelAddr) << " in "
303                << Section.getName() << "\n";
304         Valid = false;
305       } else if (!BD->getAtomicRoot()) {
306         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
307                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
308                << Section.getName() << "\n";
309         Valid = false;
310       }
311     }
312   }
313   return Valid;
314 }
315 
316 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
317   const uint64_t Address = GAI->second->getAddress();
318   const uint64_t Size = GAI->second->getSize();
319 
320   auto fixParents = [&](BinaryDataMapType::iterator Itr,
321                         BinaryData *NewParent) {
322     BinaryData *OldParent = Itr->second->Parent;
323     Itr->second->Parent = NewParent;
324     ++Itr;
325     while (Itr != BinaryDataMap.end() && OldParent &&
326            Itr->second->Parent == OldParent) {
327       Itr->second->Parent = NewParent;
328       ++Itr;
329     }
330   };
331 
332   // Check if the previous symbol contains the newly added symbol.
333   if (GAI != BinaryDataMap.begin()) {
334     BinaryData *Prev = std::prev(GAI)->second;
335     while (Prev) {
336       if (Prev->getSection() == GAI->second->getSection() &&
337           Prev->containsRange(Address, Size)) {
338         fixParents(GAI, Prev);
339       } else {
340         fixParents(GAI, nullptr);
341       }
342       Prev = Prev->Parent;
343     }
344   }
345 
346   // Check if the newly added symbol contains any subsequent symbols.
347   if (Size != 0) {
348     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
349     auto Itr = std::next(GAI);
350     while (
351         Itr != BinaryDataMap.end() &&
352         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
353       Itr->second->Parent = BD;
354       ++Itr;
355     }
356   }
357 }
358 
359 iterator_range<BinaryContext::binary_data_iterator>
360 BinaryContext::getSubBinaryData(BinaryData *BD) {
361   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
362   auto End = Start;
363   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
364     ++End;
365   return make_range(Start, End);
366 }
367 
368 std::pair<const MCSymbol *, uint64_t>
369 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
370                                 bool IsPCRel) {
371   uint64_t Addend = 0;
372 
373   if (isAArch64()) {
374     // Check if this is an access to a constant island and create bookkeeping
375     // to keep track of it and emit it later as part of this function.
376     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
377       return std::make_pair(IslandSym, Addend);
378 
379     // Detect custom code written in assembly that refers to arbitrary
380     // constant islands from other functions. Write this reference so we
381     // can pull this constant island and emit it as part of this function
382     // too.
383     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
384     if (IslandIter != AddressToConstantIslandMap.end()) {
385       if (MCSymbol *IslandSym =
386               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
387         BF.createIslandDependency(IslandSym, IslandIter->second);
388         return std::make_pair(IslandSym, Addend);
389       }
390     }
391   }
392 
393   // Note that the address does not necessarily have to reside inside
394   // a section, it could be an absolute address too.
395   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
396   if (Section && Section->isText()) {
397     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
398       if (Address != BF.getAddress()) {
399         // The address could potentially escape. Mark it as another entry
400         // point into the function.
401         if (opts::Verbosity >= 1) {
402           outs() << "BOLT-INFO: potentially escaped address 0x"
403                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
404         }
405         BF.HasInternalLabelReference = true;
406         return std::make_pair(
407             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
408       }
409     } else {
410       BF.InterproceduralReferences.insert(Address);
411     }
412   }
413 
414   // With relocations, catch jump table references outside of the basic block
415   // containing the indirect jump.
416   if (HasRelocations) {
417     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
418     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
419       const MCSymbol *Symbol =
420           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
421 
422       return std::make_pair(Symbol, Addend);
423     }
424   }
425 
426   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
427     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
428 
429   // TODO: use DWARF info to get size/alignment here?
430   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
431   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
432   return std::make_pair(TargetSymbol, Addend);
433 }
434 
435 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
436                                                   BinaryFunction &BF) {
437   if (!isX86())
438     return MemoryContentsType::UNKNOWN;
439 
440   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
441   if (!Section) {
442     // No section - possibly an absolute address. Since we don't allow
443     // internal function addresses to escape the function scope - we
444     // consider it a tail call.
445     if (opts::Verbosity > 1) {
446       errs() << "BOLT-WARNING: no section for address 0x"
447              << Twine::utohexstr(Address) << " referenced from function " << BF
448              << '\n';
449     }
450     return MemoryContentsType::UNKNOWN;
451   }
452 
453   if (Section->isVirtual()) {
454     // The contents are filled at runtime.
455     return MemoryContentsType::UNKNOWN;
456   }
457 
458   // No support for jump tables in code yet.
459   if (Section->isText())
460     return MemoryContentsType::UNKNOWN;
461 
462   // Start with checking for PIC jump table. We expect non-PIC jump tables
463   // to have high 32 bits set to 0.
464   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
465     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
466 
467   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
468     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
469 
470   return MemoryContentsType::UNKNOWN;
471 }
472 
473 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
474 bool isPotentialFragmentByName(BinaryFunction &Fragment,
475                                BinaryFunction &Parent) {
476   for (StringRef Name : Parent.getNames()) {
477     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
478     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
479     if (Fragment.hasRestoredNameRegex(NameRegex))
480       return true;
481   }
482   return false;
483 }
484 
485 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
486                                      const JumpTable::JumpTableType Type,
487                                      BinaryFunction &BF,
488                                      const uint64_t NextJTAddress,
489                                      JumpTable::OffsetsType *Offsets) {
490   // Is one of the targets __builtin_unreachable?
491   bool HasUnreachable = false;
492 
493   // Number of targets other than __builtin_unreachable.
494   uint64_t NumRealEntries = 0;
495 
496   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
497   auto addOffset = [&](uint64_t Offset) {
498     if (Offsets)
499       Offsets->emplace_back(Offset);
500   };
501 
502   auto doesBelongToFunction = [&](const uint64_t Addr,
503                                   BinaryFunction *TargetBF) -> bool {
504     if (BF.containsAddress(Addr))
505       return true;
506     // Nothing to do if we failed to identify the containing function.
507     if (!TargetBF)
508       return false;
509     // Case 1: check if BF is a fragment and TargetBF is its parent.
510     if (BF.isFragment()) {
511       // Parent function may or may not be already registered.
512       // Set parent link based on function name matching heuristic.
513       return registerFragment(BF, *TargetBF);
514     }
515     // Case 2: check if TargetBF is a fragment and BF is its parent.
516     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
517   };
518 
519   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520   if (!Section)
521     return false;
522 
523   // The upper bound is defined by containing object, section limits, and
524   // the next jump table in memory.
525   uint64_t UpperBound = Section->getEndAddress();
526   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
527   if (JumpTableBD && JumpTableBD->getSize()) {
528     assert(JumpTableBD->getEndAddress() <= UpperBound &&
529            "data object cannot cross a section boundary");
530     UpperBound = JumpTableBD->getEndAddress();
531   }
532   if (NextJTAddress)
533     UpperBound = std::min(NextJTAddress, UpperBound);
534 
535   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
536                     << '\n');
537   const uint64_t EntrySize = getJumpTableEntrySize(Type);
538   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
539        EntryAddress += EntrySize) {
540     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
541                       << " -> ");
542     // Check if there's a proper relocation against the jump table entry.
543     if (HasRelocations) {
544       if (Type == JumpTable::JTT_PIC &&
545           !DataPCRelocations.count(EntryAddress)) {
546         LLVM_DEBUG(
547             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
548         break;
549       }
550       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
551         LLVM_DEBUG(
552             dbgs()
553             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
554         break;
555       }
556     }
557 
558     const uint64_t Value =
559         (Type == JumpTable::JTT_PIC)
560             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
561             : *getPointerAtAddress(EntryAddress);
562 
563     // __builtin_unreachable() case.
564     if (Value == BF.getAddress() + BF.getSize()) {
565       addOffset(Value - BF.getAddress());
566       HasUnreachable = true;
567       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
568       continue;
569     }
570 
571     // Function or one of its fragments.
572     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
573 
574     // We assume that a jump table cannot have function start as an entry.
575     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
576       LLVM_DEBUG({
577         if (!BF.containsAddress(Value)) {
578           dbgs() << "FAIL: function doesn't contain this address\n";
579           if (TargetBF) {
580             dbgs() << "  ! function containing this address: "
581                    << TargetBF->getPrintName() << '\n';
582             if (TargetBF->isFragment())
583               dbgs() << "  ! is a fragment\n";
584             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
585               dbgs() << "  ! its parent is "
586                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
587                      << '\n';
588           }
589         }
590         if (Value == BF.getAddress())
591           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
592       });
593       break;
594     }
595 
596     // Check there's an instruction at this offset.
597     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
598         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
599       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
600       break;
601     }
602 
603     ++NumRealEntries;
604 
605     if (TargetBF == &BF) {
606       // Address inside the function.
607       addOffset(Value - TargetBF->getAddress());
608       LLVM_DEBUG(dbgs() << "OK: real entry\n");
609     } else {
610       // Address in split fragment.
611       BF.setHasSplitJumpTable(true);
612       // Add invalid offset for proper identification of jump table size.
613       addOffset(INVALID_OFFSET);
614       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
615                         << TargetBF->getPrintName() << '\n');
616     }
617   }
618 
619   // It's a jump table if the number of real entries is more than 1, or there's
620   // one real entry and "unreachable" targets. If there are only multiple
621   // "unreachable" targets, then it's not a jump table.
622   return NumRealEntries + HasUnreachable >= 2;
623 }
624 
625 void BinaryContext::populateJumpTables() {
626   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
627                     << '\n');
628   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
629        ++JTI) {
630     JumpTable *JT = JTI->second;
631     BinaryFunction &BF = *JT->Parent;
632 
633     if (!BF.isSimple())
634       continue;
635 
636     uint64_t NextJTAddress = 0;
637     auto NextJTI = std::next(JTI);
638     if (NextJTI != JTE)
639       NextJTAddress = NextJTI->second->getAddress();
640 
641     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
642                                           NextJTAddress, &JT->OffsetEntries);
643     if (!Success) {
644       dbgs() << "failed to analyze jump table in function " << BF << '\n';
645       JT->print(dbgs());
646       if (NextJTI != JTE) {
647         dbgs() << "next jump table at 0x"
648                << Twine::utohexstr(NextJTI->second->getAddress())
649                << " belongs to function " << *NextJTI->second->Parent << '\n';
650         NextJTI->second->print(dbgs());
651       }
652       llvm_unreachable("jump table heuristic failure");
653     }
654 
655     for (uint64_t EntryOffset : JT->OffsetEntries) {
656       if (EntryOffset == BF.getSize())
657         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
658       else
659         BF.registerReferencedOffset(EntryOffset);
660     }
661 
662     // In strict mode, erase PC-relative relocation record. Later we check that
663     // all such records are erased and thus have been accounted for.
664     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
665       for (uint64_t Address = JT->getAddress();
666            Address < JT->getAddress() + JT->getSize();
667            Address += JT->EntrySize) {
668         DataPCRelocations.erase(DataPCRelocations.find(Address));
669       }
670     }
671 
672     // Mark to skip the function and all its fragments.
673     if (BF.hasSplitJumpTable())
674       FragmentsToSkip.push_back(&BF);
675   }
676 
677   if (opts::StrictMode && DataPCRelocations.size()) {
678     LLVM_DEBUG({
679       dbgs() << DataPCRelocations.size()
680              << " unclaimed PC-relative relocations left in data:\n";
681       for (uint64_t Reloc : DataPCRelocations)
682         dbgs() << Twine::utohexstr(Reloc) << '\n';
683     });
684     assert(0 && "unclaimed PC-relative relocations left in data\n");
685   }
686   clearList(DataPCRelocations);
687 }
688 
689 void BinaryContext::skipMarkedFragments() {
690   // Unique functions in the vector.
691   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
692                                                        FragmentsToSkip.end());
693   // Copy the functions back to FragmentsToSkip.
694   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
695   auto addToWorklist = [&](BinaryFunction *Function) -> void {
696     if (UniqueFunctions.count(Function))
697       return;
698     FragmentsToSkip.push_back(Function);
699     UniqueFunctions.insert(Function);
700   };
701   // Functions containing split jump tables need to be skipped with all
702   // fragments (transitively).
703   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
704     BinaryFunction *BF = FragmentsToSkip[I];
705     assert(UniqueFunctions.count(BF) &&
706            "internal error in traversing function fragments");
707     if (opts::Verbosity >= 1)
708       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
709     BF->setIgnored();
710     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
711     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
712                   addToWorklist);
713   }
714   if (!FragmentsToSkip.empty())
715     errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function"
716            << (FragmentsToSkip.size() == 1 ? "" : "s")
717            << " due to cold fragments\n";
718   FragmentsToSkip.clear();
719 }
720 
721 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
722                                                  uint64_t Size,
723                                                  uint16_t Alignment,
724                                                  unsigned Flags) {
725   auto Itr = BinaryDataMap.find(Address);
726   if (Itr != BinaryDataMap.end()) {
727     assert(Itr->second->getSize() == Size || !Size);
728     return Itr->second->getSymbol();
729   }
730 
731   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
732   assert(!GlobalSymbols.count(Name) && "created name is not unique");
733   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
734 }
735 
736 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
737   return Ctx->getOrCreateSymbol(Name);
738 }
739 
740 BinaryFunction *BinaryContext::createBinaryFunction(
741     const std::string &Name, BinarySection &Section, uint64_t Address,
742     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
743   auto Result = BinaryFunctions.emplace(
744       Address, BinaryFunction(Name, Section, Address, Size, *this));
745   assert(Result.second == true && "unexpected duplicate function");
746   BinaryFunction *BF = &Result.first->second;
747   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
748                         Alignment);
749   setSymbolToFunctionMap(BF->getSymbol(), BF);
750   return BF;
751 }
752 
753 const MCSymbol *
754 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
755                                     JumpTable::JumpTableType Type) {
756   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
757     assert(JT->Type == Type && "jump table types have to match");
758     assert(JT->Parent == &Function &&
759            "cannot re-use jump table of a different function");
760     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
761 
762     return JT->getFirstLabel();
763   }
764 
765   // Re-use the existing symbol if possible.
766   MCSymbol *JTLabel = nullptr;
767   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
768     if (!isInternalSymbolName(Object->getSymbol()->getName()))
769       JTLabel = Object->getSymbol();
770   }
771 
772   const uint64_t EntrySize = getJumpTableEntrySize(Type);
773   if (!JTLabel) {
774     const std::string JumpTableName = generateJumpTableName(Function, Address);
775     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
776   }
777 
778   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
779                     << " in function " << Function << '\n');
780 
781   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
782                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
783                                 *getSectionForAddress(Address));
784   JumpTables.emplace(Address, JT);
785 
786   // Duplicate the entry for the parent function for easy access.
787   Function.JumpTables.emplace(Address, JT);
788 
789   return JTLabel;
790 }
791 
792 std::pair<uint64_t, const MCSymbol *>
793 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
794                                   const MCSymbol *OldLabel) {
795   auto L = scopeLock();
796   unsigned Offset = 0;
797   bool Found = false;
798   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
799     if (Elmt.second != OldLabel)
800       continue;
801     Offset = Elmt.first;
802     Found = true;
803     break;
804   }
805   assert(Found && "Label not found");
806   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
807   JumpTable *NewJT =
808       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
809                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
810                     *getSectionForAddress(JT->getAddress()));
811   NewJT->Entries = JT->Entries;
812   NewJT->Counts = JT->Counts;
813   uint64_t JumpTableID = ++DuplicatedJumpTables;
814   // Invert it to differentiate from regular jump tables whose IDs are their
815   // addresses in the input binary memory space
816   JumpTableID = ~JumpTableID;
817   JumpTables.emplace(JumpTableID, NewJT);
818   Function.JumpTables.emplace(JumpTableID, NewJT);
819   return std::make_pair(JumpTableID, NewLabel);
820 }
821 
822 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
823                                                  uint64_t Address) {
824   size_t Id;
825   uint64_t Offset = 0;
826   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
827     Offset = Address - JT->getAddress();
828     auto Itr = JT->Labels.find(Offset);
829     if (Itr != JT->Labels.end())
830       return std::string(Itr->second->getName());
831     Id = JumpTableIds.at(JT->getAddress());
832   } else {
833     Id = JumpTableIds[Address] = BF.JumpTables.size();
834   }
835   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
836           (Offset ? ("." + std::to_string(Offset)) : ""));
837 }
838 
839 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
840   // FIXME: aarch64 support is missing.
841   if (!isX86())
842     return true;
843 
844   if (BF.getSize() == BF.getMaxSize())
845     return true;
846 
847   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
848   assert(FunctionData && "cannot get function as data");
849 
850   uint64_t Offset = BF.getSize();
851   MCInst Instr;
852   uint64_t InstrSize = 0;
853   uint64_t InstrAddress = BF.getAddress() + Offset;
854   using std::placeholders::_1;
855 
856   // Skip instructions that satisfy the predicate condition.
857   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
858     const uint64_t StartOffset = Offset;
859     for (; Offset < BF.getMaxSize();
860          Offset += InstrSize, InstrAddress += InstrSize) {
861       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
862                                   InstrAddress, nulls()))
863         break;
864       if (!Predicate(Instr))
865         break;
866     }
867 
868     return Offset - StartOffset;
869   };
870 
871   // Skip a sequence of zero bytes.
872   auto skipZeros = [&]() {
873     const uint64_t StartOffset = Offset;
874     for (; Offset < BF.getMaxSize(); ++Offset)
875       if ((*FunctionData)[Offset] != 0)
876         break;
877 
878     return Offset - StartOffset;
879   };
880 
881   // Accept the whole padding area filled with breakpoints.
882   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
883   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
884     return true;
885 
886   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
887 
888   // Some functions have a jump to the next function or to the padding area
889   // inserted after the body.
890   auto isSkipJump = [&](const MCInst &Instr) {
891     uint64_t TargetAddress = 0;
892     if (MIB->isUnconditionalBranch(Instr) &&
893         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
894       if (TargetAddress >= InstrAddress + InstrSize &&
895           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
896         return true;
897       }
898     }
899     return false;
900   };
901 
902   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
903   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
904          skipZeros())
905     ;
906 
907   if (Offset == BF.getMaxSize())
908     return true;
909 
910   if (opts::Verbosity >= 1) {
911     errs() << "BOLT-WARNING: bad padding at address 0x"
912            << Twine::utohexstr(BF.getAddress() + BF.getSize())
913            << " starting at offset " << (Offset - BF.getSize())
914            << " in function " << BF << '\n'
915            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
916            << '\n';
917   }
918 
919   return false;
920 }
921 
922 void BinaryContext::adjustCodePadding() {
923   for (auto &BFI : BinaryFunctions) {
924     BinaryFunction &BF = BFI.second;
925     if (!shouldEmit(BF))
926       continue;
927 
928     if (!hasValidCodePadding(BF)) {
929       if (HasRelocations) {
930         if (opts::Verbosity >= 1) {
931           outs() << "BOLT-INFO: function " << BF
932                  << " has invalid padding. Ignoring the function.\n";
933         }
934         BF.setIgnored();
935       } else {
936         BF.setMaxSize(BF.getSize());
937       }
938     }
939   }
940 }
941 
942 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
943                                                uint64_t Size,
944                                                uint16_t Alignment,
945                                                unsigned Flags) {
946   // Register the name with MCContext.
947   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
948 
949   auto GAI = BinaryDataMap.find(Address);
950   BinaryData *BD;
951   if (GAI == BinaryDataMap.end()) {
952     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
953     BinarySection &Section =
954         SectionOrErr ? SectionOrErr.get() : absoluteSection();
955     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
956                         Section, Flags);
957     GAI = BinaryDataMap.emplace(Address, BD).first;
958     GlobalSymbols[Name] = BD;
959     updateObjectNesting(GAI);
960   } else {
961     BD = GAI->second;
962     if (!BD->hasName(Name)) {
963       GlobalSymbols[Name] = BD;
964       BD->Symbols.push_back(Symbol);
965     }
966   }
967 
968   return Symbol;
969 }
970 
971 const BinaryData *
972 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
973   auto NI = BinaryDataMap.lower_bound(Address);
974   auto End = BinaryDataMap.end();
975   if ((NI != End && Address == NI->first) ||
976       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
977     if (NI->second->containsAddress(Address))
978       return NI->second;
979 
980     // If this is a sub-symbol, see if a parent data contains the address.
981     const BinaryData *BD = NI->second->getParent();
982     while (BD) {
983       if (BD->containsAddress(Address))
984         return BD;
985       BD = BD->getParent();
986     }
987   }
988   return nullptr;
989 }
990 
991 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
992   auto NI = BinaryDataMap.find(Address);
993   assert(NI != BinaryDataMap.end());
994   if (NI == BinaryDataMap.end())
995     return false;
996   // TODO: it's possible that a jump table starts at the same address
997   // as a larger blob of private data.  When we set the size of the
998   // jump table, it might be smaller than the total blob size.  In this
999   // case we just leave the original size since (currently) it won't really
1000   // affect anything.
1001   assert((!NI->second->Size || NI->second->Size == Size ||
1002           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1003          "can't change the size of a symbol that has already had its "
1004          "size set");
1005   if (!NI->second->Size) {
1006     NI->second->Size = Size;
1007     updateObjectNesting(NI);
1008     return true;
1009   }
1010   return false;
1011 }
1012 
1013 void BinaryContext::generateSymbolHashes() {
1014   auto isPadding = [](const BinaryData &BD) {
1015     StringRef Contents = BD.getSection().getContents();
1016     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1017     return (BD.getName().startswith("HOLEat") ||
1018             SymData.find_first_not_of(0) == StringRef::npos);
1019   };
1020 
1021   uint64_t NumCollisions = 0;
1022   for (auto &Entry : BinaryDataMap) {
1023     BinaryData &BD = *Entry.second;
1024     StringRef Name = BD.getName();
1025 
1026     if (!isInternalSymbolName(Name))
1027       continue;
1028 
1029     // First check if a non-anonymous alias exists and move it to the front.
1030     if (BD.getSymbols().size() > 1) {
1031       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1032                               [&](const MCSymbol *Symbol) {
1033                                 return !isInternalSymbolName(Symbol->getName());
1034                               });
1035       if (Itr != BD.getSymbols().end()) {
1036         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1037         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1038         continue;
1039       }
1040     }
1041 
1042     // We have to skip 0 size symbols since they will all collide.
1043     if (BD.getSize() == 0) {
1044       continue;
1045     }
1046 
1047     const uint64_t Hash = BD.getSection().hash(BD);
1048     const size_t Idx = Name.find("0x");
1049     std::string NewName =
1050         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1051     if (getBinaryDataByName(NewName)) {
1052       // Ignore collisions for symbols that appear to be padding
1053       // (i.e. all zeros or a "hole")
1054       if (!isPadding(BD)) {
1055         if (opts::Verbosity) {
1056           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1057                  << " with new name (" << NewName << "), skipping.\n";
1058         }
1059         ++NumCollisions;
1060       }
1061       continue;
1062     }
1063     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1064     GlobalSymbols[NewName] = &BD;
1065   }
1066   if (NumCollisions) {
1067     errs() << "BOLT-WARNING: " << NumCollisions
1068            << " collisions detected while hashing binary objects";
1069     if (!opts::Verbosity)
1070       errs() << ". Use -v=1 to see the list.";
1071     errs() << '\n';
1072   }
1073 }
1074 
1075 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1076                                      BinaryFunction &Function) const {
1077   if (!isPotentialFragmentByName(TargetFunction, Function))
1078     return false;
1079   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1080   if (TargetFunction.isParentFragment(&Function))
1081     return true;
1082   TargetFunction.addParentFragment(Function);
1083   Function.addFragment(TargetFunction);
1084   if (!HasRelocations) {
1085     TargetFunction.setSimple(false);
1086     Function.setSimple(false);
1087   }
1088   if (opts::Verbosity >= 1) {
1089     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1090            << Function << '\n';
1091   }
1092   return true;
1093 }
1094 
1095 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1096   for (uint64_t Address : Function.InterproceduralReferences) {
1097     if (!Address)
1098       continue;
1099 
1100     BinaryFunction *TargetFunction =
1101         getBinaryFunctionContainingAddress(Address);
1102     if (&Function == TargetFunction)
1103       continue;
1104 
1105     if (TargetFunction) {
1106       if (TargetFunction->IsFragment &&
1107           !registerFragment(*TargetFunction, Function)) {
1108         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1109                   "fragments: "
1110                << Function.getPrintName() << " and "
1111                << TargetFunction->getPrintName() << '\n';
1112       }
1113       if (uint64_t Offset = Address - TargetFunction->getAddress())
1114         TargetFunction->addEntryPointAtOffset(Offset);
1115 
1116       continue;
1117     }
1118 
1119     // Check if address falls in function padding space - this could be
1120     // unmarked data in code. In this case adjust the padding space size.
1121     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1122     assert(Section && "cannot get section for referenced address");
1123 
1124     if (!Section->isText())
1125       continue;
1126 
1127     // PLT requires special handling and could be ignored in this context.
1128     StringRef SectionName = Section->getName();
1129     if (SectionName == ".plt" || SectionName == ".plt.got")
1130       continue;
1131 
1132     if (opts::processAllFunctions()) {
1133       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1134              << "object in code at address 0x" << Twine::utohexstr(Address)
1135              << " belonging to section " << SectionName << " in current mode\n";
1136       exit(1);
1137     }
1138 
1139     TargetFunction = getBinaryFunctionContainingAddress(Address,
1140                                                         /*CheckPastEnd=*/false,
1141                                                         /*UseMaxSize=*/true);
1142     // We are not going to overwrite non-simple functions, but for simple
1143     // ones - adjust the padding size.
1144     if (TargetFunction && TargetFunction->isSimple()) {
1145       errs() << "BOLT-WARNING: function " << *TargetFunction
1146              << " has an object detected in a padding region at address 0x"
1147              << Twine::utohexstr(Address) << '\n';
1148       TargetFunction->setMaxSize(TargetFunction->getSize());
1149     }
1150   }
1151 
1152   clearList(Function.InterproceduralReferences);
1153 }
1154 
1155 void BinaryContext::postProcessSymbolTable() {
1156   fixBinaryDataHoles();
1157   bool Valid = true;
1158   for (auto &Entry : BinaryDataMap) {
1159     BinaryData *BD = Entry.second;
1160     if ((BD->getName().startswith("SYMBOLat") ||
1161          BD->getName().startswith("DATAat")) &&
1162         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1163         BD->getSection()) {
1164       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1165       Valid = false;
1166     }
1167   }
1168   assert(Valid);
1169   generateSymbolHashes();
1170 }
1171 
1172 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1173                                  BinaryFunction &ParentBF) {
1174   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1175          "cannot merge functions with multiple entry points");
1176 
1177   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1178                                                          std::defer_lock);
1179   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1180       SymbolToFunctionMapMutex, std::defer_lock);
1181 
1182   const StringRef ChildName = ChildBF.getOneName();
1183 
1184   // Move symbols over and update bookkeeping info.
1185   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1186     ParentBF.getSymbols().push_back(Symbol);
1187     WriteSymbolMapLock.lock();
1188     SymbolToFunctionMap[Symbol] = &ParentBF;
1189     WriteSymbolMapLock.unlock();
1190     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1191   }
1192   ChildBF.getSymbols().clear();
1193 
1194   // Move other names the child function is known under.
1195   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1196             std::back_inserter(ParentBF.Aliases));
1197   ChildBF.Aliases.clear();
1198 
1199   if (HasRelocations) {
1200     // Merge execution counts of ChildBF into those of ParentBF.
1201     // Without relocations, we cannot reliably merge profiles as both functions
1202     // continue to exist and either one can be executed.
1203     ChildBF.mergeProfileDataInto(ParentBF);
1204 
1205     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1206                                                           std::defer_lock);
1207     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1208                                                            std::defer_lock);
1209     // Remove ChildBF from the global set of functions in relocs mode.
1210     ReadBfsLock.lock();
1211     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1212     ReadBfsLock.unlock();
1213 
1214     assert(FI != BinaryFunctions.end() && "function not found");
1215     assert(&ChildBF == &FI->second && "function mismatch");
1216 
1217     WriteBfsLock.lock();
1218     ChildBF.clearDisasmState();
1219     FI = BinaryFunctions.erase(FI);
1220     WriteBfsLock.unlock();
1221 
1222   } else {
1223     // In non-relocation mode we keep the function, but rename it.
1224     std::string NewName = "__ICF_" + ChildName.str();
1225 
1226     WriteCtxLock.lock();
1227     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1228     WriteCtxLock.unlock();
1229 
1230     ChildBF.setFolded(&ParentBF);
1231   }
1232 }
1233 
1234 void BinaryContext::fixBinaryDataHoles() {
1235   assert(validateObjectNesting() && "object nesting inconsitency detected");
1236 
1237   for (BinarySection &Section : allocatableSections()) {
1238     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1239 
1240     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1241       BinaryData *BD = Itr->second;
1242       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1243                      (BD->getName().startswith("SYMBOLat0x") ||
1244                       BD->getName().startswith("DATAat0x") ||
1245                       BD->getName().startswith("ANONYMOUS")));
1246       return !isHole && BD->getSection() == Section && !BD->getParent();
1247     };
1248 
1249     auto BDStart = BinaryDataMap.begin();
1250     auto BDEnd = BinaryDataMap.end();
1251     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1252     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1253 
1254     uint64_t EndAddress = Section.getAddress();
1255 
1256     while (Itr != End) {
1257       if (Itr->second->getAddress() > EndAddress) {
1258         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1259         Holes.emplace_back(EndAddress, Gap);
1260       }
1261       EndAddress = Itr->second->getEndAddress();
1262       ++Itr;
1263     }
1264 
1265     if (EndAddress < Section.getEndAddress())
1266       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1267 
1268     // If there is already a symbol at the start of the hole, grow that symbol
1269     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1270     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1271       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1272       if (BD) {
1273         // BD->getSection() can be != Section if there are sections that
1274         // overlap.  In this case it is probably safe to just skip the holes
1275         // since the overlapping section will not(?) have any symbols in it.
1276         if (BD->getSection() == Section)
1277           setBinaryDataSize(Hole.first, Hole.second);
1278       } else {
1279         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1280       }
1281     }
1282   }
1283 
1284   assert(validateObjectNesting() && "object nesting inconsitency detected");
1285   assert(validateHoles() && "top level hole detected in object map");
1286 }
1287 
1288 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1289   const BinarySection *CurrentSection = nullptr;
1290   bool FirstSection = true;
1291 
1292   for (auto &Entry : BinaryDataMap) {
1293     const BinaryData *BD = Entry.second;
1294     const BinarySection &Section = BD->getSection();
1295     if (FirstSection || Section != *CurrentSection) {
1296       uint64_t Address, Size;
1297       StringRef Name = Section.getName();
1298       if (Section) {
1299         Address = Section.getAddress();
1300         Size = Section.getSize();
1301       } else {
1302         Address = BD->getAddress();
1303         Size = BD->getSize();
1304       }
1305       OS << "BOLT-INFO: Section " << Name << ", "
1306          << "0x" + Twine::utohexstr(Address) << ":"
1307          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1308       CurrentSection = &Section;
1309       FirstSection = false;
1310     }
1311 
1312     OS << "BOLT-INFO: ";
1313     const BinaryData *P = BD->getParent();
1314     while (P) {
1315       OS << "  ";
1316       P = P->getParent();
1317     }
1318     OS << *BD << "\n";
1319   }
1320 }
1321 
1322 Expected<unsigned>
1323 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName,
1324                             unsigned FileNumber,
1325                             Optional<MD5::MD5Result> Checksum,
1326                             Optional<StringRef> Source, unsigned CUID) {
1327   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1328   return Table.tryGetFile(Directory, FileName, Checksum, Source,
1329                           Ctx->getDwarfVersion(), FileNumber);
1330 }
1331 
1332 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1333                                                const uint32_t SrcCUID,
1334                                                unsigned FileIndex) {
1335   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1336   const DWARFDebugLine::LineTable *LineTable =
1337       DwCtx->getLineTableForUnit(SrcUnit);
1338   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1339       LineTable->Prologue.FileNames;
1340   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1341   // means empty dir.
1342   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1343          "FileIndex out of range for the compilation unit.");
1344   StringRef Dir = "";
1345   if (FileNames[FileIndex - 1].DirIdx != 0) {
1346     if (Optional<const char *> DirName = dwarf::toString(
1347             LineTable->Prologue
1348                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1349       Dir = *DirName;
1350     }
1351   }
1352   StringRef FileName = "";
1353   if (Optional<const char *> FName =
1354           dwarf::toString(FileNames[FileIndex - 1].Name))
1355     FileName = *FName;
1356   assert(FileName != "");
1357   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID));
1358 }
1359 
1360 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1361   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1362   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1363                  SortedFunctions.begin(),
1364                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1365                    return &BFI.second;
1366                  });
1367 
1368   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1369                    [](const BinaryFunction *A, const BinaryFunction *B) {
1370                      if (A->hasValidIndex() && B->hasValidIndex()) {
1371                        return A->getIndex() < B->getIndex();
1372                      }
1373                      return A->hasValidIndex();
1374                    });
1375   return SortedFunctions;
1376 }
1377 
1378 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1379   std::vector<BinaryFunction *> AllFunctions;
1380   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1381   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1382                  std::back_inserter(AllFunctions),
1383                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1384                    return &BFI.second;
1385                  });
1386   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1387             std::back_inserter(AllFunctions));
1388 
1389   return AllFunctions;
1390 }
1391 
1392 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1393   auto Iter = DWOCUs.find(DWOId);
1394   if (Iter == DWOCUs.end())
1395     return None;
1396 
1397   return Iter->second;
1398 }
1399 
1400 DWARFContext *BinaryContext::getDWOContext() {
1401   if (DWOCUs.empty())
1402     return nullptr;
1403   return &DWOCUs.begin()->second->getContext();
1404 }
1405 
1406 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1407 void BinaryContext::preprocessDWODebugInfo() {
1408   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1409     DWARFUnit *const DwarfUnit = CU.get();
1410     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1411       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1412       if (!DWOCU->isDWOUnit()) {
1413         std::string DWOName = dwarf::toString(
1414             DwarfUnit->getUnitDIE().find(
1415                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1416             "");
1417         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1418                << DWOName
1419                << " was not retrieved and won't be updated. Please check "
1420                   "relative path.\n";
1421         continue;
1422       }
1423       DWOCUs[*DWOId] = DWOCU;
1424     }
1425   }
1426 }
1427 
1428 void BinaryContext::preprocessDebugInfo() {
1429   struct CURange {
1430     uint64_t LowPC;
1431     uint64_t HighPC;
1432     DWARFUnit *Unit;
1433 
1434     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1435   };
1436 
1437   // Building a map of address ranges to CUs similar to .debug_aranges and use
1438   // it to assign CU to functions.
1439   std::vector<CURange> AllRanges;
1440   AllRanges.reserve(DwCtx->getNumCompileUnits());
1441   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1442     Expected<DWARFAddressRangesVector> RangesOrError =
1443         CU->getUnitDIE().getAddressRanges();
1444     if (!RangesOrError) {
1445       consumeError(RangesOrError.takeError());
1446       continue;
1447     }
1448     for (DWARFAddressRange &Range : *RangesOrError) {
1449       // Parts of the debug info could be invalidated due to corresponding code
1450       // being removed from the binary by the linker. Hence we check if the
1451       // address is a valid one.
1452       if (containsAddress(Range.LowPC))
1453         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1454     }
1455   }
1456 
1457   std::sort(AllRanges.begin(), AllRanges.end());
1458   for (auto &KV : BinaryFunctions) {
1459     const uint64_t FunctionAddress = KV.first;
1460     BinaryFunction &Function = KV.second;
1461 
1462     auto It = std::partition_point(
1463         AllRanges.begin(), AllRanges.end(),
1464         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1465     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1466       Function.setDWARFUnit(It->Unit);
1467     }
1468   }
1469 
1470   // Discover units with debug info that needs to be updated.
1471   for (const auto &KV : BinaryFunctions) {
1472     const BinaryFunction &BF = KV.second;
1473     if (shouldEmit(BF) && BF.getDWARFUnit())
1474       ProcessedCUs.insert(BF.getDWARFUnit());
1475   }
1476 
1477   // Clear debug info for functions from units that we are not going to process.
1478   for (auto &KV : BinaryFunctions) {
1479     BinaryFunction &BF = KV.second;
1480     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1481       BF.setDWARFUnit(nullptr);
1482   }
1483 
1484   if (opts::Verbosity >= 1) {
1485     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1486            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1487   }
1488 
1489   // Populate MCContext with DWARF files from all units.
1490   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1491   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1492     const uint64_t CUID = CU->getOffset();
1493     getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol(
1494         GlobalPrefix + "line_table_start" + Twine(CUID)));
1495 
1496     if (!ProcessedCUs.count(CU.get()))
1497       continue;
1498 
1499     const DWARFDebugLine::LineTable *LineTable =
1500         DwCtx->getLineTableForUnit(CU.get());
1501     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1502         LineTable->Prologue.FileNames;
1503 
1504     // Assign a unique label to every line table, one per CU.
1505     // Make sure empty debug line tables are registered too.
1506     if (FileNames.empty()) {
1507       cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID));
1508       continue;
1509     }
1510     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1511       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1512       // means empty dir.
1513       StringRef Dir = "";
1514       if (FileNames[I].DirIdx != 0)
1515         if (Optional<const char *> DirName = dwarf::toString(
1516                 LineTable->Prologue
1517                     .IncludeDirectories[FileNames[I].DirIdx - 1]))
1518           Dir = *DirName;
1519       StringRef FileName = "";
1520       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1521         FileName = *FName;
1522       assert(FileName != "");
1523       cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID));
1524     }
1525   }
1526 
1527   preprocessDWODebugInfo();
1528 }
1529 
1530 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1531   if (opts::processAllFunctions())
1532     return true;
1533 
1534   if (Function.isIgnored())
1535     return false;
1536 
1537   // In relocation mode we will emit non-simple functions with CFG.
1538   // If the function does not have a CFG it should be marked as ignored.
1539   return HasRelocations || Function.isSimple();
1540 }
1541 
1542 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1543   uint32_t Operation = Inst.getOperation();
1544   switch (Operation) {
1545   case MCCFIInstruction::OpSameValue:
1546     OS << "OpSameValue Reg" << Inst.getRegister();
1547     break;
1548   case MCCFIInstruction::OpRememberState:
1549     OS << "OpRememberState";
1550     break;
1551   case MCCFIInstruction::OpRestoreState:
1552     OS << "OpRestoreState";
1553     break;
1554   case MCCFIInstruction::OpOffset:
1555     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1556     break;
1557   case MCCFIInstruction::OpDefCfaRegister:
1558     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1559     break;
1560   case MCCFIInstruction::OpDefCfaOffset:
1561     OS << "OpDefCfaOffset " << Inst.getOffset();
1562     break;
1563   case MCCFIInstruction::OpDefCfa:
1564     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1565     break;
1566   case MCCFIInstruction::OpRelOffset:
1567     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1568     break;
1569   case MCCFIInstruction::OpAdjustCfaOffset:
1570     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1571     break;
1572   case MCCFIInstruction::OpEscape:
1573     OS << "OpEscape";
1574     break;
1575   case MCCFIInstruction::OpRestore:
1576     OS << "OpRestore Reg" << Inst.getRegister();
1577     break;
1578   case MCCFIInstruction::OpUndefined:
1579     OS << "OpUndefined Reg" << Inst.getRegister();
1580     break;
1581   case MCCFIInstruction::OpRegister:
1582     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1583        << Inst.getRegister2();
1584     break;
1585   case MCCFIInstruction::OpWindowSave:
1586     OS << "OpWindowSave";
1587     break;
1588   case MCCFIInstruction::OpGnuArgsSize:
1589     OS << "OpGnuArgsSize";
1590     break;
1591   default:
1592     OS << "Op#" << Operation;
1593     break;
1594   }
1595 }
1596 
1597 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1598                                      uint64_t Offset,
1599                                      const BinaryFunction *Function,
1600                                      bool PrintMCInst, bool PrintMemData,
1601                                      bool PrintRelocations) const {
1602   if (MIB->isEHLabel(Instruction)) {
1603     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1604     return;
1605   }
1606   OS << format("    %08" PRIx64 ": ", Offset);
1607   if (MIB->isCFI(Instruction)) {
1608     uint32_t Offset = Instruction.getOperand(0).getImm();
1609     OS << "\t!CFI\t$" << Offset << "\t; ";
1610     if (Function)
1611       printCFI(OS, *Function->getCFIFor(Instruction));
1612     OS << "\n";
1613     return;
1614   }
1615   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1616   if (MIB->isCall(Instruction)) {
1617     if (MIB->isTailCall(Instruction))
1618       OS << " # TAILCALL ";
1619     if (MIB->isInvoke(Instruction)) {
1620       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1621       OS << " # handler: ";
1622       if (EHInfo->first)
1623         OS << *EHInfo->first;
1624       else
1625         OS << '0';
1626       OS << "; action: " << EHInfo->second;
1627       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1628       if (GnuArgsSize >= 0)
1629         OS << "; GNU_args_size = " << GnuArgsSize;
1630     }
1631   } else if (MIB->isIndirectBranch(Instruction)) {
1632     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1633       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1634     } else {
1635       OS << " # UNKNOWN CONTROL FLOW";
1636     }
1637   }
1638   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1639     OS << " # Offset: " << *Offset;
1640 
1641   MIB->printAnnotations(Instruction, OS);
1642 
1643   if (opts::PrintDebugInfo) {
1644     DebugLineTableRowRef RowRef =
1645         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1646     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1647       const DWARFDebugLine::LineTable *LineTable;
1648       if (Function && Function->getDWARFUnit() &&
1649           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1650         LineTable = Function->getDWARFLineTable();
1651       } else {
1652         LineTable = DwCtx->getLineTableForUnit(
1653             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1654       }
1655       assert(LineTable &&
1656              "line table expected for instruction with debug info");
1657 
1658       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1659       StringRef FileName = "";
1660       if (Optional<const char *> FName =
1661               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1662         FileName = *FName;
1663       OS << " # debug line " << FileName << ":" << Row.Line;
1664       if (Row.Column)
1665         OS << ":" << Row.Column;
1666       if (Row.Discriminator)
1667         OS << " discriminator:" << Row.Discriminator;
1668     }
1669   }
1670 
1671   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1672     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1673     Function->printRelocations(OS, Offset, Size);
1674   }
1675 
1676   OS << "\n";
1677 
1678   if (PrintMCInst) {
1679     Instruction.dump_pretty(OS, InstPrinter.get());
1680     OS << "\n";
1681   }
1682 }
1683 
1684 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1685   auto SI = AddressToSection.upper_bound(Address);
1686   if (SI != AddressToSection.begin()) {
1687     --SI;
1688     uint64_t UpperBound = SI->first + SI->second->getSize();
1689     if (!SI->second->getSize())
1690       UpperBound += 1;
1691     if (UpperBound > Address)
1692       return *SI->second;
1693   }
1694   return std::make_error_code(std::errc::bad_address);
1695 }
1696 
1697 ErrorOr<StringRef>
1698 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1699   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1700     return Section->getName();
1701   return std::make_error_code(std::errc::bad_address);
1702 }
1703 
1704 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1705   auto Res = Sections.insert(Section);
1706   (void)Res;
1707   assert(Res.second && "can't register the same section twice.");
1708 
1709   // Only register allocatable sections in the AddressToSection map.
1710   if (Section->isAllocatable() && Section->getAddress())
1711     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1712   NameToSection.insert(
1713       std::make_pair(std::string(Section->getName()), Section));
1714   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1715   return *Section;
1716 }
1717 
1718 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1719   return registerSection(new BinarySection(*this, Section));
1720 }
1721 
1722 BinarySection &
1723 BinaryContext::registerSection(StringRef SectionName,
1724                                const BinarySection &OriginalSection) {
1725   return registerSection(
1726       new BinarySection(*this, SectionName, OriginalSection));
1727 }
1728 
1729 BinarySection &
1730 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1731                                        unsigned ELFFlags, uint8_t *Data,
1732                                        uint64_t Size, unsigned Alignment) {
1733   auto NamedSections = getSectionByName(Name);
1734   if (NamedSections.begin() != NamedSections.end()) {
1735     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1736            "can only update unique sections");
1737     BinarySection *Section = NamedSections.begin()->second;
1738 
1739     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1740     const bool Flag = Section->isAllocatable();
1741     (void)Flag;
1742     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1743     LLVM_DEBUG(dbgs() << *Section << "\n");
1744     // FIXME: Fix section flags/attributes for MachO.
1745     if (isELF())
1746       assert(Flag == Section->isAllocatable() &&
1747              "can't change section allocation status");
1748     return *Section;
1749   }
1750 
1751   return registerSection(
1752       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1753 }
1754 
1755 bool BinaryContext::deregisterSection(BinarySection &Section) {
1756   BinarySection *SectionPtr = &Section;
1757   auto Itr = Sections.find(SectionPtr);
1758   if (Itr != Sections.end()) {
1759     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1760     while (Range.first != Range.second) {
1761       if (Range.first->second == SectionPtr) {
1762         AddressToSection.erase(Range.first);
1763         break;
1764       }
1765       ++Range.first;
1766     }
1767 
1768     auto NameRange =
1769         NameToSection.equal_range(std::string(SectionPtr->getName()));
1770     while (NameRange.first != NameRange.second) {
1771       if (NameRange.first->second == SectionPtr) {
1772         NameToSection.erase(NameRange.first);
1773         break;
1774       }
1775       ++NameRange.first;
1776     }
1777 
1778     Sections.erase(Itr);
1779     delete SectionPtr;
1780     return true;
1781   }
1782   return false;
1783 }
1784 
1785 void BinaryContext::printSections(raw_ostream &OS) const {
1786   for (BinarySection *const &Section : Sections)
1787     OS << "BOLT-INFO: " << *Section << "\n";
1788 }
1789 
1790 BinarySection &BinaryContext::absoluteSection() {
1791   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1792     return *Section;
1793   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1794 }
1795 
1796 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1797                                                            size_t Size) const {
1798   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1799   if (!Section)
1800     return std::make_error_code(std::errc::bad_address);
1801 
1802   if (Section->isVirtual())
1803     return 0;
1804 
1805   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1806                    AsmInfo->getCodePointerSize());
1807   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1808   return DE.getUnsigned(&ValueOffset, Size);
1809 }
1810 
1811 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1812                                                          size_t Size) const {
1813   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1814   if (!Section)
1815     return std::make_error_code(std::errc::bad_address);
1816 
1817   if (Section->isVirtual())
1818     return 0;
1819 
1820   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1821                    AsmInfo->getCodePointerSize());
1822   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1823   return DE.getSigned(&ValueOffset, Size);
1824 }
1825 
1826 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1827                                   uint64_t Type, uint64_t Addend,
1828                                   uint64_t Value) {
1829   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1830   assert(Section && "cannot find section for address");
1831   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1832                          Value);
1833 }
1834 
1835 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1836                                          uint64_t Type, uint64_t Addend,
1837                                          uint64_t Value) {
1838   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1839   assert(Section && "cannot find section for address");
1840   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1841                                 Addend, Value);
1842 }
1843 
1844 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1845   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1846   assert(Section && "cannot find section for address");
1847   return Section->removeRelocationAt(Address - Section->getAddress());
1848 }
1849 
1850 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1851   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1852   if (!Section)
1853     return nullptr;
1854 
1855   return Section->getRelocationAt(Address - Section->getAddress());
1856 }
1857 
1858 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1859   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1860   if (!Section)
1861     return nullptr;
1862 
1863   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1864 }
1865 
1866 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1867                                              const uint64_t Address) {
1868   auto setImmovable = [&](BinaryData &BD) {
1869     BinaryData *Root = BD.getAtomicRoot();
1870     LLVM_DEBUG(if (Root->isMoveable()) {
1871       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1872              << "due to ambiguous relocation referencing 0x"
1873              << Twine::utohexstr(Address) << '\n';
1874     });
1875     Root->setIsMoveable(false);
1876   };
1877 
1878   if (Address == BD.getAddress()) {
1879     setImmovable(BD);
1880 
1881     // Set previous symbol as immovable
1882     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1883     if (Prev && Prev->getEndAddress() == BD.getAddress())
1884       setImmovable(*Prev);
1885   }
1886 
1887   if (Address == BD.getEndAddress()) {
1888     setImmovable(BD);
1889 
1890     // Set next symbol as immovable
1891     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1892     if (Next && Next->getAddress() == BD.getEndAddress())
1893       setImmovable(*Next);
1894   }
1895 }
1896 
1897 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1898                                                     uint64_t *EntryDesc) {
1899   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1900   auto BFI = SymbolToFunctionMap.find(Symbol);
1901   if (BFI == SymbolToFunctionMap.end())
1902     return nullptr;
1903 
1904   BinaryFunction *BF = BFI->second;
1905   if (EntryDesc)
1906     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1907 
1908   return BF;
1909 }
1910 
1911 void BinaryContext::exitWithBugReport(StringRef Message,
1912                                       const BinaryFunction &Function) const {
1913   errs() << "=======================================\n";
1914   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1915             "this function.\n";
1916   errs() << "If you are running the most recent version of BOLT, you may "
1917             "want to "
1918             "report this and paste this dump.\nPlease check that there is no "
1919             "sensitive contents being shared in this dump.\n";
1920   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1921   ScopedPrinter SP(errs());
1922   SP.printBinaryBlock("Function contents", *Function.getData());
1923   errs() << "\n";
1924   Function.dump();
1925   errs() << "ERROR: " << Message;
1926   errs() << "\n=======================================\n";
1927   exit(1);
1928 }
1929 
1930 BinaryFunction *
1931 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1932                                             bool IsSimple) {
1933   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1934   BinaryFunction *BF = InjectedBinaryFunctions.back();
1935   setSymbolToFunctionMap(BF->getSymbol(), BF);
1936   BF->CurrentState = BinaryFunction::State::CFG;
1937   return BF;
1938 }
1939 
1940 std::pair<size_t, size_t>
1941 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
1942   // Adjust branch instruction to match the current layout.
1943   if (FixBranches)
1944     BF.fixBranches();
1945 
1946   // Create local MC context to isolate the effect of ephemeral code emission.
1947   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
1948   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
1949   MCAsmBackend *MAB =
1950       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1951 
1952   SmallString<256> Code;
1953   raw_svector_ostream VecOS(Code);
1954 
1955   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
1956   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1957       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1958       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
1959       /*RelaxAll=*/false,
1960       /*IncrementalLinkerCompatible=*/false,
1961       /*DWARFMustBeAtTheEnd=*/false));
1962 
1963   Streamer->initSections(false, *STI);
1964 
1965   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
1966   Section->setHasInstructions(true);
1967 
1968   // Create symbols in the LocalCtx so that they get destroyed with it.
1969   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
1970   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
1971   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
1972   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
1973 
1974   Streamer->SwitchSection(Section);
1975   Streamer->emitLabel(StartLabel);
1976   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
1977                    /*EmitCodeOnly=*/true);
1978   Streamer->emitLabel(EndLabel);
1979 
1980   if (BF.isSplit()) {
1981     MCSectionELF *ColdSection =
1982         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
1983                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1984     ColdSection->setHasInstructions(true);
1985 
1986     Streamer->SwitchSection(ColdSection);
1987     Streamer->emitLabel(ColdStartLabel);
1988     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
1989                      /*EmitCodeOnly=*/true);
1990     Streamer->emitLabel(ColdEndLabel);
1991     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
1992     Streamer->emitBytes(StringRef(""));
1993     Streamer->SwitchSection(Section);
1994   }
1995 
1996   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
1997   // MCStreamer::Finish(), which does more than we want
1998   Streamer->emitBytes(StringRef(""));
1999 
2000   MCAssembler &Assembler =
2001       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2002   MCAsmLayout Layout(Assembler);
2003   Assembler.layout(Layout);
2004 
2005   const uint64_t HotSize =
2006       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2007   const uint64_t ColdSize = BF.isSplit()
2008                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2009                                       Layout.getSymbolOffset(*ColdStartLabel)
2010                                 : 0ULL;
2011 
2012   // Clean-up the effect of the code emission.
2013   for (const MCSymbol &Symbol : Assembler.symbols()) {
2014     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2015     MutableSymbol->setUndefined();
2016     MutableSymbol->setIsRegistered(false);
2017   }
2018 
2019   return std::make_pair(HotSize, ColdSize);
2020 }
2021 
2022 bool BinaryContext::validateEncoding(const MCInst &Inst,
2023                                      ArrayRef<uint8_t> InputEncoding) const {
2024   SmallString<256> Code;
2025   SmallVector<MCFixup, 4> Fixups;
2026   raw_svector_ostream VecOS(Code);
2027 
2028   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2029   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2030   if (InputEncoding != EncodedData) {
2031     if (opts::Verbosity > 1) {
2032       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2033              << "      input: " << InputEncoding << '\n'
2034              << "     output: " << EncodedData << '\n';
2035     }
2036     return false;
2037   }
2038 
2039   return true;
2040 }
2041 
2042 uint64_t BinaryContext::getHotThreshold() const {
2043   static uint64_t Threshold = 0;
2044   if (Threshold == 0) {
2045     Threshold = std::max(
2046         (uint64_t)opts::ExecutionCountThreshold,
2047         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2048   }
2049   return Threshold;
2050 }
2051 
2052 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2053     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2054   auto FI = BinaryFunctions.upper_bound(Address);
2055   if (FI == BinaryFunctions.begin())
2056     return nullptr;
2057   --FI;
2058 
2059   const uint64_t UsedSize =
2060       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2061 
2062   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2063     return nullptr;
2064 
2065   return &FI->second;
2066 }
2067 
2068 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2069   // First, try to find a function starting at the given address. If the
2070   // function was folded, this will get us the original folded function if it
2071   // wasn't removed from the list, e.g. in non-relocation mode.
2072   auto BFI = BinaryFunctions.find(Address);
2073   if (BFI != BinaryFunctions.end())
2074     return &BFI->second;
2075 
2076   // We might have folded the function matching the object at the given
2077   // address. In such case, we look for a function matching the symbol
2078   // registered at the original address. The new function (the one that the
2079   // original was folded into) will hold the symbol.
2080   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2081     uint64_t EntryID = 0;
2082     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2083     if (BF && EntryID == 0)
2084       return BF;
2085   }
2086   return nullptr;
2087 }
2088 
2089 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2090     const DWARFAddressRangesVector &InputRanges) const {
2091   DebugAddressRangesVector OutputRanges;
2092 
2093   for (const DWARFAddressRange Range : InputRanges) {
2094     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2095     while (BFI != BinaryFunctions.end()) {
2096       const BinaryFunction &Function = BFI->second;
2097       if (Function.getAddress() >= Range.HighPC)
2098         break;
2099       const DebugAddressRangesVector FunctionRanges =
2100           Function.getOutputAddressRanges();
2101       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2102                 std::back_inserter(OutputRanges));
2103       std::advance(BFI, 1);
2104     }
2105   }
2106 
2107   return OutputRanges;
2108 }
2109 
2110 } // namespace bolt
2111 } // namespace llvm
2112