xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision 641e92d46b2aae6bd086444409397ecf25cf4283)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
22 #include "llvm/MC/MCAsmLayout.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Regex.h"
36 #include <algorithm>
37 #include <functional>
38 #include <iterator>
39 #include <unordered_set>
40 
41 using namespace llvm;
42 
43 #undef  DEBUG_TYPE
44 #define DEBUG_TYPE "bolt"
45 
46 namespace opts {
47 
48 cl::opt<bool>
49 NoHugePages("no-huge-pages",
50   cl::desc("use regular size pages for code alignment"),
51   cl::ZeroOrMore,
52   cl::Hidden,
53   cl::cat(BoltCategory));
54 
55 static cl::opt<bool>
56 PrintDebugInfo("print-debug-info",
57   cl::desc("print debug info when printing functions"),
58   cl::Hidden,
59   cl::ZeroOrMore,
60   cl::cat(BoltCategory));
61 
62 cl::opt<bool>
63 PrintRelocations("print-relocations",
64   cl::desc("print relocations when printing functions/objects"),
65   cl::Hidden,
66   cl::ZeroOrMore,
67   cl::cat(BoltCategory));
68 
69 static cl::opt<bool>
70 PrintMemData("print-mem-data",
71   cl::desc("print memory data annotations when printing functions"),
72   cl::Hidden,
73   cl::ZeroOrMore,
74   cl::cat(BoltCategory));
75 
76 } // namespace opts
77 
78 namespace llvm {
79 namespace bolt {
80 
81 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
82                              std::unique_ptr<DWARFContext> DwCtx,
83                              std::unique_ptr<Triple> TheTriple,
84                              const Target *TheTarget, std::string TripleName,
85                              std::unique_ptr<MCCodeEmitter> MCE,
86                              std::unique_ptr<MCObjectFileInfo> MOFI,
87                              std::unique_ptr<const MCAsmInfo> AsmInfo,
88                              std::unique_ptr<const MCInstrInfo> MII,
89                              std::unique_ptr<const MCSubtargetInfo> STI,
90                              std::unique_ptr<MCInstPrinter> InstPrinter,
91                              std::unique_ptr<const MCInstrAnalysis> MIA,
92                              std::unique_ptr<MCPlusBuilder> MIB,
93                              std::unique_ptr<const MCRegisterInfo> MRI,
94                              std::unique_ptr<MCDisassembler> DisAsm)
95     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
96       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
97       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
98       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
99       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
100       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
101   Relocation::Arch = this->TheTriple->getArch();
102   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
103 }
104 
105 BinaryContext::~BinaryContext() {
106   for (BinarySection *Section : Sections)
107     delete Section;
108   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
109     delete InjectedFunction;
110   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
111     delete JTI.second;
112   clearBinaryData();
113 }
114 
115 /// Create BinaryContext for a given architecture \p ArchName and
116 /// triple \p TripleName.
117 std::unique_ptr<BinaryContext>
118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
119                                    std::unique_ptr<DWARFContext> DwCtx) {
120   StringRef ArchName = "";
121   StringRef FeaturesStr = "";
122   switch (File->getArch()) {
123   case llvm::Triple::x86_64:
124     ArchName = "x86-64";
125     FeaturesStr = "+nopl";
126     break;
127   case llvm::Triple::aarch64:
128     ArchName = "aarch64";
129     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
130                   "+fullfp16,+spe,+fuse-aes,+rcpc";
131     break;
132   default:
133     errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n";
134     return nullptr;
135   }
136 
137   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
138   const std::string TripleName = TheTriple->str();
139 
140   std::string Error;
141   const Target *TheTarget =
142       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
143   if (!TheTarget) {
144     errs() << "BOLT-ERROR: " << Error;
145     return nullptr;
146   }
147 
148   std::unique_ptr<const MCRegisterInfo> MRI(
149       TheTarget->createMCRegInfo(TripleName));
150   if (!MRI) {
151     errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n";
152     return nullptr;
153   }
154 
155   // Set up disassembler.
156   std::unique_ptr<const MCAsmInfo> AsmInfo(
157       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
158   if (!AsmInfo) {
159     errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n";
160     return nullptr;
161   }
162 
163   std::unique_ptr<const MCSubtargetInfo> STI(
164       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
165   if (!STI) {
166     errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n";
167     return nullptr;
168   }
169 
170   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
171   if (!MII) {
172     errs() << "BOLT-ERROR: no instruction info for target " << TripleName
173            << "\n";
174     return nullptr;
175   }
176 
177   std::unique_ptr<MCContext> Ctx(
178       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
179   std::unique_ptr<MCObjectFileInfo> MOFI(
180       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
181   Ctx->setObjectFileInfo(MOFI.get());
182   // We do not support X86 Large code model. Change this in the future.
183   bool Large = false;
184   if (TheTriple->getArch() == llvm::Triple::aarch64)
185     Large = true;
186   unsigned LSDAEncoding =
187       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
188   unsigned TTypeEncoding =
189       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
190   if (IsPIC) {
191     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
192                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
194                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
195   }
196 
197   std::unique_ptr<MCDisassembler> DisAsm(
198       TheTarget->createMCDisassembler(*STI, *Ctx));
199 
200   if (!DisAsm) {
201     errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n";
202     return nullptr;
203   }
204 
205   std::unique_ptr<const MCInstrAnalysis> MIA(
206       TheTarget->createMCInstrAnalysis(MII.get()));
207   if (!MIA) {
208     errs() << "BOLT-ERROR: failed to create instruction analysis for target"
209            << TripleName << "\n";
210     return nullptr;
211   }
212 
213   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
214   std::unique_ptr<MCInstPrinter> InstructionPrinter(
215       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
216                                      *MII, *MRI));
217   if (!InstructionPrinter) {
218     errs() << "BOLT-ERROR: no instruction printer for target " << TripleName
219            << '\n';
220     return nullptr;
221   }
222   InstructionPrinter->setPrintImmHex(true);
223 
224   std::unique_ptr<MCCodeEmitter> MCE(
225       TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx));
226 
227   // Make sure we don't miss any output on core dumps.
228   outs().SetUnbuffered();
229   errs().SetUnbuffered();
230   dbgs().SetUnbuffered();
231 
232   auto BC = std::make_unique<BinaryContext>(
233       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
234       std::string(TripleName), std::move(MCE), std::move(MOFI),
235       std::move(AsmInfo), std::move(MII), std::move(STI),
236       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
237       std::move(DisAsm));
238 
239   BC->TTypeEncoding = TTypeEncoding;
240   BC->LSDAEncoding = LSDAEncoding;
241 
242   BC->MAB = std::unique_ptr<MCAsmBackend>(
243       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
244 
245   BC->setFilename(File->getFileName());
246 
247   BC->HasFixedLoadAddress = !IsPIC;
248 
249   return BC;
250 }
251 
252 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
253   if (opts::HotText &&
254       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
255     return true;
256 
257   if (opts::HotData &&
258       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
259     return true;
260 
261   if (SymbolName == "_end")
262     return true;
263 
264   return false;
265 }
266 
267 std::unique_ptr<MCObjectWriter>
268 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
269   return MAB->createObjectWriter(OS);
270 }
271 
272 bool BinaryContext::validateObjectNesting() const {
273   auto Itr = BinaryDataMap.begin();
274   auto End = BinaryDataMap.end();
275   bool Valid = true;
276   while (Itr != End) {
277     auto Next = std::next(Itr);
278     while (Next != End &&
279            Itr->second->getSection() == Next->second->getSection() &&
280            Itr->second->containsRange(Next->second->getAddress(),
281                                       Next->second->getSize())) {
282       if (Next->second->Parent != Itr->second) {
283         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
284                << "BOLT-WARNING:  " << *Itr->second << "\n"
285                << "BOLT-WARNING:  " << *Next->second << "\n";
286         Valid = false;
287       }
288       ++Next;
289     }
290     Itr = Next;
291   }
292   return Valid;
293 }
294 
295 bool BinaryContext::validateHoles() const {
296   bool Valid = true;
297   for (BinarySection &Section : sections()) {
298     for (const Relocation &Rel : Section.relocations()) {
299       uint64_t RelAddr = Rel.Offset + Section.getAddress();
300       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
301       if (!BD) {
302         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
303                << " 0x" << Twine::utohexstr(RelAddr) << " in "
304                << Section.getName() << "\n";
305         Valid = false;
306       } else if (!BD->getAtomicRoot()) {
307         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
308                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
309                << Section.getName() << "\n";
310         Valid = false;
311       }
312     }
313   }
314   return Valid;
315 }
316 
317 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
318   const uint64_t Address = GAI->second->getAddress();
319   const uint64_t Size = GAI->second->getSize();
320 
321   auto fixParents = [&](BinaryDataMapType::iterator Itr,
322                         BinaryData *NewParent) {
323     BinaryData *OldParent = Itr->second->Parent;
324     Itr->second->Parent = NewParent;
325     ++Itr;
326     while (Itr != BinaryDataMap.end() && OldParent &&
327            Itr->second->Parent == OldParent) {
328       Itr->second->Parent = NewParent;
329       ++Itr;
330     }
331   };
332 
333   // Check if the previous symbol contains the newly added symbol.
334   if (GAI != BinaryDataMap.begin()) {
335     BinaryData *Prev = std::prev(GAI)->second;
336     while (Prev) {
337       if (Prev->getSection() == GAI->second->getSection() &&
338           Prev->containsRange(Address, Size)) {
339         fixParents(GAI, Prev);
340       } else {
341         fixParents(GAI, nullptr);
342       }
343       Prev = Prev->Parent;
344     }
345   }
346 
347   // Check if the newly added symbol contains any subsequent symbols.
348   if (Size != 0) {
349     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
350     auto Itr = std::next(GAI);
351     while (
352         Itr != BinaryDataMap.end() &&
353         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
354       Itr->second->Parent = BD;
355       ++Itr;
356     }
357   }
358 }
359 
360 iterator_range<BinaryContext::binary_data_iterator>
361 BinaryContext::getSubBinaryData(BinaryData *BD) {
362   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
363   auto End = Start;
364   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
365     ++End;
366   return make_range(Start, End);
367 }
368 
369 std::pair<const MCSymbol *, uint64_t>
370 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
371                                 bool IsPCRel) {
372   uint64_t Addend = 0;
373 
374   if (isAArch64()) {
375     // Check if this is an access to a constant island and create bookkeeping
376     // to keep track of it and emit it later as part of this function.
377     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
378       return std::make_pair(IslandSym, Addend);
379 
380     // Detect custom code written in assembly that refers to arbitrary
381     // constant islands from other functions. Write this reference so we
382     // can pull this constant island and emit it as part of this function
383     // too.
384     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
385     if (IslandIter != AddressToConstantIslandMap.end()) {
386       if (MCSymbol *IslandSym =
387               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
388         BF.createIslandDependency(IslandSym, IslandIter->second);
389         return std::make_pair(IslandSym, Addend);
390       }
391     }
392   }
393 
394   // Note that the address does not necessarily have to reside inside
395   // a section, it could be an absolute address too.
396   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
397   if (Section && Section->isText()) {
398     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
399       if (Address != BF.getAddress()) {
400         // The address could potentially escape. Mark it as another entry
401         // point into the function.
402         if (opts::Verbosity >= 1) {
403           outs() << "BOLT-INFO: potentially escaped address 0x"
404                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
405         }
406         BF.HasInternalLabelReference = true;
407         return std::make_pair(
408             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
409       }
410     } else {
411       BF.InterproceduralReferences.insert(Address);
412     }
413   }
414 
415   // With relocations, catch jump table references outside of the basic block
416   // containing the indirect jump.
417   if (HasRelocations) {
418     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
419     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
420       const MCSymbol *Symbol =
421           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
422 
423       return std::make_pair(Symbol, Addend);
424     }
425   }
426 
427   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
428     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
429 
430   // TODO: use DWARF info to get size/alignment here?
431   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
432   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
433   return std::make_pair(TargetSymbol, Addend);
434 }
435 
436 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
437                                                   BinaryFunction &BF) {
438   if (!isX86())
439     return MemoryContentsType::UNKNOWN;
440 
441   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
442   if (!Section) {
443     // No section - possibly an absolute address. Since we don't allow
444     // internal function addresses to escape the function scope - we
445     // consider it a tail call.
446     if (opts::Verbosity > 1) {
447       errs() << "BOLT-WARNING: no section for address 0x"
448              << Twine::utohexstr(Address) << " referenced from function " << BF
449              << '\n';
450     }
451     return MemoryContentsType::UNKNOWN;
452   }
453 
454   if (Section->isVirtual()) {
455     // The contents are filled at runtime.
456     return MemoryContentsType::UNKNOWN;
457   }
458 
459   // No support for jump tables in code yet.
460   if (Section->isText())
461     return MemoryContentsType::UNKNOWN;
462 
463   // Start with checking for PIC jump table. We expect non-PIC jump tables
464   // to have high 32 bits set to 0.
465   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
466     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
467 
468   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
469     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
470 
471   return MemoryContentsType::UNKNOWN;
472 }
473 
474 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
475 bool isPotentialFragmentByName(BinaryFunction &Fragment,
476                                BinaryFunction &Parent) {
477   for (StringRef Name : Parent.getNames()) {
478     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
479     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
480     if (Fragment.hasRestoredNameRegex(NameRegex))
481       return true;
482   }
483   return false;
484 }
485 
486 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
487                                      const JumpTable::JumpTableType Type,
488                                      BinaryFunction &BF,
489                                      const uint64_t NextJTAddress,
490                                      JumpTable::OffsetsType *Offsets) {
491   // Is one of the targets __builtin_unreachable?
492   bool HasUnreachable = false;
493 
494   // Number of targets other than __builtin_unreachable.
495   uint64_t NumRealEntries = 0;
496 
497   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
498   auto addOffset = [&](uint64_t Offset) {
499     if (Offsets)
500       Offsets->emplace_back(Offset);
501   };
502 
503   auto doesBelongToFunction = [&](const uint64_t Addr,
504                                   BinaryFunction *TargetBF) -> bool {
505     if (BF.containsAddress(Addr))
506       return true;
507     // Nothing to do if we failed to identify the containing function.
508     if (!TargetBF)
509       return false;
510     // Case 1: check if BF is a fragment and TargetBF is its parent.
511     if (BF.isFragment()) {
512       // Parent function may or may not be already registered.
513       // Set parent link based on function name matching heuristic.
514       return registerFragment(BF, *TargetBF);
515     }
516     // Case 2: check if TargetBF is a fragment and BF is its parent.
517     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
518   };
519 
520   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
521   if (!Section)
522     return false;
523 
524   // The upper bound is defined by containing object, section limits, and
525   // the next jump table in memory.
526   uint64_t UpperBound = Section->getEndAddress();
527   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
528   if (JumpTableBD && JumpTableBD->getSize()) {
529     assert(JumpTableBD->getEndAddress() <= UpperBound &&
530            "data object cannot cross a section boundary");
531     UpperBound = JumpTableBD->getEndAddress();
532   }
533   if (NextJTAddress)
534     UpperBound = std::min(NextJTAddress, UpperBound);
535 
536   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
537                     << '\n');
538   const uint64_t EntrySize = getJumpTableEntrySize(Type);
539   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
540        EntryAddress += EntrySize) {
541     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
542                       << " -> ");
543     // Check if there's a proper relocation against the jump table entry.
544     if (HasRelocations) {
545       if (Type == JumpTable::JTT_PIC &&
546           !DataPCRelocations.count(EntryAddress)) {
547         LLVM_DEBUG(
548             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
549         break;
550       }
551       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
552         LLVM_DEBUG(
553             dbgs()
554             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
555         break;
556       }
557     }
558 
559     const uint64_t Value =
560         (Type == JumpTable::JTT_PIC)
561             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
562             : *getPointerAtAddress(EntryAddress);
563 
564     // __builtin_unreachable() case.
565     if (Value == BF.getAddress() + BF.getSize()) {
566       addOffset(Value - BF.getAddress());
567       HasUnreachable = true;
568       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
569       continue;
570     }
571 
572     // Function or one of its fragments.
573     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
574 
575     // We assume that a jump table cannot have function start as an entry.
576     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
577       LLVM_DEBUG({
578         if (!BF.containsAddress(Value)) {
579           dbgs() << "FAIL: function doesn't contain this address\n";
580           if (TargetBF) {
581             dbgs() << "  ! function containing this address: "
582                    << TargetBF->getPrintName() << '\n';
583             if (TargetBF->isFragment())
584               dbgs() << "  ! is a fragment\n";
585             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
586               dbgs() << "  ! its parent is "
587                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
588                      << '\n';
589           }
590         }
591         if (Value == BF.getAddress())
592           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
593       });
594       break;
595     }
596 
597     // Check there's an instruction at this offset.
598     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
599         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
600       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
601       break;
602     }
603 
604     ++NumRealEntries;
605 
606     if (TargetBF == &BF) {
607       // Address inside the function.
608       addOffset(Value - TargetBF->getAddress());
609       LLVM_DEBUG(dbgs() << "OK: real entry\n");
610     } else {
611       // Address in split fragment.
612       BF.setHasSplitJumpTable(true);
613       // Add invalid offset for proper identification of jump table size.
614       addOffset(INVALID_OFFSET);
615       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
616                         << TargetBF->getPrintName() << '\n');
617     }
618   }
619 
620   // It's a jump table if the number of real entries is more than 1, or there's
621   // one real entry and "unreachable" targets. If there are only multiple
622   // "unreachable" targets, then it's not a jump table.
623   return NumRealEntries + HasUnreachable >= 2;
624 }
625 
626 void BinaryContext::populateJumpTables() {
627   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
628                     << '\n');
629   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
630        ++JTI) {
631     JumpTable *JT = JTI->second;
632     BinaryFunction &BF = *JT->Parent;
633 
634     if (!BF.isSimple())
635       continue;
636 
637     uint64_t NextJTAddress = 0;
638     auto NextJTI = std::next(JTI);
639     if (NextJTI != JTE)
640       NextJTAddress = NextJTI->second->getAddress();
641 
642     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
643                                           NextJTAddress, &JT->OffsetEntries);
644     if (!Success) {
645       dbgs() << "failed to analyze jump table in function " << BF << '\n';
646       JT->print(dbgs());
647       if (NextJTI != JTE) {
648         dbgs() << "next jump table at 0x"
649                << Twine::utohexstr(NextJTI->second->getAddress())
650                << " belongs to function " << *NextJTI->second->Parent << '\n';
651         NextJTI->second->print(dbgs());
652       }
653       llvm_unreachable("jump table heuristic failure");
654     }
655 
656     for (uint64_t EntryOffset : JT->OffsetEntries) {
657       if (EntryOffset == BF.getSize())
658         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
659       else
660         BF.registerReferencedOffset(EntryOffset);
661     }
662 
663     // In strict mode, erase PC-relative relocation record. Later we check that
664     // all such records are erased and thus have been accounted for.
665     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
666       for (uint64_t Address = JT->getAddress();
667            Address < JT->getAddress() + JT->getSize();
668            Address += JT->EntrySize) {
669         DataPCRelocations.erase(DataPCRelocations.find(Address));
670       }
671     }
672 
673     // Mark to skip the function and all its fragments.
674     if (BF.hasSplitJumpTable())
675       FragmentsToSkip.push_back(&BF);
676   }
677 
678   if (opts::StrictMode && DataPCRelocations.size()) {
679     LLVM_DEBUG({
680       dbgs() << DataPCRelocations.size()
681              << " unclaimed PC-relative relocations left in data:\n";
682       for (uint64_t Reloc : DataPCRelocations)
683         dbgs() << Twine::utohexstr(Reloc) << '\n';
684     });
685     assert(0 && "unclaimed PC-relative relocations left in data\n");
686   }
687   clearList(DataPCRelocations);
688 }
689 
690 void BinaryContext::skipMarkedFragments() {
691   // Unique functions in the vector.
692   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
693                                                        FragmentsToSkip.end());
694   // Copy the functions back to FragmentsToSkip.
695   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
696   auto addToWorklist = [&](BinaryFunction *Function) -> void {
697     if (UniqueFunctions.count(Function))
698       return;
699     FragmentsToSkip.push_back(Function);
700     UniqueFunctions.insert(Function);
701   };
702   // Functions containing split jump tables need to be skipped with all
703   // fragments (transitively).
704   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
705     BinaryFunction *BF = FragmentsToSkip[I];
706     assert(UniqueFunctions.count(BF) &&
707            "internal error in traversing function fragments");
708     if (opts::Verbosity >= 1)
709       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
710     BF->setIgnored();
711     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
712     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
713                   addToWorklist);
714   }
715   if (!FragmentsToSkip.empty())
716     errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function"
717            << (FragmentsToSkip.size() == 1 ? "" : "s")
718            << " due to cold fragments\n";
719   FragmentsToSkip.clear();
720 }
721 
722 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
723                                                  uint64_t Size,
724                                                  uint16_t Alignment,
725                                                  unsigned Flags) {
726   auto Itr = BinaryDataMap.find(Address);
727   if (Itr != BinaryDataMap.end()) {
728     assert(Itr->second->getSize() == Size || !Size);
729     return Itr->second->getSymbol();
730   }
731 
732   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
733   assert(!GlobalSymbols.count(Name) && "created name is not unique");
734   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
735 }
736 
737 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
738   return Ctx->getOrCreateSymbol(Name);
739 }
740 
741 BinaryFunction *BinaryContext::createBinaryFunction(
742     const std::string &Name, BinarySection &Section, uint64_t Address,
743     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
744   auto Result = BinaryFunctions.emplace(
745       Address, BinaryFunction(Name, Section, Address, Size, *this));
746   assert(Result.second == true && "unexpected duplicate function");
747   BinaryFunction *BF = &Result.first->second;
748   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
749                         Alignment);
750   setSymbolToFunctionMap(BF->getSymbol(), BF);
751   return BF;
752 }
753 
754 const MCSymbol *
755 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
756                                     JumpTable::JumpTableType Type) {
757   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
758     assert(JT->Type == Type && "jump table types have to match");
759     assert(JT->Parent == &Function &&
760            "cannot re-use jump table of a different function");
761     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
762 
763     return JT->getFirstLabel();
764   }
765 
766   // Re-use the existing symbol if possible.
767   MCSymbol *JTLabel = nullptr;
768   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
769     if (!isInternalSymbolName(Object->getSymbol()->getName()))
770       JTLabel = Object->getSymbol();
771   }
772 
773   const uint64_t EntrySize = getJumpTableEntrySize(Type);
774   if (!JTLabel) {
775     const std::string JumpTableName = generateJumpTableName(Function, Address);
776     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
777   }
778 
779   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
780                     << " in function " << Function << '\n');
781 
782   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
783                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
784                                 *getSectionForAddress(Address));
785   JumpTables.emplace(Address, JT);
786 
787   // Duplicate the entry for the parent function for easy access.
788   Function.JumpTables.emplace(Address, JT);
789 
790   return JTLabel;
791 }
792 
793 std::pair<uint64_t, const MCSymbol *>
794 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
795                                   const MCSymbol *OldLabel) {
796   auto L = scopeLock();
797   unsigned Offset = 0;
798   bool Found = false;
799   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
800     if (Elmt.second != OldLabel)
801       continue;
802     Offset = Elmt.first;
803     Found = true;
804     break;
805   }
806   assert(Found && "Label not found");
807   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
808   JumpTable *NewJT =
809       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
810                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
811                     *getSectionForAddress(JT->getAddress()));
812   NewJT->Entries = JT->Entries;
813   NewJT->Counts = JT->Counts;
814   uint64_t JumpTableID = ++DuplicatedJumpTables;
815   // Invert it to differentiate from regular jump tables whose IDs are their
816   // addresses in the input binary memory space
817   JumpTableID = ~JumpTableID;
818   JumpTables.emplace(JumpTableID, NewJT);
819   Function.JumpTables.emplace(JumpTableID, NewJT);
820   return std::make_pair(JumpTableID, NewLabel);
821 }
822 
823 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
824                                                  uint64_t Address) {
825   size_t Id;
826   uint64_t Offset = 0;
827   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
828     Offset = Address - JT->getAddress();
829     auto Itr = JT->Labels.find(Offset);
830     if (Itr != JT->Labels.end())
831       return std::string(Itr->second->getName());
832     Id = JumpTableIds.at(JT->getAddress());
833   } else {
834     Id = JumpTableIds[Address] = BF.JumpTables.size();
835   }
836   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
837           (Offset ? ("." + std::to_string(Offset)) : ""));
838 }
839 
840 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
841   // FIXME: aarch64 support is missing.
842   if (!isX86())
843     return true;
844 
845   if (BF.getSize() == BF.getMaxSize())
846     return true;
847 
848   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
849   assert(FunctionData && "cannot get function as data");
850 
851   uint64_t Offset = BF.getSize();
852   MCInst Instr;
853   uint64_t InstrSize = 0;
854   uint64_t InstrAddress = BF.getAddress() + Offset;
855   using std::placeholders::_1;
856 
857   // Skip instructions that satisfy the predicate condition.
858   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
859     const uint64_t StartOffset = Offset;
860     for (; Offset < BF.getMaxSize();
861          Offset += InstrSize, InstrAddress += InstrSize) {
862       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
863                                   InstrAddress, nulls()))
864         break;
865       if (!Predicate(Instr))
866         break;
867     }
868 
869     return Offset - StartOffset;
870   };
871 
872   // Skip a sequence of zero bytes.
873   auto skipZeros = [&]() {
874     const uint64_t StartOffset = Offset;
875     for (; Offset < BF.getMaxSize(); ++Offset)
876       if ((*FunctionData)[Offset] != 0)
877         break;
878 
879     return Offset - StartOffset;
880   };
881 
882   // Accept the whole padding area filled with breakpoints.
883   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
884   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
885     return true;
886 
887   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
888 
889   // Some functions have a jump to the next function or to the padding area
890   // inserted after the body.
891   auto isSkipJump = [&](const MCInst &Instr) {
892     uint64_t TargetAddress = 0;
893     if (MIB->isUnconditionalBranch(Instr) &&
894         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
895       if (TargetAddress >= InstrAddress + InstrSize &&
896           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
897         return true;
898       }
899     }
900     return false;
901   };
902 
903   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
904   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
905          skipZeros())
906     ;
907 
908   if (Offset == BF.getMaxSize())
909     return true;
910 
911   if (opts::Verbosity >= 1) {
912     errs() << "BOLT-WARNING: bad padding at address 0x"
913            << Twine::utohexstr(BF.getAddress() + BF.getSize())
914            << " starting at offset " << (Offset - BF.getSize())
915            << " in function " << BF << '\n'
916            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
917            << '\n';
918   }
919 
920   return false;
921 }
922 
923 void BinaryContext::adjustCodePadding() {
924   for (auto &BFI : BinaryFunctions) {
925     BinaryFunction &BF = BFI.second;
926     if (!shouldEmit(BF))
927       continue;
928 
929     if (!hasValidCodePadding(BF)) {
930       if (HasRelocations) {
931         if (opts::Verbosity >= 1) {
932           outs() << "BOLT-INFO: function " << BF
933                  << " has invalid padding. Ignoring the function.\n";
934         }
935         BF.setIgnored();
936       } else {
937         BF.setMaxSize(BF.getSize());
938       }
939     }
940   }
941 }
942 
943 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
944                                                uint64_t Size,
945                                                uint16_t Alignment,
946                                                unsigned Flags) {
947   // Register the name with MCContext.
948   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
949 
950   auto GAI = BinaryDataMap.find(Address);
951   BinaryData *BD;
952   if (GAI == BinaryDataMap.end()) {
953     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
954     BinarySection &Section =
955         SectionOrErr ? SectionOrErr.get() : absoluteSection();
956     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
957                         Section, Flags);
958     GAI = BinaryDataMap.emplace(Address, BD).first;
959     GlobalSymbols[Name] = BD;
960     updateObjectNesting(GAI);
961   } else {
962     BD = GAI->second;
963     if (!BD->hasName(Name)) {
964       GlobalSymbols[Name] = BD;
965       BD->Symbols.push_back(Symbol);
966     }
967   }
968 
969   return Symbol;
970 }
971 
972 const BinaryData *
973 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
974   auto NI = BinaryDataMap.lower_bound(Address);
975   auto End = BinaryDataMap.end();
976   if ((NI != End && Address == NI->first) ||
977       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
978     if (NI->second->containsAddress(Address))
979       return NI->second;
980 
981     // If this is a sub-symbol, see if a parent data contains the address.
982     const BinaryData *BD = NI->second->getParent();
983     while (BD) {
984       if (BD->containsAddress(Address))
985         return BD;
986       BD = BD->getParent();
987     }
988   }
989   return nullptr;
990 }
991 
992 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
993   auto NI = BinaryDataMap.find(Address);
994   assert(NI != BinaryDataMap.end());
995   if (NI == BinaryDataMap.end())
996     return false;
997   // TODO: it's possible that a jump table starts at the same address
998   // as a larger blob of private data.  When we set the size of the
999   // jump table, it might be smaller than the total blob size.  In this
1000   // case we just leave the original size since (currently) it won't really
1001   // affect anything.
1002   assert((!NI->second->Size || NI->second->Size == Size ||
1003           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1004          "can't change the size of a symbol that has already had its "
1005          "size set");
1006   if (!NI->second->Size) {
1007     NI->second->Size = Size;
1008     updateObjectNesting(NI);
1009     return true;
1010   }
1011   return false;
1012 }
1013 
1014 void BinaryContext::generateSymbolHashes() {
1015   auto isPadding = [](const BinaryData &BD) {
1016     StringRef Contents = BD.getSection().getContents();
1017     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1018     return (BD.getName().startswith("HOLEat") ||
1019             SymData.find_first_not_of(0) == StringRef::npos);
1020   };
1021 
1022   uint64_t NumCollisions = 0;
1023   for (auto &Entry : BinaryDataMap) {
1024     BinaryData &BD = *Entry.second;
1025     StringRef Name = BD.getName();
1026 
1027     if (!isInternalSymbolName(Name))
1028       continue;
1029 
1030     // First check if a non-anonymous alias exists and move it to the front.
1031     if (BD.getSymbols().size() > 1) {
1032       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1033                               [&](const MCSymbol *Symbol) {
1034                                 return !isInternalSymbolName(Symbol->getName());
1035                               });
1036       if (Itr != BD.getSymbols().end()) {
1037         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1038         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1039         continue;
1040       }
1041     }
1042 
1043     // We have to skip 0 size symbols since they will all collide.
1044     if (BD.getSize() == 0) {
1045       continue;
1046     }
1047 
1048     const uint64_t Hash = BD.getSection().hash(BD);
1049     const size_t Idx = Name.find("0x");
1050     std::string NewName =
1051         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1052     if (getBinaryDataByName(NewName)) {
1053       // Ignore collisions for symbols that appear to be padding
1054       // (i.e. all zeros or a "hole")
1055       if (!isPadding(BD)) {
1056         if (opts::Verbosity) {
1057           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1058                  << " with new name (" << NewName << "), skipping.\n";
1059         }
1060         ++NumCollisions;
1061       }
1062       continue;
1063     }
1064     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1065     GlobalSymbols[NewName] = &BD;
1066   }
1067   if (NumCollisions) {
1068     errs() << "BOLT-WARNING: " << NumCollisions
1069            << " collisions detected while hashing binary objects";
1070     if (!opts::Verbosity)
1071       errs() << ". Use -v=1 to see the list.";
1072     errs() << '\n';
1073   }
1074 }
1075 
1076 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1077                                      BinaryFunction &Function) const {
1078   if (!isPotentialFragmentByName(TargetFunction, Function))
1079     return false;
1080   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1081   if (TargetFunction.isParentFragment(&Function))
1082     return true;
1083   TargetFunction.addParentFragment(Function);
1084   Function.addFragment(TargetFunction);
1085   if (!HasRelocations) {
1086     TargetFunction.setSimple(false);
1087     Function.setSimple(false);
1088   }
1089   if (opts::Verbosity >= 1) {
1090     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1091            << Function << '\n';
1092   }
1093   return true;
1094 }
1095 
1096 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1097   for (uint64_t Address : Function.InterproceduralReferences) {
1098     if (!Address)
1099       continue;
1100 
1101     BinaryFunction *TargetFunction =
1102         getBinaryFunctionContainingAddress(Address);
1103     if (&Function == TargetFunction)
1104       continue;
1105 
1106     if (TargetFunction) {
1107       if (TargetFunction->IsFragment &&
1108           !registerFragment(*TargetFunction, Function)) {
1109         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1110                   "fragments: "
1111                << Function.getPrintName() << " and "
1112                << TargetFunction->getPrintName() << '\n';
1113       }
1114       if (uint64_t Offset = Address - TargetFunction->getAddress())
1115         TargetFunction->addEntryPointAtOffset(Offset);
1116 
1117       continue;
1118     }
1119 
1120     // Check if address falls in function padding space - this could be
1121     // unmarked data in code. In this case adjust the padding space size.
1122     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1123     assert(Section && "cannot get section for referenced address");
1124 
1125     if (!Section->isText())
1126       continue;
1127 
1128     // PLT requires special handling and could be ignored in this context.
1129     StringRef SectionName = Section->getName();
1130     if (SectionName == ".plt" || SectionName == ".plt.got")
1131       continue;
1132 
1133     if (opts::processAllFunctions()) {
1134       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1135              << "object in code at address 0x" << Twine::utohexstr(Address)
1136              << " belonging to section " << SectionName << " in current mode\n";
1137       exit(1);
1138     }
1139 
1140     TargetFunction = getBinaryFunctionContainingAddress(Address,
1141                                                         /*CheckPastEnd=*/false,
1142                                                         /*UseMaxSize=*/true);
1143     // We are not going to overwrite non-simple functions, but for simple
1144     // ones - adjust the padding size.
1145     if (TargetFunction && TargetFunction->isSimple()) {
1146       errs() << "BOLT-WARNING: function " << *TargetFunction
1147              << " has an object detected in a padding region at address 0x"
1148              << Twine::utohexstr(Address) << '\n';
1149       TargetFunction->setMaxSize(TargetFunction->getSize());
1150     }
1151   }
1152 
1153   clearList(Function.InterproceduralReferences);
1154 }
1155 
1156 void BinaryContext::postProcessSymbolTable() {
1157   fixBinaryDataHoles();
1158   bool Valid = true;
1159   for (auto &Entry : BinaryDataMap) {
1160     BinaryData *BD = Entry.second;
1161     if ((BD->getName().startswith("SYMBOLat") ||
1162          BD->getName().startswith("DATAat")) &&
1163         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1164         BD->getSection()) {
1165       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1166       Valid = false;
1167     }
1168   }
1169   assert(Valid);
1170   generateSymbolHashes();
1171 }
1172 
1173 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1174                                  BinaryFunction &ParentBF) {
1175   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1176          "cannot merge functions with multiple entry points");
1177 
1178   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1179                                                          std::defer_lock);
1180   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1181       SymbolToFunctionMapMutex, std::defer_lock);
1182 
1183   const StringRef ChildName = ChildBF.getOneName();
1184 
1185   // Move symbols over and update bookkeeping info.
1186   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1187     ParentBF.getSymbols().push_back(Symbol);
1188     WriteSymbolMapLock.lock();
1189     SymbolToFunctionMap[Symbol] = &ParentBF;
1190     WriteSymbolMapLock.unlock();
1191     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1192   }
1193   ChildBF.getSymbols().clear();
1194 
1195   // Move other names the child function is known under.
1196   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1197             std::back_inserter(ParentBF.Aliases));
1198   ChildBF.Aliases.clear();
1199 
1200   if (HasRelocations) {
1201     // Merge execution counts of ChildBF into those of ParentBF.
1202     // Without relocations, we cannot reliably merge profiles as both functions
1203     // continue to exist and either one can be executed.
1204     ChildBF.mergeProfileDataInto(ParentBF);
1205 
1206     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1207                                                           std::defer_lock);
1208     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1209                                                            std::defer_lock);
1210     // Remove ChildBF from the global set of functions in relocs mode.
1211     ReadBfsLock.lock();
1212     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1213     ReadBfsLock.unlock();
1214 
1215     assert(FI != BinaryFunctions.end() && "function not found");
1216     assert(&ChildBF == &FI->second && "function mismatch");
1217 
1218     WriteBfsLock.lock();
1219     ChildBF.clearDisasmState();
1220     FI = BinaryFunctions.erase(FI);
1221     WriteBfsLock.unlock();
1222 
1223   } else {
1224     // In non-relocation mode we keep the function, but rename it.
1225     std::string NewName = "__ICF_" + ChildName.str();
1226 
1227     WriteCtxLock.lock();
1228     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1229     WriteCtxLock.unlock();
1230 
1231     ChildBF.setFolded(&ParentBF);
1232   }
1233 }
1234 
1235 void BinaryContext::fixBinaryDataHoles() {
1236   assert(validateObjectNesting() && "object nesting inconsitency detected");
1237 
1238   for (BinarySection &Section : allocatableSections()) {
1239     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1240 
1241     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1242       BinaryData *BD = Itr->second;
1243       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1244                      (BD->getName().startswith("SYMBOLat0x") ||
1245                       BD->getName().startswith("DATAat0x") ||
1246                       BD->getName().startswith("ANONYMOUS")));
1247       return !isHole && BD->getSection() == Section && !BD->getParent();
1248     };
1249 
1250     auto BDStart = BinaryDataMap.begin();
1251     auto BDEnd = BinaryDataMap.end();
1252     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1253     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1254 
1255     uint64_t EndAddress = Section.getAddress();
1256 
1257     while (Itr != End) {
1258       if (Itr->second->getAddress() > EndAddress) {
1259         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1260         Holes.emplace_back(EndAddress, Gap);
1261       }
1262       EndAddress = Itr->second->getEndAddress();
1263       ++Itr;
1264     }
1265 
1266     if (EndAddress < Section.getEndAddress())
1267       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1268 
1269     // If there is already a symbol at the start of the hole, grow that symbol
1270     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1271     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1272       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1273       if (BD) {
1274         // BD->getSection() can be != Section if there are sections that
1275         // overlap.  In this case it is probably safe to just skip the holes
1276         // since the overlapping section will not(?) have any symbols in it.
1277         if (BD->getSection() == Section)
1278           setBinaryDataSize(Hole.first, Hole.second);
1279       } else {
1280         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1281       }
1282     }
1283   }
1284 
1285   assert(validateObjectNesting() && "object nesting inconsitency detected");
1286   assert(validateHoles() && "top level hole detected in object map");
1287 }
1288 
1289 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1290   const BinarySection *CurrentSection = nullptr;
1291   bool FirstSection = true;
1292 
1293   for (auto &Entry : BinaryDataMap) {
1294     const BinaryData *BD = Entry.second;
1295     const BinarySection &Section = BD->getSection();
1296     if (FirstSection || Section != *CurrentSection) {
1297       uint64_t Address, Size;
1298       StringRef Name = Section.getName();
1299       if (Section) {
1300         Address = Section.getAddress();
1301         Size = Section.getSize();
1302       } else {
1303         Address = BD->getAddress();
1304         Size = BD->getSize();
1305       }
1306       OS << "BOLT-INFO: Section " << Name << ", "
1307          << "0x" + Twine::utohexstr(Address) << ":"
1308          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1309       CurrentSection = &Section;
1310       FirstSection = false;
1311     }
1312 
1313     OS << "BOLT-INFO: ";
1314     const BinaryData *P = BD->getParent();
1315     while (P) {
1316       OS << "  ";
1317       P = P->getParent();
1318     }
1319     OS << *BD << "\n";
1320   }
1321 }
1322 
1323 Expected<unsigned>
1324 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName,
1325                             unsigned FileNumber,
1326                             Optional<MD5::MD5Result> Checksum,
1327                             Optional<StringRef> Source, unsigned CUID) {
1328   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1329   return Table.tryGetFile(Directory, FileName, Checksum, Source,
1330                           Ctx->getDwarfVersion(), FileNumber);
1331 }
1332 
1333 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1334                                                const uint32_t SrcCUID,
1335                                                unsigned FileIndex) {
1336   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1337   const DWARFDebugLine::LineTable *LineTable =
1338       DwCtx->getLineTableForUnit(SrcUnit);
1339   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1340       LineTable->Prologue.FileNames;
1341   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1342   // means empty dir.
1343   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1344          "FileIndex out of range for the compilation unit.");
1345   StringRef Dir = "";
1346   if (FileNames[FileIndex - 1].DirIdx != 0) {
1347     if (Optional<const char *> DirName = dwarf::toString(
1348             LineTable->Prologue
1349                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1350       Dir = *DirName;
1351     }
1352   }
1353   StringRef FileName = "";
1354   if (Optional<const char *> FName =
1355           dwarf::toString(FileNames[FileIndex - 1].Name))
1356     FileName = *FName;
1357   assert(FileName != "");
1358   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID));
1359 }
1360 
1361 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1362   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1363   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1364                  SortedFunctions.begin(),
1365                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1366                    return &BFI.second;
1367                  });
1368 
1369   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1370                    [](const BinaryFunction *A, const BinaryFunction *B) {
1371                      if (A->hasValidIndex() && B->hasValidIndex()) {
1372                        return A->getIndex() < B->getIndex();
1373                      }
1374                      return A->hasValidIndex();
1375                    });
1376   return SortedFunctions;
1377 }
1378 
1379 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1380   std::vector<BinaryFunction *> AllFunctions;
1381   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1382   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1383                  std::back_inserter(AllFunctions),
1384                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1385                    return &BFI.second;
1386                  });
1387   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1388             std::back_inserter(AllFunctions));
1389 
1390   return AllFunctions;
1391 }
1392 
1393 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1394   auto Iter = DWOCUs.find(DWOId);
1395   if (Iter == DWOCUs.end())
1396     return None;
1397 
1398   return Iter->second;
1399 }
1400 
1401 DWARFContext *BinaryContext::getDWOContext() {
1402   if (DWOCUs.empty())
1403     return nullptr;
1404   return &DWOCUs.begin()->second->getContext();
1405 }
1406 
1407 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1408 void BinaryContext::preprocessDWODebugInfo() {
1409   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1410     DWARFUnit *const DwarfUnit = CU.get();
1411     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1412       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1413       if (!DWOCU->isDWOUnit()) {
1414         std::string DWOName = dwarf::toString(
1415             DwarfUnit->getUnitDIE().find(
1416                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1417             "");
1418         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1419                << DWOName
1420                << " was not retrieved and won't be updated. Please check "
1421                   "relative path.\n";
1422         continue;
1423       }
1424       DWOCUs[*DWOId] = DWOCU;
1425     }
1426   }
1427 }
1428 
1429 void BinaryContext::preprocessDebugInfo() {
1430   struct CURange {
1431     uint64_t LowPC;
1432     uint64_t HighPC;
1433     DWARFUnit *Unit;
1434 
1435     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1436   };
1437 
1438   // Building a map of address ranges to CUs similar to .debug_aranges and use
1439   // it to assign CU to functions.
1440   std::vector<CURange> AllRanges;
1441   AllRanges.reserve(DwCtx->getNumCompileUnits());
1442   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1443     Expected<DWARFAddressRangesVector> RangesOrError =
1444         CU->getUnitDIE().getAddressRanges();
1445     if (!RangesOrError) {
1446       consumeError(RangesOrError.takeError());
1447       continue;
1448     }
1449     for (DWARFAddressRange &Range : *RangesOrError) {
1450       // Parts of the debug info could be invalidated due to corresponding code
1451       // being removed from the binary by the linker. Hence we check if the
1452       // address is a valid one.
1453       if (containsAddress(Range.LowPC))
1454         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1455     }
1456   }
1457 
1458   std::sort(AllRanges.begin(), AllRanges.end());
1459   for (auto &KV : BinaryFunctions) {
1460     const uint64_t FunctionAddress = KV.first;
1461     BinaryFunction &Function = KV.second;
1462 
1463     auto It = std::partition_point(
1464         AllRanges.begin(), AllRanges.end(),
1465         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1466     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1467       Function.setDWARFUnit(It->Unit);
1468     }
1469   }
1470 
1471   // Discover units with debug info that needs to be updated.
1472   for (const auto &KV : BinaryFunctions) {
1473     const BinaryFunction &BF = KV.second;
1474     if (shouldEmit(BF) && BF.getDWARFUnit())
1475       ProcessedCUs.insert(BF.getDWARFUnit());
1476   }
1477 
1478   // Clear debug info for functions from units that we are not going to process.
1479   for (auto &KV : BinaryFunctions) {
1480     BinaryFunction &BF = KV.second;
1481     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1482       BF.setDWARFUnit(nullptr);
1483   }
1484 
1485   if (opts::Verbosity >= 1) {
1486     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1487            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1488   }
1489 
1490   // Populate MCContext with DWARF files from all units.
1491   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1492   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1493     const uint64_t CUID = CU->getOffset();
1494     getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol(
1495         GlobalPrefix + "line_table_start" + Twine(CUID)));
1496 
1497     if (!ProcessedCUs.count(CU.get()))
1498       continue;
1499 
1500     const DWARFDebugLine::LineTable *LineTable =
1501         DwCtx->getLineTableForUnit(CU.get());
1502     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1503         LineTable->Prologue.FileNames;
1504 
1505     // Assign a unique label to every line table, one per CU.
1506     // Make sure empty debug line tables are registered too.
1507     if (FileNames.empty()) {
1508       cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID));
1509       continue;
1510     }
1511     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1512       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1513       // means empty dir.
1514       StringRef Dir = "";
1515       if (FileNames[I].DirIdx != 0)
1516         if (Optional<const char *> DirName = dwarf::toString(
1517                 LineTable->Prologue
1518                     .IncludeDirectories[FileNames[I].DirIdx - 1]))
1519           Dir = *DirName;
1520       StringRef FileName = "";
1521       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1522         FileName = *FName;
1523       assert(FileName != "");
1524       cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID));
1525     }
1526   }
1527 
1528   preprocessDWODebugInfo();
1529 }
1530 
1531 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1532   if (opts::processAllFunctions())
1533     return true;
1534 
1535   if (Function.isIgnored())
1536     return false;
1537 
1538   // In relocation mode we will emit non-simple functions with CFG.
1539   // If the function does not have a CFG it should be marked as ignored.
1540   return HasRelocations || Function.isSimple();
1541 }
1542 
1543 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1544   uint32_t Operation = Inst.getOperation();
1545   switch (Operation) {
1546   case MCCFIInstruction::OpSameValue:
1547     OS << "OpSameValue Reg" << Inst.getRegister();
1548     break;
1549   case MCCFIInstruction::OpRememberState:
1550     OS << "OpRememberState";
1551     break;
1552   case MCCFIInstruction::OpRestoreState:
1553     OS << "OpRestoreState";
1554     break;
1555   case MCCFIInstruction::OpOffset:
1556     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1557     break;
1558   case MCCFIInstruction::OpDefCfaRegister:
1559     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1560     break;
1561   case MCCFIInstruction::OpDefCfaOffset:
1562     OS << "OpDefCfaOffset " << Inst.getOffset();
1563     break;
1564   case MCCFIInstruction::OpDefCfa:
1565     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1566     break;
1567   case MCCFIInstruction::OpRelOffset:
1568     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1569     break;
1570   case MCCFIInstruction::OpAdjustCfaOffset:
1571     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1572     break;
1573   case MCCFIInstruction::OpEscape:
1574     OS << "OpEscape";
1575     break;
1576   case MCCFIInstruction::OpRestore:
1577     OS << "OpRestore Reg" << Inst.getRegister();
1578     break;
1579   case MCCFIInstruction::OpUndefined:
1580     OS << "OpUndefined Reg" << Inst.getRegister();
1581     break;
1582   case MCCFIInstruction::OpRegister:
1583     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1584        << Inst.getRegister2();
1585     break;
1586   case MCCFIInstruction::OpWindowSave:
1587     OS << "OpWindowSave";
1588     break;
1589   case MCCFIInstruction::OpGnuArgsSize:
1590     OS << "OpGnuArgsSize";
1591     break;
1592   default:
1593     OS << "Op#" << Operation;
1594     break;
1595   }
1596 }
1597 
1598 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1599                                      uint64_t Offset,
1600                                      const BinaryFunction *Function,
1601                                      bool PrintMCInst, bool PrintMemData,
1602                                      bool PrintRelocations) const {
1603   if (MIB->isEHLabel(Instruction)) {
1604     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1605     return;
1606   }
1607   OS << format("    %08" PRIx64 ": ", Offset);
1608   if (MIB->isCFI(Instruction)) {
1609     uint32_t Offset = Instruction.getOperand(0).getImm();
1610     OS << "\t!CFI\t$" << Offset << "\t; ";
1611     if (Function)
1612       printCFI(OS, *Function->getCFIFor(Instruction));
1613     OS << "\n";
1614     return;
1615   }
1616   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1617   if (MIB->isCall(Instruction)) {
1618     if (MIB->isTailCall(Instruction))
1619       OS << " # TAILCALL ";
1620     if (MIB->isInvoke(Instruction)) {
1621       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1622       OS << " # handler: ";
1623       if (EHInfo->first)
1624         OS << *EHInfo->first;
1625       else
1626         OS << '0';
1627       OS << "; action: " << EHInfo->second;
1628       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1629       if (GnuArgsSize >= 0)
1630         OS << "; GNU_args_size = " << GnuArgsSize;
1631     }
1632   } else if (MIB->isIndirectBranch(Instruction)) {
1633     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1634       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1635     } else {
1636       OS << " # UNKNOWN CONTROL FLOW";
1637     }
1638   }
1639   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1640     OS << " # Offset: " << *Offset;
1641 
1642   MIB->printAnnotations(Instruction, OS);
1643 
1644   if (opts::PrintDebugInfo) {
1645     DebugLineTableRowRef RowRef =
1646         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1647     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1648       const DWARFDebugLine::LineTable *LineTable;
1649       if (Function && Function->getDWARFUnit() &&
1650           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1651         LineTable = Function->getDWARFLineTable();
1652       } else {
1653         LineTable = DwCtx->getLineTableForUnit(
1654             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1655       }
1656       assert(LineTable &&
1657              "line table expected for instruction with debug info");
1658 
1659       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1660       StringRef FileName = "";
1661       if (Optional<const char *> FName =
1662               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1663         FileName = *FName;
1664       OS << " # debug line " << FileName << ":" << Row.Line;
1665       if (Row.Column)
1666         OS << ":" << Row.Column;
1667       if (Row.Discriminator)
1668         OS << " discriminator:" << Row.Discriminator;
1669     }
1670   }
1671 
1672   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1673     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1674     Function->printRelocations(OS, Offset, Size);
1675   }
1676 
1677   OS << "\n";
1678 
1679   if (PrintMCInst) {
1680     Instruction.dump_pretty(OS, InstPrinter.get());
1681     OS << "\n";
1682   }
1683 }
1684 
1685 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1686   auto SI = AddressToSection.upper_bound(Address);
1687   if (SI != AddressToSection.begin()) {
1688     --SI;
1689     uint64_t UpperBound = SI->first + SI->second->getSize();
1690     if (!SI->second->getSize())
1691       UpperBound += 1;
1692     if (UpperBound > Address)
1693       return *SI->second;
1694   }
1695   return std::make_error_code(std::errc::bad_address);
1696 }
1697 
1698 ErrorOr<StringRef>
1699 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1700   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1701     return Section->getName();
1702   return std::make_error_code(std::errc::bad_address);
1703 }
1704 
1705 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1706   auto Res = Sections.insert(Section);
1707   (void)Res;
1708   assert(Res.second && "can't register the same section twice.");
1709 
1710   // Only register allocatable sections in the AddressToSection map.
1711   if (Section->isAllocatable() && Section->getAddress())
1712     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1713   NameToSection.insert(
1714       std::make_pair(std::string(Section->getName()), Section));
1715   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1716   return *Section;
1717 }
1718 
1719 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1720   return registerSection(new BinarySection(*this, Section));
1721 }
1722 
1723 BinarySection &
1724 BinaryContext::registerSection(StringRef SectionName,
1725                                const BinarySection &OriginalSection) {
1726   return registerSection(
1727       new BinarySection(*this, SectionName, OriginalSection));
1728 }
1729 
1730 BinarySection &
1731 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1732                                        unsigned ELFFlags, uint8_t *Data,
1733                                        uint64_t Size, unsigned Alignment) {
1734   auto NamedSections = getSectionByName(Name);
1735   if (NamedSections.begin() != NamedSections.end()) {
1736     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1737            "can only update unique sections");
1738     BinarySection *Section = NamedSections.begin()->second;
1739 
1740     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1741     const bool Flag = Section->isAllocatable();
1742     (void)Flag;
1743     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1744     LLVM_DEBUG(dbgs() << *Section << "\n");
1745     // FIXME: Fix section flags/attributes for MachO.
1746     if (isELF())
1747       assert(Flag == Section->isAllocatable() &&
1748              "can't change section allocation status");
1749     return *Section;
1750   }
1751 
1752   return registerSection(
1753       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1754 }
1755 
1756 bool BinaryContext::deregisterSection(BinarySection &Section) {
1757   BinarySection *SectionPtr = &Section;
1758   auto Itr = Sections.find(SectionPtr);
1759   if (Itr != Sections.end()) {
1760     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1761     while (Range.first != Range.second) {
1762       if (Range.first->second == SectionPtr) {
1763         AddressToSection.erase(Range.first);
1764         break;
1765       }
1766       ++Range.first;
1767     }
1768 
1769     auto NameRange =
1770         NameToSection.equal_range(std::string(SectionPtr->getName()));
1771     while (NameRange.first != NameRange.second) {
1772       if (NameRange.first->second == SectionPtr) {
1773         NameToSection.erase(NameRange.first);
1774         break;
1775       }
1776       ++NameRange.first;
1777     }
1778 
1779     Sections.erase(Itr);
1780     delete SectionPtr;
1781     return true;
1782   }
1783   return false;
1784 }
1785 
1786 void BinaryContext::printSections(raw_ostream &OS) const {
1787   for (BinarySection *const &Section : Sections)
1788     OS << "BOLT-INFO: " << *Section << "\n";
1789 }
1790 
1791 BinarySection &BinaryContext::absoluteSection() {
1792   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1793     return *Section;
1794   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1795 }
1796 
1797 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1798                                                            size_t Size) const {
1799   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1800   if (!Section)
1801     return std::make_error_code(std::errc::bad_address);
1802 
1803   if (Section->isVirtual())
1804     return 0;
1805 
1806   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1807                    AsmInfo->getCodePointerSize());
1808   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1809   return DE.getUnsigned(&ValueOffset, Size);
1810 }
1811 
1812 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1813                                                          size_t Size) const {
1814   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1815   if (!Section)
1816     return std::make_error_code(std::errc::bad_address);
1817 
1818   if (Section->isVirtual())
1819     return 0;
1820 
1821   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1822                    AsmInfo->getCodePointerSize());
1823   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1824   return DE.getSigned(&ValueOffset, Size);
1825 }
1826 
1827 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1828                                   uint64_t Type, uint64_t Addend,
1829                                   uint64_t Value) {
1830   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1831   assert(Section && "cannot find section for address");
1832   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1833                          Value);
1834 }
1835 
1836 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1837                                          uint64_t Type, uint64_t Addend,
1838                                          uint64_t Value) {
1839   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1840   assert(Section && "cannot find section for address");
1841   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1842                                 Addend, Value);
1843 }
1844 
1845 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1846   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1847   assert(Section && "cannot find section for address");
1848   return Section->removeRelocationAt(Address - Section->getAddress());
1849 }
1850 
1851 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1852   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1853   if (!Section)
1854     return nullptr;
1855 
1856   return Section->getRelocationAt(Address - Section->getAddress());
1857 }
1858 
1859 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1860   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1861   if (!Section)
1862     return nullptr;
1863 
1864   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1865 }
1866 
1867 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1868                                              const uint64_t Address) {
1869   auto setImmovable = [&](BinaryData &BD) {
1870     BinaryData *Root = BD.getAtomicRoot();
1871     LLVM_DEBUG(if (Root->isMoveable()) {
1872       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1873              << "due to ambiguous relocation referencing 0x"
1874              << Twine::utohexstr(Address) << '\n';
1875     });
1876     Root->setIsMoveable(false);
1877   };
1878 
1879   if (Address == BD.getAddress()) {
1880     setImmovable(BD);
1881 
1882     // Set previous symbol as immovable
1883     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1884     if (Prev && Prev->getEndAddress() == BD.getAddress())
1885       setImmovable(*Prev);
1886   }
1887 
1888   if (Address == BD.getEndAddress()) {
1889     setImmovable(BD);
1890 
1891     // Set next symbol as immovable
1892     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1893     if (Next && Next->getAddress() == BD.getEndAddress())
1894       setImmovable(*Next);
1895   }
1896 }
1897 
1898 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1899                                                     uint64_t *EntryDesc) {
1900   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1901   auto BFI = SymbolToFunctionMap.find(Symbol);
1902   if (BFI == SymbolToFunctionMap.end())
1903     return nullptr;
1904 
1905   BinaryFunction *BF = BFI->second;
1906   if (EntryDesc)
1907     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1908 
1909   return BF;
1910 }
1911 
1912 void BinaryContext::exitWithBugReport(StringRef Message,
1913                                       const BinaryFunction &Function) const {
1914   errs() << "=======================================\n";
1915   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1916             "this function.\n";
1917   errs() << "If you are running the most recent version of BOLT, you may "
1918             "want to "
1919             "report this and paste this dump.\nPlease check that there is no "
1920             "sensitive contents being shared in this dump.\n";
1921   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1922   ScopedPrinter SP(errs());
1923   SP.printBinaryBlock("Function contents", *Function.getData());
1924   errs() << "\n";
1925   Function.dump();
1926   errs() << "ERROR: " << Message;
1927   errs() << "\n=======================================\n";
1928   exit(1);
1929 }
1930 
1931 BinaryFunction *
1932 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1933                                             bool IsSimple) {
1934   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1935   BinaryFunction *BF = InjectedBinaryFunctions.back();
1936   setSymbolToFunctionMap(BF->getSymbol(), BF);
1937   BF->CurrentState = BinaryFunction::State::CFG;
1938   return BF;
1939 }
1940 
1941 std::pair<size_t, size_t>
1942 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
1943   // Adjust branch instruction to match the current layout.
1944   if (FixBranches)
1945     BF.fixBranches();
1946 
1947   // Create local MC context to isolate the effect of ephemeral code emission.
1948   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
1949   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
1950   MCAsmBackend *MAB =
1951       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1952 
1953   SmallString<256> Code;
1954   raw_svector_ostream VecOS(Code);
1955 
1956   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
1957   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1958       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1959       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
1960       /*RelaxAll=*/false,
1961       /*IncrementalLinkerCompatible=*/false,
1962       /*DWARFMustBeAtTheEnd=*/false));
1963 
1964   Streamer->initSections(false, *STI);
1965 
1966   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
1967   Section->setHasInstructions(true);
1968 
1969   // Create symbols in the LocalCtx so that they get destroyed with it.
1970   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
1971   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
1972   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
1973   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
1974 
1975   Streamer->SwitchSection(Section);
1976   Streamer->emitLabel(StartLabel);
1977   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
1978                    /*EmitCodeOnly=*/true);
1979   Streamer->emitLabel(EndLabel);
1980 
1981   if (BF.isSplit()) {
1982     MCSectionELF *ColdSection =
1983         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
1984                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1985     ColdSection->setHasInstructions(true);
1986 
1987     Streamer->SwitchSection(ColdSection);
1988     Streamer->emitLabel(ColdStartLabel);
1989     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
1990                      /*EmitCodeOnly=*/true);
1991     Streamer->emitLabel(ColdEndLabel);
1992     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
1993     Streamer->emitBytes(StringRef(""));
1994     Streamer->SwitchSection(Section);
1995   }
1996 
1997   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
1998   // MCStreamer::Finish(), which does more than we want
1999   Streamer->emitBytes(StringRef(""));
2000 
2001   MCAssembler &Assembler =
2002       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2003   MCAsmLayout Layout(Assembler);
2004   Assembler.layout(Layout);
2005 
2006   const uint64_t HotSize =
2007       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2008   const uint64_t ColdSize = BF.isSplit()
2009                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2010                                       Layout.getSymbolOffset(*ColdStartLabel)
2011                                 : 0ULL;
2012 
2013   // Clean-up the effect of the code emission.
2014   for (const MCSymbol &Symbol : Assembler.symbols()) {
2015     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2016     MutableSymbol->setUndefined();
2017     MutableSymbol->setIsRegistered(false);
2018   }
2019 
2020   return std::make_pair(HotSize, ColdSize);
2021 }
2022 
2023 bool BinaryContext::validateEncoding(const MCInst &Inst,
2024                                      ArrayRef<uint8_t> InputEncoding) const {
2025   SmallString<256> Code;
2026   SmallVector<MCFixup, 4> Fixups;
2027   raw_svector_ostream VecOS(Code);
2028 
2029   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2030   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2031   if (InputEncoding != EncodedData) {
2032     if (opts::Verbosity > 1) {
2033       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2034              << "      input: " << InputEncoding << '\n'
2035              << "     output: " << EncodedData << '\n';
2036     }
2037     return false;
2038   }
2039 
2040   return true;
2041 }
2042 
2043 uint64_t BinaryContext::getHotThreshold() const {
2044   static uint64_t Threshold = 0;
2045   if (Threshold == 0) {
2046     Threshold = std::max(
2047         (uint64_t)opts::ExecutionCountThreshold,
2048         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2049   }
2050   return Threshold;
2051 }
2052 
2053 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2054     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2055   auto FI = BinaryFunctions.upper_bound(Address);
2056   if (FI == BinaryFunctions.begin())
2057     return nullptr;
2058   --FI;
2059 
2060   const uint64_t UsedSize =
2061       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2062 
2063   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2064     return nullptr;
2065 
2066   return &FI->second;
2067 }
2068 
2069 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2070   // First, try to find a function starting at the given address. If the
2071   // function was folded, this will get us the original folded function if it
2072   // wasn't removed from the list, e.g. in non-relocation mode.
2073   auto BFI = BinaryFunctions.find(Address);
2074   if (BFI != BinaryFunctions.end())
2075     return &BFI->second;
2076 
2077   // We might have folded the function matching the object at the given
2078   // address. In such case, we look for a function matching the symbol
2079   // registered at the original address. The new function (the one that the
2080   // original was folded into) will hold the symbol.
2081   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2082     uint64_t EntryID = 0;
2083     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2084     if (BF && EntryID == 0)
2085       return BF;
2086   }
2087   return nullptr;
2088 }
2089 
2090 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2091     const DWARFAddressRangesVector &InputRanges) const {
2092   DebugAddressRangesVector OutputRanges;
2093 
2094   for (const DWARFAddressRange Range : InputRanges) {
2095     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2096     while (BFI != BinaryFunctions.end()) {
2097       const BinaryFunction &Function = BFI->second;
2098       if (Function.getAddress() >= Range.HighPC)
2099         break;
2100       const DebugAddressRangesVector FunctionRanges =
2101           Function.getOutputAddressRanges();
2102       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2103                 std::back_inserter(OutputRanges));
2104       std::advance(BFI, 1);
2105     }
2106   }
2107 
2108   return OutputRanges;
2109 }
2110 
2111 } // namespace bolt
2112 } // namespace llvm
2113