xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision a9cd49d50e88aa142e6d57de90321d8810e1027f)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
22 #include "llvm/MC/MCAsmLayout.h"
23 #include "llvm/MC/MCAssembler.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
26 #include "llvm/MC/MCInstPrinter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCSectionELF.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Regex.h"
34 #include <algorithm>
35 #include <functional>
36 #include <iterator>
37 #include <unordered_set>
38 
39 using namespace llvm;
40 
41 #undef  DEBUG_TYPE
42 #define DEBUG_TYPE "bolt"
43 
44 namespace opts {
45 
46 cl::opt<bool>
47 NoHugePages("no-huge-pages",
48   cl::desc("use regular size pages for code alignment"),
49   cl::ZeroOrMore,
50   cl::Hidden,
51   cl::cat(BoltCategory));
52 
53 static cl::opt<bool>
54 PrintDebugInfo("print-debug-info",
55   cl::desc("print debug info when printing functions"),
56   cl::Hidden,
57   cl::ZeroOrMore,
58   cl::cat(BoltCategory));
59 
60 cl::opt<bool>
61 PrintRelocations("print-relocations",
62   cl::desc("print relocations when printing functions/objects"),
63   cl::Hidden,
64   cl::ZeroOrMore,
65   cl::cat(BoltCategory));
66 
67 static cl::opt<bool>
68 PrintMemData("print-mem-data",
69   cl::desc("print memory data annotations when printing functions"),
70   cl::Hidden,
71   cl::ZeroOrMore,
72   cl::cat(BoltCategory));
73 
74 } // namespace opts
75 
76 namespace llvm {
77 namespace bolt {
78 
79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
80                              std::unique_ptr<DWARFContext> DwCtx,
81                              std::unique_ptr<Triple> TheTriple,
82                              const Target *TheTarget, std::string TripleName,
83                              std::unique_ptr<MCCodeEmitter> MCE,
84                              std::unique_ptr<MCObjectFileInfo> MOFI,
85                              std::unique_ptr<const MCAsmInfo> AsmInfo,
86                              std::unique_ptr<const MCInstrInfo> MII,
87                              std::unique_ptr<const MCSubtargetInfo> STI,
88                              std::unique_ptr<MCInstPrinter> InstPrinter,
89                              std::unique_ptr<const MCInstrAnalysis> MIA,
90                              std::unique_ptr<MCPlusBuilder> MIB,
91                              std::unique_ptr<const MCRegisterInfo> MRI,
92                              std::unique_ptr<MCDisassembler> DisAsm)
93     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
94       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
95       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
96       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
97       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
98       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
99   Relocation::Arch = this->TheTriple->getArch();
100   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
101 }
102 
103 BinaryContext::~BinaryContext() {
104   for (BinarySection *Section : Sections)
105     delete Section;
106   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
107     delete InjectedFunction;
108   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
109     delete JTI.second;
110   clearBinaryData();
111 }
112 
113 /// Create BinaryContext for a given architecture \p ArchName and
114 /// triple \p TripleName.
115 std::unique_ptr<BinaryContext>
116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
117                                    std::unique_ptr<DWARFContext> DwCtx) {
118   StringRef ArchName = "";
119   StringRef FeaturesStr = "";
120   switch (File->getArch()) {
121   case llvm::Triple::x86_64:
122     ArchName = "x86-64";
123     FeaturesStr = "+nopl";
124     break;
125   case llvm::Triple::aarch64:
126     ArchName = "aarch64";
127     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
128                   "+fullfp16,+spe,+fuse-aes,+rcpc";
129     break;
130   default:
131     errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n";
132     return nullptr;
133   }
134 
135   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
136   const std::string TripleName = TheTriple->str();
137 
138   std::string Error;
139   const Target *TheTarget =
140       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
141   if (!TheTarget) {
142     errs() << "BOLT-ERROR: " << Error;
143     return nullptr;
144   }
145 
146   std::unique_ptr<const MCRegisterInfo> MRI(
147       TheTarget->createMCRegInfo(TripleName));
148   if (!MRI) {
149     errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n";
150     return nullptr;
151   }
152 
153   // Set up disassembler.
154   std::unique_ptr<const MCAsmInfo> AsmInfo(
155       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
156   if (!AsmInfo) {
157     errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n";
158     return nullptr;
159   }
160 
161   std::unique_ptr<const MCSubtargetInfo> STI(
162       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
163   if (!STI) {
164     errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n";
165     return nullptr;
166   }
167 
168   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
169   if (!MII) {
170     errs() << "BOLT-ERROR: no instruction info for target " << TripleName
171            << "\n";
172     return nullptr;
173   }
174 
175   std::unique_ptr<MCContext> Ctx(
176       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
177   std::unique_ptr<MCObjectFileInfo> MOFI(
178       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
179   Ctx->setObjectFileInfo(MOFI.get());
180   // We do not support X86 Large code model. Change this in the future.
181   bool Large = false;
182   if (TheTriple->getArch() == llvm::Triple::aarch64)
183     Large = true;
184   unsigned LSDAEncoding =
185       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
186   unsigned TTypeEncoding =
187       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
188   if (IsPIC) {
189     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
190                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
191     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
192                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
193   }
194 
195   std::unique_ptr<MCDisassembler> DisAsm(
196       TheTarget->createMCDisassembler(*STI, *Ctx));
197 
198   if (!DisAsm) {
199     errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n";
200     return nullptr;
201   }
202 
203   std::unique_ptr<const MCInstrAnalysis> MIA(
204       TheTarget->createMCInstrAnalysis(MII.get()));
205   if (!MIA) {
206     errs() << "BOLT-ERROR: failed to create instruction analysis for target"
207            << TripleName << "\n";
208     return nullptr;
209   }
210 
211   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
212   std::unique_ptr<MCInstPrinter> InstructionPrinter(
213       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
214                                      *MII, *MRI));
215   if (!InstructionPrinter) {
216     errs() << "BOLT-ERROR: no instruction printer for target " << TripleName
217            << '\n';
218     return nullptr;
219   }
220   InstructionPrinter->setPrintImmHex(true);
221 
222   std::unique_ptr<MCCodeEmitter> MCE(
223       TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx));
224 
225   // Make sure we don't miss any output on core dumps.
226   outs().SetUnbuffered();
227   errs().SetUnbuffered();
228   dbgs().SetUnbuffered();
229 
230   auto BC = std::make_unique<BinaryContext>(
231       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
232       std::string(TripleName), std::move(MCE), std::move(MOFI),
233       std::move(AsmInfo), std::move(MII), std::move(STI),
234       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
235       std::move(DisAsm));
236 
237   BC->TTypeEncoding = TTypeEncoding;
238   BC->LSDAEncoding = LSDAEncoding;
239 
240   BC->MAB = std::unique_ptr<MCAsmBackend>(
241       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
242 
243   BC->setFilename(File->getFileName());
244 
245   BC->HasFixedLoadAddress = !IsPIC;
246 
247   return BC;
248 }
249 
250 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
251   if (opts::HotText &&
252       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
253     return true;
254 
255   if (opts::HotData &&
256       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
257     return true;
258 
259   if (SymbolName == "_end")
260     return true;
261 
262   return false;
263 }
264 
265 std::unique_ptr<MCObjectWriter>
266 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
267   return MAB->createObjectWriter(OS);
268 }
269 
270 bool BinaryContext::validateObjectNesting() const {
271   auto Itr = BinaryDataMap.begin();
272   auto End = BinaryDataMap.end();
273   bool Valid = true;
274   while (Itr != End) {
275     auto Next = std::next(Itr);
276     while (Next != End &&
277            Itr->second->getSection() == Next->second->getSection() &&
278            Itr->second->containsRange(Next->second->getAddress(),
279                                       Next->second->getSize())) {
280       if (Next->second->Parent != Itr->second) {
281         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
282                << "BOLT-WARNING:  " << *Itr->second << "\n"
283                << "BOLT-WARNING:  " << *Next->second << "\n";
284         Valid = false;
285       }
286       ++Next;
287     }
288     Itr = Next;
289   }
290   return Valid;
291 }
292 
293 bool BinaryContext::validateHoles() const {
294   bool Valid = true;
295   for (BinarySection &Section : sections()) {
296     for (const Relocation &Rel : Section.relocations()) {
297       uint64_t RelAddr = Rel.Offset + Section.getAddress();
298       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
299       if (!BD) {
300         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
301                << " 0x" << Twine::utohexstr(RelAddr) << " in "
302                << Section.getName() << "\n";
303         Valid = false;
304       } else if (!BD->getAtomicRoot()) {
305         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
306                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
307                << Section.getName() << "\n";
308         Valid = false;
309       }
310     }
311   }
312   return Valid;
313 }
314 
315 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
316   const uint64_t Address = GAI->second->getAddress();
317   const uint64_t Size = GAI->second->getSize();
318 
319   auto fixParents = [&](BinaryDataMapType::iterator Itr,
320                         BinaryData *NewParent) {
321     BinaryData *OldParent = Itr->second->Parent;
322     Itr->second->Parent = NewParent;
323     ++Itr;
324     while (Itr != BinaryDataMap.end() && OldParent &&
325            Itr->second->Parent == OldParent) {
326       Itr->second->Parent = NewParent;
327       ++Itr;
328     }
329   };
330 
331   // Check if the previous symbol contains the newly added symbol.
332   if (GAI != BinaryDataMap.begin()) {
333     BinaryData *Prev = std::prev(GAI)->second;
334     while (Prev) {
335       if (Prev->getSection() == GAI->second->getSection() &&
336           Prev->containsRange(Address, Size)) {
337         fixParents(GAI, Prev);
338       } else {
339         fixParents(GAI, nullptr);
340       }
341       Prev = Prev->Parent;
342     }
343   }
344 
345   // Check if the newly added symbol contains any subsequent symbols.
346   if (Size != 0) {
347     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
348     auto Itr = std::next(GAI);
349     while (
350         Itr != BinaryDataMap.end() &&
351         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
352       Itr->second->Parent = BD;
353       ++Itr;
354     }
355   }
356 }
357 
358 iterator_range<BinaryContext::binary_data_iterator>
359 BinaryContext::getSubBinaryData(BinaryData *BD) {
360   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
361   auto End = Start;
362   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
363     ++End;
364   return make_range(Start, End);
365 }
366 
367 std::pair<const MCSymbol *, uint64_t>
368 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
369                                 bool IsPCRel) {
370   uint64_t Addend = 0;
371 
372   if (isAArch64()) {
373     // Check if this is an access to a constant island and create bookkeeping
374     // to keep track of it and emit it later as part of this function.
375     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
376       return std::make_pair(IslandSym, Addend);
377 
378     // Detect custom code written in assembly that refers to arbitrary
379     // constant islands from other functions. Write this reference so we
380     // can pull this constant island and emit it as part of this function
381     // too.
382     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
383     if (IslandIter != AddressToConstantIslandMap.end()) {
384       if (MCSymbol *IslandSym =
385               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
386         BF.createIslandDependency(IslandSym, IslandIter->second);
387         return std::make_pair(IslandSym, Addend);
388       }
389     }
390   }
391 
392   // Note that the address does not necessarily have to reside inside
393   // a section, it could be an absolute address too.
394   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
395   if (Section && Section->isText()) {
396     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
397       if (Address != BF.getAddress()) {
398         // The address could potentially escape. Mark it as another entry
399         // point into the function.
400         if (opts::Verbosity >= 1) {
401           outs() << "BOLT-INFO: potentially escaped address 0x"
402                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
403         }
404         BF.HasInternalLabelReference = true;
405         return std::make_pair(
406             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
407       }
408     } else {
409       BF.InterproceduralReferences.insert(Address);
410     }
411   }
412 
413   // With relocations, catch jump table references outside of the basic block
414   // containing the indirect jump.
415   if (HasRelocations) {
416     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
417     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
418       const MCSymbol *Symbol =
419           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
420 
421       return std::make_pair(Symbol, Addend);
422     }
423   }
424 
425   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
426     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
427 
428   // TODO: use DWARF info to get size/alignment here?
429   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
430   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
431   return std::make_pair(TargetSymbol, Addend);
432 }
433 
434 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
435                                                   BinaryFunction &BF) {
436   if (!isX86())
437     return MemoryContentsType::UNKNOWN;
438 
439   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
440   if (!Section) {
441     // No section - possibly an absolute address. Since we don't allow
442     // internal function addresses to escape the function scope - we
443     // consider it a tail call.
444     if (opts::Verbosity > 1) {
445       errs() << "BOLT-WARNING: no section for address 0x"
446              << Twine::utohexstr(Address) << " referenced from function " << BF
447              << '\n';
448     }
449     return MemoryContentsType::UNKNOWN;
450   }
451 
452   if (Section->isVirtual()) {
453     // The contents are filled at runtime.
454     return MemoryContentsType::UNKNOWN;
455   }
456 
457   // No support for jump tables in code yet.
458   if (Section->isText())
459     return MemoryContentsType::UNKNOWN;
460 
461   // Start with checking for PIC jump table. We expect non-PIC jump tables
462   // to have high 32 bits set to 0.
463   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
464     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
465 
466   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
467     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
468 
469   return MemoryContentsType::UNKNOWN;
470 }
471 
472 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
473 bool isPotentialFragmentByName(BinaryFunction &Fragment,
474                                BinaryFunction &Parent) {
475   for (StringRef Name : Parent.getNames()) {
476     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
477     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
478     if (Fragment.hasRestoredNameRegex(NameRegex))
479       return true;
480   }
481   return false;
482 }
483 
484 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
485                                      const JumpTable::JumpTableType Type,
486                                      BinaryFunction &BF,
487                                      const uint64_t NextJTAddress,
488                                      JumpTable::OffsetsType *Offsets) {
489   // Is one of the targets __builtin_unreachable?
490   bool HasUnreachable = false;
491 
492   // Number of targets other than __builtin_unreachable.
493   uint64_t NumRealEntries = 0;
494 
495   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
496   auto addOffset = [&](uint64_t Offset) {
497     if (Offsets)
498       Offsets->emplace_back(Offset);
499   };
500 
501   auto doesBelongToFunction = [&](const uint64_t Addr,
502                                   BinaryFunction *TargetBF) -> bool {
503     if (BF.containsAddress(Addr))
504       return true;
505     // Nothing to do if we failed to identify the containing function.
506     if (!TargetBF)
507       return false;
508     // Case 1: check if BF is a fragment and TargetBF is its parent.
509     if (BF.isFragment()) {
510       // Parent function may or may not be already registered.
511       // Set parent link based on function name matching heuristic.
512       return registerFragment(BF, *TargetBF);
513     }
514     // Case 2: check if TargetBF is a fragment and BF is its parent.
515     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
516   };
517 
518   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
519   if (!Section)
520     return false;
521 
522   // The upper bound is defined by containing object, section limits, and
523   // the next jump table in memory.
524   uint64_t UpperBound = Section->getEndAddress();
525   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
526   if (JumpTableBD && JumpTableBD->getSize()) {
527     assert(JumpTableBD->getEndAddress() <= UpperBound &&
528            "data object cannot cross a section boundary");
529     UpperBound = JumpTableBD->getEndAddress();
530   }
531   if (NextJTAddress)
532     UpperBound = std::min(NextJTAddress, UpperBound);
533 
534   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
535                     << '\n');
536   const uint64_t EntrySize = getJumpTableEntrySize(Type);
537   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
538        EntryAddress += EntrySize) {
539     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
540                       << " -> ");
541     // Check if there's a proper relocation against the jump table entry.
542     if (HasRelocations) {
543       if (Type == JumpTable::JTT_PIC &&
544           !DataPCRelocations.count(EntryAddress)) {
545         LLVM_DEBUG(
546             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
547         break;
548       }
549       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
550         LLVM_DEBUG(
551             dbgs()
552             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
553         break;
554       }
555     }
556 
557     const uint64_t Value =
558         (Type == JumpTable::JTT_PIC)
559             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
560             : *getPointerAtAddress(EntryAddress);
561 
562     // __builtin_unreachable() case.
563     if (Value == BF.getAddress() + BF.getSize()) {
564       addOffset(Value - BF.getAddress());
565       HasUnreachable = true;
566       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
567       continue;
568     }
569 
570     // Function or one of its fragments.
571     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
572 
573     // We assume that a jump table cannot have function start as an entry.
574     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
575       LLVM_DEBUG({
576         if (!BF.containsAddress(Value)) {
577           dbgs() << "FAIL: function doesn't contain this address\n";
578           if (TargetBF) {
579             dbgs() << "  ! function containing this address: "
580                    << TargetBF->getPrintName() << '\n';
581             if (TargetBF->isFragment())
582               dbgs() << "  ! is a fragment\n";
583             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
584               dbgs() << "  ! its parent is "
585                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
586                      << '\n';
587           }
588         }
589         if (Value == BF.getAddress())
590           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
591       });
592       break;
593     }
594 
595     // Check there's an instruction at this offset.
596     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
597         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
598       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
599       break;
600     }
601 
602     ++NumRealEntries;
603 
604     if (TargetBF == &BF) {
605       // Address inside the function.
606       addOffset(Value - TargetBF->getAddress());
607       LLVM_DEBUG(dbgs() << "OK: real entry\n");
608     } else {
609       // Address in split fragment.
610       BF.setHasSplitJumpTable(true);
611       // Add invalid offset for proper identification of jump table size.
612       addOffset(INVALID_OFFSET);
613       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
614                         << TargetBF->getPrintName() << '\n');
615     }
616   }
617 
618   // It's a jump table if the number of real entries is more than 1, or there's
619   // one real entry and "unreachable" targets. If there are only multiple
620   // "unreachable" targets, then it's not a jump table.
621   return NumRealEntries + HasUnreachable >= 2;
622 }
623 
624 void BinaryContext::populateJumpTables() {
625   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
626                     << '\n');
627   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
628        ++JTI) {
629     JumpTable *JT = JTI->second;
630     BinaryFunction &BF = *JT->Parent;
631 
632     if (!BF.isSimple())
633       continue;
634 
635     uint64_t NextJTAddress = 0;
636     auto NextJTI = std::next(JTI);
637     if (NextJTI != JTE)
638       NextJTAddress = NextJTI->second->getAddress();
639 
640     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
641                                           NextJTAddress, &JT->OffsetEntries);
642     if (!Success) {
643       dbgs() << "failed to analyze jump table in function " << BF << '\n';
644       JT->print(dbgs());
645       if (NextJTI != JTE) {
646         dbgs() << "next jump table at 0x"
647                << Twine::utohexstr(NextJTI->second->getAddress())
648                << " belongs to function " << *NextJTI->second->Parent << '\n';
649         NextJTI->second->print(dbgs());
650       }
651       llvm_unreachable("jump table heuristic failure");
652     }
653 
654     for (uint64_t EntryOffset : JT->OffsetEntries) {
655       if (EntryOffset == BF.getSize())
656         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
657       else
658         BF.registerReferencedOffset(EntryOffset);
659     }
660 
661     // In strict mode, erase PC-relative relocation record. Later we check that
662     // all such records are erased and thus have been accounted for.
663     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
664       for (uint64_t Address = JT->getAddress();
665            Address < JT->getAddress() + JT->getSize();
666            Address += JT->EntrySize) {
667         DataPCRelocations.erase(DataPCRelocations.find(Address));
668       }
669     }
670 
671     // Mark to skip the function and all its fragments.
672     if (BF.hasSplitJumpTable())
673       FragmentsToSkip.push_back(&BF);
674   }
675 
676   if (opts::StrictMode && DataPCRelocations.size()) {
677     LLVM_DEBUG({
678       dbgs() << DataPCRelocations.size()
679              << " unclaimed PC-relative relocations left in data:\n";
680       for (uint64_t Reloc : DataPCRelocations)
681         dbgs() << Twine::utohexstr(Reloc) << '\n';
682     });
683     assert(0 && "unclaimed PC-relative relocations left in data\n");
684   }
685   clearList(DataPCRelocations);
686 }
687 
688 void BinaryContext::skipMarkedFragments() {
689   // Unique functions in the vector.
690   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
691                                                        FragmentsToSkip.end());
692   // Copy the functions back to FragmentsToSkip.
693   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
694   auto addToWorklist = [&](BinaryFunction *Function) -> void {
695     if (UniqueFunctions.count(Function))
696       return;
697     FragmentsToSkip.push_back(Function);
698     UniqueFunctions.insert(Function);
699   };
700   // Functions containing split jump tables need to be skipped with all
701   // fragments (transitively).
702   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
703     BinaryFunction *BF = FragmentsToSkip[I];
704     assert(UniqueFunctions.count(BF) &&
705            "internal error in traversing function fragments");
706     if (opts::Verbosity >= 1)
707       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
708     BF->setIgnored();
709     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
710     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
711                   addToWorklist);
712   }
713   errs() << "BOLT-WARNING: Ignored " << FragmentsToSkip.size() << " functions "
714          << "due to cold fragments.\n";
715   FragmentsToSkip.clear();
716 }
717 
718 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
719                                                  uint64_t Size,
720                                                  uint16_t Alignment,
721                                                  unsigned Flags) {
722   auto Itr = BinaryDataMap.find(Address);
723   if (Itr != BinaryDataMap.end()) {
724     assert(Itr->second->getSize() == Size || !Size);
725     return Itr->second->getSymbol();
726   }
727 
728   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
729   assert(!GlobalSymbols.count(Name) && "created name is not unique");
730   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
731 }
732 
733 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
734   return Ctx->getOrCreateSymbol(Name);
735 }
736 
737 BinaryFunction *BinaryContext::createBinaryFunction(
738     const std::string &Name, BinarySection &Section, uint64_t Address,
739     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
740   auto Result = BinaryFunctions.emplace(
741       Address, BinaryFunction(Name, Section, Address, Size, *this));
742   assert(Result.second == true && "unexpected duplicate function");
743   BinaryFunction *BF = &Result.first->second;
744   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
745                         Alignment);
746   setSymbolToFunctionMap(BF->getSymbol(), BF);
747   return BF;
748 }
749 
750 const MCSymbol *
751 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
752                                     JumpTable::JumpTableType Type) {
753   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
754     assert(JT->Type == Type && "jump table types have to match");
755     assert(JT->Parent == &Function &&
756            "cannot re-use jump table of a different function");
757     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
758 
759     return JT->getFirstLabel();
760   }
761 
762   // Re-use the existing symbol if possible.
763   MCSymbol *JTLabel = nullptr;
764   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
765     if (!isInternalSymbolName(Object->getSymbol()->getName()))
766       JTLabel = Object->getSymbol();
767   }
768 
769   const uint64_t EntrySize = getJumpTableEntrySize(Type);
770   if (!JTLabel) {
771     const std::string JumpTableName = generateJumpTableName(Function, Address);
772     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
773   }
774 
775   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
776                     << " in function " << Function << '\n');
777 
778   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
779                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
780                                 *getSectionForAddress(Address));
781   JumpTables.emplace(Address, JT);
782 
783   // Duplicate the entry for the parent function for easy access.
784   Function.JumpTables.emplace(Address, JT);
785 
786   return JTLabel;
787 }
788 
789 std::pair<uint64_t, const MCSymbol *>
790 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
791                                   const MCSymbol *OldLabel) {
792   auto L = scopeLock();
793   unsigned Offset = 0;
794   bool Found = false;
795   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
796     if (Elmt.second != OldLabel)
797       continue;
798     Offset = Elmt.first;
799     Found = true;
800     break;
801   }
802   assert(Found && "Label not found");
803   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
804   JumpTable *NewJT =
805       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
806                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
807                     *getSectionForAddress(JT->getAddress()));
808   NewJT->Entries = JT->Entries;
809   NewJT->Counts = JT->Counts;
810   uint64_t JumpTableID = ++DuplicatedJumpTables;
811   // Invert it to differentiate from regular jump tables whose IDs are their
812   // addresses in the input binary memory space
813   JumpTableID = ~JumpTableID;
814   JumpTables.emplace(JumpTableID, NewJT);
815   Function.JumpTables.emplace(JumpTableID, NewJT);
816   return std::make_pair(JumpTableID, NewLabel);
817 }
818 
819 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
820                                                  uint64_t Address) {
821   size_t Id;
822   uint64_t Offset = 0;
823   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
824     Offset = Address - JT->getAddress();
825     auto Itr = JT->Labels.find(Offset);
826     if (Itr != JT->Labels.end())
827       return std::string(Itr->second->getName());
828     Id = JumpTableIds.at(JT->getAddress());
829   } else {
830     Id = JumpTableIds[Address] = BF.JumpTables.size();
831   }
832   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
833           (Offset ? ("." + std::to_string(Offset)) : ""));
834 }
835 
836 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
837   // FIXME: aarch64 support is missing.
838   if (!isX86())
839     return true;
840 
841   if (BF.getSize() == BF.getMaxSize())
842     return true;
843 
844   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
845   assert(FunctionData && "cannot get function as data");
846 
847   uint64_t Offset = BF.getSize();
848   MCInst Instr;
849   uint64_t InstrSize = 0;
850   uint64_t InstrAddress = BF.getAddress() + Offset;
851   using std::placeholders::_1;
852 
853   // Skip instructions that satisfy the predicate condition.
854   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
855     const uint64_t StartOffset = Offset;
856     for (; Offset < BF.getMaxSize();
857          Offset += InstrSize, InstrAddress += InstrSize) {
858       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
859                                   InstrAddress, nulls()))
860         break;
861       if (!Predicate(Instr))
862         break;
863     }
864 
865     return Offset - StartOffset;
866   };
867 
868   // Skip a sequence of zero bytes.
869   auto skipZeros = [&]() {
870     const uint64_t StartOffset = Offset;
871     for (; Offset < BF.getMaxSize(); ++Offset)
872       if ((*FunctionData)[Offset] != 0)
873         break;
874 
875     return Offset - StartOffset;
876   };
877 
878   // Accept the whole padding area filled with breakpoints.
879   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
880   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
881     return true;
882 
883   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
884 
885   // Some functions have a jump to the next function or to the padding area
886   // inserted after the body.
887   auto isSkipJump = [&](const MCInst &Instr) {
888     uint64_t TargetAddress = 0;
889     if (MIB->isUnconditionalBranch(Instr) &&
890         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
891       if (TargetAddress >= InstrAddress + InstrSize &&
892           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
893         return true;
894       }
895     }
896     return false;
897   };
898 
899   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
900   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
901          skipZeros())
902     ;
903 
904   if (Offset == BF.getMaxSize())
905     return true;
906 
907   if (opts::Verbosity >= 1) {
908     errs() << "BOLT-WARNING: bad padding at address 0x"
909            << Twine::utohexstr(BF.getAddress() + BF.getSize())
910            << " starting at offset " << (Offset - BF.getSize())
911            << " in function " << BF << '\n'
912            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
913            << '\n';
914   }
915 
916   return false;
917 }
918 
919 void BinaryContext::adjustCodePadding() {
920   for (auto &BFI : BinaryFunctions) {
921     BinaryFunction &BF = BFI.second;
922     if (!shouldEmit(BF))
923       continue;
924 
925     if (!hasValidCodePadding(BF)) {
926       if (HasRelocations) {
927         if (opts::Verbosity >= 1) {
928           outs() << "BOLT-INFO: function " << BF
929                  << " has invalid padding. Ignoring the function.\n";
930         }
931         BF.setIgnored();
932       } else {
933         BF.setMaxSize(BF.getSize());
934       }
935     }
936   }
937 }
938 
939 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
940                                                uint64_t Size,
941                                                uint16_t Alignment,
942                                                unsigned Flags) {
943   // Register the name with MCContext.
944   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
945 
946   auto GAI = BinaryDataMap.find(Address);
947   BinaryData *BD;
948   if (GAI == BinaryDataMap.end()) {
949     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
950     BinarySection &Section =
951         SectionOrErr ? SectionOrErr.get() : absoluteSection();
952     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
953                         Section, Flags);
954     GAI = BinaryDataMap.emplace(Address, BD).first;
955     GlobalSymbols[Name] = BD;
956     updateObjectNesting(GAI);
957   } else {
958     BD = GAI->second;
959     if (!BD->hasName(Name)) {
960       GlobalSymbols[Name] = BD;
961       BD->Symbols.push_back(Symbol);
962     }
963   }
964 
965   return Symbol;
966 }
967 
968 const BinaryData *
969 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
970   auto NI = BinaryDataMap.lower_bound(Address);
971   auto End = BinaryDataMap.end();
972   if ((NI != End && Address == NI->first) ||
973       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
974     if (NI->second->containsAddress(Address))
975       return NI->second;
976 
977     // If this is a sub-symbol, see if a parent data contains the address.
978     const BinaryData *BD = NI->second->getParent();
979     while (BD) {
980       if (BD->containsAddress(Address))
981         return BD;
982       BD = BD->getParent();
983     }
984   }
985   return nullptr;
986 }
987 
988 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
989   auto NI = BinaryDataMap.find(Address);
990   assert(NI != BinaryDataMap.end());
991   if (NI == BinaryDataMap.end())
992     return false;
993   // TODO: it's possible that a jump table starts at the same address
994   // as a larger blob of private data.  When we set the size of the
995   // jump table, it might be smaller than the total blob size.  In this
996   // case we just leave the original size since (currently) it won't really
997   // affect anything.
998   assert((!NI->second->Size || NI->second->Size == Size ||
999           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1000          "can't change the size of a symbol that has already had its "
1001          "size set");
1002   if (!NI->second->Size) {
1003     NI->second->Size = Size;
1004     updateObjectNesting(NI);
1005     return true;
1006   }
1007   return false;
1008 }
1009 
1010 void BinaryContext::generateSymbolHashes() {
1011   auto isPadding = [](const BinaryData &BD) {
1012     StringRef Contents = BD.getSection().getContents();
1013     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1014     return (BD.getName().startswith("HOLEat") ||
1015             SymData.find_first_not_of(0) == StringRef::npos);
1016   };
1017 
1018   uint64_t NumCollisions = 0;
1019   for (auto &Entry : BinaryDataMap) {
1020     BinaryData &BD = *Entry.second;
1021     StringRef Name = BD.getName();
1022 
1023     if (!isInternalSymbolName(Name))
1024       continue;
1025 
1026     // First check if a non-anonymous alias exists and move it to the front.
1027     if (BD.getSymbols().size() > 1) {
1028       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1029                               [&](const MCSymbol *Symbol) {
1030                                 return !isInternalSymbolName(Symbol->getName());
1031                               });
1032       if (Itr != BD.getSymbols().end()) {
1033         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1034         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1035         continue;
1036       }
1037     }
1038 
1039     // We have to skip 0 size symbols since they will all collide.
1040     if (BD.getSize() == 0) {
1041       continue;
1042     }
1043 
1044     const uint64_t Hash = BD.getSection().hash(BD);
1045     const size_t Idx = Name.find("0x");
1046     std::string NewName =
1047         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1048     if (getBinaryDataByName(NewName)) {
1049       // Ignore collisions for symbols that appear to be padding
1050       // (i.e. all zeros or a "hole")
1051       if (!isPadding(BD)) {
1052         if (opts::Verbosity) {
1053           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1054                  << " with new name (" << NewName << "), skipping.\n";
1055         }
1056         ++NumCollisions;
1057       }
1058       continue;
1059     }
1060     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1061     GlobalSymbols[NewName] = &BD;
1062   }
1063   if (NumCollisions) {
1064     errs() << "BOLT-WARNING: " << NumCollisions
1065            << " collisions detected while hashing binary objects";
1066     if (!opts::Verbosity)
1067       errs() << ". Use -v=1 to see the list.";
1068     errs() << '\n';
1069   }
1070 }
1071 
1072 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1073                                      BinaryFunction &Function) const {
1074   if (!isPotentialFragmentByName(TargetFunction, Function))
1075     return false;
1076   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1077   if (TargetFunction.isParentFragment(&Function))
1078     return true;
1079   TargetFunction.addParentFragment(Function);
1080   Function.addFragment(TargetFunction);
1081   if (!HasRelocations) {
1082     TargetFunction.setSimple(false);
1083     Function.setSimple(false);
1084   }
1085   if (opts::Verbosity >= 1) {
1086     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1087            << Function << '\n';
1088   }
1089   return true;
1090 }
1091 
1092 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1093   for (uint64_t Address : Function.InterproceduralReferences) {
1094     if (!Address)
1095       continue;
1096 
1097     BinaryFunction *TargetFunction =
1098         getBinaryFunctionContainingAddress(Address);
1099     if (&Function == TargetFunction)
1100       continue;
1101 
1102     if (TargetFunction) {
1103       if (TargetFunction->IsFragment &&
1104           !registerFragment(*TargetFunction, Function)) {
1105         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1106                   "fragments: "
1107                << Function.getPrintName() << " and "
1108                << TargetFunction->getPrintName() << '\n';
1109       }
1110       if (uint64_t Offset = Address - TargetFunction->getAddress())
1111         TargetFunction->addEntryPointAtOffset(Offset);
1112 
1113       continue;
1114     }
1115 
1116     // Check if address falls in function padding space - this could be
1117     // unmarked data in code. In this case adjust the padding space size.
1118     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1119     assert(Section && "cannot get section for referenced address");
1120 
1121     if (!Section->isText())
1122       continue;
1123 
1124     // PLT requires special handling and could be ignored in this context.
1125     StringRef SectionName = Section->getName();
1126     if (SectionName == ".plt" || SectionName == ".plt.got")
1127       continue;
1128 
1129     if (opts::processAllFunctions()) {
1130       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1131              << "object in code at address 0x" << Twine::utohexstr(Address)
1132              << " belonging to section " << SectionName << " in current mode\n";
1133       exit(1);
1134     }
1135 
1136     TargetFunction = getBinaryFunctionContainingAddress(Address,
1137                                                         /*CheckPastEnd=*/false,
1138                                                         /*UseMaxSize=*/true);
1139     // We are not going to overwrite non-simple functions, but for simple
1140     // ones - adjust the padding size.
1141     if (TargetFunction && TargetFunction->isSimple()) {
1142       errs() << "BOLT-WARNING: function " << *TargetFunction
1143              << " has an object detected in a padding region at address 0x"
1144              << Twine::utohexstr(Address) << '\n';
1145       TargetFunction->setMaxSize(TargetFunction->getSize());
1146     }
1147   }
1148 
1149   clearList(Function.InterproceduralReferences);
1150 }
1151 
1152 void BinaryContext::postProcessSymbolTable() {
1153   fixBinaryDataHoles();
1154   bool Valid = true;
1155   for (auto &Entry : BinaryDataMap) {
1156     BinaryData *BD = Entry.second;
1157     if ((BD->getName().startswith("SYMBOLat") ||
1158          BD->getName().startswith("DATAat")) &&
1159         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1160         BD->getSection()) {
1161       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1162       Valid = false;
1163     }
1164   }
1165   assert(Valid);
1166   generateSymbolHashes();
1167 }
1168 
1169 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1170                                  BinaryFunction &ParentBF) {
1171   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1172          "cannot merge functions with multiple entry points");
1173 
1174   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1175                                                          std::defer_lock);
1176   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1177       SymbolToFunctionMapMutex, std::defer_lock);
1178 
1179   const StringRef ChildName = ChildBF.getOneName();
1180 
1181   // Move symbols over and update bookkeeping info.
1182   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1183     ParentBF.getSymbols().push_back(Symbol);
1184     WriteSymbolMapLock.lock();
1185     SymbolToFunctionMap[Symbol] = &ParentBF;
1186     WriteSymbolMapLock.unlock();
1187     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1188   }
1189   ChildBF.getSymbols().clear();
1190 
1191   // Move other names the child function is known under.
1192   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1193             std::back_inserter(ParentBF.Aliases));
1194   ChildBF.Aliases.clear();
1195 
1196   if (HasRelocations) {
1197     // Merge execution counts of ChildBF into those of ParentBF.
1198     // Without relocations, we cannot reliably merge profiles as both functions
1199     // continue to exist and either one can be executed.
1200     ChildBF.mergeProfileDataInto(ParentBF);
1201 
1202     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1203                                                           std::defer_lock);
1204     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1205                                                            std::defer_lock);
1206     // Remove ChildBF from the global set of functions in relocs mode.
1207     ReadBfsLock.lock();
1208     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1209     ReadBfsLock.unlock();
1210 
1211     assert(FI != BinaryFunctions.end() && "function not found");
1212     assert(&ChildBF == &FI->second && "function mismatch");
1213 
1214     WriteBfsLock.lock();
1215     ChildBF.clearDisasmState();
1216     FI = BinaryFunctions.erase(FI);
1217     WriteBfsLock.unlock();
1218 
1219   } else {
1220     // In non-relocation mode we keep the function, but rename it.
1221     std::string NewName = "__ICF_" + ChildName.str();
1222 
1223     WriteCtxLock.lock();
1224     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1225     WriteCtxLock.unlock();
1226 
1227     ChildBF.setFolded(&ParentBF);
1228   }
1229 }
1230 
1231 void BinaryContext::fixBinaryDataHoles() {
1232   assert(validateObjectNesting() && "object nesting inconsitency detected");
1233 
1234   for (BinarySection &Section : allocatableSections()) {
1235     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1236 
1237     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1238       BinaryData *BD = Itr->second;
1239       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1240                      (BD->getName().startswith("SYMBOLat0x") ||
1241                       BD->getName().startswith("DATAat0x") ||
1242                       BD->getName().startswith("ANONYMOUS")));
1243       return !isHole && BD->getSection() == Section && !BD->getParent();
1244     };
1245 
1246     auto BDStart = BinaryDataMap.begin();
1247     auto BDEnd = BinaryDataMap.end();
1248     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1249     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1250 
1251     uint64_t EndAddress = Section.getAddress();
1252 
1253     while (Itr != End) {
1254       if (Itr->second->getAddress() > EndAddress) {
1255         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1256         Holes.emplace_back(EndAddress, Gap);
1257       }
1258       EndAddress = Itr->second->getEndAddress();
1259       ++Itr;
1260     }
1261 
1262     if (EndAddress < Section.getEndAddress())
1263       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1264 
1265     // If there is already a symbol at the start of the hole, grow that symbol
1266     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1267     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1268       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1269       if (BD) {
1270         // BD->getSection() can be != Section if there are sections that
1271         // overlap.  In this case it is probably safe to just skip the holes
1272         // since the overlapping section will not(?) have any symbols in it.
1273         if (BD->getSection() == Section)
1274           setBinaryDataSize(Hole.first, Hole.second);
1275       } else {
1276         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1277       }
1278     }
1279   }
1280 
1281   assert(validateObjectNesting() && "object nesting inconsitency detected");
1282   assert(validateHoles() && "top level hole detected in object map");
1283 }
1284 
1285 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1286   const BinarySection *CurrentSection = nullptr;
1287   bool FirstSection = true;
1288 
1289   for (auto &Entry : BinaryDataMap) {
1290     const BinaryData *BD = Entry.second;
1291     const BinarySection &Section = BD->getSection();
1292     if (FirstSection || Section != *CurrentSection) {
1293       uint64_t Address, Size;
1294       StringRef Name = Section.getName();
1295       if (Section) {
1296         Address = Section.getAddress();
1297         Size = Section.getSize();
1298       } else {
1299         Address = BD->getAddress();
1300         Size = BD->getSize();
1301       }
1302       OS << "BOLT-INFO: Section " << Name << ", "
1303          << "0x" + Twine::utohexstr(Address) << ":"
1304          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1305       CurrentSection = &Section;
1306       FirstSection = false;
1307     }
1308 
1309     OS << "BOLT-INFO: ";
1310     const BinaryData *P = BD->getParent();
1311     while (P) {
1312       OS << "  ";
1313       P = P->getParent();
1314     }
1315     OS << *BD << "\n";
1316   }
1317 }
1318 
1319 Expected<unsigned>
1320 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName,
1321                             unsigned FileNumber,
1322                             Optional<MD5::MD5Result> Checksum,
1323                             Optional<StringRef> Source, unsigned CUID) {
1324   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1325   return Table.tryGetFile(Directory, FileName, Checksum, Source,
1326                           Ctx->getDwarfVersion(), FileNumber);
1327 }
1328 
1329 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1330                                                const uint32_t SrcCUID,
1331                                                unsigned FileIndex) {
1332   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1333   const DWARFDebugLine::LineTable *LineTable =
1334       DwCtx->getLineTableForUnit(SrcUnit);
1335   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1336       LineTable->Prologue.FileNames;
1337   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1338   // means empty dir.
1339   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1340          "FileIndex out of range for the compilation unit.");
1341   StringRef Dir = "";
1342   if (FileNames[FileIndex - 1].DirIdx != 0) {
1343     if (Optional<const char *> DirName = dwarf::toString(
1344             LineTable->Prologue
1345                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1346       Dir = *DirName;
1347     }
1348   }
1349   StringRef FileName = "";
1350   if (Optional<const char *> FName =
1351           dwarf::toString(FileNames[FileIndex - 1].Name))
1352     FileName = *FName;
1353   assert(FileName != "");
1354   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID));
1355 }
1356 
1357 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1358   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1359   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1360                  SortedFunctions.begin(),
1361                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1362                    return &BFI.second;
1363                  });
1364 
1365   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1366                    [](const BinaryFunction *A, const BinaryFunction *B) {
1367                      if (A->hasValidIndex() && B->hasValidIndex()) {
1368                        return A->getIndex() < B->getIndex();
1369                      }
1370                      return A->hasValidIndex();
1371                    });
1372   return SortedFunctions;
1373 }
1374 
1375 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1376   std::vector<BinaryFunction *> AllFunctions;
1377   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1378   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1379                  std::back_inserter(AllFunctions),
1380                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1381                    return &BFI.second;
1382                  });
1383   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1384             std::back_inserter(AllFunctions));
1385 
1386   return AllFunctions;
1387 }
1388 
1389 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1390   auto Iter = DWOCUs.find(DWOId);
1391   if (Iter == DWOCUs.end())
1392     return None;
1393 
1394   return Iter->second;
1395 }
1396 
1397 DWARFContext *BinaryContext::getDWOContext() {
1398   if (DWOCUs.empty())
1399     return nullptr;
1400   return &DWOCUs.begin()->second->getContext();
1401 }
1402 
1403 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1404 void BinaryContext::preprocessDWODebugInfo() {
1405   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1406     DWARFUnit *const DwarfUnit = CU.get();
1407     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1408       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1409       if (!DWOCU->isDWOUnit()) {
1410         std::string DWOName = dwarf::toString(
1411             DwarfUnit->getUnitDIE().find(
1412                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1413             "");
1414         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1415                << DWOName
1416                << " was not retrieved and won't be updated. Please check "
1417                   "relative path.\n";
1418         continue;
1419       }
1420       DWOCUs[*DWOId] = DWOCU;
1421     }
1422   }
1423 }
1424 
1425 void BinaryContext::preprocessDebugInfo() {
1426   struct CURange {
1427     uint64_t LowPC;
1428     uint64_t HighPC;
1429     DWARFUnit *Unit;
1430 
1431     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1432   };
1433 
1434   // Building a map of address ranges to CUs similar to .debug_aranges and use
1435   // it to assign CU to functions.
1436   std::vector<CURange> AllRanges;
1437   AllRanges.reserve(DwCtx->getNumCompileUnits());
1438   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1439     Expected<DWARFAddressRangesVector> RangesOrError =
1440         CU->getUnitDIE().getAddressRanges();
1441     if (!RangesOrError) {
1442       consumeError(RangesOrError.takeError());
1443       continue;
1444     }
1445     for (DWARFAddressRange &Range : *RangesOrError) {
1446       // Parts of the debug info could be invalidated due to corresponding code
1447       // being removed from the binary by the linker. Hence we check if the
1448       // address is a valid one.
1449       if (containsAddress(Range.LowPC))
1450         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1451     }
1452   }
1453 
1454   std::sort(AllRanges.begin(), AllRanges.end());
1455   for (auto &KV : BinaryFunctions) {
1456     const uint64_t FunctionAddress = KV.first;
1457     BinaryFunction &Function = KV.second;
1458 
1459     auto It = std::partition_point(
1460         AllRanges.begin(), AllRanges.end(),
1461         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1462     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1463       Function.setDWARFUnit(It->Unit);
1464     }
1465   }
1466 
1467   // Discover units with debug info that needs to be updated.
1468   for (const auto &KV : BinaryFunctions) {
1469     const BinaryFunction &BF = KV.second;
1470     if (shouldEmit(BF) && BF.getDWARFUnit())
1471       ProcessedCUs.insert(BF.getDWARFUnit());
1472   }
1473 
1474   // Clear debug info for functions from units that we are not going to process.
1475   for (auto &KV : BinaryFunctions) {
1476     BinaryFunction &BF = KV.second;
1477     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1478       BF.setDWARFUnit(nullptr);
1479   }
1480 
1481   if (opts::Verbosity >= 1) {
1482     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1483            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1484   }
1485 
1486   // Populate MCContext with DWARF files from all units.
1487   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1488   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1489     const uint64_t CUID = CU->getOffset();
1490     getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol(
1491         GlobalPrefix + "line_table_start" + Twine(CUID)));
1492 
1493     if (!ProcessedCUs.count(CU.get()))
1494       continue;
1495 
1496     const DWARFDebugLine::LineTable *LineTable =
1497         DwCtx->getLineTableForUnit(CU.get());
1498     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1499         LineTable->Prologue.FileNames;
1500 
1501     // Assign a unique label to every line table, one per CU.
1502     // Make sure empty debug line tables are registered too.
1503     if (FileNames.empty()) {
1504       cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID));
1505       continue;
1506     }
1507     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1508       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1509       // means empty dir.
1510       StringRef Dir = "";
1511       if (FileNames[I].DirIdx != 0)
1512         if (Optional<const char *> DirName = dwarf::toString(
1513                 LineTable->Prologue
1514                     .IncludeDirectories[FileNames[I].DirIdx - 1]))
1515           Dir = *DirName;
1516       StringRef FileName = "";
1517       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1518         FileName = *FName;
1519       assert(FileName != "");
1520       cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID));
1521     }
1522   }
1523 
1524   preprocessDWODebugInfo();
1525 }
1526 
1527 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1528   if (opts::processAllFunctions())
1529     return true;
1530 
1531   if (Function.isIgnored())
1532     return false;
1533 
1534   // In relocation mode we will emit non-simple functions with CFG.
1535   // If the function does not have a CFG it should be marked as ignored.
1536   return HasRelocations || Function.isSimple();
1537 }
1538 
1539 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1540   uint32_t Operation = Inst.getOperation();
1541   switch (Operation) {
1542   case MCCFIInstruction::OpSameValue:
1543     OS << "OpSameValue Reg" << Inst.getRegister();
1544     break;
1545   case MCCFIInstruction::OpRememberState:
1546     OS << "OpRememberState";
1547     break;
1548   case MCCFIInstruction::OpRestoreState:
1549     OS << "OpRestoreState";
1550     break;
1551   case MCCFIInstruction::OpOffset:
1552     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1553     break;
1554   case MCCFIInstruction::OpDefCfaRegister:
1555     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1556     break;
1557   case MCCFIInstruction::OpDefCfaOffset:
1558     OS << "OpDefCfaOffset " << Inst.getOffset();
1559     break;
1560   case MCCFIInstruction::OpDefCfa:
1561     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1562     break;
1563   case MCCFIInstruction::OpRelOffset:
1564     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1565     break;
1566   case MCCFIInstruction::OpAdjustCfaOffset:
1567     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1568     break;
1569   case MCCFIInstruction::OpEscape:
1570     OS << "OpEscape";
1571     break;
1572   case MCCFIInstruction::OpRestore:
1573     OS << "OpRestore Reg" << Inst.getRegister();
1574     break;
1575   case MCCFIInstruction::OpUndefined:
1576     OS << "OpUndefined Reg" << Inst.getRegister();
1577     break;
1578   case MCCFIInstruction::OpRegister:
1579     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1580        << Inst.getRegister2();
1581     break;
1582   case MCCFIInstruction::OpWindowSave:
1583     OS << "OpWindowSave";
1584     break;
1585   case MCCFIInstruction::OpGnuArgsSize:
1586     OS << "OpGnuArgsSize";
1587     break;
1588   default:
1589     OS << "Op#" << Operation;
1590     break;
1591   }
1592 }
1593 
1594 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1595                                      uint64_t Offset,
1596                                      const BinaryFunction *Function,
1597                                      bool PrintMCInst, bool PrintMemData,
1598                                      bool PrintRelocations) const {
1599   if (MIB->isEHLabel(Instruction)) {
1600     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1601     return;
1602   }
1603   OS << format("    %08" PRIx64 ": ", Offset);
1604   if (MIB->isCFI(Instruction)) {
1605     uint32_t Offset = Instruction.getOperand(0).getImm();
1606     OS << "\t!CFI\t$" << Offset << "\t; ";
1607     if (Function)
1608       printCFI(OS, *Function->getCFIFor(Instruction));
1609     OS << "\n";
1610     return;
1611   }
1612   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1613   if (MIB->isCall(Instruction)) {
1614     if (MIB->isTailCall(Instruction))
1615       OS << " # TAILCALL ";
1616     if (MIB->isInvoke(Instruction)) {
1617       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1618       OS << " # handler: ";
1619       if (EHInfo->first)
1620         OS << *EHInfo->first;
1621       else
1622         OS << '0';
1623       OS << "; action: " << EHInfo->second;
1624       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1625       if (GnuArgsSize >= 0)
1626         OS << "; GNU_args_size = " << GnuArgsSize;
1627     }
1628   } else if (MIB->isIndirectBranch(Instruction)) {
1629     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1630       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1631     } else {
1632       OS << " # UNKNOWN CONTROL FLOW";
1633     }
1634   }
1635   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1636     OS << " # Offset: " << *Offset;
1637 
1638   MIB->printAnnotations(Instruction, OS);
1639 
1640   if (opts::PrintDebugInfo) {
1641     DebugLineTableRowRef RowRef =
1642         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1643     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1644       const DWARFDebugLine::LineTable *LineTable;
1645       if (Function && Function->getDWARFUnit() &&
1646           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1647         LineTable = Function->getDWARFLineTable();
1648       } else {
1649         LineTable = DwCtx->getLineTableForUnit(
1650             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1651       }
1652       assert(LineTable &&
1653              "line table expected for instruction with debug info");
1654 
1655       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1656       StringRef FileName = "";
1657       if (Optional<const char *> FName =
1658               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1659         FileName = *FName;
1660       OS << " # debug line " << FileName << ":" << Row.Line;
1661       if (Row.Column)
1662         OS << ":" << Row.Column;
1663       if (Row.Discriminator)
1664         OS << " discriminator:" << Row.Discriminator;
1665     }
1666   }
1667 
1668   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1669     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1670     Function->printRelocations(OS, Offset, Size);
1671   }
1672 
1673   OS << "\n";
1674 
1675   if (PrintMCInst) {
1676     Instruction.dump_pretty(OS, InstPrinter.get());
1677     OS << "\n";
1678   }
1679 }
1680 
1681 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1682   auto SI = AddressToSection.upper_bound(Address);
1683   if (SI != AddressToSection.begin()) {
1684     --SI;
1685     uint64_t UpperBound = SI->first + SI->second->getSize();
1686     if (!SI->second->getSize())
1687       UpperBound += 1;
1688     if (UpperBound > Address)
1689       return *SI->second;
1690   }
1691   return std::make_error_code(std::errc::bad_address);
1692 }
1693 
1694 ErrorOr<StringRef>
1695 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1696   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1697     return Section->getName();
1698   return std::make_error_code(std::errc::bad_address);
1699 }
1700 
1701 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1702   auto Res = Sections.insert(Section);
1703   (void)Res;
1704   assert(Res.second && "can't register the same section twice.");
1705 
1706   // Only register allocatable sections in the AddressToSection map.
1707   if (Section->isAllocatable() && Section->getAddress())
1708     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1709   NameToSection.insert(
1710       std::make_pair(std::string(Section->getName()), Section));
1711   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1712   return *Section;
1713 }
1714 
1715 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1716   return registerSection(new BinarySection(*this, Section));
1717 }
1718 
1719 BinarySection &
1720 BinaryContext::registerSection(StringRef SectionName,
1721                                const BinarySection &OriginalSection) {
1722   return registerSection(
1723       new BinarySection(*this, SectionName, OriginalSection));
1724 }
1725 
1726 BinarySection &
1727 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1728                                        unsigned ELFFlags, uint8_t *Data,
1729                                        uint64_t Size, unsigned Alignment) {
1730   auto NamedSections = getSectionByName(Name);
1731   if (NamedSections.begin() != NamedSections.end()) {
1732     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1733            "can only update unique sections");
1734     BinarySection *Section = NamedSections.begin()->second;
1735 
1736     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1737     const bool Flag = Section->isAllocatable();
1738     (void)Flag;
1739     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1740     LLVM_DEBUG(dbgs() << *Section << "\n");
1741     // FIXME: Fix section flags/attributes for MachO.
1742     if (isELF())
1743       assert(Flag == Section->isAllocatable() &&
1744              "can't change section allocation status");
1745     return *Section;
1746   }
1747 
1748   return registerSection(
1749       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1750 }
1751 
1752 bool BinaryContext::deregisterSection(BinarySection &Section) {
1753   BinarySection *SectionPtr = &Section;
1754   auto Itr = Sections.find(SectionPtr);
1755   if (Itr != Sections.end()) {
1756     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1757     while (Range.first != Range.second) {
1758       if (Range.first->second == SectionPtr) {
1759         AddressToSection.erase(Range.first);
1760         break;
1761       }
1762       ++Range.first;
1763     }
1764 
1765     auto NameRange =
1766         NameToSection.equal_range(std::string(SectionPtr->getName()));
1767     while (NameRange.first != NameRange.second) {
1768       if (NameRange.first->second == SectionPtr) {
1769         NameToSection.erase(NameRange.first);
1770         break;
1771       }
1772       ++NameRange.first;
1773     }
1774 
1775     Sections.erase(Itr);
1776     delete SectionPtr;
1777     return true;
1778   }
1779   return false;
1780 }
1781 
1782 void BinaryContext::printSections(raw_ostream &OS) const {
1783   for (BinarySection *const &Section : Sections)
1784     OS << "BOLT-INFO: " << *Section << "\n";
1785 }
1786 
1787 BinarySection &BinaryContext::absoluteSection() {
1788   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1789     return *Section;
1790   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1791 }
1792 
1793 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1794                                                            size_t Size) const {
1795   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1796   if (!Section)
1797     return std::make_error_code(std::errc::bad_address);
1798 
1799   if (Section->isVirtual())
1800     return 0;
1801 
1802   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1803                    AsmInfo->getCodePointerSize());
1804   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1805   return DE.getUnsigned(&ValueOffset, Size);
1806 }
1807 
1808 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1809                                                          size_t Size) const {
1810   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1811   if (!Section)
1812     return std::make_error_code(std::errc::bad_address);
1813 
1814   if (Section->isVirtual())
1815     return 0;
1816 
1817   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1818                    AsmInfo->getCodePointerSize());
1819   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1820   return DE.getSigned(&ValueOffset, Size);
1821 }
1822 
1823 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1824                                   uint64_t Type, uint64_t Addend,
1825                                   uint64_t Value) {
1826   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1827   assert(Section && "cannot find section for address");
1828   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1829                          Value);
1830 }
1831 
1832 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1833                                          uint64_t Type, uint64_t Addend,
1834                                          uint64_t Value) {
1835   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1836   assert(Section && "cannot find section for address");
1837   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1838                                 Addend, Value);
1839 }
1840 
1841 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1842   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1843   assert(Section && "cannot find section for address");
1844   return Section->removeRelocationAt(Address - Section->getAddress());
1845 }
1846 
1847 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1848   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1849   if (!Section)
1850     return nullptr;
1851 
1852   return Section->getRelocationAt(Address - Section->getAddress());
1853 }
1854 
1855 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1856   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1857   if (!Section)
1858     return nullptr;
1859 
1860   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1861 }
1862 
1863 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1864                                              const uint64_t Address) {
1865   auto setImmovable = [&](BinaryData &BD) {
1866     BinaryData *Root = BD.getAtomicRoot();
1867     LLVM_DEBUG(if (Root->isMoveable()) {
1868       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1869              << "due to ambiguous relocation referencing 0x"
1870              << Twine::utohexstr(Address) << '\n';
1871     });
1872     Root->setIsMoveable(false);
1873   };
1874 
1875   if (Address == BD.getAddress()) {
1876     setImmovable(BD);
1877 
1878     // Set previous symbol as immovable
1879     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1880     if (Prev && Prev->getEndAddress() == BD.getAddress())
1881       setImmovable(*Prev);
1882   }
1883 
1884   if (Address == BD.getEndAddress()) {
1885     setImmovable(BD);
1886 
1887     // Set next symbol as immovable
1888     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1889     if (Next && Next->getAddress() == BD.getEndAddress())
1890       setImmovable(*Next);
1891   }
1892 }
1893 
1894 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1895                                                     uint64_t *EntryDesc) {
1896   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1897   auto BFI = SymbolToFunctionMap.find(Symbol);
1898   if (BFI == SymbolToFunctionMap.end())
1899     return nullptr;
1900 
1901   BinaryFunction *BF = BFI->second;
1902   if (EntryDesc)
1903     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1904 
1905   return BF;
1906 }
1907 
1908 void BinaryContext::exitWithBugReport(StringRef Message,
1909                                       const BinaryFunction &Function) const {
1910   errs() << "=======================================\n";
1911   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1912             "this function.\n";
1913   errs() << "If you are running the most recent version of BOLT, you may "
1914             "want to "
1915             "report this and paste this dump.\nPlease check that there is no "
1916             "sensitive contents being shared in this dump.\n";
1917   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1918   ScopedPrinter SP(errs());
1919   SP.printBinaryBlock("Function contents", *Function.getData());
1920   errs() << "\n";
1921   Function.dump();
1922   errs() << "ERROR: " << Message;
1923   errs() << "\n=======================================\n";
1924   exit(1);
1925 }
1926 
1927 BinaryFunction *
1928 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1929                                             bool IsSimple) {
1930   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1931   BinaryFunction *BF = InjectedBinaryFunctions.back();
1932   setSymbolToFunctionMap(BF->getSymbol(), BF);
1933   BF->CurrentState = BinaryFunction::State::CFG;
1934   return BF;
1935 }
1936 
1937 std::pair<size_t, size_t>
1938 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
1939   // Adjust branch instruction to match the current layout.
1940   if (FixBranches)
1941     BF.fixBranches();
1942 
1943   // Create local MC context to isolate the effect of ephemeral code emission.
1944   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
1945   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
1946   MCAsmBackend *MAB =
1947       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1948 
1949   SmallString<256> Code;
1950   raw_svector_ostream VecOS(Code);
1951 
1952   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
1953   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1954       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1955       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
1956       /*RelaxAll=*/false,
1957       /*IncrementalLinkerCompatible=*/false,
1958       /*DWARFMustBeAtTheEnd=*/false));
1959 
1960   Streamer->initSections(false, *STI);
1961 
1962   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
1963   Section->setHasInstructions(true);
1964 
1965   // Create symbols in the LocalCtx so that they get destroyed with it.
1966   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
1967   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
1968   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
1969   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
1970 
1971   Streamer->SwitchSection(Section);
1972   Streamer->emitLabel(StartLabel);
1973   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
1974                    /*EmitCodeOnly=*/true);
1975   Streamer->emitLabel(EndLabel);
1976 
1977   if (BF.isSplit()) {
1978     MCSectionELF *ColdSection =
1979         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
1980                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1981     ColdSection->setHasInstructions(true);
1982 
1983     Streamer->SwitchSection(ColdSection);
1984     Streamer->emitLabel(ColdStartLabel);
1985     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
1986                      /*EmitCodeOnly=*/true);
1987     Streamer->emitLabel(ColdEndLabel);
1988     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
1989     Streamer->emitBytes(StringRef(""));
1990     Streamer->SwitchSection(Section);
1991   }
1992 
1993   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
1994   // MCStreamer::Finish(), which does more than we want
1995   Streamer->emitBytes(StringRef(""));
1996 
1997   MCAssembler &Assembler =
1998       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
1999   MCAsmLayout Layout(Assembler);
2000   Assembler.layout(Layout);
2001 
2002   const uint64_t HotSize =
2003       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2004   const uint64_t ColdSize = BF.isSplit()
2005                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2006                                       Layout.getSymbolOffset(*ColdStartLabel)
2007                                 : 0ULL;
2008 
2009   // Clean-up the effect of the code emission.
2010   for (const MCSymbol &Symbol : Assembler.symbols()) {
2011     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2012     MutableSymbol->setUndefined();
2013     MutableSymbol->setIsRegistered(false);
2014   }
2015 
2016   return std::make_pair(HotSize, ColdSize);
2017 }
2018 
2019 bool BinaryContext::validateEncoding(const MCInst &Inst,
2020                                      ArrayRef<uint8_t> InputEncoding) const {
2021   SmallString<256> Code;
2022   SmallVector<MCFixup, 4> Fixups;
2023   raw_svector_ostream VecOS(Code);
2024 
2025   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2026   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2027   if (InputEncoding != EncodedData) {
2028     if (opts::Verbosity > 1) {
2029       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2030              << "      input: " << InputEncoding << '\n'
2031              << "     output: " << EncodedData << '\n';
2032     }
2033     return false;
2034   }
2035 
2036   return true;
2037 }
2038 
2039 uint64_t BinaryContext::getHotThreshold() const {
2040   static uint64_t Threshold = 0;
2041   if (Threshold == 0) {
2042     Threshold = std::max(
2043         (uint64_t)opts::ExecutionCountThreshold,
2044         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2045   }
2046   return Threshold;
2047 }
2048 
2049 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2050     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2051   auto FI = BinaryFunctions.upper_bound(Address);
2052   if (FI == BinaryFunctions.begin())
2053     return nullptr;
2054   --FI;
2055 
2056   const uint64_t UsedSize =
2057       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2058 
2059   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2060     return nullptr;
2061 
2062   return &FI->second;
2063 }
2064 
2065 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2066   // First, try to find a function starting at the given address. If the
2067   // function was folded, this will get us the original folded function if it
2068   // wasn't removed from the list, e.g. in non-relocation mode.
2069   auto BFI = BinaryFunctions.find(Address);
2070   if (BFI != BinaryFunctions.end())
2071     return &BFI->second;
2072 
2073   // We might have folded the function matching the object at the given
2074   // address. In such case, we look for a function matching the symbol
2075   // registered at the original address. The new function (the one that the
2076   // original was folded into) will hold the symbol.
2077   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2078     uint64_t EntryID = 0;
2079     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2080     if (BF && EntryID == 0)
2081       return BF;
2082   }
2083   return nullptr;
2084 }
2085 
2086 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2087     const DWARFAddressRangesVector &InputRanges) const {
2088   DebugAddressRangesVector OutputRanges;
2089 
2090   for (const DWARFAddressRange Range : InputRanges) {
2091     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2092     while (BFI != BinaryFunctions.end()) {
2093       const BinaryFunction &Function = BFI->second;
2094       if (Function.getAddress() >= Range.HighPC)
2095         break;
2096       const DebugAddressRangesVector FunctionRanges =
2097           Function.getOutputAddressRanges();
2098       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2099                 std::back_inserter(OutputRanges));
2100       std::advance(BFI, 1);
2101     }
2102   }
2103 
2104   return OutputRanges;
2105 }
2106 
2107 } // namespace bolt
2108 } // namespace llvm
2109