xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision e290133c76edb35483cf3280ef59c207af4e29f4)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool>
51 NoHugePages("no-huge-pages",
52   cl::desc("use regular size pages for code alignment"),
53   cl::ZeroOrMore,
54   cl::Hidden,
55   cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58 PrintDebugInfo("print-debug-info",
59   cl::desc("print debug info when printing functions"),
60   cl::Hidden,
61   cl::ZeroOrMore,
62   cl::cat(BoltCategory));
63 
64 cl::opt<bool>
65 PrintRelocations("print-relocations",
66   cl::desc("print relocations when printing functions/objects"),
67   cl::Hidden,
68   cl::ZeroOrMore,
69   cl::cat(BoltCategory));
70 
71 static cl::opt<bool>
72 PrintMemData("print-mem-data",
73   cl::desc("print memory data annotations when printing functions"),
74   cl::Hidden,
75   cl::ZeroOrMore,
76   cl::cat(BoltCategory));
77 
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
84                              std::unique_ptr<DWARFContext> DwCtx,
85                              std::unique_ptr<Triple> TheTriple,
86                              const Target *TheTarget, std::string TripleName,
87                              std::unique_ptr<MCCodeEmitter> MCE,
88                              std::unique_ptr<MCObjectFileInfo> MOFI,
89                              std::unique_ptr<const MCAsmInfo> AsmInfo,
90                              std::unique_ptr<const MCInstrInfo> MII,
91                              std::unique_ptr<const MCSubtargetInfo> STI,
92                              std::unique_ptr<MCInstPrinter> InstPrinter,
93                              std::unique_ptr<const MCInstrAnalysis> MIA,
94                              std::unique_ptr<MCPlusBuilder> MIB,
95                              std::unique_ptr<const MCRegisterInfo> MRI,
96                              std::unique_ptr<MCDisassembler> DisAsm)
97     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
98       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
99       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
100       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
101       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
102       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
103   Relocation::Arch = this->TheTriple->getArch();
104   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
105   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
106 }
107 
108 BinaryContext::~BinaryContext() {
109   for (BinarySection *Section : Sections)
110     delete Section;
111   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
112     delete InjectedFunction;
113   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
114     delete JTI.second;
115   clearBinaryData();
116 }
117 
118 /// Create BinaryContext for a given architecture \p ArchName and
119 /// triple \p TripleName.
120 Expected<std::unique_ptr<BinaryContext>>
121 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
122                                    std::unique_ptr<DWARFContext> DwCtx) {
123   StringRef ArchName = "";
124   StringRef FeaturesStr = "";
125   switch (File->getArch()) {
126   case llvm::Triple::x86_64:
127     ArchName = "x86-64";
128     FeaturesStr = "+nopl";
129     break;
130   case llvm::Triple::aarch64:
131     ArchName = "aarch64";
132     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
133                   "+fullfp16,+spe,+fuse-aes,+rcpc";
134     break;
135   default:
136     return createStringError(std::errc::not_supported,
137                              "BOLT-ERROR: Unrecognized machine in ELF file");
138   }
139 
140   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
141   const std::string TripleName = TheTriple->str();
142 
143   std::string Error;
144   const Target *TheTarget =
145       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
146   if (!TheTarget)
147     return createStringError(make_error_code(std::errc::not_supported),
148                              Twine("BOLT-ERROR: ", Error));
149 
150   std::unique_ptr<const MCRegisterInfo> MRI(
151       TheTarget->createMCRegInfo(TripleName));
152   if (!MRI)
153     return createStringError(
154         make_error_code(std::errc::not_supported),
155         Twine("BOLT-ERROR: no register info for target ", TripleName));
156 
157   // Set up disassembler.
158   std::unique_ptr<MCAsmInfo> AsmInfo(
159       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
160   if (!AsmInfo)
161     return createStringError(
162         make_error_code(std::errc::not_supported),
163         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
164   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
165   // we want to emit such names as using @PLT without double quotes to convey
166   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
167   // override the default AsmInfo behavior to emit names the way we want.
168   AsmInfo->setAllowAtInName(true);
169 
170   std::unique_ptr<const MCSubtargetInfo> STI(
171       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
172   if (!STI)
173     return createStringError(
174         make_error_code(std::errc::not_supported),
175         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
176 
177   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
178   if (!MII)
179     return createStringError(
180         make_error_code(std::errc::not_supported),
181         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
182 
183   std::unique_ptr<MCContext> Ctx(
184       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
185   std::unique_ptr<MCObjectFileInfo> MOFI(
186       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
187   Ctx->setObjectFileInfo(MOFI.get());
188   // We do not support X86 Large code model. Change this in the future.
189   bool Large = false;
190   if (TheTriple->getArch() == llvm::Triple::aarch64)
191     Large = true;
192   unsigned LSDAEncoding =
193       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
194   unsigned TTypeEncoding =
195       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
196   if (IsPIC) {
197     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
198                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
199     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
200                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
201   }
202 
203   std::unique_ptr<MCDisassembler> DisAsm(
204       TheTarget->createMCDisassembler(*STI, *Ctx));
205 
206   if (!DisAsm)
207     return createStringError(
208         make_error_code(std::errc::not_supported),
209         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
210 
211   std::unique_ptr<const MCInstrAnalysis> MIA(
212       TheTarget->createMCInstrAnalysis(MII.get()));
213   if (!MIA)
214     return createStringError(
215         make_error_code(std::errc::not_supported),
216         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
217               TripleName));
218 
219   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
220   std::unique_ptr<MCInstPrinter> InstructionPrinter(
221       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
222                                      *MII, *MRI));
223   if (!InstructionPrinter)
224     return createStringError(
225         make_error_code(std::errc::not_supported),
226         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
227   InstructionPrinter->setPrintImmHex(true);
228 
229   std::unique_ptr<MCCodeEmitter> MCE(
230       TheTarget->createMCCodeEmitter(*MII, *Ctx));
231 
232   // Make sure we don't miss any output on core dumps.
233   outs().SetUnbuffered();
234   errs().SetUnbuffered();
235   dbgs().SetUnbuffered();
236 
237   auto BC = std::make_unique<BinaryContext>(
238       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
239       std::string(TripleName), std::move(MCE), std::move(MOFI),
240       std::move(AsmInfo), std::move(MII), std::move(STI),
241       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
242       std::move(DisAsm));
243 
244   BC->TTypeEncoding = TTypeEncoding;
245   BC->LSDAEncoding = LSDAEncoding;
246 
247   BC->MAB = std::unique_ptr<MCAsmBackend>(
248       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
249 
250   BC->setFilename(File->getFileName());
251 
252   BC->HasFixedLoadAddress = !IsPIC;
253 
254   BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
255       BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx));
256 
257   if (!BC->SymbolicDisAsm)
258     return createStringError(
259         make_error_code(std::errc::not_supported),
260         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
261 
262   return std::move(BC);
263 }
264 
265 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
266   if (opts::HotText &&
267       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
268     return true;
269 
270   if (opts::HotData &&
271       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
272     return true;
273 
274   if (SymbolName == "_end")
275     return true;
276 
277   return false;
278 }
279 
280 std::unique_ptr<MCObjectWriter>
281 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
282   return MAB->createObjectWriter(OS);
283 }
284 
285 bool BinaryContext::validateObjectNesting() const {
286   auto Itr = BinaryDataMap.begin();
287   auto End = BinaryDataMap.end();
288   bool Valid = true;
289   while (Itr != End) {
290     auto Next = std::next(Itr);
291     while (Next != End &&
292            Itr->second->getSection() == Next->second->getSection() &&
293            Itr->second->containsRange(Next->second->getAddress(),
294                                       Next->second->getSize())) {
295       if (Next->second->Parent != Itr->second) {
296         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
297                << "BOLT-WARNING:  " << *Itr->second << "\n"
298                << "BOLT-WARNING:  " << *Next->second << "\n";
299         Valid = false;
300       }
301       ++Next;
302     }
303     Itr = Next;
304   }
305   return Valid;
306 }
307 
308 bool BinaryContext::validateHoles() const {
309   bool Valid = true;
310   for (BinarySection &Section : sections()) {
311     for (const Relocation &Rel : Section.relocations()) {
312       uint64_t RelAddr = Rel.Offset + Section.getAddress();
313       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
314       if (!BD) {
315         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
316                << " 0x" << Twine::utohexstr(RelAddr) << " in "
317                << Section.getName() << "\n";
318         Valid = false;
319       } else if (!BD->getAtomicRoot()) {
320         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
321                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
322                << Section.getName() << "\n";
323         Valid = false;
324       }
325     }
326   }
327   return Valid;
328 }
329 
330 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
331   const uint64_t Address = GAI->second->getAddress();
332   const uint64_t Size = GAI->second->getSize();
333 
334   auto fixParents = [&](BinaryDataMapType::iterator Itr,
335                         BinaryData *NewParent) {
336     BinaryData *OldParent = Itr->second->Parent;
337     Itr->second->Parent = NewParent;
338     ++Itr;
339     while (Itr != BinaryDataMap.end() && OldParent &&
340            Itr->second->Parent == OldParent) {
341       Itr->second->Parent = NewParent;
342       ++Itr;
343     }
344   };
345 
346   // Check if the previous symbol contains the newly added symbol.
347   if (GAI != BinaryDataMap.begin()) {
348     BinaryData *Prev = std::prev(GAI)->second;
349     while (Prev) {
350       if (Prev->getSection() == GAI->second->getSection() &&
351           Prev->containsRange(Address, Size)) {
352         fixParents(GAI, Prev);
353       } else {
354         fixParents(GAI, nullptr);
355       }
356       Prev = Prev->Parent;
357     }
358   }
359 
360   // Check if the newly added symbol contains any subsequent symbols.
361   if (Size != 0) {
362     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
363     auto Itr = std::next(GAI);
364     while (
365         Itr != BinaryDataMap.end() &&
366         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
367       Itr->second->Parent = BD;
368       ++Itr;
369     }
370   }
371 }
372 
373 iterator_range<BinaryContext::binary_data_iterator>
374 BinaryContext::getSubBinaryData(BinaryData *BD) {
375   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
376   auto End = Start;
377   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
378     ++End;
379   return make_range(Start, End);
380 }
381 
382 std::pair<const MCSymbol *, uint64_t>
383 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
384                                 bool IsPCRel) {
385   uint64_t Addend = 0;
386 
387   if (isAArch64()) {
388     // Check if this is an access to a constant island and create bookkeeping
389     // to keep track of it and emit it later as part of this function.
390     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
391       return std::make_pair(IslandSym, Addend);
392 
393     // Detect custom code written in assembly that refers to arbitrary
394     // constant islands from other functions. Write this reference so we
395     // can pull this constant island and emit it as part of this function
396     // too.
397     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
398     if (IslandIter != AddressToConstantIslandMap.end()) {
399       if (MCSymbol *IslandSym =
400               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
401         BF.createIslandDependency(IslandSym, IslandIter->second);
402         return std::make_pair(IslandSym, Addend);
403       }
404     }
405   }
406 
407   // Note that the address does not necessarily have to reside inside
408   // a section, it could be an absolute address too.
409   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
410   if (Section && Section->isText()) {
411     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
412       if (Address != BF.getAddress()) {
413         // The address could potentially escape. Mark it as another entry
414         // point into the function.
415         if (opts::Verbosity >= 1) {
416           outs() << "BOLT-INFO: potentially escaped address 0x"
417                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
418         }
419         BF.HasInternalLabelReference = true;
420         return std::make_pair(
421             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
422       }
423     } else {
424       BF.InterproceduralReferences.insert(Address);
425     }
426   }
427 
428   // With relocations, catch jump table references outside of the basic block
429   // containing the indirect jump.
430   if (HasRelocations) {
431     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
432     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
433       const MCSymbol *Symbol =
434           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
435 
436       return std::make_pair(Symbol, Addend);
437     }
438   }
439 
440   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
441     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
442 
443   // TODO: use DWARF info to get size/alignment here?
444   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
445   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
446   return std::make_pair(TargetSymbol, Addend);
447 }
448 
449 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
450                                                   BinaryFunction &BF) {
451   if (!isX86())
452     return MemoryContentsType::UNKNOWN;
453 
454   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
455   if (!Section) {
456     // No section - possibly an absolute address. Since we don't allow
457     // internal function addresses to escape the function scope - we
458     // consider it a tail call.
459     if (opts::Verbosity > 1) {
460       errs() << "BOLT-WARNING: no section for address 0x"
461              << Twine::utohexstr(Address) << " referenced from function " << BF
462              << '\n';
463     }
464     return MemoryContentsType::UNKNOWN;
465   }
466 
467   if (Section->isVirtual()) {
468     // The contents are filled at runtime.
469     return MemoryContentsType::UNKNOWN;
470   }
471 
472   // No support for jump tables in code yet.
473   if (Section->isText())
474     return MemoryContentsType::UNKNOWN;
475 
476   // Start with checking for PIC jump table. We expect non-PIC jump tables
477   // to have high 32 bits set to 0.
478   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
479     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
480 
481   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
482     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
483 
484   return MemoryContentsType::UNKNOWN;
485 }
486 
487 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
488 bool isPotentialFragmentByName(BinaryFunction &Fragment,
489                                BinaryFunction &Parent) {
490   for (StringRef Name : Parent.getNames()) {
491     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
492     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
493     if (Fragment.hasRestoredNameRegex(NameRegex))
494       return true;
495   }
496   return false;
497 }
498 
499 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
500                                      const JumpTable::JumpTableType Type,
501                                      BinaryFunction &BF,
502                                      const uint64_t NextJTAddress,
503                                      JumpTable::OffsetsType *Offsets) {
504   // Is one of the targets __builtin_unreachable?
505   bool HasUnreachable = false;
506 
507   // Number of targets other than __builtin_unreachable.
508   uint64_t NumRealEntries = 0;
509 
510   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
511   auto addOffset = [&](uint64_t Offset) {
512     if (Offsets)
513       Offsets->emplace_back(Offset);
514   };
515 
516   auto doesBelongToFunction = [&](const uint64_t Addr,
517                                   BinaryFunction *TargetBF) -> bool {
518     if (BF.containsAddress(Addr))
519       return true;
520     // Nothing to do if we failed to identify the containing function.
521     if (!TargetBF)
522       return false;
523     // Case 1: check if BF is a fragment and TargetBF is its parent.
524     if (BF.isFragment()) {
525       // Parent function may or may not be already registered.
526       // Set parent link based on function name matching heuristic.
527       return registerFragment(BF, *TargetBF);
528     }
529     // Case 2: check if TargetBF is a fragment and BF is its parent.
530     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
531   };
532 
533   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
534   if (!Section)
535     return false;
536 
537   // The upper bound is defined by containing object, section limits, and
538   // the next jump table in memory.
539   uint64_t UpperBound = Section->getEndAddress();
540   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
541   if (JumpTableBD && JumpTableBD->getSize()) {
542     assert(JumpTableBD->getEndAddress() <= UpperBound &&
543            "data object cannot cross a section boundary");
544     UpperBound = JumpTableBD->getEndAddress();
545   }
546   if (NextJTAddress)
547     UpperBound = std::min(NextJTAddress, UpperBound);
548 
549   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
550                     << '\n');
551   const uint64_t EntrySize = getJumpTableEntrySize(Type);
552   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
553        EntryAddress += EntrySize) {
554     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
555                       << " -> ");
556     // Check if there's a proper relocation against the jump table entry.
557     if (HasRelocations) {
558       if (Type == JumpTable::JTT_PIC &&
559           !DataPCRelocations.count(EntryAddress)) {
560         LLVM_DEBUG(
561             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
562         break;
563       }
564       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
565         LLVM_DEBUG(
566             dbgs()
567             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
568         break;
569       }
570     }
571 
572     const uint64_t Value =
573         (Type == JumpTable::JTT_PIC)
574             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
575             : *getPointerAtAddress(EntryAddress);
576 
577     // __builtin_unreachable() case.
578     if (Value == BF.getAddress() + BF.getSize()) {
579       addOffset(Value - BF.getAddress());
580       HasUnreachable = true;
581       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
582       continue;
583     }
584 
585     // Function or one of its fragments.
586     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
587 
588     // We assume that a jump table cannot have function start as an entry.
589     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
590       LLVM_DEBUG({
591         if (!BF.containsAddress(Value)) {
592           dbgs() << "FAIL: function doesn't contain this address\n";
593           if (TargetBF) {
594             dbgs() << "  ! function containing this address: "
595                    << TargetBF->getPrintName() << '\n';
596             if (TargetBF->isFragment())
597               dbgs() << "  ! is a fragment\n";
598             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
599               dbgs() << "  ! its parent is "
600                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
601                      << '\n';
602           }
603         }
604         if (Value == BF.getAddress())
605           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
606       });
607       break;
608     }
609 
610     // Check there's an instruction at this offset.
611     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
612         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
613       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
614       break;
615     }
616 
617     ++NumRealEntries;
618 
619     if (TargetBF == &BF) {
620       // Address inside the function.
621       addOffset(Value - TargetBF->getAddress());
622       LLVM_DEBUG(dbgs() << "OK: real entry\n");
623     } else {
624       // Address in split fragment.
625       BF.setHasSplitJumpTable(true);
626       // Add invalid offset for proper identification of jump table size.
627       addOffset(INVALID_OFFSET);
628       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
629                         << TargetBF->getPrintName() << '\n');
630     }
631   }
632 
633   // It's a jump table if the number of real entries is more than 1, or there's
634   // one real entry and "unreachable" targets. If there are only multiple
635   // "unreachable" targets, then it's not a jump table.
636   return NumRealEntries + HasUnreachable >= 2;
637 }
638 
639 void BinaryContext::populateJumpTables() {
640   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
641                     << '\n');
642   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
643        ++JTI) {
644     JumpTable *JT = JTI->second;
645     BinaryFunction &BF = *JT->Parent;
646 
647     if (!BF.isSimple())
648       continue;
649 
650     uint64_t NextJTAddress = 0;
651     auto NextJTI = std::next(JTI);
652     if (NextJTI != JTE)
653       NextJTAddress = NextJTI->second->getAddress();
654 
655     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
656                                           NextJTAddress, &JT->OffsetEntries);
657     if (!Success) {
658       dbgs() << "failed to analyze jump table in function " << BF << '\n';
659       JT->print(dbgs());
660       if (NextJTI != JTE) {
661         dbgs() << "next jump table at 0x"
662                << Twine::utohexstr(NextJTI->second->getAddress())
663                << " belongs to function " << *NextJTI->second->Parent << '\n';
664         NextJTI->second->print(dbgs());
665       }
666       llvm_unreachable("jump table heuristic failure");
667     }
668 
669     for (uint64_t EntryOffset : JT->OffsetEntries) {
670       if (EntryOffset == BF.getSize())
671         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
672       else
673         BF.registerReferencedOffset(EntryOffset);
674     }
675 
676     // In strict mode, erase PC-relative relocation record. Later we check that
677     // all such records are erased and thus have been accounted for.
678     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
679       for (uint64_t Address = JT->getAddress();
680            Address < JT->getAddress() + JT->getSize();
681            Address += JT->EntrySize) {
682         DataPCRelocations.erase(DataPCRelocations.find(Address));
683       }
684     }
685 
686     // Mark to skip the function and all its fragments.
687     if (BF.hasSplitJumpTable())
688       FragmentsToSkip.push_back(&BF);
689   }
690 
691   if (opts::StrictMode && DataPCRelocations.size()) {
692     LLVM_DEBUG({
693       dbgs() << DataPCRelocations.size()
694              << " unclaimed PC-relative relocations left in data:\n";
695       for (uint64_t Reloc : DataPCRelocations)
696         dbgs() << Twine::utohexstr(Reloc) << '\n';
697     });
698     assert(0 && "unclaimed PC-relative relocations left in data\n");
699   }
700   clearList(DataPCRelocations);
701 }
702 
703 void BinaryContext::skipMarkedFragments() {
704   // Unique functions in the vector.
705   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
706                                                        FragmentsToSkip.end());
707   // Copy the functions back to FragmentsToSkip.
708   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
709   auto addToWorklist = [&](BinaryFunction *Function) -> void {
710     if (UniqueFunctions.count(Function))
711       return;
712     FragmentsToSkip.push_back(Function);
713     UniqueFunctions.insert(Function);
714   };
715   // Functions containing split jump tables need to be skipped with all
716   // fragments (transitively).
717   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
718     BinaryFunction *BF = FragmentsToSkip[I];
719     assert(UniqueFunctions.count(BF) &&
720            "internal error in traversing function fragments");
721     if (opts::Verbosity >= 1)
722       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
723     BF->setIgnored();
724     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
725     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
726                   addToWorklist);
727   }
728   if (!FragmentsToSkip.empty())
729     errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function"
730            << (FragmentsToSkip.size() == 1 ? "" : "s")
731            << " due to cold fragments\n";
732   FragmentsToSkip.clear();
733 }
734 
735 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
736                                                  uint64_t Size,
737                                                  uint16_t Alignment,
738                                                  unsigned Flags) {
739   auto Itr = BinaryDataMap.find(Address);
740   if (Itr != BinaryDataMap.end()) {
741     assert(Itr->second->getSize() == Size || !Size);
742     return Itr->second->getSymbol();
743   }
744 
745   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
746   assert(!GlobalSymbols.count(Name) && "created name is not unique");
747   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
748 }
749 
750 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
751   return Ctx->getOrCreateSymbol(Name);
752 }
753 
754 BinaryFunction *BinaryContext::createBinaryFunction(
755     const std::string &Name, BinarySection &Section, uint64_t Address,
756     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
757   auto Result = BinaryFunctions.emplace(
758       Address, BinaryFunction(Name, Section, Address, Size, *this));
759   assert(Result.second == true && "unexpected duplicate function");
760   BinaryFunction *BF = &Result.first->second;
761   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
762                         Alignment);
763   setSymbolToFunctionMap(BF->getSymbol(), BF);
764   return BF;
765 }
766 
767 const MCSymbol *
768 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
769                                     JumpTable::JumpTableType Type) {
770   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
771     assert(JT->Type == Type && "jump table types have to match");
772     assert(JT->Parent == &Function &&
773            "cannot re-use jump table of a different function");
774     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
775 
776     return JT->getFirstLabel();
777   }
778 
779   // Re-use the existing symbol if possible.
780   MCSymbol *JTLabel = nullptr;
781   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
782     if (!isInternalSymbolName(Object->getSymbol()->getName()))
783       JTLabel = Object->getSymbol();
784   }
785 
786   const uint64_t EntrySize = getJumpTableEntrySize(Type);
787   if (!JTLabel) {
788     const std::string JumpTableName = generateJumpTableName(Function, Address);
789     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
790   }
791 
792   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
793                     << " in function " << Function << '\n');
794 
795   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
796                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
797                                 *getSectionForAddress(Address));
798   JumpTables.emplace(Address, JT);
799 
800   // Duplicate the entry for the parent function for easy access.
801   Function.JumpTables.emplace(Address, JT);
802 
803   return JTLabel;
804 }
805 
806 std::pair<uint64_t, const MCSymbol *>
807 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
808                                   const MCSymbol *OldLabel) {
809   auto L = scopeLock();
810   unsigned Offset = 0;
811   bool Found = false;
812   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
813     if (Elmt.second != OldLabel)
814       continue;
815     Offset = Elmt.first;
816     Found = true;
817     break;
818   }
819   assert(Found && "Label not found");
820   (void)Found;
821   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
822   JumpTable *NewJT =
823       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
824                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
825                     *getSectionForAddress(JT->getAddress()));
826   NewJT->Entries = JT->Entries;
827   NewJT->Counts = JT->Counts;
828   uint64_t JumpTableID = ++DuplicatedJumpTables;
829   // Invert it to differentiate from regular jump tables whose IDs are their
830   // addresses in the input binary memory space
831   JumpTableID = ~JumpTableID;
832   JumpTables.emplace(JumpTableID, NewJT);
833   Function.JumpTables.emplace(JumpTableID, NewJT);
834   return std::make_pair(JumpTableID, NewLabel);
835 }
836 
837 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
838                                                  uint64_t Address) {
839   size_t Id;
840   uint64_t Offset = 0;
841   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
842     Offset = Address - JT->getAddress();
843     auto Itr = JT->Labels.find(Offset);
844     if (Itr != JT->Labels.end())
845       return std::string(Itr->second->getName());
846     Id = JumpTableIds.at(JT->getAddress());
847   } else {
848     Id = JumpTableIds[Address] = BF.JumpTables.size();
849   }
850   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
851           (Offset ? ("." + std::to_string(Offset)) : ""));
852 }
853 
854 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
855   // FIXME: aarch64 support is missing.
856   if (!isX86())
857     return true;
858 
859   if (BF.getSize() == BF.getMaxSize())
860     return true;
861 
862   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
863   assert(FunctionData && "cannot get function as data");
864 
865   uint64_t Offset = BF.getSize();
866   MCInst Instr;
867   uint64_t InstrSize = 0;
868   uint64_t InstrAddress = BF.getAddress() + Offset;
869   using std::placeholders::_1;
870 
871   // Skip instructions that satisfy the predicate condition.
872   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
873     const uint64_t StartOffset = Offset;
874     for (; Offset < BF.getMaxSize();
875          Offset += InstrSize, InstrAddress += InstrSize) {
876       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
877                                   InstrAddress, nulls()))
878         break;
879       if (!Predicate(Instr))
880         break;
881     }
882 
883     return Offset - StartOffset;
884   };
885 
886   // Skip a sequence of zero bytes.
887   auto skipZeros = [&]() {
888     const uint64_t StartOffset = Offset;
889     for (; Offset < BF.getMaxSize(); ++Offset)
890       if ((*FunctionData)[Offset] != 0)
891         break;
892 
893     return Offset - StartOffset;
894   };
895 
896   // Accept the whole padding area filled with breakpoints.
897   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
898   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
899     return true;
900 
901   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
902 
903   // Some functions have a jump to the next function or to the padding area
904   // inserted after the body.
905   auto isSkipJump = [&](const MCInst &Instr) {
906     uint64_t TargetAddress = 0;
907     if (MIB->isUnconditionalBranch(Instr) &&
908         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
909       if (TargetAddress >= InstrAddress + InstrSize &&
910           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
911         return true;
912       }
913     }
914     return false;
915   };
916 
917   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
918   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
919          skipZeros())
920     ;
921 
922   if (Offset == BF.getMaxSize())
923     return true;
924 
925   if (opts::Verbosity >= 1) {
926     errs() << "BOLT-WARNING: bad padding at address 0x"
927            << Twine::utohexstr(BF.getAddress() + BF.getSize())
928            << " starting at offset " << (Offset - BF.getSize())
929            << " in function " << BF << '\n'
930            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
931            << '\n';
932   }
933 
934   return false;
935 }
936 
937 void BinaryContext::adjustCodePadding() {
938   for (auto &BFI : BinaryFunctions) {
939     BinaryFunction &BF = BFI.second;
940     if (!shouldEmit(BF))
941       continue;
942 
943     if (!hasValidCodePadding(BF)) {
944       if (HasRelocations) {
945         if (opts::Verbosity >= 1) {
946           outs() << "BOLT-INFO: function " << BF
947                  << " has invalid padding. Ignoring the function.\n";
948         }
949         BF.setIgnored();
950       } else {
951         BF.setMaxSize(BF.getSize());
952       }
953     }
954   }
955 }
956 
957 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
958                                                uint64_t Size,
959                                                uint16_t Alignment,
960                                                unsigned Flags) {
961   // Register the name with MCContext.
962   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
963 
964   auto GAI = BinaryDataMap.find(Address);
965   BinaryData *BD;
966   if (GAI == BinaryDataMap.end()) {
967     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
968     BinarySection &Section =
969         SectionOrErr ? SectionOrErr.get() : absoluteSection();
970     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
971                         Section, Flags);
972     GAI = BinaryDataMap.emplace(Address, BD).first;
973     GlobalSymbols[Name] = BD;
974     updateObjectNesting(GAI);
975   } else {
976     BD = GAI->second;
977     if (!BD->hasName(Name)) {
978       GlobalSymbols[Name] = BD;
979       BD->Symbols.push_back(Symbol);
980     }
981   }
982 
983   return Symbol;
984 }
985 
986 const BinaryData *
987 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
988   auto NI = BinaryDataMap.lower_bound(Address);
989   auto End = BinaryDataMap.end();
990   if ((NI != End && Address == NI->first) ||
991       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
992     if (NI->second->containsAddress(Address))
993       return NI->second;
994 
995     // If this is a sub-symbol, see if a parent data contains the address.
996     const BinaryData *BD = NI->second->getParent();
997     while (BD) {
998       if (BD->containsAddress(Address))
999         return BD;
1000       BD = BD->getParent();
1001     }
1002   }
1003   return nullptr;
1004 }
1005 
1006 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1007   auto NI = BinaryDataMap.find(Address);
1008   assert(NI != BinaryDataMap.end());
1009   if (NI == BinaryDataMap.end())
1010     return false;
1011   // TODO: it's possible that a jump table starts at the same address
1012   // as a larger blob of private data.  When we set the size of the
1013   // jump table, it might be smaller than the total blob size.  In this
1014   // case we just leave the original size since (currently) it won't really
1015   // affect anything.
1016   assert((!NI->second->Size || NI->second->Size == Size ||
1017           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1018          "can't change the size of a symbol that has already had its "
1019          "size set");
1020   if (!NI->second->Size) {
1021     NI->second->Size = Size;
1022     updateObjectNesting(NI);
1023     return true;
1024   }
1025   return false;
1026 }
1027 
1028 void BinaryContext::generateSymbolHashes() {
1029   auto isPadding = [](const BinaryData &BD) {
1030     StringRef Contents = BD.getSection().getContents();
1031     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1032     return (BD.getName().startswith("HOLEat") ||
1033             SymData.find_first_not_of(0) == StringRef::npos);
1034   };
1035 
1036   uint64_t NumCollisions = 0;
1037   for (auto &Entry : BinaryDataMap) {
1038     BinaryData &BD = *Entry.second;
1039     StringRef Name = BD.getName();
1040 
1041     if (!isInternalSymbolName(Name))
1042       continue;
1043 
1044     // First check if a non-anonymous alias exists and move it to the front.
1045     if (BD.getSymbols().size() > 1) {
1046       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1047                               [&](const MCSymbol *Symbol) {
1048                                 return !isInternalSymbolName(Symbol->getName());
1049                               });
1050       if (Itr != BD.getSymbols().end()) {
1051         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1052         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1053         continue;
1054       }
1055     }
1056 
1057     // We have to skip 0 size symbols since they will all collide.
1058     if (BD.getSize() == 0) {
1059       continue;
1060     }
1061 
1062     const uint64_t Hash = BD.getSection().hash(BD);
1063     const size_t Idx = Name.find("0x");
1064     std::string NewName =
1065         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1066     if (getBinaryDataByName(NewName)) {
1067       // Ignore collisions for symbols that appear to be padding
1068       // (i.e. all zeros or a "hole")
1069       if (!isPadding(BD)) {
1070         if (opts::Verbosity) {
1071           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1072                  << " with new name (" << NewName << "), skipping.\n";
1073         }
1074         ++NumCollisions;
1075       }
1076       continue;
1077     }
1078     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1079     GlobalSymbols[NewName] = &BD;
1080   }
1081   if (NumCollisions) {
1082     errs() << "BOLT-WARNING: " << NumCollisions
1083            << " collisions detected while hashing binary objects";
1084     if (!opts::Verbosity)
1085       errs() << ". Use -v=1 to see the list.";
1086     errs() << '\n';
1087   }
1088 }
1089 
1090 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1091                                      BinaryFunction &Function) const {
1092   if (!isPotentialFragmentByName(TargetFunction, Function))
1093     return false;
1094   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1095   if (TargetFunction.isParentFragment(&Function))
1096     return true;
1097   TargetFunction.addParentFragment(Function);
1098   Function.addFragment(TargetFunction);
1099   if (!HasRelocations) {
1100     TargetFunction.setSimple(false);
1101     Function.setSimple(false);
1102   }
1103   if (opts::Verbosity >= 1) {
1104     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1105            << Function << '\n';
1106   }
1107   return true;
1108 }
1109 
1110 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1111   for (uint64_t Address : Function.InterproceduralReferences) {
1112     if (!Address)
1113       continue;
1114 
1115     BinaryFunction *TargetFunction =
1116         getBinaryFunctionContainingAddress(Address);
1117     if (&Function == TargetFunction)
1118       continue;
1119 
1120     if (TargetFunction) {
1121       if (TargetFunction->IsFragment &&
1122           !registerFragment(*TargetFunction, Function)) {
1123         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1124                   "fragments: "
1125                << Function.getPrintName() << " and "
1126                << TargetFunction->getPrintName() << '\n';
1127       }
1128       if (uint64_t Offset = Address - TargetFunction->getAddress())
1129         TargetFunction->addEntryPointAtOffset(Offset);
1130 
1131       continue;
1132     }
1133 
1134     // Check if address falls in function padding space - this could be
1135     // unmarked data in code. In this case adjust the padding space size.
1136     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1137     assert(Section && "cannot get section for referenced address");
1138 
1139     if (!Section->isText())
1140       continue;
1141 
1142     // PLT requires special handling and could be ignored in this context.
1143     StringRef SectionName = Section->getName();
1144     if (SectionName == ".plt" || SectionName == ".plt.got")
1145       continue;
1146 
1147     if (opts::processAllFunctions()) {
1148       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1149              << "object in code at address 0x" << Twine::utohexstr(Address)
1150              << " belonging to section " << SectionName << " in current mode\n";
1151       exit(1);
1152     }
1153 
1154     TargetFunction = getBinaryFunctionContainingAddress(Address,
1155                                                         /*CheckPastEnd=*/false,
1156                                                         /*UseMaxSize=*/true);
1157     // We are not going to overwrite non-simple functions, but for simple
1158     // ones - adjust the padding size.
1159     if (TargetFunction && TargetFunction->isSimple()) {
1160       errs() << "BOLT-WARNING: function " << *TargetFunction
1161              << " has an object detected in a padding region at address 0x"
1162              << Twine::utohexstr(Address) << '\n';
1163       TargetFunction->setMaxSize(TargetFunction->getSize());
1164     }
1165   }
1166 
1167   clearList(Function.InterproceduralReferences);
1168 }
1169 
1170 void BinaryContext::postProcessSymbolTable() {
1171   fixBinaryDataHoles();
1172   bool Valid = true;
1173   for (auto &Entry : BinaryDataMap) {
1174     BinaryData *BD = Entry.second;
1175     if ((BD->getName().startswith("SYMBOLat") ||
1176          BD->getName().startswith("DATAat")) &&
1177         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1178         BD->getSection()) {
1179       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1180       Valid = false;
1181     }
1182   }
1183   assert(Valid);
1184   (void)Valid;
1185   generateSymbolHashes();
1186 }
1187 
1188 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1189                                  BinaryFunction &ParentBF) {
1190   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1191          "cannot merge functions with multiple entry points");
1192 
1193   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1194                                                          std::defer_lock);
1195   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1196       SymbolToFunctionMapMutex, std::defer_lock);
1197 
1198   const StringRef ChildName = ChildBF.getOneName();
1199 
1200   // Move symbols over and update bookkeeping info.
1201   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1202     ParentBF.getSymbols().push_back(Symbol);
1203     WriteSymbolMapLock.lock();
1204     SymbolToFunctionMap[Symbol] = &ParentBF;
1205     WriteSymbolMapLock.unlock();
1206     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1207   }
1208   ChildBF.getSymbols().clear();
1209 
1210   // Move other names the child function is known under.
1211   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1212             std::back_inserter(ParentBF.Aliases));
1213   ChildBF.Aliases.clear();
1214 
1215   if (HasRelocations) {
1216     // Merge execution counts of ChildBF into those of ParentBF.
1217     // Without relocations, we cannot reliably merge profiles as both functions
1218     // continue to exist and either one can be executed.
1219     ChildBF.mergeProfileDataInto(ParentBF);
1220 
1221     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1222                                                           std::defer_lock);
1223     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1224                                                            std::defer_lock);
1225     // Remove ChildBF from the global set of functions in relocs mode.
1226     ReadBfsLock.lock();
1227     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1228     ReadBfsLock.unlock();
1229 
1230     assert(FI != BinaryFunctions.end() && "function not found");
1231     assert(&ChildBF == &FI->second && "function mismatch");
1232 
1233     WriteBfsLock.lock();
1234     ChildBF.clearDisasmState();
1235     FI = BinaryFunctions.erase(FI);
1236     WriteBfsLock.unlock();
1237 
1238   } else {
1239     // In non-relocation mode we keep the function, but rename it.
1240     std::string NewName = "__ICF_" + ChildName.str();
1241 
1242     WriteCtxLock.lock();
1243     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1244     WriteCtxLock.unlock();
1245 
1246     ChildBF.setFolded(&ParentBF);
1247   }
1248 }
1249 
1250 void BinaryContext::fixBinaryDataHoles() {
1251   assert(validateObjectNesting() && "object nesting inconsitency detected");
1252 
1253   for (BinarySection &Section : allocatableSections()) {
1254     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1255 
1256     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1257       BinaryData *BD = Itr->second;
1258       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1259                      (BD->getName().startswith("SYMBOLat0x") ||
1260                       BD->getName().startswith("DATAat0x") ||
1261                       BD->getName().startswith("ANONYMOUS")));
1262       return !isHole && BD->getSection() == Section && !BD->getParent();
1263     };
1264 
1265     auto BDStart = BinaryDataMap.begin();
1266     auto BDEnd = BinaryDataMap.end();
1267     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1268     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1269 
1270     uint64_t EndAddress = Section.getAddress();
1271 
1272     while (Itr != End) {
1273       if (Itr->second->getAddress() > EndAddress) {
1274         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1275         Holes.emplace_back(EndAddress, Gap);
1276       }
1277       EndAddress = Itr->second->getEndAddress();
1278       ++Itr;
1279     }
1280 
1281     if (EndAddress < Section.getEndAddress())
1282       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1283 
1284     // If there is already a symbol at the start of the hole, grow that symbol
1285     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1286     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1287       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1288       if (BD) {
1289         // BD->getSection() can be != Section if there are sections that
1290         // overlap.  In this case it is probably safe to just skip the holes
1291         // since the overlapping section will not(?) have any symbols in it.
1292         if (BD->getSection() == Section)
1293           setBinaryDataSize(Hole.first, Hole.second);
1294       } else {
1295         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1296       }
1297     }
1298   }
1299 
1300   assert(validateObjectNesting() && "object nesting inconsitency detected");
1301   assert(validateHoles() && "top level hole detected in object map");
1302 }
1303 
1304 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1305   const BinarySection *CurrentSection = nullptr;
1306   bool FirstSection = true;
1307 
1308   for (auto &Entry : BinaryDataMap) {
1309     const BinaryData *BD = Entry.second;
1310     const BinarySection &Section = BD->getSection();
1311     if (FirstSection || Section != *CurrentSection) {
1312       uint64_t Address, Size;
1313       StringRef Name = Section.getName();
1314       if (Section) {
1315         Address = Section.getAddress();
1316         Size = Section.getSize();
1317       } else {
1318         Address = BD->getAddress();
1319         Size = BD->getSize();
1320       }
1321       OS << "BOLT-INFO: Section " << Name << ", "
1322          << "0x" + Twine::utohexstr(Address) << ":"
1323          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1324       CurrentSection = &Section;
1325       FirstSection = false;
1326     }
1327 
1328     OS << "BOLT-INFO: ";
1329     const BinaryData *P = BD->getParent();
1330     while (P) {
1331       OS << "  ";
1332       P = P->getParent();
1333     }
1334     OS << *BD << "\n";
1335   }
1336 }
1337 
1338 Expected<unsigned> BinaryContext::getDwarfFile(
1339     StringRef Directory, StringRef FileName, unsigned FileNumber,
1340     Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1341     unsigned CUID, unsigned DWARFVersion) {
1342   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1343   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1344                           FileNumber);
1345 }
1346 
1347 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1348                                                const uint32_t SrcCUID,
1349                                                unsigned FileIndex) {
1350   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1351   const DWARFDebugLine::LineTable *LineTable =
1352       DwCtx->getLineTableForUnit(SrcUnit);
1353   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1354       LineTable->Prologue.FileNames;
1355   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1356   // means empty dir.
1357   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1358          "FileIndex out of range for the compilation unit.");
1359   StringRef Dir = "";
1360   if (FileNames[FileIndex - 1].DirIdx != 0) {
1361     if (Optional<const char *> DirName = dwarf::toString(
1362             LineTable->Prologue
1363                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1364       Dir = *DirName;
1365     }
1366   }
1367   StringRef FileName = "";
1368   if (Optional<const char *> FName =
1369           dwarf::toString(FileNames[FileIndex - 1].Name))
1370     FileName = *FName;
1371   assert(FileName != "");
1372   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1373   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1374                                DstUnit->getVersion()));
1375 }
1376 
1377 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1378   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1379   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1380                  SortedFunctions.begin(),
1381                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1382                    return &BFI.second;
1383                  });
1384 
1385   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1386                    [](const BinaryFunction *A, const BinaryFunction *B) {
1387                      if (A->hasValidIndex() && B->hasValidIndex()) {
1388                        return A->getIndex() < B->getIndex();
1389                      }
1390                      return A->hasValidIndex();
1391                    });
1392   return SortedFunctions;
1393 }
1394 
1395 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1396   std::vector<BinaryFunction *> AllFunctions;
1397   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1398   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1399                  std::back_inserter(AllFunctions),
1400                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1401                    return &BFI.second;
1402                  });
1403   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1404             std::back_inserter(AllFunctions));
1405 
1406   return AllFunctions;
1407 }
1408 
1409 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1410   auto Iter = DWOCUs.find(DWOId);
1411   if (Iter == DWOCUs.end())
1412     return None;
1413 
1414   return Iter->second;
1415 }
1416 
1417 DWARFContext *BinaryContext::getDWOContext() {
1418   if (DWOCUs.empty())
1419     return nullptr;
1420   return &DWOCUs.begin()->second->getContext();
1421 }
1422 
1423 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1424 void BinaryContext::preprocessDWODebugInfo() {
1425   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1426     DWARFUnit *const DwarfUnit = CU.get();
1427     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1428       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1429       if (!DWOCU->isDWOUnit()) {
1430         std::string DWOName = dwarf::toString(
1431             DwarfUnit->getUnitDIE().find(
1432                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1433             "");
1434         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1435                << DWOName
1436                << " was not retrieved and won't be updated. Please check "
1437                   "relative path.\n";
1438         continue;
1439       }
1440       DWOCUs[*DWOId] = DWOCU;
1441     }
1442   }
1443 }
1444 
1445 void BinaryContext::preprocessDebugInfo() {
1446   struct CURange {
1447     uint64_t LowPC;
1448     uint64_t HighPC;
1449     DWARFUnit *Unit;
1450 
1451     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1452   };
1453 
1454   // Building a map of address ranges to CUs similar to .debug_aranges and use
1455   // it to assign CU to functions.
1456   std::vector<CURange> AllRanges;
1457   AllRanges.reserve(DwCtx->getNumCompileUnits());
1458   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1459     Expected<DWARFAddressRangesVector> RangesOrError =
1460         CU->getUnitDIE().getAddressRanges();
1461     if (!RangesOrError) {
1462       consumeError(RangesOrError.takeError());
1463       continue;
1464     }
1465     for (DWARFAddressRange &Range : *RangesOrError) {
1466       // Parts of the debug info could be invalidated due to corresponding code
1467       // being removed from the binary by the linker. Hence we check if the
1468       // address is a valid one.
1469       if (containsAddress(Range.LowPC))
1470         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1471     }
1472 
1473     ContainsDwarf5 |= CU->getVersion() >= 5;
1474     ContainsDwarfLegacy |= CU->getVersion() < 5;
1475   }
1476 
1477   if (ContainsDwarf5 && ContainsDwarfLegacy)
1478     llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with "
1479                     "DWARF5 and DWARF{2,3,4}.\n";
1480 
1481   std::sort(AllRanges.begin(), AllRanges.end());
1482   for (auto &KV : BinaryFunctions) {
1483     const uint64_t FunctionAddress = KV.first;
1484     BinaryFunction &Function = KV.second;
1485 
1486     auto It = std::partition_point(
1487         AllRanges.begin(), AllRanges.end(),
1488         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1489     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1490       Function.setDWARFUnit(It->Unit);
1491     }
1492   }
1493 
1494   // Discover units with debug info that needs to be updated.
1495   for (const auto &KV : BinaryFunctions) {
1496     const BinaryFunction &BF = KV.second;
1497     if (shouldEmit(BF) && BF.getDWARFUnit())
1498       ProcessedCUs.insert(BF.getDWARFUnit());
1499   }
1500 
1501   // Clear debug info for functions from units that we are not going to process.
1502   for (auto &KV : BinaryFunctions) {
1503     BinaryFunction &BF = KV.second;
1504     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1505       BF.setDWARFUnit(nullptr);
1506   }
1507 
1508   if (opts::Verbosity >= 1) {
1509     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1510            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1511   }
1512 
1513   preprocessDWODebugInfo();
1514 
1515   // Populate MCContext with DWARF files from all units.
1516   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1517   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1518     const uint64_t CUID = CU->getOffset();
1519     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1520     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1521         GlobalPrefix + "line_table_start" + Twine(CUID)));
1522 
1523     if (!ProcessedCUs.count(CU.get()))
1524       continue;
1525 
1526     const DWARFDebugLine::LineTable *LineTable =
1527         DwCtx->getLineTableForUnit(CU.get());
1528     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1529         LineTable->Prologue.FileNames;
1530 
1531     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1532     if (DwarfVersion >= 5) {
1533       Optional<MD5::MD5Result> Checksum = None;
1534       if (LineTable->Prologue.ContentTypes.HasMD5)
1535         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1536       Optional<const char *> Name =
1537           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1538       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1539         auto Iter = DWOCUs.find(*DWOID);
1540         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1541         Name = dwarf::toString(
1542             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1543       }
1544       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1545                                   None);
1546     }
1547 
1548     BinaryLineTable.setDwarfVersion(DwarfVersion);
1549 
1550     // Assign a unique label to every line table, one per CU.
1551     // Make sure empty debug line tables are registered too.
1552     if (FileNames.empty()) {
1553       cantFail(
1554           getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1555       continue;
1556     }
1557     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1558     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1559       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1560       // means empty dir.
1561       StringRef Dir = "";
1562       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1563         if (Optional<const char *> DirName = dwarf::toString(
1564                 LineTable->Prologue
1565                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1566           Dir = *DirName;
1567       StringRef FileName = "";
1568       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1569         FileName = *FName;
1570       assert(FileName != "");
1571       Optional<MD5::MD5Result> Checksum = None;
1572       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1573         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1574       cantFail(
1575           getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1576     }
1577   }
1578 }
1579 
1580 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1581   if (Function.isPseudo())
1582     return false;
1583 
1584   if (opts::processAllFunctions())
1585     return true;
1586 
1587   if (Function.isIgnored())
1588     return false;
1589 
1590   // In relocation mode we will emit non-simple functions with CFG.
1591   // If the function does not have a CFG it should be marked as ignored.
1592   return HasRelocations || Function.isSimple();
1593 }
1594 
1595 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1596   uint32_t Operation = Inst.getOperation();
1597   switch (Operation) {
1598   case MCCFIInstruction::OpSameValue:
1599     OS << "OpSameValue Reg" << Inst.getRegister();
1600     break;
1601   case MCCFIInstruction::OpRememberState:
1602     OS << "OpRememberState";
1603     break;
1604   case MCCFIInstruction::OpRestoreState:
1605     OS << "OpRestoreState";
1606     break;
1607   case MCCFIInstruction::OpOffset:
1608     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1609     break;
1610   case MCCFIInstruction::OpDefCfaRegister:
1611     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1612     break;
1613   case MCCFIInstruction::OpDefCfaOffset:
1614     OS << "OpDefCfaOffset " << Inst.getOffset();
1615     break;
1616   case MCCFIInstruction::OpDefCfa:
1617     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1618     break;
1619   case MCCFIInstruction::OpRelOffset:
1620     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1621     break;
1622   case MCCFIInstruction::OpAdjustCfaOffset:
1623     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1624     break;
1625   case MCCFIInstruction::OpEscape:
1626     OS << "OpEscape";
1627     break;
1628   case MCCFIInstruction::OpRestore:
1629     OS << "OpRestore Reg" << Inst.getRegister();
1630     break;
1631   case MCCFIInstruction::OpUndefined:
1632     OS << "OpUndefined Reg" << Inst.getRegister();
1633     break;
1634   case MCCFIInstruction::OpRegister:
1635     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1636        << Inst.getRegister2();
1637     break;
1638   case MCCFIInstruction::OpWindowSave:
1639     OS << "OpWindowSave";
1640     break;
1641   case MCCFIInstruction::OpGnuArgsSize:
1642     OS << "OpGnuArgsSize";
1643     break;
1644   default:
1645     OS << "Op#" << Operation;
1646     break;
1647   }
1648 }
1649 
1650 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1651   // For aarch64, the ABI defines mapping symbols so we identify data in the
1652   // code section (see IHI0056B). $x identifies a symbol starting code or the
1653   // end of a data chunk inside code, $d indentifies start of data.
1654   if (!isAArch64() || ELFSymbolRef(Symbol).getSize())
1655     return MarkerSymType::NONE;
1656 
1657   Expected<StringRef> NameOrError = Symbol.getName();
1658   Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1659 
1660   if (!TypeOrError || !NameOrError)
1661     return MarkerSymType::NONE;
1662 
1663   if (*TypeOrError != SymbolRef::ST_Unknown)
1664     return MarkerSymType::NONE;
1665 
1666   if (*NameOrError == "$x" || NameOrError->startswith("$x."))
1667     return MarkerSymType::CODE;
1668 
1669   if (*NameOrError == "$d" || NameOrError->startswith("$d."))
1670     return MarkerSymType::DATA;
1671 
1672   return MarkerSymType::NONE;
1673 }
1674 
1675 bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1676   return getMarkerType(Symbol) != MarkerSymType::NONE;
1677 }
1678 
1679 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1680                                      uint64_t Offset,
1681                                      const BinaryFunction *Function,
1682                                      bool PrintMCInst, bool PrintMemData,
1683                                      bool PrintRelocations,
1684                                      StringRef Endl) const {
1685   if (MIB->isEHLabel(Instruction)) {
1686     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl;
1687     return;
1688   }
1689   OS << format("    %08" PRIx64 ": ", Offset);
1690   if (MIB->isCFI(Instruction)) {
1691     uint32_t Offset = Instruction.getOperand(0).getImm();
1692     OS << "\t!CFI\t$" << Offset << "\t; ";
1693     if (Function)
1694       printCFI(OS, *Function->getCFIFor(Instruction));
1695     OS << Endl;
1696     return;
1697   }
1698   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1699   if (MIB->isCall(Instruction)) {
1700     if (MIB->isTailCall(Instruction))
1701       OS << " # TAILCALL ";
1702     if (MIB->isInvoke(Instruction)) {
1703       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1704       OS << " # handler: ";
1705       if (EHInfo->first)
1706         OS << *EHInfo->first;
1707       else
1708         OS << '0';
1709       OS << "; action: " << EHInfo->second;
1710       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1711       if (GnuArgsSize >= 0)
1712         OS << "; GNU_args_size = " << GnuArgsSize;
1713     }
1714   } else if (MIB->isIndirectBranch(Instruction)) {
1715     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1716       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1717     } else {
1718       OS << " # UNKNOWN CONTROL FLOW";
1719     }
1720   }
1721   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1722     OS << " # Offset: " << *Offset;
1723 
1724   MIB->printAnnotations(Instruction, OS);
1725 
1726   if (opts::PrintDebugInfo) {
1727     DebugLineTableRowRef RowRef =
1728         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1729     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1730       const DWARFDebugLine::LineTable *LineTable;
1731       if (Function && Function->getDWARFUnit() &&
1732           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1733         LineTable = Function->getDWARFLineTable();
1734       } else {
1735         LineTable = DwCtx->getLineTableForUnit(
1736             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1737       }
1738       assert(LineTable &&
1739              "line table expected for instruction with debug info");
1740 
1741       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1742       StringRef FileName = "";
1743       if (Optional<const char *> FName =
1744               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1745         FileName = *FName;
1746       OS << " # debug line " << FileName << ":" << Row.Line;
1747       if (Row.Column)
1748         OS << ":" << Row.Column;
1749       if (Row.Discriminator)
1750         OS << " discriminator:" << Row.Discriminator;
1751     }
1752   }
1753 
1754   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1755     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1756     Function->printRelocations(OS, Offset, Size);
1757   }
1758 
1759   OS << Endl;
1760 
1761   if (PrintMCInst) {
1762     Instruction.dump_pretty(OS, InstPrinter.get());
1763     OS << Endl;
1764   }
1765 }
1766 
1767 Optional<uint64_t>
1768 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1769                                         uint64_t FileOffset) const {
1770   // Find a segment with a matching file offset.
1771   for (auto &KV : SegmentMapInfo) {
1772     const SegmentInfo &SegInfo = KV.second;
1773     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1774       // Use segment's aligned memory offset to calculate the base address.
1775       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1776       return MMapAddress - MemOffset;
1777     }
1778   }
1779 
1780   return NoneType();
1781 }
1782 
1783 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1784   auto SI = AddressToSection.upper_bound(Address);
1785   if (SI != AddressToSection.begin()) {
1786     --SI;
1787     uint64_t UpperBound = SI->first + SI->second->getSize();
1788     if (!SI->second->getSize())
1789       UpperBound += 1;
1790     if (UpperBound > Address)
1791       return *SI->second;
1792   }
1793   return std::make_error_code(std::errc::bad_address);
1794 }
1795 
1796 ErrorOr<StringRef>
1797 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1798   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1799     return Section->getName();
1800   return std::make_error_code(std::errc::bad_address);
1801 }
1802 
1803 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1804   auto Res = Sections.insert(Section);
1805   (void)Res;
1806   assert(Res.second && "can't register the same section twice.");
1807 
1808   // Only register allocatable sections in the AddressToSection map.
1809   if (Section->isAllocatable() && Section->getAddress())
1810     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1811   NameToSection.insert(
1812       std::make_pair(std::string(Section->getName()), Section));
1813   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1814   return *Section;
1815 }
1816 
1817 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1818   return registerSection(new BinarySection(*this, Section));
1819 }
1820 
1821 BinarySection &
1822 BinaryContext::registerSection(StringRef SectionName,
1823                                const BinarySection &OriginalSection) {
1824   return registerSection(
1825       new BinarySection(*this, SectionName, OriginalSection));
1826 }
1827 
1828 BinarySection &
1829 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1830                                        unsigned ELFFlags, uint8_t *Data,
1831                                        uint64_t Size, unsigned Alignment) {
1832   auto NamedSections = getSectionByName(Name);
1833   if (NamedSections.begin() != NamedSections.end()) {
1834     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1835            "can only update unique sections");
1836     BinarySection *Section = NamedSections.begin()->second;
1837 
1838     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1839     const bool Flag = Section->isAllocatable();
1840     (void)Flag;
1841     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1842     LLVM_DEBUG(dbgs() << *Section << "\n");
1843     // FIXME: Fix section flags/attributes for MachO.
1844     if (isELF())
1845       assert(Flag == Section->isAllocatable() &&
1846              "can't change section allocation status");
1847     return *Section;
1848   }
1849 
1850   return registerSection(
1851       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1852 }
1853 
1854 bool BinaryContext::deregisterSection(BinarySection &Section) {
1855   BinarySection *SectionPtr = &Section;
1856   auto Itr = Sections.find(SectionPtr);
1857   if (Itr != Sections.end()) {
1858     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1859     while (Range.first != Range.second) {
1860       if (Range.first->second == SectionPtr) {
1861         AddressToSection.erase(Range.first);
1862         break;
1863       }
1864       ++Range.first;
1865     }
1866 
1867     auto NameRange =
1868         NameToSection.equal_range(std::string(SectionPtr->getName()));
1869     while (NameRange.first != NameRange.second) {
1870       if (NameRange.first->second == SectionPtr) {
1871         NameToSection.erase(NameRange.first);
1872         break;
1873       }
1874       ++NameRange.first;
1875     }
1876 
1877     Sections.erase(Itr);
1878     delete SectionPtr;
1879     return true;
1880   }
1881   return false;
1882 }
1883 
1884 void BinaryContext::printSections(raw_ostream &OS) const {
1885   for (BinarySection *const &Section : Sections)
1886     OS << "BOLT-INFO: " << *Section << "\n";
1887 }
1888 
1889 BinarySection &BinaryContext::absoluteSection() {
1890   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1891     return *Section;
1892   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1893 }
1894 
1895 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1896                                                            size_t Size) const {
1897   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1898   if (!Section)
1899     return std::make_error_code(std::errc::bad_address);
1900 
1901   if (Section->isVirtual())
1902     return 0;
1903 
1904   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1905                    AsmInfo->getCodePointerSize());
1906   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1907   return DE.getUnsigned(&ValueOffset, Size);
1908 }
1909 
1910 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1911                                                          size_t Size) const {
1912   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1913   if (!Section)
1914     return std::make_error_code(std::errc::bad_address);
1915 
1916   if (Section->isVirtual())
1917     return 0;
1918 
1919   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1920                    AsmInfo->getCodePointerSize());
1921   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1922   return DE.getSigned(&ValueOffset, Size);
1923 }
1924 
1925 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1926                                   uint64_t Type, uint64_t Addend,
1927                                   uint64_t Value) {
1928   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1929   assert(Section && "cannot find section for address");
1930   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1931                          Value);
1932 }
1933 
1934 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1935                                          uint64_t Type, uint64_t Addend,
1936                                          uint64_t Value) {
1937   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1938   assert(Section && "cannot find section for address");
1939   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1940                                 Addend, Value);
1941 }
1942 
1943 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1944   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1945   assert(Section && "cannot find section for address");
1946   return Section->removeRelocationAt(Address - Section->getAddress());
1947 }
1948 
1949 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1950   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1951   if (!Section)
1952     return nullptr;
1953 
1954   return Section->getRelocationAt(Address - Section->getAddress());
1955 }
1956 
1957 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1958   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1959   if (!Section)
1960     return nullptr;
1961 
1962   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1963 }
1964 
1965 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1966                                              const uint64_t Address) {
1967   auto setImmovable = [&](BinaryData &BD) {
1968     BinaryData *Root = BD.getAtomicRoot();
1969     LLVM_DEBUG(if (Root->isMoveable()) {
1970       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1971              << "due to ambiguous relocation referencing 0x"
1972              << Twine::utohexstr(Address) << '\n';
1973     });
1974     Root->setIsMoveable(false);
1975   };
1976 
1977   if (Address == BD.getAddress()) {
1978     setImmovable(BD);
1979 
1980     // Set previous symbol as immovable
1981     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1982     if (Prev && Prev->getEndAddress() == BD.getAddress())
1983       setImmovable(*Prev);
1984   }
1985 
1986   if (Address == BD.getEndAddress()) {
1987     setImmovable(BD);
1988 
1989     // Set next symbol as immovable
1990     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1991     if (Next && Next->getAddress() == BD.getEndAddress())
1992       setImmovable(*Next);
1993   }
1994 }
1995 
1996 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1997                                                     uint64_t *EntryDesc) {
1998   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1999   auto BFI = SymbolToFunctionMap.find(Symbol);
2000   if (BFI == SymbolToFunctionMap.end())
2001     return nullptr;
2002 
2003   BinaryFunction *BF = BFI->second;
2004   if (EntryDesc)
2005     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
2006 
2007   return BF;
2008 }
2009 
2010 void BinaryContext::exitWithBugReport(StringRef Message,
2011                                       const BinaryFunction &Function) const {
2012   errs() << "=======================================\n";
2013   errs() << "BOLT is unable to proceed because it couldn't properly understand "
2014             "this function.\n";
2015   errs() << "If you are running the most recent version of BOLT, you may "
2016             "want to "
2017             "report this and paste this dump.\nPlease check that there is no "
2018             "sensitive contents being shared in this dump.\n";
2019   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
2020   ScopedPrinter SP(errs());
2021   SP.printBinaryBlock("Function contents", *Function.getData());
2022   errs() << "\n";
2023   Function.dump();
2024   errs() << "ERROR: " << Message;
2025   errs() << "\n=======================================\n";
2026   exit(1);
2027 }
2028 
2029 BinaryFunction *
2030 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2031                                             bool IsSimple) {
2032   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
2033   BinaryFunction *BF = InjectedBinaryFunctions.back();
2034   setSymbolToFunctionMap(BF->getSymbol(), BF);
2035   BF->CurrentState = BinaryFunction::State::CFG;
2036   return BF;
2037 }
2038 
2039 std::pair<size_t, size_t>
2040 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2041   // Adjust branch instruction to match the current layout.
2042   if (FixBranches)
2043     BF.fixBranches();
2044 
2045   // Create local MC context to isolate the effect of ephemeral code emission.
2046   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2047   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2048   MCAsmBackend *MAB =
2049       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2050 
2051   SmallString<256> Code;
2052   raw_svector_ostream VecOS(Code);
2053 
2054   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2055   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2056       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2057       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2058       /*RelaxAll=*/false,
2059       /*IncrementalLinkerCompatible=*/false,
2060       /*DWARFMustBeAtTheEnd=*/false));
2061 
2062   Streamer->initSections(false, *STI);
2063 
2064   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2065   Section->setHasInstructions(true);
2066 
2067   // Create symbols in the LocalCtx so that they get destroyed with it.
2068   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2069   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2070   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2071   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2072 
2073   Streamer->SwitchSection(Section);
2074   Streamer->emitLabel(StartLabel);
2075   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2076                    /*EmitCodeOnly=*/true);
2077   Streamer->emitLabel(EndLabel);
2078 
2079   if (BF.isSplit()) {
2080     MCSectionELF *ColdSection =
2081         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2082                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2083     ColdSection->setHasInstructions(true);
2084 
2085     Streamer->SwitchSection(ColdSection);
2086     Streamer->emitLabel(ColdStartLabel);
2087     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2088                      /*EmitCodeOnly=*/true);
2089     Streamer->emitLabel(ColdEndLabel);
2090     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2091     Streamer->emitBytes(StringRef(""));
2092     Streamer->SwitchSection(Section);
2093   }
2094 
2095   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2096   // MCStreamer::Finish(), which does more than we want
2097   Streamer->emitBytes(StringRef(""));
2098 
2099   MCAssembler &Assembler =
2100       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2101   MCAsmLayout Layout(Assembler);
2102   Assembler.layout(Layout);
2103 
2104   const uint64_t HotSize =
2105       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2106   const uint64_t ColdSize = BF.isSplit()
2107                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2108                                       Layout.getSymbolOffset(*ColdStartLabel)
2109                                 : 0ULL;
2110 
2111   // Clean-up the effect of the code emission.
2112   for (const MCSymbol &Symbol : Assembler.symbols()) {
2113     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2114     MutableSymbol->setUndefined();
2115     MutableSymbol->setIsRegistered(false);
2116   }
2117 
2118   return std::make_pair(HotSize, ColdSize);
2119 }
2120 
2121 bool BinaryContext::validateEncoding(const MCInst &Inst,
2122                                      ArrayRef<uint8_t> InputEncoding) const {
2123   SmallString<256> Code;
2124   SmallVector<MCFixup, 4> Fixups;
2125   raw_svector_ostream VecOS(Code);
2126 
2127   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2128   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2129   if (InputEncoding != EncodedData) {
2130     if (opts::Verbosity > 1) {
2131       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2132              << "      input: " << InputEncoding << '\n'
2133              << "     output: " << EncodedData << '\n';
2134     }
2135     return false;
2136   }
2137 
2138   return true;
2139 }
2140 
2141 uint64_t BinaryContext::getHotThreshold() const {
2142   static uint64_t Threshold = 0;
2143   if (Threshold == 0) {
2144     Threshold = std::max(
2145         (uint64_t)opts::ExecutionCountThreshold,
2146         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2147   }
2148   return Threshold;
2149 }
2150 
2151 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2152     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2153   auto FI = BinaryFunctions.upper_bound(Address);
2154   if (FI == BinaryFunctions.begin())
2155     return nullptr;
2156   --FI;
2157 
2158   const uint64_t UsedSize =
2159       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2160 
2161   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2162     return nullptr;
2163 
2164   return &FI->second;
2165 }
2166 
2167 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2168   // First, try to find a function starting at the given address. If the
2169   // function was folded, this will get us the original folded function if it
2170   // wasn't removed from the list, e.g. in non-relocation mode.
2171   auto BFI = BinaryFunctions.find(Address);
2172   if (BFI != BinaryFunctions.end())
2173     return &BFI->second;
2174 
2175   // We might have folded the function matching the object at the given
2176   // address. In such case, we look for a function matching the symbol
2177   // registered at the original address. The new function (the one that the
2178   // original was folded into) will hold the symbol.
2179   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2180     uint64_t EntryID = 0;
2181     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2182     if (BF && EntryID == 0)
2183       return BF;
2184   }
2185   return nullptr;
2186 }
2187 
2188 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2189     const DWARFAddressRangesVector &InputRanges) const {
2190   DebugAddressRangesVector OutputRanges;
2191 
2192   for (const DWARFAddressRange Range : InputRanges) {
2193     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2194     while (BFI != BinaryFunctions.end()) {
2195       const BinaryFunction &Function = BFI->second;
2196       if (Function.getAddress() >= Range.HighPC)
2197         break;
2198       const DebugAddressRangesVector FunctionRanges =
2199           Function.getOutputAddressRanges();
2200       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2201                 std::back_inserter(OutputRanges));
2202       std::advance(BFI, 1);
2203     }
2204   }
2205 
2206   return OutputRanges;
2207 }
2208 
2209 } // namespace bolt
2210 } // namespace llvm
2211