xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision ba1ac98c62e847551bd7ed7d48d811228f7f4619)
1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryContext class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryContext.h"
14 #include "bolt/Core/BinaryEmitter.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "bolt/Utils/NameResolver.h"
18 #include "bolt/Utils/Utils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23 #include "llvm/MC/MCAsmLayout.h"
24 #include "llvm/MC/MCAssembler.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
27 #include "llvm/MC/MCInstPrinter.h"
28 #include "llvm/MC/MCObjectStreamer.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/Regex.h"
38 #include <algorithm>
39 #include <functional>
40 #include <iterator>
41 #include <unordered_set>
42 
43 using namespace llvm;
44 
45 #undef  DEBUG_TYPE
46 #define DEBUG_TYPE "bolt"
47 
48 namespace opts {
49 
50 cl::opt<bool>
51 NoHugePages("no-huge-pages",
52   cl::desc("use regular size pages for code alignment"),
53   cl::ZeroOrMore,
54   cl::Hidden,
55   cl::cat(BoltCategory));
56 
57 static cl::opt<bool>
58 PrintDebugInfo("print-debug-info",
59   cl::desc("print debug info when printing functions"),
60   cl::Hidden,
61   cl::ZeroOrMore,
62   cl::cat(BoltCategory));
63 
64 cl::opt<bool>
65 PrintRelocations("print-relocations",
66   cl::desc("print relocations when printing functions/objects"),
67   cl::Hidden,
68   cl::ZeroOrMore,
69   cl::cat(BoltCategory));
70 
71 static cl::opt<bool>
72 PrintMemData("print-mem-data",
73   cl::desc("print memory data annotations when printing functions"),
74   cl::Hidden,
75   cl::ZeroOrMore,
76   cl::cat(BoltCategory));
77 
78 } // namespace opts
79 
80 namespace llvm {
81 namespace bolt {
82 
83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
84                              std::unique_ptr<DWARFContext> DwCtx,
85                              std::unique_ptr<Triple> TheTriple,
86                              const Target *TheTarget, std::string TripleName,
87                              std::unique_ptr<MCCodeEmitter> MCE,
88                              std::unique_ptr<MCObjectFileInfo> MOFI,
89                              std::unique_ptr<const MCAsmInfo> AsmInfo,
90                              std::unique_ptr<const MCInstrInfo> MII,
91                              std::unique_ptr<const MCSubtargetInfo> STI,
92                              std::unique_ptr<MCInstPrinter> InstPrinter,
93                              std::unique_ptr<const MCInstrAnalysis> MIA,
94                              std::unique_ptr<MCPlusBuilder> MIB,
95                              std::unique_ptr<const MCRegisterInfo> MRI,
96                              std::unique_ptr<MCDisassembler> DisAsm)
97     : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
98       TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
99       TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
100       AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
101       InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
102       MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
103   Relocation::Arch = this->TheTriple->getArch();
104   RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
105   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
106 }
107 
108 BinaryContext::~BinaryContext() {
109   for (BinarySection *Section : Sections)
110     delete Section;
111   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
112     delete InjectedFunction;
113   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
114     delete JTI.second;
115   clearBinaryData();
116 }
117 
118 /// Create BinaryContext for a given architecture \p ArchName and
119 /// triple \p TripleName.
120 Expected<std::unique_ptr<BinaryContext>>
121 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
122                                    std::unique_ptr<DWARFContext> DwCtx) {
123   StringRef ArchName = "";
124   StringRef FeaturesStr = "";
125   switch (File->getArch()) {
126   case llvm::Triple::x86_64:
127     ArchName = "x86-64";
128     FeaturesStr = "+nopl";
129     break;
130   case llvm::Triple::aarch64:
131     ArchName = "aarch64";
132     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
133                   "+fullfp16,+spe,+fuse-aes,+rcpc";
134     break;
135   default:
136     return createStringError(std::errc::not_supported,
137                              "BOLT-ERROR: Unrecognized machine in ELF file");
138   }
139 
140   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
141   const std::string TripleName = TheTriple->str();
142 
143   std::string Error;
144   const Target *TheTarget =
145       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
146   if (!TheTarget)
147     return createStringError(make_error_code(std::errc::not_supported),
148                              Twine("BOLT-ERROR: ", Error));
149 
150   std::unique_ptr<const MCRegisterInfo> MRI(
151       TheTarget->createMCRegInfo(TripleName));
152   if (!MRI)
153     return createStringError(
154         make_error_code(std::errc::not_supported),
155         Twine("BOLT-ERROR: no register info for target ", TripleName));
156 
157   // Set up disassembler.
158   std::unique_ptr<MCAsmInfo> AsmInfo(
159       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
160   if (!AsmInfo)
161     return createStringError(
162         make_error_code(std::errc::not_supported),
163         Twine("BOLT-ERROR: no assembly info for target ", TripleName));
164   // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
165   // we want to emit such names as using @PLT without double quotes to convey
166   // variant kind to the assembler. BOLT doesn't rely on the linker so we can
167   // override the default AsmInfo behavior to emit names the way we want.
168   AsmInfo->setAllowAtInName(true);
169 
170   std::unique_ptr<const MCSubtargetInfo> STI(
171       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
172   if (!STI)
173     return createStringError(
174         make_error_code(std::errc::not_supported),
175         Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
176 
177   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
178   if (!MII)
179     return createStringError(
180         make_error_code(std::errc::not_supported),
181         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
182 
183   std::unique_ptr<MCContext> Ctx(
184       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
185   std::unique_ptr<MCObjectFileInfo> MOFI(
186       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
187   Ctx->setObjectFileInfo(MOFI.get());
188   // We do not support X86 Large code model. Change this in the future.
189   bool Large = false;
190   if (TheTriple->getArch() == llvm::Triple::aarch64)
191     Large = true;
192   unsigned LSDAEncoding =
193       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
194   unsigned TTypeEncoding =
195       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
196   if (IsPIC) {
197     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
198                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
199     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
200                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
201   }
202 
203   std::unique_ptr<MCDisassembler> DisAsm(
204       TheTarget->createMCDisassembler(*STI, *Ctx));
205 
206   if (!DisAsm)
207     return createStringError(
208         make_error_code(std::errc::not_supported),
209         Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
210 
211   std::unique_ptr<const MCInstrAnalysis> MIA(
212       TheTarget->createMCInstrAnalysis(MII.get()));
213   if (!MIA)
214     return createStringError(
215         make_error_code(std::errc::not_supported),
216         Twine("BOLT-ERROR: failed to create instruction analysis for target ",
217               TripleName));
218 
219   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
220   std::unique_ptr<MCInstPrinter> InstructionPrinter(
221       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
222                                      *MII, *MRI));
223   if (!InstructionPrinter)
224     return createStringError(
225         make_error_code(std::errc::not_supported),
226         Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
227   InstructionPrinter->setPrintImmHex(true);
228 
229   std::unique_ptr<MCCodeEmitter> MCE(
230       TheTarget->createMCCodeEmitter(*MII, *Ctx));
231 
232   // Make sure we don't miss any output on core dumps.
233   outs().SetUnbuffered();
234   errs().SetUnbuffered();
235   dbgs().SetUnbuffered();
236 
237   auto BC = std::make_unique<BinaryContext>(
238       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
239       std::string(TripleName), std::move(MCE), std::move(MOFI),
240       std::move(AsmInfo), std::move(MII), std::move(STI),
241       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
242       std::move(DisAsm));
243 
244   BC->TTypeEncoding = TTypeEncoding;
245   BC->LSDAEncoding = LSDAEncoding;
246 
247   BC->MAB = std::unique_ptr<MCAsmBackend>(
248       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
249 
250   BC->setFilename(File->getFileName());
251 
252   BC->HasFixedLoadAddress = !IsPIC;
253 
254   return std::move(BC);
255 }
256 
257 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
258   if (opts::HotText &&
259       (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
260     return true;
261 
262   if (opts::HotData &&
263       (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
264     return true;
265 
266   if (SymbolName == "_end")
267     return true;
268 
269   return false;
270 }
271 
272 std::unique_ptr<MCObjectWriter>
273 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
274   return MAB->createObjectWriter(OS);
275 }
276 
277 bool BinaryContext::validateObjectNesting() const {
278   auto Itr = BinaryDataMap.begin();
279   auto End = BinaryDataMap.end();
280   bool Valid = true;
281   while (Itr != End) {
282     auto Next = std::next(Itr);
283     while (Next != End &&
284            Itr->second->getSection() == Next->second->getSection() &&
285            Itr->second->containsRange(Next->second->getAddress(),
286                                       Next->second->getSize())) {
287       if (Next->second->Parent != Itr->second) {
288         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
289                << "BOLT-WARNING:  " << *Itr->second << "\n"
290                << "BOLT-WARNING:  " << *Next->second << "\n";
291         Valid = false;
292       }
293       ++Next;
294     }
295     Itr = Next;
296   }
297   return Valid;
298 }
299 
300 bool BinaryContext::validateHoles() const {
301   bool Valid = true;
302   for (BinarySection &Section : sections()) {
303     for (const Relocation &Rel : Section.relocations()) {
304       uint64_t RelAddr = Rel.Offset + Section.getAddress();
305       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
306       if (!BD) {
307         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
308                << " 0x" << Twine::utohexstr(RelAddr) << " in "
309                << Section.getName() << "\n";
310         Valid = false;
311       } else if (!BD->getAtomicRoot()) {
312         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
313                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
314                << Section.getName() << "\n";
315         Valid = false;
316       }
317     }
318   }
319   return Valid;
320 }
321 
322 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
323   const uint64_t Address = GAI->second->getAddress();
324   const uint64_t Size = GAI->second->getSize();
325 
326   auto fixParents = [&](BinaryDataMapType::iterator Itr,
327                         BinaryData *NewParent) {
328     BinaryData *OldParent = Itr->second->Parent;
329     Itr->second->Parent = NewParent;
330     ++Itr;
331     while (Itr != BinaryDataMap.end() && OldParent &&
332            Itr->second->Parent == OldParent) {
333       Itr->second->Parent = NewParent;
334       ++Itr;
335     }
336   };
337 
338   // Check if the previous symbol contains the newly added symbol.
339   if (GAI != BinaryDataMap.begin()) {
340     BinaryData *Prev = std::prev(GAI)->second;
341     while (Prev) {
342       if (Prev->getSection() == GAI->second->getSection() &&
343           Prev->containsRange(Address, Size)) {
344         fixParents(GAI, Prev);
345       } else {
346         fixParents(GAI, nullptr);
347       }
348       Prev = Prev->Parent;
349     }
350   }
351 
352   // Check if the newly added symbol contains any subsequent symbols.
353   if (Size != 0) {
354     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
355     auto Itr = std::next(GAI);
356     while (
357         Itr != BinaryDataMap.end() &&
358         BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) {
359       Itr->second->Parent = BD;
360       ++Itr;
361     }
362   }
363 }
364 
365 iterator_range<BinaryContext::binary_data_iterator>
366 BinaryContext::getSubBinaryData(BinaryData *BD) {
367   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
368   auto End = Start;
369   while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second))
370     ++End;
371   return make_range(Start, End);
372 }
373 
374 std::pair<const MCSymbol *, uint64_t>
375 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
376                                 bool IsPCRel) {
377   uint64_t Addend = 0;
378 
379   if (isAArch64()) {
380     // Check if this is an access to a constant island and create bookkeeping
381     // to keep track of it and emit it later as part of this function.
382     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
383       return std::make_pair(IslandSym, Addend);
384 
385     // Detect custom code written in assembly that refers to arbitrary
386     // constant islands from other functions. Write this reference so we
387     // can pull this constant island and emit it as part of this function
388     // too.
389     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
390     if (IslandIter != AddressToConstantIslandMap.end()) {
391       if (MCSymbol *IslandSym =
392               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
393         BF.createIslandDependency(IslandSym, IslandIter->second);
394         return std::make_pair(IslandSym, Addend);
395       }
396     }
397   }
398 
399   // Note that the address does not necessarily have to reside inside
400   // a section, it could be an absolute address too.
401   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
402   if (Section && Section->isText()) {
403     if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) {
404       if (Address != BF.getAddress()) {
405         // The address could potentially escape. Mark it as another entry
406         // point into the function.
407         if (opts::Verbosity >= 1) {
408           outs() << "BOLT-INFO: potentially escaped address 0x"
409                  << Twine::utohexstr(Address) << " in function " << BF << '\n';
410         }
411         BF.HasInternalLabelReference = true;
412         return std::make_pair(
413             BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend);
414       }
415     } else {
416       BF.InterproceduralReferences.insert(Address);
417     }
418   }
419 
420   // With relocations, catch jump table references outside of the basic block
421   // containing the indirect jump.
422   if (HasRelocations) {
423     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
424     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
425       const MCSymbol *Symbol =
426           getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
427 
428       return std::make_pair(Symbol, Addend);
429     }
430   }
431 
432   if (BinaryData *BD = getBinaryDataContainingAddress(Address))
433     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
434 
435   // TODO: use DWARF info to get size/alignment here?
436   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
437   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
438   return std::make_pair(TargetSymbol, Addend);
439 }
440 
441 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
442                                                   BinaryFunction &BF) {
443   if (!isX86())
444     return MemoryContentsType::UNKNOWN;
445 
446   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
447   if (!Section) {
448     // No section - possibly an absolute address. Since we don't allow
449     // internal function addresses to escape the function scope - we
450     // consider it a tail call.
451     if (opts::Verbosity > 1) {
452       errs() << "BOLT-WARNING: no section for address 0x"
453              << Twine::utohexstr(Address) << " referenced from function " << BF
454              << '\n';
455     }
456     return MemoryContentsType::UNKNOWN;
457   }
458 
459   if (Section->isVirtual()) {
460     // The contents are filled at runtime.
461     return MemoryContentsType::UNKNOWN;
462   }
463 
464   // No support for jump tables in code yet.
465   if (Section->isText())
466     return MemoryContentsType::UNKNOWN;
467 
468   // Start with checking for PIC jump table. We expect non-PIC jump tables
469   // to have high 32 bits set to 0.
470   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
471     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
472 
473   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
474     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
475 
476   return MemoryContentsType::UNKNOWN;
477 }
478 
479 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
480 bool isPotentialFragmentByName(BinaryFunction &Fragment,
481                                BinaryFunction &Parent) {
482   for (StringRef Name : Parent.getNames()) {
483     std::string NamePrefix = Regex::escape(NameResolver::restore(Name));
484     std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str();
485     if (Fragment.hasRestoredNameRegex(NameRegex))
486       return true;
487   }
488   return false;
489 }
490 
491 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
492                                      const JumpTable::JumpTableType Type,
493                                      BinaryFunction &BF,
494                                      const uint64_t NextJTAddress,
495                                      JumpTable::OffsetsType *Offsets) {
496   // Is one of the targets __builtin_unreachable?
497   bool HasUnreachable = false;
498 
499   // Number of targets other than __builtin_unreachable.
500   uint64_t NumRealEntries = 0;
501 
502   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
503   auto addOffset = [&](uint64_t Offset) {
504     if (Offsets)
505       Offsets->emplace_back(Offset);
506   };
507 
508   auto doesBelongToFunction = [&](const uint64_t Addr,
509                                   BinaryFunction *TargetBF) -> bool {
510     if (BF.containsAddress(Addr))
511       return true;
512     // Nothing to do if we failed to identify the containing function.
513     if (!TargetBF)
514       return false;
515     // Case 1: check if BF is a fragment and TargetBF is its parent.
516     if (BF.isFragment()) {
517       // Parent function may or may not be already registered.
518       // Set parent link based on function name matching heuristic.
519       return registerFragment(BF, *TargetBF);
520     }
521     // Case 2: check if TargetBF is a fragment and BF is its parent.
522     return TargetBF->isFragment() && registerFragment(*TargetBF, BF);
523   };
524 
525   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
526   if (!Section)
527     return false;
528 
529   // The upper bound is defined by containing object, section limits, and
530   // the next jump table in memory.
531   uint64_t UpperBound = Section->getEndAddress();
532   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
533   if (JumpTableBD && JumpTableBD->getSize()) {
534     assert(JumpTableBD->getEndAddress() <= UpperBound &&
535            "data object cannot cross a section boundary");
536     UpperBound = JumpTableBD->getEndAddress();
537   }
538   if (NextJTAddress)
539     UpperBound = std::min(NextJTAddress, UpperBound);
540 
541   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
542                     << '\n');
543   const uint64_t EntrySize = getJumpTableEntrySize(Type);
544   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
545        EntryAddress += EntrySize) {
546     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
547                       << " -> ");
548     // Check if there's a proper relocation against the jump table entry.
549     if (HasRelocations) {
550       if (Type == JumpTable::JTT_PIC &&
551           !DataPCRelocations.count(EntryAddress)) {
552         LLVM_DEBUG(
553             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
554         break;
555       }
556       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
557         LLVM_DEBUG(
558             dbgs()
559             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
560         break;
561       }
562     }
563 
564     const uint64_t Value =
565         (Type == JumpTable::JTT_PIC)
566             ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
567             : *getPointerAtAddress(EntryAddress);
568 
569     // __builtin_unreachable() case.
570     if (Value == BF.getAddress() + BF.getSize()) {
571       addOffset(Value - BF.getAddress());
572       HasUnreachable = true;
573       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
574       continue;
575     }
576 
577     // Function or one of its fragments.
578     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
579 
580     // We assume that a jump table cannot have function start as an entry.
581     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
582       LLVM_DEBUG({
583         if (!BF.containsAddress(Value)) {
584           dbgs() << "FAIL: function doesn't contain this address\n";
585           if (TargetBF) {
586             dbgs() << "  ! function containing this address: "
587                    << TargetBF->getPrintName() << '\n';
588             if (TargetBF->isFragment())
589               dbgs() << "  ! is a fragment\n";
590             for (BinaryFunction *TargetParent : TargetBF->ParentFragments)
591               dbgs() << "  ! its parent is "
592                      << (TargetParent ? TargetParent->getPrintName() : "(none)")
593                      << '\n';
594           }
595         }
596         if (Value == BF.getAddress())
597           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
598       });
599       break;
600     }
601 
602     // Check there's an instruction at this offset.
603     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
604         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
605       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
606       break;
607     }
608 
609     ++NumRealEntries;
610 
611     if (TargetBF == &BF) {
612       // Address inside the function.
613       addOffset(Value - TargetBF->getAddress());
614       LLVM_DEBUG(dbgs() << "OK: real entry\n");
615     } else {
616       // Address in split fragment.
617       BF.setHasSplitJumpTable(true);
618       // Add invalid offset for proper identification of jump table size.
619       addOffset(INVALID_OFFSET);
620       LLVM_DEBUG(dbgs() << "OK: address in split fragment "
621                         << TargetBF->getPrintName() << '\n');
622     }
623   }
624 
625   // It's a jump table if the number of real entries is more than 1, or there's
626   // one real entry and "unreachable" targets. If there are only multiple
627   // "unreachable" targets, then it's not a jump table.
628   return NumRealEntries + HasUnreachable >= 2;
629 }
630 
631 void BinaryContext::populateJumpTables() {
632   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
633                     << '\n');
634   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
635        ++JTI) {
636     JumpTable *JT = JTI->second;
637     BinaryFunction &BF = *JT->Parent;
638 
639     if (!BF.isSimple())
640       continue;
641 
642     uint64_t NextJTAddress = 0;
643     auto NextJTI = std::next(JTI);
644     if (NextJTI != JTE)
645       NextJTAddress = NextJTI->second->getAddress();
646 
647     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
648                                           NextJTAddress, &JT->OffsetEntries);
649     if (!Success) {
650       dbgs() << "failed to analyze jump table in function " << BF << '\n';
651       JT->print(dbgs());
652       if (NextJTI != JTE) {
653         dbgs() << "next jump table at 0x"
654                << Twine::utohexstr(NextJTI->second->getAddress())
655                << " belongs to function " << *NextJTI->second->Parent << '\n';
656         NextJTI->second->print(dbgs());
657       }
658       llvm_unreachable("jump table heuristic failure");
659     }
660 
661     for (uint64_t EntryOffset : JT->OffsetEntries) {
662       if (EntryOffset == BF.getSize())
663         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
664       else
665         BF.registerReferencedOffset(EntryOffset);
666     }
667 
668     // In strict mode, erase PC-relative relocation record. Later we check that
669     // all such records are erased and thus have been accounted for.
670     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
671       for (uint64_t Address = JT->getAddress();
672            Address < JT->getAddress() + JT->getSize();
673            Address += JT->EntrySize) {
674         DataPCRelocations.erase(DataPCRelocations.find(Address));
675       }
676     }
677 
678     // Mark to skip the function and all its fragments.
679     if (BF.hasSplitJumpTable())
680       FragmentsToSkip.push_back(&BF);
681   }
682 
683   if (opts::StrictMode && DataPCRelocations.size()) {
684     LLVM_DEBUG({
685       dbgs() << DataPCRelocations.size()
686              << " unclaimed PC-relative relocations left in data:\n";
687       for (uint64_t Reloc : DataPCRelocations)
688         dbgs() << Twine::utohexstr(Reloc) << '\n';
689     });
690     assert(0 && "unclaimed PC-relative relocations left in data\n");
691   }
692   clearList(DataPCRelocations);
693 }
694 
695 void BinaryContext::skipMarkedFragments() {
696   // Unique functions in the vector.
697   std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(),
698                                                        FragmentsToSkip.end());
699   // Copy the functions back to FragmentsToSkip.
700   FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end());
701   auto addToWorklist = [&](BinaryFunction *Function) -> void {
702     if (UniqueFunctions.count(Function))
703       return;
704     FragmentsToSkip.push_back(Function);
705     UniqueFunctions.insert(Function);
706   };
707   // Functions containing split jump tables need to be skipped with all
708   // fragments (transitively).
709   for (size_t I = 0; I != FragmentsToSkip.size(); I++) {
710     BinaryFunction *BF = FragmentsToSkip[I];
711     assert(UniqueFunctions.count(BF) &&
712            "internal error in traversing function fragments");
713     if (opts::Verbosity >= 1)
714       errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
715     BF->setIgnored();
716     std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist);
717     std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(),
718                   addToWorklist);
719   }
720   if (!FragmentsToSkip.empty())
721     errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function"
722            << (FragmentsToSkip.size() == 1 ? "" : "s")
723            << " due to cold fragments\n";
724   FragmentsToSkip.clear();
725 }
726 
727 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
728                                                  uint64_t Size,
729                                                  uint16_t Alignment,
730                                                  unsigned Flags) {
731   auto Itr = BinaryDataMap.find(Address);
732   if (Itr != BinaryDataMap.end()) {
733     assert(Itr->second->getSize() == Size || !Size);
734     return Itr->second->getSymbol();
735   }
736 
737   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
738   assert(!GlobalSymbols.count(Name) && "created name is not unique");
739   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
740 }
741 
742 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
743   return Ctx->getOrCreateSymbol(Name);
744 }
745 
746 BinaryFunction *BinaryContext::createBinaryFunction(
747     const std::string &Name, BinarySection &Section, uint64_t Address,
748     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
749   auto Result = BinaryFunctions.emplace(
750       Address, BinaryFunction(Name, Section, Address, Size, *this));
751   assert(Result.second == true && "unexpected duplicate function");
752   BinaryFunction *BF = &Result.first->second;
753   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
754                         Alignment);
755   setSymbolToFunctionMap(BF->getSymbol(), BF);
756   return BF;
757 }
758 
759 const MCSymbol *
760 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
761                                     JumpTable::JumpTableType Type) {
762   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
763     assert(JT->Type == Type && "jump table types have to match");
764     assert(JT->Parent == &Function &&
765            "cannot re-use jump table of a different function");
766     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
767 
768     return JT->getFirstLabel();
769   }
770 
771   // Re-use the existing symbol if possible.
772   MCSymbol *JTLabel = nullptr;
773   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
774     if (!isInternalSymbolName(Object->getSymbol()->getName()))
775       JTLabel = Object->getSymbol();
776   }
777 
778   const uint64_t EntrySize = getJumpTableEntrySize(Type);
779   if (!JTLabel) {
780     const std::string JumpTableName = generateJumpTableName(Function, Address);
781     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
782   }
783 
784   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
785                     << " in function " << Function << '\n');
786 
787   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
788                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
789                                 *getSectionForAddress(Address));
790   JumpTables.emplace(Address, JT);
791 
792   // Duplicate the entry for the parent function for easy access.
793   Function.JumpTables.emplace(Address, JT);
794 
795   return JTLabel;
796 }
797 
798 std::pair<uint64_t, const MCSymbol *>
799 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
800                                   const MCSymbol *OldLabel) {
801   auto L = scopeLock();
802   unsigned Offset = 0;
803   bool Found = false;
804   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
805     if (Elmt.second != OldLabel)
806       continue;
807     Offset = Elmt.first;
808     Found = true;
809     break;
810   }
811   assert(Found && "Label not found");
812   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
813   JumpTable *NewJT =
814       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
815                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
816                     *getSectionForAddress(JT->getAddress()));
817   NewJT->Entries = JT->Entries;
818   NewJT->Counts = JT->Counts;
819   uint64_t JumpTableID = ++DuplicatedJumpTables;
820   // Invert it to differentiate from regular jump tables whose IDs are their
821   // addresses in the input binary memory space
822   JumpTableID = ~JumpTableID;
823   JumpTables.emplace(JumpTableID, NewJT);
824   Function.JumpTables.emplace(JumpTableID, NewJT);
825   return std::make_pair(JumpTableID, NewLabel);
826 }
827 
828 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
829                                                  uint64_t Address) {
830   size_t Id;
831   uint64_t Offset = 0;
832   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
833     Offset = Address - JT->getAddress();
834     auto Itr = JT->Labels.find(Offset);
835     if (Itr != JT->Labels.end())
836       return std::string(Itr->second->getName());
837     Id = JumpTableIds.at(JT->getAddress());
838   } else {
839     Id = JumpTableIds[Address] = BF.JumpTables.size();
840   }
841   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
842           (Offset ? ("." + std::to_string(Offset)) : ""));
843 }
844 
845 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
846   // FIXME: aarch64 support is missing.
847   if (!isX86())
848     return true;
849 
850   if (BF.getSize() == BF.getMaxSize())
851     return true;
852 
853   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
854   assert(FunctionData && "cannot get function as data");
855 
856   uint64_t Offset = BF.getSize();
857   MCInst Instr;
858   uint64_t InstrSize = 0;
859   uint64_t InstrAddress = BF.getAddress() + Offset;
860   using std::placeholders::_1;
861 
862   // Skip instructions that satisfy the predicate condition.
863   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
864     const uint64_t StartOffset = Offset;
865     for (; Offset < BF.getMaxSize();
866          Offset += InstrSize, InstrAddress += InstrSize) {
867       if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset),
868                                   InstrAddress, nulls()))
869         break;
870       if (!Predicate(Instr))
871         break;
872     }
873 
874     return Offset - StartOffset;
875   };
876 
877   // Skip a sequence of zero bytes.
878   auto skipZeros = [&]() {
879     const uint64_t StartOffset = Offset;
880     for (; Offset < BF.getMaxSize(); ++Offset)
881       if ((*FunctionData)[Offset] != 0)
882         break;
883 
884     return Offset - StartOffset;
885   };
886 
887   // Accept the whole padding area filled with breakpoints.
888   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
889   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
890     return true;
891 
892   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
893 
894   // Some functions have a jump to the next function or to the padding area
895   // inserted after the body.
896   auto isSkipJump = [&](const MCInst &Instr) {
897     uint64_t TargetAddress = 0;
898     if (MIB->isUnconditionalBranch(Instr) &&
899         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
900       if (TargetAddress >= InstrAddress + InstrSize &&
901           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
902         return true;
903       }
904     }
905     return false;
906   };
907 
908   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
909   while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
910          skipZeros())
911     ;
912 
913   if (Offset == BF.getMaxSize())
914     return true;
915 
916   if (opts::Verbosity >= 1) {
917     errs() << "BOLT-WARNING: bad padding at address 0x"
918            << Twine::utohexstr(BF.getAddress() + BF.getSize())
919            << " starting at offset " << (Offset - BF.getSize())
920            << " in function " << BF << '\n'
921            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
922            << '\n';
923   }
924 
925   return false;
926 }
927 
928 void BinaryContext::adjustCodePadding() {
929   for (auto &BFI : BinaryFunctions) {
930     BinaryFunction &BF = BFI.second;
931     if (!shouldEmit(BF))
932       continue;
933 
934     if (!hasValidCodePadding(BF)) {
935       if (HasRelocations) {
936         if (opts::Verbosity >= 1) {
937           outs() << "BOLT-INFO: function " << BF
938                  << " has invalid padding. Ignoring the function.\n";
939         }
940         BF.setIgnored();
941       } else {
942         BF.setMaxSize(BF.getSize());
943       }
944     }
945   }
946 }
947 
948 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
949                                                uint64_t Size,
950                                                uint16_t Alignment,
951                                                unsigned Flags) {
952   // Register the name with MCContext.
953   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
954 
955   auto GAI = BinaryDataMap.find(Address);
956   BinaryData *BD;
957   if (GAI == BinaryDataMap.end()) {
958     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
959     BinarySection &Section =
960         SectionOrErr ? SectionOrErr.get() : absoluteSection();
961     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
962                         Section, Flags);
963     GAI = BinaryDataMap.emplace(Address, BD).first;
964     GlobalSymbols[Name] = BD;
965     updateObjectNesting(GAI);
966   } else {
967     BD = GAI->second;
968     if (!BD->hasName(Name)) {
969       GlobalSymbols[Name] = BD;
970       BD->Symbols.push_back(Symbol);
971     }
972   }
973 
974   return Symbol;
975 }
976 
977 const BinaryData *
978 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
979   auto NI = BinaryDataMap.lower_bound(Address);
980   auto End = BinaryDataMap.end();
981   if ((NI != End && Address == NI->first) ||
982       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
983     if (NI->second->containsAddress(Address))
984       return NI->second;
985 
986     // If this is a sub-symbol, see if a parent data contains the address.
987     const BinaryData *BD = NI->second->getParent();
988     while (BD) {
989       if (BD->containsAddress(Address))
990         return BD;
991       BD = BD->getParent();
992     }
993   }
994   return nullptr;
995 }
996 
997 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
998   auto NI = BinaryDataMap.find(Address);
999   assert(NI != BinaryDataMap.end());
1000   if (NI == BinaryDataMap.end())
1001     return false;
1002   // TODO: it's possible that a jump table starts at the same address
1003   // as a larger blob of private data.  When we set the size of the
1004   // jump table, it might be smaller than the total blob size.  In this
1005   // case we just leave the original size since (currently) it won't really
1006   // affect anything.
1007   assert((!NI->second->Size || NI->second->Size == Size ||
1008           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1009          "can't change the size of a symbol that has already had its "
1010          "size set");
1011   if (!NI->second->Size) {
1012     NI->second->Size = Size;
1013     updateObjectNesting(NI);
1014     return true;
1015   }
1016   return false;
1017 }
1018 
1019 void BinaryContext::generateSymbolHashes() {
1020   auto isPadding = [](const BinaryData &BD) {
1021     StringRef Contents = BD.getSection().getContents();
1022     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1023     return (BD.getName().startswith("HOLEat") ||
1024             SymData.find_first_not_of(0) == StringRef::npos);
1025   };
1026 
1027   uint64_t NumCollisions = 0;
1028   for (auto &Entry : BinaryDataMap) {
1029     BinaryData &BD = *Entry.second;
1030     StringRef Name = BD.getName();
1031 
1032     if (!isInternalSymbolName(Name))
1033       continue;
1034 
1035     // First check if a non-anonymous alias exists and move it to the front.
1036     if (BD.getSymbols().size() > 1) {
1037       auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(),
1038                               [&](const MCSymbol *Symbol) {
1039                                 return !isInternalSymbolName(Symbol->getName());
1040                               });
1041       if (Itr != BD.getSymbols().end()) {
1042         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1043         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1044         continue;
1045       }
1046     }
1047 
1048     // We have to skip 0 size symbols since they will all collide.
1049     if (BD.getSize() == 0) {
1050       continue;
1051     }
1052 
1053     const uint64_t Hash = BD.getSection().hash(BD);
1054     const size_t Idx = Name.find("0x");
1055     std::string NewName =
1056         (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str();
1057     if (getBinaryDataByName(NewName)) {
1058       // Ignore collisions for symbols that appear to be padding
1059       // (i.e. all zeros or a "hole")
1060       if (!isPadding(BD)) {
1061         if (opts::Verbosity) {
1062           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1063                  << " with new name (" << NewName << "), skipping.\n";
1064         }
1065         ++NumCollisions;
1066       }
1067       continue;
1068     }
1069     BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName));
1070     GlobalSymbols[NewName] = &BD;
1071   }
1072   if (NumCollisions) {
1073     errs() << "BOLT-WARNING: " << NumCollisions
1074            << " collisions detected while hashing binary objects";
1075     if (!opts::Verbosity)
1076       errs() << ". Use -v=1 to see the list.";
1077     errs() << '\n';
1078   }
1079 }
1080 
1081 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1082                                      BinaryFunction &Function) const {
1083   if (!isPotentialFragmentByName(TargetFunction, Function))
1084     return false;
1085   assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1086   if (TargetFunction.isParentFragment(&Function))
1087     return true;
1088   TargetFunction.addParentFragment(Function);
1089   Function.addFragment(TargetFunction);
1090   if (!HasRelocations) {
1091     TargetFunction.setSimple(false);
1092     Function.setSimple(false);
1093   }
1094   if (opts::Verbosity >= 1) {
1095     outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of "
1096            << Function << '\n';
1097   }
1098   return true;
1099 }
1100 
1101 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1102   for (uint64_t Address : Function.InterproceduralReferences) {
1103     if (!Address)
1104       continue;
1105 
1106     BinaryFunction *TargetFunction =
1107         getBinaryFunctionContainingAddress(Address);
1108     if (&Function == TargetFunction)
1109       continue;
1110 
1111     if (TargetFunction) {
1112       if (TargetFunction->IsFragment &&
1113           !registerFragment(*TargetFunction, Function)) {
1114         errs() << "BOLT-WARNING: interprocedural reference between unrelated "
1115                   "fragments: "
1116                << Function.getPrintName() << " and "
1117                << TargetFunction->getPrintName() << '\n';
1118       }
1119       if (uint64_t Offset = Address - TargetFunction->getAddress())
1120         TargetFunction->addEntryPointAtOffset(Offset);
1121 
1122       continue;
1123     }
1124 
1125     // Check if address falls in function padding space - this could be
1126     // unmarked data in code. In this case adjust the padding space size.
1127     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1128     assert(Section && "cannot get section for referenced address");
1129 
1130     if (!Section->isText())
1131       continue;
1132 
1133     // PLT requires special handling and could be ignored in this context.
1134     StringRef SectionName = Section->getName();
1135     if (SectionName == ".plt" || SectionName == ".plt.got")
1136       continue;
1137 
1138     if (opts::processAllFunctions()) {
1139       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1140              << "object in code at address 0x" << Twine::utohexstr(Address)
1141              << " belonging to section " << SectionName << " in current mode\n";
1142       exit(1);
1143     }
1144 
1145     TargetFunction = getBinaryFunctionContainingAddress(Address,
1146                                                         /*CheckPastEnd=*/false,
1147                                                         /*UseMaxSize=*/true);
1148     // We are not going to overwrite non-simple functions, but for simple
1149     // ones - adjust the padding size.
1150     if (TargetFunction && TargetFunction->isSimple()) {
1151       errs() << "BOLT-WARNING: function " << *TargetFunction
1152              << " has an object detected in a padding region at address 0x"
1153              << Twine::utohexstr(Address) << '\n';
1154       TargetFunction->setMaxSize(TargetFunction->getSize());
1155     }
1156   }
1157 
1158   clearList(Function.InterproceduralReferences);
1159 }
1160 
1161 void BinaryContext::postProcessSymbolTable() {
1162   fixBinaryDataHoles();
1163   bool Valid = true;
1164   for (auto &Entry : BinaryDataMap) {
1165     BinaryData *BD = Entry.second;
1166     if ((BD->getName().startswith("SYMBOLat") ||
1167          BD->getName().startswith("DATAat")) &&
1168         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1169         BD->getSection()) {
1170       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1171       Valid = false;
1172     }
1173   }
1174   assert(Valid);
1175   generateSymbolHashes();
1176 }
1177 
1178 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1179                                  BinaryFunction &ParentBF) {
1180   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1181          "cannot merge functions with multiple entry points");
1182 
1183   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1184                                                          std::defer_lock);
1185   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1186       SymbolToFunctionMapMutex, std::defer_lock);
1187 
1188   const StringRef ChildName = ChildBF.getOneName();
1189 
1190   // Move symbols over and update bookkeeping info.
1191   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1192     ParentBF.getSymbols().push_back(Symbol);
1193     WriteSymbolMapLock.lock();
1194     SymbolToFunctionMap[Symbol] = &ParentBF;
1195     WriteSymbolMapLock.unlock();
1196     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1197   }
1198   ChildBF.getSymbols().clear();
1199 
1200   // Move other names the child function is known under.
1201   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1202             std::back_inserter(ParentBF.Aliases));
1203   ChildBF.Aliases.clear();
1204 
1205   if (HasRelocations) {
1206     // Merge execution counts of ChildBF into those of ParentBF.
1207     // Without relocations, we cannot reliably merge profiles as both functions
1208     // continue to exist and either one can be executed.
1209     ChildBF.mergeProfileDataInto(ParentBF);
1210 
1211     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1212                                                           std::defer_lock);
1213     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1214                                                            std::defer_lock);
1215     // Remove ChildBF from the global set of functions in relocs mode.
1216     ReadBfsLock.lock();
1217     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1218     ReadBfsLock.unlock();
1219 
1220     assert(FI != BinaryFunctions.end() && "function not found");
1221     assert(&ChildBF == &FI->second && "function mismatch");
1222 
1223     WriteBfsLock.lock();
1224     ChildBF.clearDisasmState();
1225     FI = BinaryFunctions.erase(FI);
1226     WriteBfsLock.unlock();
1227 
1228   } else {
1229     // In non-relocation mode we keep the function, but rename it.
1230     std::string NewName = "__ICF_" + ChildName.str();
1231 
1232     WriteCtxLock.lock();
1233     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1234     WriteCtxLock.unlock();
1235 
1236     ChildBF.setFolded(&ParentBF);
1237   }
1238 }
1239 
1240 void BinaryContext::fixBinaryDataHoles() {
1241   assert(validateObjectNesting() && "object nesting inconsitency detected");
1242 
1243   for (BinarySection &Section : allocatableSections()) {
1244     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1245 
1246     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1247       BinaryData *BD = Itr->second;
1248       bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1249                      (BD->getName().startswith("SYMBOLat0x") ||
1250                       BD->getName().startswith("DATAat0x") ||
1251                       BD->getName().startswith("ANONYMOUS")));
1252       return !isHole && BD->getSection() == Section && !BD->getParent();
1253     };
1254 
1255     auto BDStart = BinaryDataMap.begin();
1256     auto BDEnd = BinaryDataMap.end();
1257     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1258     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1259 
1260     uint64_t EndAddress = Section.getAddress();
1261 
1262     while (Itr != End) {
1263       if (Itr->second->getAddress() > EndAddress) {
1264         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1265         Holes.emplace_back(EndAddress, Gap);
1266       }
1267       EndAddress = Itr->second->getEndAddress();
1268       ++Itr;
1269     }
1270 
1271     if (EndAddress < Section.getEndAddress())
1272       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1273 
1274     // If there is already a symbol at the start of the hole, grow that symbol
1275     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1276     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1277       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1278       if (BD) {
1279         // BD->getSection() can be != Section if there are sections that
1280         // overlap.  In this case it is probably safe to just skip the holes
1281         // since the overlapping section will not(?) have any symbols in it.
1282         if (BD->getSection() == Section)
1283           setBinaryDataSize(Hole.first, Hole.second);
1284       } else {
1285         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1286       }
1287     }
1288   }
1289 
1290   assert(validateObjectNesting() && "object nesting inconsitency detected");
1291   assert(validateHoles() && "top level hole detected in object map");
1292 }
1293 
1294 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1295   const BinarySection *CurrentSection = nullptr;
1296   bool FirstSection = true;
1297 
1298   for (auto &Entry : BinaryDataMap) {
1299     const BinaryData *BD = Entry.second;
1300     const BinarySection &Section = BD->getSection();
1301     if (FirstSection || Section != *CurrentSection) {
1302       uint64_t Address, Size;
1303       StringRef Name = Section.getName();
1304       if (Section) {
1305         Address = Section.getAddress();
1306         Size = Section.getSize();
1307       } else {
1308         Address = BD->getAddress();
1309         Size = BD->getSize();
1310       }
1311       OS << "BOLT-INFO: Section " << Name << ", "
1312          << "0x" + Twine::utohexstr(Address) << ":"
1313          << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n";
1314       CurrentSection = &Section;
1315       FirstSection = false;
1316     }
1317 
1318     OS << "BOLT-INFO: ";
1319     const BinaryData *P = BD->getParent();
1320     while (P) {
1321       OS << "  ";
1322       P = P->getParent();
1323     }
1324     OS << *BD << "\n";
1325   }
1326 }
1327 
1328 Expected<unsigned> BinaryContext::getDwarfFile(
1329     StringRef Directory, StringRef FileName, unsigned FileNumber,
1330     Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
1331     unsigned CUID, unsigned DWARFVersion) {
1332   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1333   return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion,
1334                           FileNumber);
1335 }
1336 
1337 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1338                                                const uint32_t SrcCUID,
1339                                                unsigned FileIndex) {
1340   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1341   const DWARFDebugLine::LineTable *LineTable =
1342       DwCtx->getLineTableForUnit(SrcUnit);
1343   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1344       LineTable->Prologue.FileNames;
1345   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1346   // means empty dir.
1347   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1348          "FileIndex out of range for the compilation unit.");
1349   StringRef Dir = "";
1350   if (FileNames[FileIndex - 1].DirIdx != 0) {
1351     if (Optional<const char *> DirName = dwarf::toString(
1352             LineTable->Prologue
1353                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1354       Dir = *DirName;
1355     }
1356   }
1357   StringRef FileName = "";
1358   if (Optional<const char *> FName =
1359           dwarf::toString(FileNames[FileIndex - 1].Name))
1360     FileName = *FName;
1361   assert(FileName != "");
1362   DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID);
1363   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID,
1364                                DstUnit->getVersion()));
1365 }
1366 
1367 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1368   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1369   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1370                  SortedFunctions.begin(),
1371                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1372                    return &BFI.second;
1373                  });
1374 
1375   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1376                    [](const BinaryFunction *A, const BinaryFunction *B) {
1377                      if (A->hasValidIndex() && B->hasValidIndex()) {
1378                        return A->getIndex() < B->getIndex();
1379                      }
1380                      return A->hasValidIndex();
1381                    });
1382   return SortedFunctions;
1383 }
1384 
1385 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1386   std::vector<BinaryFunction *> AllFunctions;
1387   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1388   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1389                  std::back_inserter(AllFunctions),
1390                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1391                    return &BFI.second;
1392                  });
1393   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1394             std::back_inserter(AllFunctions));
1395 
1396   return AllFunctions;
1397 }
1398 
1399 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1400   auto Iter = DWOCUs.find(DWOId);
1401   if (Iter == DWOCUs.end())
1402     return None;
1403 
1404   return Iter->second;
1405 }
1406 
1407 DWARFContext *BinaryContext::getDWOContext() {
1408   if (DWOCUs.empty())
1409     return nullptr;
1410   return &DWOCUs.begin()->second->getContext();
1411 }
1412 
1413 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1414 void BinaryContext::preprocessDWODebugInfo() {
1415   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1416     DWARFUnit *const DwarfUnit = CU.get();
1417     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1418       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1419       if (!DWOCU->isDWOUnit()) {
1420         std::string DWOName = dwarf::toString(
1421             DwarfUnit->getUnitDIE().find(
1422                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1423             "");
1424         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1425                << DWOName
1426                << " was not retrieved and won't be updated. Please check "
1427                   "relative path.\n";
1428         continue;
1429       }
1430       DWOCUs[*DWOId] = DWOCU;
1431     }
1432   }
1433 }
1434 
1435 void BinaryContext::preprocessDebugInfo() {
1436   struct CURange {
1437     uint64_t LowPC;
1438     uint64_t HighPC;
1439     DWARFUnit *Unit;
1440 
1441     bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1442   };
1443 
1444   // Building a map of address ranges to CUs similar to .debug_aranges and use
1445   // it to assign CU to functions.
1446   std::vector<CURange> AllRanges;
1447   AllRanges.reserve(DwCtx->getNumCompileUnits());
1448   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1449     Expected<DWARFAddressRangesVector> RangesOrError =
1450         CU->getUnitDIE().getAddressRanges();
1451     if (!RangesOrError) {
1452       consumeError(RangesOrError.takeError());
1453       continue;
1454     }
1455     for (DWARFAddressRange &Range : *RangesOrError) {
1456       // Parts of the debug info could be invalidated due to corresponding code
1457       // being removed from the binary by the linker. Hence we check if the
1458       // address is a valid one.
1459       if (containsAddress(Range.LowPC))
1460         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1461     }
1462 
1463     ContainsDwarf5 |= CU->getVersion() >= 5;
1464     ContainsDwarfLegacy |= CU->getVersion() < 5;
1465   }
1466 
1467   if (ContainsDwarf5 && ContainsDwarfLegacy)
1468     llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with "
1469                     "DWARF5 and DWARF{2,3,4}.\n";
1470 
1471   std::sort(AllRanges.begin(), AllRanges.end());
1472   for (auto &KV : BinaryFunctions) {
1473     const uint64_t FunctionAddress = KV.first;
1474     BinaryFunction &Function = KV.second;
1475 
1476     auto It = std::partition_point(
1477         AllRanges.begin(), AllRanges.end(),
1478         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1479     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1480       Function.setDWARFUnit(It->Unit);
1481     }
1482   }
1483 
1484   // Discover units with debug info that needs to be updated.
1485   for (const auto &KV : BinaryFunctions) {
1486     const BinaryFunction &BF = KV.second;
1487     if (shouldEmit(BF) && BF.getDWARFUnit())
1488       ProcessedCUs.insert(BF.getDWARFUnit());
1489   }
1490 
1491   // Clear debug info for functions from units that we are not going to process.
1492   for (auto &KV : BinaryFunctions) {
1493     BinaryFunction &BF = KV.second;
1494     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1495       BF.setDWARFUnit(nullptr);
1496   }
1497 
1498   if (opts::Verbosity >= 1) {
1499     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1500            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1501   }
1502 
1503   preprocessDWODebugInfo();
1504 
1505   // Populate MCContext with DWARF files from all units.
1506   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1507   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1508     const uint64_t CUID = CU->getOffset();
1509     DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1510     BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1511         GlobalPrefix + "line_table_start" + Twine(CUID)));
1512 
1513     if (!ProcessedCUs.count(CU.get()))
1514       continue;
1515 
1516     const DWARFDebugLine::LineTable *LineTable =
1517         DwCtx->getLineTableForUnit(CU.get());
1518     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1519         LineTable->Prologue.FileNames;
1520 
1521     uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1522     if (DwarfVersion >= 5) {
1523       Optional<MD5::MD5Result> Checksum = None;
1524       if (LineTable->Prologue.ContentTypes.HasMD5)
1525         Checksum = LineTable->Prologue.FileNames[0].Checksum;
1526       Optional<const char *> Name =
1527           dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1528       if (Optional<uint64_t> DWOID = CU->getDWOId()) {
1529         auto Iter = DWOCUs.find(*DWOID);
1530         assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1531         Name = dwarf::toString(
1532             Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr);
1533       }
1534       BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum,
1535                                   None);
1536     }
1537 
1538     BinaryLineTable.setDwarfVersion(DwarfVersion);
1539 
1540     // Assign a unique label to every line table, one per CU.
1541     // Make sure empty debug line tables are registered too.
1542     if (FileNames.empty()) {
1543       cantFail(
1544           getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion));
1545       continue;
1546     }
1547     const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1548     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1549       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1550       // means empty dir.
1551       StringRef Dir = "";
1552       if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1553         if (Optional<const char *> DirName = dwarf::toString(
1554                 LineTable->Prologue
1555                     .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1556           Dir = *DirName;
1557       StringRef FileName = "";
1558       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1559         FileName = *FName;
1560       assert(FileName != "");
1561       Optional<MD5::MD5Result> Checksum = None;
1562       if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1563         Checksum = LineTable->Prologue.FileNames[I].Checksum;
1564       cantFail(
1565           getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion));
1566     }
1567   }
1568 }
1569 
1570 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1571   if (Function.isPseudo())
1572     return false;
1573 
1574   if (opts::processAllFunctions())
1575     return true;
1576 
1577   if (Function.isIgnored())
1578     return false;
1579 
1580   // In relocation mode we will emit non-simple functions with CFG.
1581   // If the function does not have a CFG it should be marked as ignored.
1582   return HasRelocations || Function.isSimple();
1583 }
1584 
1585 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1586   uint32_t Operation = Inst.getOperation();
1587   switch (Operation) {
1588   case MCCFIInstruction::OpSameValue:
1589     OS << "OpSameValue Reg" << Inst.getRegister();
1590     break;
1591   case MCCFIInstruction::OpRememberState:
1592     OS << "OpRememberState";
1593     break;
1594   case MCCFIInstruction::OpRestoreState:
1595     OS << "OpRestoreState";
1596     break;
1597   case MCCFIInstruction::OpOffset:
1598     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1599     break;
1600   case MCCFIInstruction::OpDefCfaRegister:
1601     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1602     break;
1603   case MCCFIInstruction::OpDefCfaOffset:
1604     OS << "OpDefCfaOffset " << Inst.getOffset();
1605     break;
1606   case MCCFIInstruction::OpDefCfa:
1607     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1608     break;
1609   case MCCFIInstruction::OpRelOffset:
1610     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1611     break;
1612   case MCCFIInstruction::OpAdjustCfaOffset:
1613     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1614     break;
1615   case MCCFIInstruction::OpEscape:
1616     OS << "OpEscape";
1617     break;
1618   case MCCFIInstruction::OpRestore:
1619     OS << "OpRestore Reg" << Inst.getRegister();
1620     break;
1621   case MCCFIInstruction::OpUndefined:
1622     OS << "OpUndefined Reg" << Inst.getRegister();
1623     break;
1624   case MCCFIInstruction::OpRegister:
1625     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1626        << Inst.getRegister2();
1627     break;
1628   case MCCFIInstruction::OpWindowSave:
1629     OS << "OpWindowSave";
1630     break;
1631   case MCCFIInstruction::OpGnuArgsSize:
1632     OS << "OpGnuArgsSize";
1633     break;
1634   default:
1635     OS << "Op#" << Operation;
1636     break;
1637   }
1638 }
1639 
1640 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1641                                      uint64_t Offset,
1642                                      const BinaryFunction *Function,
1643                                      bool PrintMCInst, bool PrintMemData,
1644                                      bool PrintRelocations) const {
1645   if (MIB->isEHLabel(Instruction)) {
1646     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1647     return;
1648   }
1649   OS << format("    %08" PRIx64 ": ", Offset);
1650   if (MIB->isCFI(Instruction)) {
1651     uint32_t Offset = Instruction.getOperand(0).getImm();
1652     OS << "\t!CFI\t$" << Offset << "\t; ";
1653     if (Function)
1654       printCFI(OS, *Function->getCFIFor(Instruction));
1655     OS << "\n";
1656     return;
1657   }
1658   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1659   if (MIB->isCall(Instruction)) {
1660     if (MIB->isTailCall(Instruction))
1661       OS << " # TAILCALL ";
1662     if (MIB->isInvoke(Instruction)) {
1663       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1664       OS << " # handler: ";
1665       if (EHInfo->first)
1666         OS << *EHInfo->first;
1667       else
1668         OS << '0';
1669       OS << "; action: " << EHInfo->second;
1670       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1671       if (GnuArgsSize >= 0)
1672         OS << "; GNU_args_size = " << GnuArgsSize;
1673     }
1674   } else if (MIB->isIndirectBranch(Instruction)) {
1675     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1676       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1677     } else {
1678       OS << " # UNKNOWN CONTROL FLOW";
1679     }
1680   }
1681   if (Optional<uint32_t> Offset = MIB->getOffset(Instruction))
1682     OS << " # Offset: " << *Offset;
1683 
1684   MIB->printAnnotations(Instruction, OS);
1685 
1686   if (opts::PrintDebugInfo) {
1687     DebugLineTableRowRef RowRef =
1688         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1689     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1690       const DWARFDebugLine::LineTable *LineTable;
1691       if (Function && Function->getDWARFUnit() &&
1692           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1693         LineTable = Function->getDWARFLineTable();
1694       } else {
1695         LineTable = DwCtx->getLineTableForUnit(
1696             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1697       }
1698       assert(LineTable &&
1699              "line table expected for instruction with debug info");
1700 
1701       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1702       StringRef FileName = "";
1703       if (Optional<const char *> FName =
1704               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1705         FileName = *FName;
1706       OS << " # debug line " << FileName << ":" << Row.Line;
1707       if (Row.Column)
1708         OS << ":" << Row.Column;
1709       if (Row.Discriminator)
1710         OS << " discriminator:" << Row.Discriminator;
1711     }
1712   }
1713 
1714   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1715     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1716     Function->printRelocations(OS, Offset, Size);
1717   }
1718 
1719   OS << "\n";
1720 
1721   if (PrintMCInst) {
1722     Instruction.dump_pretty(OS, InstPrinter.get());
1723     OS << "\n";
1724   }
1725 }
1726 
1727 Optional<uint64_t>
1728 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
1729                                         uint64_t FileOffset) const {
1730   // Find a segment with a matching file offset.
1731   for (auto &KV : SegmentMapInfo) {
1732     const SegmentInfo &SegInfo = KV.second;
1733     if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
1734       // Use segment's aligned memory offset to calculate the base address.
1735       const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
1736       return MMapAddress - MemOffset;
1737     }
1738   }
1739 
1740   return NoneType();
1741 }
1742 
1743 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1744   auto SI = AddressToSection.upper_bound(Address);
1745   if (SI != AddressToSection.begin()) {
1746     --SI;
1747     uint64_t UpperBound = SI->first + SI->second->getSize();
1748     if (!SI->second->getSize())
1749       UpperBound += 1;
1750     if (UpperBound > Address)
1751       return *SI->second;
1752   }
1753   return std::make_error_code(std::errc::bad_address);
1754 }
1755 
1756 ErrorOr<StringRef>
1757 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1758   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
1759     return Section->getName();
1760   return std::make_error_code(std::errc::bad_address);
1761 }
1762 
1763 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1764   auto Res = Sections.insert(Section);
1765   (void)Res;
1766   assert(Res.second && "can't register the same section twice.");
1767 
1768   // Only register allocatable sections in the AddressToSection map.
1769   if (Section->isAllocatable() && Section->getAddress())
1770     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1771   NameToSection.insert(
1772       std::make_pair(std::string(Section->getName()), Section));
1773   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1774   return *Section;
1775 }
1776 
1777 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1778   return registerSection(new BinarySection(*this, Section));
1779 }
1780 
1781 BinarySection &
1782 BinaryContext::registerSection(StringRef SectionName,
1783                                const BinarySection &OriginalSection) {
1784   return registerSection(
1785       new BinarySection(*this, SectionName, OriginalSection));
1786 }
1787 
1788 BinarySection &
1789 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType,
1790                                        unsigned ELFFlags, uint8_t *Data,
1791                                        uint64_t Size, unsigned Alignment) {
1792   auto NamedSections = getSectionByName(Name);
1793   if (NamedSections.begin() != NamedSections.end()) {
1794     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1795            "can only update unique sections");
1796     BinarySection *Section = NamedSections.begin()->second;
1797 
1798     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1799     const bool Flag = Section->isAllocatable();
1800     (void)Flag;
1801     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1802     LLVM_DEBUG(dbgs() << *Section << "\n");
1803     // FIXME: Fix section flags/attributes for MachO.
1804     if (isELF())
1805       assert(Flag == Section->isAllocatable() &&
1806              "can't change section allocation status");
1807     return *Section;
1808   }
1809 
1810   return registerSection(
1811       new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
1812 }
1813 
1814 bool BinaryContext::deregisterSection(BinarySection &Section) {
1815   BinarySection *SectionPtr = &Section;
1816   auto Itr = Sections.find(SectionPtr);
1817   if (Itr != Sections.end()) {
1818     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1819     while (Range.first != Range.second) {
1820       if (Range.first->second == SectionPtr) {
1821         AddressToSection.erase(Range.first);
1822         break;
1823       }
1824       ++Range.first;
1825     }
1826 
1827     auto NameRange =
1828         NameToSection.equal_range(std::string(SectionPtr->getName()));
1829     while (NameRange.first != NameRange.second) {
1830       if (NameRange.first->second == SectionPtr) {
1831         NameToSection.erase(NameRange.first);
1832         break;
1833       }
1834       ++NameRange.first;
1835     }
1836 
1837     Sections.erase(Itr);
1838     delete SectionPtr;
1839     return true;
1840   }
1841   return false;
1842 }
1843 
1844 void BinaryContext::printSections(raw_ostream &OS) const {
1845   for (BinarySection *const &Section : Sections)
1846     OS << "BOLT-INFO: " << *Section << "\n";
1847 }
1848 
1849 BinarySection &BinaryContext::absoluteSection() {
1850   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1851     return *Section;
1852   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1853 }
1854 
1855 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1856                                                            size_t Size) const {
1857   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1858   if (!Section)
1859     return std::make_error_code(std::errc::bad_address);
1860 
1861   if (Section->isVirtual())
1862     return 0;
1863 
1864   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1865                    AsmInfo->getCodePointerSize());
1866   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1867   return DE.getUnsigned(&ValueOffset, Size);
1868 }
1869 
1870 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
1871                                                          size_t Size) const {
1872   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1873   if (!Section)
1874     return std::make_error_code(std::errc::bad_address);
1875 
1876   if (Section->isVirtual())
1877     return 0;
1878 
1879   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1880                    AsmInfo->getCodePointerSize());
1881   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1882   return DE.getSigned(&ValueOffset, Size);
1883 }
1884 
1885 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
1886                                   uint64_t Type, uint64_t Addend,
1887                                   uint64_t Value) {
1888   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1889   assert(Section && "cannot find section for address");
1890   Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend,
1891                          Value);
1892 }
1893 
1894 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
1895                                          uint64_t Type, uint64_t Addend,
1896                                          uint64_t Value) {
1897   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1898   assert(Section && "cannot find section for address");
1899   Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type,
1900                                 Addend, Value);
1901 }
1902 
1903 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1904   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1905   assert(Section && "cannot find section for address");
1906   return Section->removeRelocationAt(Address - Section->getAddress());
1907 }
1908 
1909 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1910   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1911   if (!Section)
1912     return nullptr;
1913 
1914   return Section->getRelocationAt(Address - Section->getAddress());
1915 }
1916 
1917 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1918   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1919   if (!Section)
1920     return nullptr;
1921 
1922   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1923 }
1924 
1925 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1926                                              const uint64_t Address) {
1927   auto setImmovable = [&](BinaryData &BD) {
1928     BinaryData *Root = BD.getAtomicRoot();
1929     LLVM_DEBUG(if (Root->isMoveable()) {
1930       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1931              << "due to ambiguous relocation referencing 0x"
1932              << Twine::utohexstr(Address) << '\n';
1933     });
1934     Root->setIsMoveable(false);
1935   };
1936 
1937   if (Address == BD.getAddress()) {
1938     setImmovable(BD);
1939 
1940     // Set previous symbol as immovable
1941     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1942     if (Prev && Prev->getEndAddress() == BD.getAddress())
1943       setImmovable(*Prev);
1944   }
1945 
1946   if (Address == BD.getEndAddress()) {
1947     setImmovable(BD);
1948 
1949     // Set next symbol as immovable
1950     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1951     if (Next && Next->getAddress() == BD.getEndAddress())
1952       setImmovable(*Next);
1953   }
1954 }
1955 
1956 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1957                                                     uint64_t *EntryDesc) {
1958   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1959   auto BFI = SymbolToFunctionMap.find(Symbol);
1960   if (BFI == SymbolToFunctionMap.end())
1961     return nullptr;
1962 
1963   BinaryFunction *BF = BFI->second;
1964   if (EntryDesc)
1965     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1966 
1967   return BF;
1968 }
1969 
1970 void BinaryContext::exitWithBugReport(StringRef Message,
1971                                       const BinaryFunction &Function) const {
1972   errs() << "=======================================\n";
1973   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1974             "this function.\n";
1975   errs() << "If you are running the most recent version of BOLT, you may "
1976             "want to "
1977             "report this and paste this dump.\nPlease check that there is no "
1978             "sensitive contents being shared in this dump.\n";
1979   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1980   ScopedPrinter SP(errs());
1981   SP.printBinaryBlock("Function contents", *Function.getData());
1982   errs() << "\n";
1983   Function.dump();
1984   errs() << "ERROR: " << Message;
1985   errs() << "\n=======================================\n";
1986   exit(1);
1987 }
1988 
1989 BinaryFunction *
1990 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1991                                             bool IsSimple) {
1992   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1993   BinaryFunction *BF = InjectedBinaryFunctions.back();
1994   setSymbolToFunctionMap(BF->getSymbol(), BF);
1995   BF->CurrentState = BinaryFunction::State::CFG;
1996   return BF;
1997 }
1998 
1999 std::pair<size_t, size_t>
2000 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2001   // Adjust branch instruction to match the current layout.
2002   if (FixBranches)
2003     BF.fixBranches();
2004 
2005   // Create local MC context to isolate the effect of ephemeral code emission.
2006   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2007   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2008   MCAsmBackend *MAB =
2009       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
2010 
2011   SmallString<256> Code;
2012   raw_svector_ostream VecOS(Code);
2013 
2014   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
2015   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2016       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2017       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2018       /*RelaxAll=*/false,
2019       /*IncrementalLinkerCompatible=*/false,
2020       /*DWARFMustBeAtTheEnd=*/false));
2021 
2022   Streamer->initSections(false, *STI);
2023 
2024   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2025   Section->setHasInstructions(true);
2026 
2027   // Create symbols in the LocalCtx so that they get destroyed with it.
2028   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2029   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2030   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2031   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2032 
2033   Streamer->SwitchSection(Section);
2034   Streamer->emitLabel(StartLabel);
2035   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2036                    /*EmitCodeOnly=*/true);
2037   Streamer->emitLabel(EndLabel);
2038 
2039   if (BF.isSplit()) {
2040     MCSectionELF *ColdSection =
2041         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2042                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2043     ColdSection->setHasInstructions(true);
2044 
2045     Streamer->SwitchSection(ColdSection);
2046     Streamer->emitLabel(ColdStartLabel);
2047     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2048                      /*EmitCodeOnly=*/true);
2049     Streamer->emitLabel(ColdEndLabel);
2050     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2051     Streamer->emitBytes(StringRef(""));
2052     Streamer->SwitchSection(Section);
2053   }
2054 
2055   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2056   // MCStreamer::Finish(), which does more than we want
2057   Streamer->emitBytes(StringRef(""));
2058 
2059   MCAssembler &Assembler =
2060       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2061   MCAsmLayout Layout(Assembler);
2062   Assembler.layout(Layout);
2063 
2064   const uint64_t HotSize =
2065       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2066   const uint64_t ColdSize = BF.isSplit()
2067                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2068                                       Layout.getSymbolOffset(*ColdStartLabel)
2069                                 : 0ULL;
2070 
2071   // Clean-up the effect of the code emission.
2072   for (const MCSymbol &Symbol : Assembler.symbols()) {
2073     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2074     MutableSymbol->setUndefined();
2075     MutableSymbol->setIsRegistered(false);
2076   }
2077 
2078   return std::make_pair(HotSize, ColdSize);
2079 }
2080 
2081 bool BinaryContext::validateEncoding(const MCInst &Inst,
2082                                      ArrayRef<uint8_t> InputEncoding) const {
2083   SmallString<256> Code;
2084   SmallVector<MCFixup, 4> Fixups;
2085   raw_svector_ostream VecOS(Code);
2086 
2087   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2088   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2089   if (InputEncoding != EncodedData) {
2090     if (opts::Verbosity > 1) {
2091       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2092              << "      input: " << InputEncoding << '\n'
2093              << "     output: " << EncodedData << '\n';
2094     }
2095     return false;
2096   }
2097 
2098   return true;
2099 }
2100 
2101 uint64_t BinaryContext::getHotThreshold() const {
2102   static uint64_t Threshold = 0;
2103   if (Threshold == 0) {
2104     Threshold = std::max(
2105         (uint64_t)opts::ExecutionCountThreshold,
2106         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2107   }
2108   return Threshold;
2109 }
2110 
2111 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2112     uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2113   auto FI = BinaryFunctions.upper_bound(Address);
2114   if (FI == BinaryFunctions.begin())
2115     return nullptr;
2116   --FI;
2117 
2118   const uint64_t UsedSize =
2119       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2120 
2121   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2122     return nullptr;
2123 
2124   return &FI->second;
2125 }
2126 
2127 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2128   // First, try to find a function starting at the given address. If the
2129   // function was folded, this will get us the original folded function if it
2130   // wasn't removed from the list, e.g. in non-relocation mode.
2131   auto BFI = BinaryFunctions.find(Address);
2132   if (BFI != BinaryFunctions.end())
2133     return &BFI->second;
2134 
2135   // We might have folded the function matching the object at the given
2136   // address. In such case, we look for a function matching the symbol
2137   // registered at the original address. The new function (the one that the
2138   // original was folded into) will hold the symbol.
2139   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2140     uint64_t EntryID = 0;
2141     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2142     if (BF && EntryID == 0)
2143       return BF;
2144   }
2145   return nullptr;
2146 }
2147 
2148 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2149     const DWARFAddressRangesVector &InputRanges) const {
2150   DebugAddressRangesVector OutputRanges;
2151 
2152   for (const DWARFAddressRange Range : InputRanges) {
2153     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2154     while (BFI != BinaryFunctions.end()) {
2155       const BinaryFunction &Function = BFI->second;
2156       if (Function.getAddress() >= Range.HighPC)
2157         break;
2158       const DebugAddressRangesVector FunctionRanges =
2159           Function.getOutputAddressRanges();
2160       std::move(std::begin(FunctionRanges), std::end(FunctionRanges),
2161                 std::back_inserter(OutputRanges));
2162       std::advance(BFI, 1);
2163     }
2164   }
2165 
2166   return OutputRanges;
2167 }
2168 
2169 } // namespace bolt
2170 } // namespace llvm
2171