xref: /llvm-project/bolt/lib/Core/BinaryContext.cpp (revision a34c753fe709a624f5b087397fb05adeac2311e4)
1 //===--- BinaryContext.cpp  - Interface for machine-level context ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/BinaryContext.h"
12 #include "bolt/Core/BinaryEmitter.h"
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Utils/CommandLineOpts.h"
15 #include "bolt/Utils/NameResolver.h"
16 #include "bolt/Utils/Utils.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
19 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
20 #include "llvm/MC/MCAsmLayout.h"
21 #include "llvm/MC/MCAssembler.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
24 #include "llvm/MC/MCInstPrinter.h"
25 #include "llvm/MC/MCObjectStreamer.h"
26 #include "llvm/MC/MCObjectWriter.h"
27 #include "llvm/MC/MCSectionELF.h"
28 #include "llvm/MC/MCStreamer.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Regex.h"
32 #include <functional>
33 #include <iterator>
34 
35 using namespace llvm;
36 
37 #undef  DEBUG_TYPE
38 #define DEBUG_TYPE "bolt"
39 
40 namespace opts {
41 
42 cl::opt<bool>
43 NoHugePages("no-huge-pages",
44   cl::desc("use regular size pages for code alignment"),
45   cl::ZeroOrMore,
46   cl::Hidden,
47   cl::cat(BoltCategory));
48 
49 static cl::opt<bool>
50 PrintDebugInfo("print-debug-info",
51   cl::desc("print debug info when printing functions"),
52   cl::Hidden,
53   cl::ZeroOrMore,
54   cl::cat(BoltCategory));
55 
56 cl::opt<bool>
57 PrintRelocations("print-relocations",
58   cl::desc("print relocations when printing functions/objects"),
59   cl::Hidden,
60   cl::ZeroOrMore,
61   cl::cat(BoltCategory));
62 
63 static cl::opt<bool>
64 PrintMemData("print-mem-data",
65   cl::desc("print memory data annotations when printing functions"),
66   cl::Hidden,
67   cl::ZeroOrMore,
68   cl::cat(BoltCategory));
69 
70 } // namespace opts
71 
72 namespace llvm {
73 namespace bolt {
74 
75 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
76                              std::unique_ptr<DWARFContext> DwCtx,
77                              std::unique_ptr<Triple> TheTriple,
78                              const Target *TheTarget,
79                              std::string TripleName,
80                              std::unique_ptr<MCCodeEmitter> MCE,
81                              std::unique_ptr<MCObjectFileInfo> MOFI,
82                              std::unique_ptr<const MCAsmInfo> AsmInfo,
83                              std::unique_ptr<const MCInstrInfo> MII,
84                              std::unique_ptr<const MCSubtargetInfo> STI,
85                              std::unique_ptr<MCInstPrinter> InstPrinter,
86                              std::unique_ptr<const MCInstrAnalysis> MIA,
87                              std::unique_ptr<MCPlusBuilder> MIB,
88                              std::unique_ptr<const MCRegisterInfo> MRI,
89                              std::unique_ptr<MCDisassembler> DisAsm)
90     : Ctx(std::move(Ctx)),
91       DwCtx(std::move(DwCtx)),
92       TheTriple(std::move(TheTriple)),
93       TheTarget(TheTarget),
94       TripleName(TripleName),
95       MCE(std::move(MCE)),
96       MOFI(std::move(MOFI)),
97       AsmInfo(std::move(AsmInfo)),
98       MII(std::move(MII)),
99       STI(std::move(STI)),
100       InstPrinter(std::move(InstPrinter)),
101       MIA(std::move(MIA)),
102       MIB(std::move(MIB)),
103       MRI(std::move(MRI)),
104       DisAsm(std::move(DisAsm)) {
105   Relocation::Arch = this->TheTriple->getArch();
106   PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
107 }
108 
109 BinaryContext::~BinaryContext() {
110   for (BinarySection *Section : Sections) {
111     delete Section;
112   }
113   for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) {
114     delete InjectedFunction;
115   }
116   for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) {
117     delete JTI.second;
118   }
119   clearBinaryData();
120 }
121 
122 /// Create BinaryContext for a given architecture \p ArchName and
123 /// triple \p TripleName.
124 std::unique_ptr<BinaryContext>
125 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
126                                    std::unique_ptr<DWARFContext> DwCtx) {
127   StringRef ArchName = "";
128   StringRef FeaturesStr = "";
129   switch (File->getArch()) {
130   case llvm::Triple::x86_64:
131     ArchName = "x86-64";
132     FeaturesStr = "+nopl";
133     break;
134   case llvm::Triple::aarch64:
135     ArchName = "aarch64";
136     FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm,"
137                   "+fullfp16,+spe,+fuse-aes,+rcpc";
138     break;
139   default:
140     errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n";
141     return nullptr;
142   }
143 
144   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
145   const std::string TripleName = TheTriple->str();
146 
147   std::string Error;
148   const Target *TheTarget =
149       TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
150   if (!TheTarget) {
151     errs() << "BOLT-ERROR: " << Error;
152     return nullptr;
153   }
154 
155   std::unique_ptr<const MCRegisterInfo> MRI(
156       TheTarget->createMCRegInfo(TripleName));
157   if (!MRI) {
158     errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n";
159     return nullptr;
160   }
161 
162   // Set up disassembler.
163   std::unique_ptr<const MCAsmInfo> AsmInfo(
164       TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
165   if (!AsmInfo) {
166     errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n";
167     return nullptr;
168   }
169 
170   std::unique_ptr<const MCSubtargetInfo> STI(
171       TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr));
172   if (!STI) {
173     errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n";
174     return nullptr;
175   }
176 
177   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
178   if (!MII) {
179     errs() << "BOLT-ERROR: no instruction info for target " << TripleName
180            << "\n";
181     return nullptr;
182   }
183 
184   std::unique_ptr<MCContext> Ctx(
185       new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
186   std::unique_ptr<MCObjectFileInfo> MOFI(
187       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
188   Ctx->setObjectFileInfo(MOFI.get());
189   // We do not support X86 Large code model. Change this in the future.
190   bool Large = false;
191   if (TheTriple->getArch() == llvm::Triple::aarch64)
192     Large = true;
193   unsigned LSDAEncoding =
194       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
195   unsigned TTypeEncoding =
196       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
197   if (IsPIC) {
198     LSDAEncoding = dwarf::DW_EH_PE_pcrel |
199                    (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
200     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
201                     (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
202   }
203 
204   std::unique_ptr<MCDisassembler> DisAsm(
205       TheTarget->createMCDisassembler(*STI, *Ctx));
206 
207   if (!DisAsm) {
208     errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n";
209     return nullptr;
210   }
211 
212   std::unique_ptr<const MCInstrAnalysis> MIA(
213       TheTarget->createMCInstrAnalysis(MII.get()));
214   if (!MIA) {
215     errs() << "BOLT-ERROR: failed to create instruction analysis for target"
216            << TripleName << "\n";
217     return nullptr;
218   }
219 
220   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
221   std::unique_ptr<MCInstPrinter> InstructionPrinter(
222       TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
223                                      *MII, *MRI));
224   if (!InstructionPrinter) {
225     errs() << "BOLT-ERROR: no instruction printer for target " << TripleName
226            << '\n';
227     return nullptr;
228   }
229   InstructionPrinter->setPrintImmHex(true);
230 
231   std::unique_ptr<MCCodeEmitter> MCE(
232       TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx));
233 
234   // Make sure we don't miss any output on core dumps.
235   outs().SetUnbuffered();
236   errs().SetUnbuffered();
237   dbgs().SetUnbuffered();
238 
239   auto BC = std::make_unique<BinaryContext>(
240       std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
241       std::string(TripleName), std::move(MCE), std::move(MOFI),
242       std::move(AsmInfo), std::move(MII), std::move(STI),
243       std::move(InstructionPrinter), std::move(MIA), nullptr,
244       std::move(MRI), std::move(DisAsm));
245 
246   BC->TTypeEncoding = TTypeEncoding;
247   BC->LSDAEncoding = LSDAEncoding;
248 
249   BC->MAB = std::unique_ptr<MCAsmBackend>(
250       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
251 
252   BC->setFilename(File->getFileName());
253 
254   BC->HasFixedLoadAddress = !IsPIC;
255 
256   return BC;
257 }
258 
259 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
260   if (opts::HotText && (SymbolName == "__hot_start" ||
261                         SymbolName == "__hot_end"))
262     return true;
263 
264   if (opts::HotData && (SymbolName == "__hot_data_start" ||
265                         SymbolName == "__hot_data_end"))
266     return true;
267 
268   if (SymbolName == "_end")
269     return true;
270 
271   return false;
272 }
273 
274 std::unique_ptr<MCObjectWriter>
275 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
276   return MAB->createObjectWriter(OS);
277 }
278 
279 bool BinaryContext::validateObjectNesting() const {
280   auto Itr = BinaryDataMap.begin();
281   auto End = BinaryDataMap.end();
282   bool Valid = true;
283   while (Itr != End) {
284     auto Next = std::next(Itr);
285     while (Next != End &&
286            Itr->second->getSection() == Next->second->getSection() &&
287            Itr->second->containsRange(Next->second->getAddress(),
288                                       Next->second->getSize())) {
289       if (Next->second->Parent != Itr->second) {
290         errs() << "BOLT-WARNING: object nesting incorrect for:\n"
291                << "BOLT-WARNING:  " << *Itr->second << "\n"
292                << "BOLT-WARNING:  " << *Next->second << "\n";
293         Valid = false;
294       }
295       ++Next;
296     }
297     Itr = Next;
298   }
299   return Valid;
300 }
301 
302 bool BinaryContext::validateHoles() const {
303   bool Valid = true;
304   for (BinarySection &Section : sections()) {
305     for (const Relocation &Rel : Section.relocations()) {
306       uint64_t RelAddr = Rel.Offset + Section.getAddress();
307       const BinaryData *BD = getBinaryDataContainingAddress(RelAddr);
308       if (!BD) {
309         errs() << "BOLT-WARNING: no BinaryData found for relocation at address"
310                << " 0x" << Twine::utohexstr(RelAddr) << " in "
311                << Section.getName() << "\n";
312         Valid = false;
313       } else if (!BD->getAtomicRoot()) {
314         errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at "
315                << "address 0x" << Twine::utohexstr(RelAddr) << " in "
316                << Section.getName() << "\n";
317         Valid = false;
318       }
319     }
320   }
321   return Valid;
322 }
323 
324 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
325   const uint64_t Address = GAI->second->getAddress();
326   const uint64_t Size = GAI->second->getSize();
327 
328   auto fixParents =
329     [&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) {
330     BinaryData *OldParent = Itr->second->Parent;
331     Itr->second->Parent = NewParent;
332     ++Itr;
333     while (Itr != BinaryDataMap.end() && OldParent &&
334            Itr->second->Parent == OldParent) {
335       Itr->second->Parent = NewParent;
336       ++Itr;
337     }
338   };
339 
340   // Check if the previous symbol contains the newly added symbol.
341   if (GAI != BinaryDataMap.begin()) {
342     BinaryData *Prev = std::prev(GAI)->second;
343     while (Prev) {
344       if (Prev->getSection() == GAI->second->getSection() &&
345           Prev->containsRange(Address, Size)) {
346         fixParents(GAI, Prev);
347       } else {
348         fixParents(GAI, nullptr);
349       }
350       Prev = Prev->Parent;
351     }
352   }
353 
354   // Check if the newly added symbol contains any subsequent symbols.
355   if (Size != 0) {
356     BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
357     auto Itr = std::next(GAI);
358     while (Itr != BinaryDataMap.end() &&
359            BD->containsRange(Itr->second->getAddress(),
360                              Itr->second->getSize())) {
361       Itr->second->Parent = BD;
362       ++Itr;
363     }
364   }
365 }
366 
367 iterator_range<BinaryContext::binary_data_iterator>
368 BinaryContext::getSubBinaryData(BinaryData *BD) {
369   auto Start = std::next(BinaryDataMap.find(BD->getAddress()));
370   auto End = Start;
371   while (End != BinaryDataMap.end() &&
372          BD->isAncestorOf(End->second)) {
373     ++End;
374   }
375   return make_range(Start, End);
376 }
377 
378 std::pair<const MCSymbol *, uint64_t>
379 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
380                                 bool IsPCRel) {
381   uint64_t Addend = 0;
382 
383   if (isAArch64()) {
384     // Check if this is an access to a constant island and create bookkeeping
385     // to keep track of it and emit it later as part of this function.
386     if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
387       return std::make_pair(IslandSym, Addend);
388 
389     // Detect custom code written in assembly that refers to arbitrary
390     // constant islands from other functions. Write this reference so we
391     // can pull this constant island and emit it as part of this function
392     // too.
393     auto IslandIter = AddressToConstantIslandMap.lower_bound(Address);
394     if (IslandIter != AddressToConstantIslandMap.end()) {
395       if (MCSymbol *IslandSym =
396               IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) {
397         BF.createIslandDependency(IslandSym, IslandIter->second);
398         return std::make_pair(IslandSym, Addend);
399       }
400     }
401   }
402 
403   // Note that the address does not necessarily have to reside inside
404   // a section, it could be an absolute address too.
405   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
406   if (Section && Section->isText()) {
407     if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) {
408       if (Address != BF.getAddress()) {
409         // The address could potentially escape. Mark it as another entry
410         // point into the function.
411         if (opts::Verbosity >= 1) {
412           outs() << "BOLT-INFO: potentially escaped address 0x"
413                  << Twine::utohexstr(Address) << " in function "
414                  << BF << '\n';
415         }
416         BF.HasInternalLabelReference = true;
417         return std::make_pair(
418                   BF.addEntryPointAtOffset(Address - BF.getAddress()),
419                   Addend);
420       }
421     } else {
422       BF.InterproceduralReferences.insert(Address);
423     }
424   }
425 
426   // With relocations, catch jump table references outside of the basic block
427   // containing the indirect jump.
428   if (HasRelocations) {
429     const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
430     if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
431       const MCSymbol *Symbol =
432         getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC);
433 
434       return std::make_pair(Symbol, Addend);
435     }
436   }
437 
438   if (BinaryData *BD = getBinaryDataContainingAddress(Address)) {
439     return std::make_pair(BD->getSymbol(), Address - BD->getAddress());
440   }
441 
442   // TODO: use DWARF info to get size/alignment here?
443   MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat");
444   LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
445   return std::make_pair(TargetSymbol, Addend);
446 }
447 
448 MemoryContentsType
449 BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) {
450   if (!isX86())
451     return MemoryContentsType::UNKNOWN;
452 
453   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
454   if (!Section) {
455     // No section - possibly an absolute address. Since we don't allow
456     // internal function addresses to escape the function scope - we
457     // consider it a tail call.
458     if (opts::Verbosity > 1) {
459       errs() << "BOLT-WARNING: no section for address 0x"
460              << Twine::utohexstr(Address) << " referenced from function "
461              << BF << '\n';
462     }
463     return MemoryContentsType::UNKNOWN;
464   }
465 
466   if (Section->isVirtual()) {
467     // The contents are filled at runtime.
468     return MemoryContentsType::UNKNOWN;
469   }
470 
471   // No support for jump tables in code yet.
472   if (Section->isText())
473     return MemoryContentsType::UNKNOWN;
474 
475   // Start with checking for PIC jump table. We expect non-PIC jump tables
476   // to have high 32 bits set to 0.
477   if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF))
478     return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
479 
480   if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF))
481     return MemoryContentsType::POSSIBLE_JUMP_TABLE;
482 
483   return MemoryContentsType::UNKNOWN;
484 }
485 
486 bool BinaryContext::analyzeJumpTable(const uint64_t Address,
487                                      const JumpTable::JumpTableType Type,
488                                      BinaryFunction &BF,
489                                      const uint64_t NextJTAddress,
490                                      JumpTable::OffsetsType *Offsets) {
491   // Is one of the targets __builtin_unreachable?
492   bool HasUnreachable = false;
493 
494   // Number of targets other than __builtin_unreachable.
495   uint64_t NumRealEntries = 0;
496 
497   constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max();
498   auto addOffset = [&](uint64_t Offset) {
499     if (Offsets)
500       Offsets->emplace_back(Offset);
501   };
502 
503   auto isFragment = [](BinaryFunction &Fragment,
504                        BinaryFunction &Parent) -> bool {
505     // Check if <fragment restored name> == <parent restored name>.cold(.\d+)?
506     for (StringRef BFName : Parent.getNames()) {
507       std::string BFNamePrefix = Regex::escape(NameResolver::restore(BFName));
508       std::string BFNameRegex =
509           Twine(BFNamePrefix, "\\.cold(\\.[0-9]+)?").str();
510       if (Fragment.hasRestoredNameRegex(BFNameRegex))
511         return true;
512     }
513     return false;
514   };
515 
516   auto doesBelongToFunction = [&](const uint64_t Addr,
517                                   BinaryFunction *TargetBF) -> bool {
518     if (BF.containsAddress(Addr))
519       return true;
520     // Nothing to do if we failed to identify the containing function.
521     if (!TargetBF)
522       return false;
523     // Case 1: check if BF is a fragment and TargetBF is its parent.
524     if (BF.isFragment()) {
525       // BF is a fragment, but parent function is not registered.
526       // This means there's no direct jump between parent and fragment.
527       // Set parent link here in jump table analysis, based on function name
528       // matching heuristic.
529       if (!BF.getParentFragment() && isFragment(BF, *TargetBF))
530         registerFragment(BF, *TargetBF);
531       return BF.getParentFragment() == TargetBF;
532     }
533     // Case 2: check if TargetBF is a fragment and BF is its parent.
534     if (TargetBF->isFragment()) {
535       // TargetBF is a fragment, but parent function is not registered.
536       // This means there's no direct jump between parent and fragment.
537       // Set parent link here in jump table analysis, based on function name
538       // matching heuristic.
539       if (!TargetBF->getParentFragment() && isFragment(*TargetBF, BF))
540         registerFragment(*TargetBF, BF);
541       return TargetBF->getParentFragment() == &BF;
542     }
543     return false;
544   };
545 
546   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
547   if (!Section)
548     return false;
549 
550   // The upper bound is defined by containing object, section limits, and
551   // the next jump table in memory.
552   uint64_t UpperBound = Section->getEndAddress();
553   const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
554   if (JumpTableBD && JumpTableBD->getSize()) {
555     assert(JumpTableBD->getEndAddress() <= UpperBound &&
556            "data object cannot cross a section boundary");
557     UpperBound = JumpTableBD->getEndAddress();
558   }
559   if (NextJTAddress) {
560     UpperBound = std::min(NextJTAddress, UpperBound);
561   }
562 
563   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName()
564                     << '\n');
565   const uint64_t EntrySize = getJumpTableEntrySize(Type);
566   for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
567        EntryAddress += EntrySize) {
568     LLVM_DEBUG(dbgs() << "  * Checking 0x" << Twine::utohexstr(EntryAddress)
569                       << " -> ");
570     // Check if there's a proper relocation against the jump table entry.
571     if (HasRelocations) {
572       if (Type == JumpTable::JTT_PIC &&
573           !DataPCRelocations.count(EntryAddress)) {
574         LLVM_DEBUG(
575             dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
576         break;
577       }
578       if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) {
579         LLVM_DEBUG(
580             dbgs()
581             << "FAIL: JTT_NORMAL table, no relocation for this address\n");
582         break;
583       }
584     }
585 
586     const uint64_t Value = (Type == JumpTable::JTT_PIC)
587       ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize)
588       : *getPointerAtAddress(EntryAddress);
589 
590     // __builtin_unreachable() case.
591     if (Value == BF.getAddress() + BF.getSize()) {
592       addOffset(Value - BF.getAddress());
593       HasUnreachable = true;
594       LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n");
595       continue;
596     }
597 
598     // Function or one of its fragments.
599     BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
600 
601     // We assume that a jump table cannot have function start as an entry.
602     if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) {
603       LLVM_DEBUG({
604         if (!BF.containsAddress(Value)) {
605           dbgs() << "FAIL: function doesn't contain this address\n";
606           if (TargetBF) {
607             dbgs() << "  ! function containing this address: "
608                    << TargetBF->getPrintName() << '\n';
609             if (TargetBF->isFragment())
610               dbgs() << "  ! is a fragment\n";
611             BinaryFunction *TargetParent = TargetBF->getParentFragment();
612             dbgs() << "  ! its parent is "
613                    << (TargetParent ? TargetParent->getPrintName() : "(none)")
614                    << '\n';
615           }
616         }
617         if (Value == BF.getAddress())
618           dbgs() << "FAIL: jump table cannot have function start as an entry\n";
619       });
620       break;
621     }
622 
623     // Check there's an instruction at this offset.
624     if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
625         !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) {
626       LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n");
627       break;
628     }
629 
630     ++NumRealEntries;
631 
632     if (TargetBF == &BF) {
633       // Address inside the function.
634       addOffset(Value - TargetBF->getAddress());
635       LLVM_DEBUG(dbgs() << "OK: real entry\n");
636     } else {
637       // Address in split fragment.
638       BF.setHasSplitJumpTable(true);
639       // Add invalid offset for proper identification of jump table size.
640       addOffset(INVALID_OFFSET);
641       LLVM_DEBUG(dbgs() << "OK: address in split fragment\n");
642     }
643   }
644 
645   // It's a jump table if the number of real entries is more than 1, or there's
646   // one real entry and "unreachable" targets. If there are only multiple
647   // "unreachable" targets, then it's not a jump table.
648   return NumRealEntries + HasUnreachable >= 2;
649 }
650 
651 void BinaryContext::populateJumpTables() {
652   std::vector<BinaryFunction *> FuncsToSkip;
653   LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
654                     << '\n');
655   for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
656        ++JTI) {
657     JumpTable *JT = JTI->second;
658     BinaryFunction &BF = *JT->Parent;
659 
660     if (!BF.isSimple())
661       continue;
662 
663     uint64_t NextJTAddress = 0;
664     auto NextJTI = std::next(JTI);
665     if (NextJTI != JTE) {
666       NextJTAddress = NextJTI->second->getAddress();
667     }
668 
669     const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF,
670                                           NextJTAddress, &JT->OffsetEntries);
671     if (!Success) {
672       dbgs() << "failed to analyze jump table in function " << BF << '\n';
673       JT->print(dbgs());
674       if (NextJTI != JTE) {
675         dbgs() << "next jump table at 0x"
676                << Twine::utohexstr(NextJTI->second->getAddress())
677                << " belongs to function " << *NextJTI->second->Parent << '\n';
678         NextJTI->second->print(dbgs());
679       }
680       llvm_unreachable("jump table heuristic failure");
681     }
682 
683     for (uint64_t EntryOffset : JT->OffsetEntries) {
684       if (EntryOffset == BF.getSize())
685         BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize());
686       else
687         BF.registerReferencedOffset(EntryOffset);
688     }
689 
690     // In strict mode, erase PC-relative relocation record. Later we check that
691     // all such records are erased and thus have been accounted for.
692     if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
693       for (uint64_t Address = JT->getAddress();
694            Address < JT->getAddress() + JT->getSize();
695            Address += JT->EntrySize) {
696         DataPCRelocations.erase(DataPCRelocations.find(Address));
697       }
698     }
699 
700     // Mark to skip the function and all its fragments.
701     if (BF.hasSplitJumpTable())
702       FuncsToSkip.push_back(&BF);
703   }
704 
705   if (opts::StrictMode && DataPCRelocations.size()) {
706     LLVM_DEBUG({
707       dbgs() << DataPCRelocations.size()
708              << " unclaimed PC-relative relocations left in data:\n";
709       for (uint64_t Reloc : DataPCRelocations)
710         dbgs() << Twine::utohexstr(Reloc) << '\n';
711     });
712     assert(0 && "unclaimed PC-relative relocations left in data\n");
713   }
714   clearList(DataPCRelocations);
715   // Functions containing split jump tables need to be skipped with all
716   // fragments.
717   for (BinaryFunction *BF : FuncsToSkip) {
718     BinaryFunction *ParentBF =
719         const_cast<BinaryFunction *>(BF->getTopmostFragment());
720     LLVM_DEBUG(dbgs() << "Skipping " << ParentBF->getPrintName()
721                       << " family\n");
722     ParentBF->setIgnored();
723     ParentBF->ignoreFragments();
724   }
725 }
726 
727 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address,
728                                                  Twine Prefix,
729                                                  uint64_t Size,
730                                                  uint16_t Alignment,
731                                                  unsigned Flags) {
732   auto Itr = BinaryDataMap.find(Address);
733   if (Itr != BinaryDataMap.end()) {
734     assert(Itr->second->getSize() == Size || !Size);
735     return Itr->second->getSymbol();
736   }
737 
738   std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str();
739   assert(!GlobalSymbols.count(Name) && "created name is not unique");
740   return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
741 }
742 
743 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
744   return Ctx->getOrCreateSymbol(Name);
745 }
746 
747 BinaryFunction *BinaryContext::createBinaryFunction(
748     const std::string &Name, BinarySection &Section, uint64_t Address,
749     uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
750   auto Result = BinaryFunctions.emplace(
751       Address, BinaryFunction(Name, Section, Address, Size, *this));
752   assert(Result.second == true && "unexpected duplicate function");
753   BinaryFunction *BF = &Result.first->second;
754   registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size,
755                         Alignment);
756   setSymbolToFunctionMap(BF->getSymbol(), BF);
757   return BF;
758 }
759 
760 const MCSymbol *
761 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
762                                     JumpTable::JumpTableType Type) {
763   if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
764     assert(JT->Type == Type && "jump table types have to match");
765     assert(JT->Parent == &Function &&
766            "cannot re-use jump table of a different function");
767     assert(Address == JT->getAddress() && "unexpected non-empty jump table");
768 
769     return JT->getFirstLabel();
770   }
771 
772   // Re-use the existing symbol if possible.
773   MCSymbol *JTLabel = nullptr;
774   if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
775     if (!isInternalSymbolName(Object->getSymbol()->getName()))
776       JTLabel = Object->getSymbol();
777   }
778 
779   const uint64_t EntrySize = getJumpTableEntrySize(Type);
780   if (!JTLabel) {
781     const std::string JumpTableName = generateJumpTableName(Function, Address);
782     JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize);
783   }
784 
785   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
786                     << " in function " << Function << '\n');
787 
788   JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
789                                 JumpTable::LabelMapType{{0, JTLabel}}, Function,
790                                 *getSectionForAddress(Address));
791   JumpTables.emplace(Address, JT);
792 
793   // Duplicate the entry for the parent function for easy access.
794   Function.JumpTables.emplace(Address, JT);
795 
796   return JTLabel;
797 }
798 
799 std::pair<uint64_t, const MCSymbol *>
800 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
801                                   const MCSymbol *OldLabel) {
802   auto L = scopeLock();
803   unsigned Offset = 0;
804   bool Found = false;
805   for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
806     if (Elmt.second != OldLabel)
807       continue;
808     Offset = Elmt.first;
809     Found = true;
810     break;
811   }
812   assert(Found && "Label not found");
813   MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT");
814   JumpTable *NewJT =
815       new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
816                     JumpTable::LabelMapType{{Offset, NewLabel}}, Function,
817                     *getSectionForAddress(JT->getAddress()));
818   NewJT->Entries = JT->Entries;
819   NewJT->Counts = JT->Counts;
820   uint64_t JumpTableID = ++DuplicatedJumpTables;
821   // Invert it to differentiate from regular jump tables whose IDs are their
822   // addresses in the input binary memory space
823   JumpTableID = ~JumpTableID;
824   JumpTables.emplace(JumpTableID, NewJT);
825   Function.JumpTables.emplace(JumpTableID, NewJT);
826   return std::make_pair(JumpTableID, NewLabel);
827 }
828 
829 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
830                                                  uint64_t Address) {
831   size_t Id;
832   uint64_t Offset = 0;
833   if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
834     Offset = Address - JT->getAddress();
835     auto Itr = JT->Labels.find(Offset);
836     if (Itr != JT->Labels.end()) {
837       return std::string(Itr->second->getName());
838     }
839     Id = JumpTableIds.at(JT->getAddress());
840   } else {
841     Id = JumpTableIds[Address] = BF.JumpTables.size();
842   }
843   return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) +
844           (Offset ? ("." + std::to_string(Offset)) : ""));
845 }
846 
847 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
848   // FIXME: aarch64 support is missing.
849   if (!isX86())
850     return true;
851 
852   if (BF.getSize() == BF.getMaxSize())
853     return true;
854 
855   ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
856   assert(FunctionData && "cannot get function as data");
857 
858   uint64_t Offset = BF.getSize();
859   MCInst Instr;
860   uint64_t InstrSize = 0;
861   uint64_t InstrAddress = BF.getAddress() + Offset;
862   using std::placeholders::_1;
863 
864   // Skip instructions that satisfy the predicate condition.
865   auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
866     const uint64_t StartOffset = Offset;
867     for (; Offset < BF.getMaxSize();
868          Offset += InstrSize, InstrAddress += InstrSize) {
869       if (!DisAsm->getInstruction(Instr,
870                                   InstrSize,
871                                   FunctionData->slice(Offset),
872                                   InstrAddress,
873                                   nulls()))
874         break;
875       if (!Predicate(Instr))
876         break;
877     }
878 
879     return Offset - StartOffset;
880   };
881 
882   // Skip a sequence of zero bytes.
883   auto skipZeros = [&]() {
884     const uint64_t StartOffset = Offset;
885     for (; Offset < BF.getMaxSize(); ++Offset)
886       if ((*FunctionData)[Offset] != 0)
887         break;
888 
889     return Offset - StartOffset;
890   };
891 
892   // Accept the whole padding area filled with breakpoints.
893   auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1);
894   if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
895     return true;
896 
897   auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1);
898 
899   // Some functions have a jump to the next function or to the padding area
900   // inserted after the body.
901   auto isSkipJump = [&](const MCInst &Instr) {
902     uint64_t TargetAddress = 0;
903     if (MIB->isUnconditionalBranch(Instr) &&
904         MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) {
905       if (TargetAddress >= InstrAddress + InstrSize &&
906           TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
907         return true;
908       }
909     }
910     return false;
911   };
912 
913   // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
914   while (skipInstructions(isNoop) ||
915          skipInstructions(isSkipJump) ||
916          skipZeros())
917     ;
918 
919   if (Offset == BF.getMaxSize())
920     return true;
921 
922   if (opts::Verbosity >= 1) {
923     errs() << "BOLT-WARNING: bad padding at address 0x"
924            << Twine::utohexstr(BF.getAddress() + BF.getSize())
925            << " starting at offset "
926            << (Offset - BF.getSize()) << " in function "
927            << BF << '\n'
928            << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
929            << '\n';
930   }
931 
932   return false;
933 }
934 
935 void BinaryContext::adjustCodePadding() {
936   for (auto &BFI : BinaryFunctions) {
937     BinaryFunction &BF = BFI.second;
938     if (!shouldEmit(BF))
939       continue;
940 
941     if (!hasValidCodePadding(BF)) {
942       if (HasRelocations) {
943         if (opts::Verbosity >= 1) {
944           outs() << "BOLT-INFO: function " << BF
945                  << " has invalid padding. Ignoring the function.\n";
946         }
947         BF.setIgnored();
948       } else {
949         BF.setMaxSize(BF.getSize());
950       }
951     }
952   }
953 }
954 
955 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name,
956                                                uint64_t Address,
957                                                uint64_t Size,
958                                                uint16_t Alignment,
959                                                unsigned Flags) {
960   // Register the name with MCContext.
961   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
962 
963   auto GAI = BinaryDataMap.find(Address);
964   BinaryData *BD;
965   if (GAI == BinaryDataMap.end()) {
966     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
967     BinarySection &Section =
968         SectionOrErr ? SectionOrErr.get() : absoluteSection();
969     BD = new BinaryData(*Symbol,
970                         Address,
971                         Size,
972                         Alignment ? Alignment : 1,
973                         Section,
974                         Flags);
975     GAI = BinaryDataMap.emplace(Address, BD).first;
976     GlobalSymbols[Name] = BD;
977     updateObjectNesting(GAI);
978   } else {
979     BD = GAI->second;
980     if (!BD->hasName(Name)) {
981       GlobalSymbols[Name] = BD;
982       BD->Symbols.push_back(Symbol);
983     }
984   }
985 
986   return Symbol;
987 }
988 
989 const BinaryData *
990 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
991   auto NI = BinaryDataMap.lower_bound(Address);
992   auto End = BinaryDataMap.end();
993   if ((NI != End && Address == NI->first) ||
994       ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
995     if (NI->second->containsAddress(Address)) {
996       return NI->second;
997     }
998 
999     // If this is a sub-symbol, see if a parent data contains the address.
1000     const BinaryData *BD = NI->second->getParent();
1001     while (BD) {
1002       if (BD->containsAddress(Address))
1003         return BD;
1004       BD = BD->getParent();
1005     }
1006   }
1007   return nullptr;
1008 }
1009 
1010 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1011   auto NI = BinaryDataMap.find(Address);
1012   assert(NI != BinaryDataMap.end());
1013   if (NI == BinaryDataMap.end())
1014     return false;
1015   // TODO: it's possible that a jump table starts at the same address
1016   // as a larger blob of private data.  When we set the size of the
1017   // jump table, it might be smaller than the total blob size.  In this
1018   // case we just leave the original size since (currently) it won't really
1019   // affect anything.  See T26915981.
1020   assert((!NI->second->Size || NI->second->Size == Size ||
1021           (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1022          "can't change the size of a symbol that has already had its "
1023          "size set");
1024   if (!NI->second->Size) {
1025     NI->second->Size = Size;
1026     updateObjectNesting(NI);
1027     return true;
1028   }
1029   return false;
1030 }
1031 
1032 void BinaryContext::generateSymbolHashes() {
1033   auto isPadding = [](const BinaryData &BD) {
1034     StringRef Contents = BD.getSection().getContents();
1035     StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize());
1036     return (BD.getName().startswith("HOLEat") ||
1037             SymData.find_first_not_of(0) == StringRef::npos);
1038   };
1039 
1040   uint64_t NumCollisions = 0;
1041   for (auto &Entry : BinaryDataMap) {
1042     BinaryData &BD = *Entry.second;
1043     StringRef Name = BD.getName();
1044 
1045     if (!isInternalSymbolName(Name))
1046       continue;
1047 
1048     // First check if a non-anonymous alias exists and move it to the front.
1049     if (BD.getSymbols().size() > 1) {
1050       auto Itr = std::find_if(BD.getSymbols().begin(),
1051                               BD.getSymbols().end(),
1052                               [&](const MCSymbol *Symbol) {
1053                                 return !isInternalSymbolName(Symbol->getName());
1054                               });
1055       if (Itr != BD.getSymbols().end()) {
1056         size_t Idx = std::distance(BD.getSymbols().begin(), Itr);
1057         std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]);
1058         continue;
1059       }
1060     }
1061 
1062     // We have to skip 0 size symbols since they will all collide.
1063     if (BD.getSize() == 0) {
1064       continue;
1065     }
1066 
1067     const uint64_t Hash = BD.getSection().hash(BD);
1068     const size_t Idx = Name.find("0x");
1069     std::string NewName = (Twine(Name.substr(0, Idx)) +
1070                  "_" + Twine::utohexstr(Hash)).str();
1071     if (getBinaryDataByName(NewName)) {
1072       // Ignore collisions for symbols that appear to be padding
1073       // (i.e. all zeros or a "hole")
1074       if (!isPadding(BD)) {
1075         if (opts::Verbosity) {
1076           errs() << "BOLT-WARNING: collision detected when hashing " << BD
1077                  << " with new name (" << NewName << "), skipping.\n";
1078         }
1079         ++NumCollisions;
1080       }
1081       continue;
1082     }
1083     BD.Symbols.insert(BD.Symbols.begin(),
1084                        Ctx->getOrCreateSymbol(NewName));
1085     GlobalSymbols[NewName] = &BD;
1086   }
1087   if (NumCollisions) {
1088     errs() << "BOLT-WARNING: " << NumCollisions
1089            << " collisions detected while hashing binary objects";
1090     if (!opts::Verbosity)
1091       errs() << ". Use -v=1 to see the list.";
1092     errs() << '\n';
1093   }
1094 }
1095 
1096 void BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1097                                      BinaryFunction &Function) const {
1098   // Only a parent function (or a sibling) can reach its fragment.
1099   assert(!Function.IsFragment &&
1100          "only one cold fragment is supported at this time");
1101   if (BinaryFunction *TargetParent = TargetFunction.getParentFragment()) {
1102     assert(TargetParent == &Function && "mismatching parent function");
1103     return;
1104   }
1105   TargetFunction.setParentFragment(Function);
1106   Function.addFragment(TargetFunction);
1107   if (!HasRelocations) {
1108     TargetFunction.setSimple(false);
1109     Function.setSimple(false);
1110   }
1111   if (opts::Verbosity >= 1) {
1112     outs() << "BOLT-INFO: marking " << TargetFunction
1113            << " as a fragment of " << Function << '\n';
1114   }
1115 }
1116 
1117 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) {
1118   for (uint64_t Address : Function.InterproceduralReferences) {
1119     if (!Address)
1120       continue;
1121 
1122     BinaryFunction *TargetFunction =
1123         getBinaryFunctionContainingAddress(Address);
1124     if (&Function == TargetFunction)
1125       continue;
1126 
1127     if (TargetFunction) {
1128       if (TargetFunction->IsFragment)
1129         registerFragment(*TargetFunction, Function);
1130       if (uint64_t Offset = Address - TargetFunction->getAddress())
1131         TargetFunction->addEntryPointAtOffset(Offset);
1132 
1133       continue;
1134     }
1135 
1136     // Check if address falls in function padding space - this could be
1137     // unmarked data in code. In this case adjust the padding space size.
1138     ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1139     assert(Section && "cannot get section for referenced address");
1140 
1141     if (!Section->isText())
1142       continue;
1143 
1144     // PLT requires special handling and could be ignored in this context.
1145     StringRef SectionName = Section->getName();
1146     if (SectionName == ".plt" || SectionName == ".plt.got")
1147       continue;
1148 
1149     if (opts::processAllFunctions()) {
1150       errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1151              << "object in code at address 0x"
1152              << Twine::utohexstr(Address) << " belonging to section "
1153              << SectionName << " in current mode\n";
1154       exit(1);
1155     }
1156 
1157     TargetFunction =
1158       getBinaryFunctionContainingAddress(Address,
1159                                          /*CheckPastEnd=*/false,
1160                                          /*UseMaxSize=*/true);
1161     // We are not going to overwrite non-simple functions, but for simple
1162     // ones - adjust the padding size.
1163     if (TargetFunction && TargetFunction->isSimple()) {
1164       errs() << "BOLT-WARNING: function " << *TargetFunction
1165              << " has an object detected in a padding region at address 0x"
1166              << Twine::utohexstr(Address) << '\n';
1167       TargetFunction->setMaxSize(TargetFunction->getSize());
1168     }
1169   }
1170 
1171   clearList(Function.InterproceduralReferences);
1172 }
1173 
1174 void BinaryContext::postProcessSymbolTable() {
1175   fixBinaryDataHoles();
1176   bool Valid = true;
1177   for (auto &Entry : BinaryDataMap) {
1178     BinaryData *BD = Entry.second;
1179     if ((BD->getName().startswith("SYMBOLat") ||
1180          BD->getName().startswith("DATAat")) &&
1181         !BD->getParent() &&
1182         !BD->getSize() &&
1183         !BD->isAbsolute() &&
1184         BD->getSection()) {
1185       errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n";
1186       Valid = false;
1187     }
1188   }
1189   assert(Valid);
1190   generateSymbolHashes();
1191 }
1192 
1193 void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1194                                  BinaryFunction &ParentBF) {
1195   assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1196          "cannot merge functions with multiple entry points");
1197 
1198   std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex,
1199                                                          std::defer_lock);
1200   std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock(
1201       SymbolToFunctionMapMutex, std::defer_lock);
1202 
1203   const StringRef ChildName = ChildBF.getOneName();
1204 
1205   // Move symbols over and update bookkeeping info.
1206   for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1207     ParentBF.getSymbols().push_back(Symbol);
1208     WriteSymbolMapLock.lock();
1209     SymbolToFunctionMap[Symbol] = &ParentBF;
1210     WriteSymbolMapLock.unlock();
1211     // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1212   }
1213   ChildBF.getSymbols().clear();
1214 
1215   // Move other names the child function is known under.
1216   std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(),
1217             std::back_inserter(ParentBF.Aliases));
1218   ChildBF.Aliases.clear();
1219 
1220   if (HasRelocations) {
1221     // Merge execution counts of ChildBF into those of ParentBF.
1222     // Without relocations, we cannot reliably merge profiles as both functions
1223     // continue to exist and either one can be executed.
1224     ChildBF.mergeProfileDataInto(ParentBF);
1225 
1226     std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex,
1227                                                           std::defer_lock);
1228     std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex,
1229                                                            std::defer_lock);
1230     // Remove ChildBF from the global set of functions in relocs mode.
1231     ReadBfsLock.lock();
1232     auto FI = BinaryFunctions.find(ChildBF.getAddress());
1233     ReadBfsLock.unlock();
1234 
1235     assert(FI != BinaryFunctions.end() && "function not found");
1236     assert(&ChildBF == &FI->second && "function mismatch");
1237 
1238     WriteBfsLock.lock();
1239     ChildBF.clearDisasmState();
1240     FI = BinaryFunctions.erase(FI);
1241     WriteBfsLock.unlock();
1242 
1243   } else {
1244     // In non-relocation mode we keep the function, but rename it.
1245     std::string NewName = "__ICF_" + ChildName.str();
1246 
1247     WriteCtxLock.lock();
1248     ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName));
1249     WriteCtxLock.unlock();
1250 
1251     ChildBF.setFolded(&ParentBF);
1252   }
1253 }
1254 
1255 void BinaryContext::fixBinaryDataHoles() {
1256   assert(validateObjectNesting() && "object nesting inconsitency detected");
1257 
1258   for (BinarySection &Section : allocatableSections()) {
1259     std::vector<std::pair<uint64_t, uint64_t>> Holes;
1260 
1261     auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1262       BinaryData *BD = Itr->second;
1263       bool isHole = (!BD->getParent() &&
1264                      !BD->getSize() &&
1265                      BD->isObject() &&
1266                      (BD->getName().startswith("SYMBOLat0x") ||
1267                       BD->getName().startswith("DATAat0x") ||
1268                       BD->getName().startswith("ANONYMOUS")));
1269       return !isHole && BD->getSection() == Section && !BD->getParent();
1270     };
1271 
1272     auto BDStart = BinaryDataMap.begin();
1273     auto BDEnd = BinaryDataMap.end();
1274     auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1275     auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1276 
1277     uint64_t EndAddress = Section.getAddress();
1278 
1279     while (Itr != End) {
1280       if (Itr->second->getAddress() > EndAddress) {
1281         uint64_t Gap = Itr->second->getAddress() - EndAddress;
1282         Holes.emplace_back(EndAddress, Gap);
1283       }
1284       EndAddress = Itr->second->getEndAddress();
1285       ++Itr;
1286     }
1287 
1288     if (EndAddress < Section.getEndAddress()) {
1289       Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress);
1290     }
1291 
1292     // If there is already a symbol at the start of the hole, grow that symbol
1293     // to cover the rest.  Otherwise, create a new symbol to cover the hole.
1294     for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1295       BinaryData *BD = getBinaryDataAtAddress(Hole.first);
1296       if (BD) {
1297         // BD->getSection() can be != Section if there are sections that
1298         // overlap.  In this case it is probably safe to just skip the holes
1299         // since the overlapping section will not(?) have any symbols in it.
1300         if (BD->getSection() == Section)
1301           setBinaryDataSize(Hole.first, Hole.second);
1302       } else {
1303         getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1);
1304       }
1305     }
1306   }
1307 
1308   assert(validateObjectNesting() && "object nesting inconsitency detected");
1309   assert(validateHoles() && "top level hole detected in object map");
1310 }
1311 
1312 void BinaryContext::printGlobalSymbols(raw_ostream& OS) const {
1313   const BinarySection* CurrentSection = nullptr;
1314   bool FirstSection = true;
1315 
1316   for (auto &Entry : BinaryDataMap) {
1317     const BinaryData *BD = Entry.second;
1318     const BinarySection &Section = BD->getSection();
1319     if (FirstSection || Section != *CurrentSection) {
1320       uint64_t Address, Size;
1321       StringRef Name = Section.getName();
1322       if (Section) {
1323         Address = Section.getAddress();
1324         Size = Section.getSize();
1325       } else {
1326         Address = BD->getAddress();
1327         Size = BD->getSize();
1328       }
1329       OS << "BOLT-INFO: Section " << Name << ", "
1330          << "0x" + Twine::utohexstr(Address) << ":"
1331          << "0x" + Twine::utohexstr(Address + Size) << "/"
1332          << Size << "\n";
1333       CurrentSection = &Section;
1334       FirstSection = false;
1335     }
1336 
1337     OS << "BOLT-INFO: ";
1338     const BinaryData *P = BD->getParent();
1339     while (P) {
1340       OS << "  ";
1341       P = P->getParent();
1342     }
1343     OS << *BD << "\n";
1344   }
1345 }
1346 
1347 Expected<unsigned>
1348 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName,
1349                             unsigned FileNumber,
1350                             Optional<MD5::MD5Result> Checksum,
1351                             Optional<StringRef> Source, unsigned CUID) {
1352   DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1353   return Table.tryGetFile(Directory, FileName, Checksum, Source,
1354                           Ctx->getDwarfVersion(), FileNumber);
1355 }
1356 
1357 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1358                                                const uint32_t SrcCUID,
1359                                                unsigned FileIndex) {
1360   DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID);
1361   const DWARFDebugLine::LineTable *LineTable =
1362       DwCtx->getLineTableForUnit(SrcUnit);
1363   const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1364       LineTable->Prologue.FileNames;
1365   // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1366   // means empty dir.
1367   assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1368          "FileIndex out of range for the compilation unit.");
1369   StringRef Dir = "";
1370   if (FileNames[FileIndex - 1].DirIdx != 0) {
1371     if (Optional<const char *> DirName = dwarf::toString(
1372             LineTable->Prologue
1373                 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1374       Dir = *DirName;
1375     }
1376   }
1377   StringRef FileName = "";
1378   if (Optional<const char *> FName =
1379           dwarf::toString(FileNames[FileIndex - 1].Name))
1380     FileName = *FName;
1381   assert(FileName != "");
1382   return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID));
1383 }
1384 
1385 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1386   std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1387   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1388                  SortedFunctions.begin(),
1389                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1390                    return &BFI.second;
1391                  });
1392 
1393   std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
1394                    [] (const BinaryFunction *A, const BinaryFunction *B) {
1395                      if (A->hasValidIndex() && B->hasValidIndex()) {
1396                        return A->getIndex() < B->getIndex();
1397                      }
1398                      return A->hasValidIndex();
1399                    });
1400   return SortedFunctions;
1401 }
1402 
1403 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1404   std::vector<BinaryFunction *> AllFunctions;
1405   AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size());
1406   std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
1407                  std::back_inserter(AllFunctions),
1408                  [](std::pair<const uint64_t, BinaryFunction> &BFI) {
1409                    return &BFI.second;
1410                  });
1411   std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(),
1412             std::back_inserter(AllFunctions));
1413 
1414   return AllFunctions;
1415 }
1416 
1417 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1418   auto Iter = DWOCUs.find(DWOId);
1419   if (Iter == DWOCUs.end())
1420     return None;
1421 
1422   return Iter->second;
1423 }
1424 
1425 DWARFContext *BinaryContext::getDWOContext() {
1426   if (DWOCUs.empty())
1427     return nullptr;
1428   return &DWOCUs.begin()->second->getContext();
1429 }
1430 
1431 /// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1432 void BinaryContext::preprocessDWODebugInfo() {
1433   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1434     DWARFUnit *const DwarfUnit = CU.get();
1435     if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1436       DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
1437       if (!DWOCU->isDWOUnit()) {
1438         std::string DWOName = dwarf::toString(
1439             DwarfUnit->getUnitDIE().find(
1440                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1441             "");
1442         outs() << "BOLT-WARNING: Debug Fission: DWO debug information for "
1443                << DWOName
1444                << " was not retrieved and won't be updated. Please check "
1445                   "relative path.\n";
1446         continue;
1447       }
1448       DWOCUs[*DWOId] = DWOCU;
1449     }
1450   }
1451 }
1452 
1453 void BinaryContext::preprocessDebugInfo() {
1454   struct CURange {
1455     uint64_t LowPC;
1456     uint64_t HighPC;
1457     DWARFUnit *Unit;
1458 
1459     bool operator<(const CURange &Other) const {
1460       return LowPC < Other.LowPC;
1461     }
1462   };
1463 
1464   // Building a map of address ranges to CUs similar to .debug_aranges and use
1465   // it to assign CU to functions.
1466   std::vector<CURange> AllRanges;
1467   AllRanges.reserve(DwCtx->getNumCompileUnits());
1468   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1469     Expected<DWARFAddressRangesVector> RangesOrError =
1470         CU->getUnitDIE().getAddressRanges();
1471     if (!RangesOrError) {
1472       consumeError(RangesOrError.takeError());
1473       continue;
1474     }
1475     for (DWARFAddressRange &Range : *RangesOrError) {
1476       // Parts of the debug info could be invalidated due to corresponding code
1477       // being removed from the binary by the linker. Hence we check if the
1478       // address is a valid one.
1479       if (containsAddress(Range.LowPC))
1480         AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()});
1481     }
1482   }
1483 
1484   std::sort(AllRanges.begin(), AllRanges.end());
1485   for (auto &KV : BinaryFunctions) {
1486     const uint64_t FunctionAddress = KV.first;
1487     BinaryFunction &Function = KV.second;
1488 
1489     auto It = std::partition_point(AllRanges.begin(), AllRanges.end(),
1490         [=](CURange R) { return R.HighPC <= FunctionAddress; });
1491     if (It != AllRanges.end() && It->LowPC <= FunctionAddress) {
1492       Function.setDWARFUnit(It->Unit);
1493     }
1494   }
1495 
1496   // Discover units with debug info that needs to be updated.
1497   for (const auto &KV : BinaryFunctions) {
1498     const BinaryFunction &BF = KV.second;
1499     if (shouldEmit(BF) && BF.getDWARFUnit())
1500       ProcessedCUs.insert(BF.getDWARFUnit());
1501   }
1502 
1503   // Clear debug info for functions from units that we are not going to process.
1504   for (auto &KV : BinaryFunctions) {
1505     BinaryFunction &BF = KV.second;
1506     if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
1507       BF.setDWARFUnit(nullptr);
1508   }
1509 
1510   if (opts::Verbosity >= 1) {
1511     outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1512            << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1513   }
1514 
1515   // Populate MCContext with DWARF files from all units.
1516   StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1517   for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1518     const uint64_t CUID = CU->getOffset();
1519     getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol(
1520         GlobalPrefix + "line_table_start" + Twine(CUID)));
1521 
1522     if (!ProcessedCUs.count(CU.get()))
1523       continue;
1524 
1525     const DWARFDebugLine::LineTable *LineTable =
1526         DwCtx->getLineTableForUnit(CU.get());
1527     const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1528         LineTable->Prologue.FileNames;
1529 
1530     // Assign a unique label to every line table, one per CU.
1531     // Make sure empty debug line tables are registered too.
1532     if (FileNames.empty()) {
1533       cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID));
1534       continue;
1535     }
1536     for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1537       // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1538       // means empty dir.
1539       StringRef Dir = "";
1540       if (FileNames[I].DirIdx != 0)
1541         if (Optional<const char *> DirName = dwarf::toString(
1542                 LineTable->Prologue
1543                     .IncludeDirectories[FileNames[I].DirIdx - 1]))
1544           Dir = *DirName;
1545       StringRef FileName = "";
1546       if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name))
1547         FileName = *FName;
1548       assert(FileName != "");
1549       cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID));
1550     }
1551   }
1552 
1553   preprocessDWODebugInfo();
1554 }
1555 
1556 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1557   if (opts::processAllFunctions())
1558     return true;
1559 
1560   if (Function.isIgnored())
1561     return false;
1562 
1563   // In relocation mode we will emit non-simple functions with CFG.
1564   // If the function does not have a CFG it should be marked as ignored.
1565   return HasRelocations || Function.isSimple();
1566 }
1567 
1568 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1569   uint32_t Operation = Inst.getOperation();
1570   switch (Operation) {
1571   case MCCFIInstruction::OpSameValue:
1572     OS << "OpSameValue Reg" << Inst.getRegister();
1573     break;
1574   case MCCFIInstruction::OpRememberState:
1575     OS << "OpRememberState";
1576     break;
1577   case MCCFIInstruction::OpRestoreState:
1578     OS << "OpRestoreState";
1579     break;
1580   case MCCFIInstruction::OpOffset:
1581     OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1582     break;
1583   case MCCFIInstruction::OpDefCfaRegister:
1584     OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1585     break;
1586   case MCCFIInstruction::OpDefCfaOffset:
1587     OS << "OpDefCfaOffset " << Inst.getOffset();
1588     break;
1589   case MCCFIInstruction::OpDefCfa:
1590     OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1591     break;
1592   case MCCFIInstruction::OpRelOffset:
1593     OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1594     break;
1595   case MCCFIInstruction::OpAdjustCfaOffset:
1596     OS << "OfAdjustCfaOffset " << Inst.getOffset();
1597     break;
1598   case MCCFIInstruction::OpEscape:
1599     OS << "OpEscape";
1600     break;
1601   case MCCFIInstruction::OpRestore:
1602     OS << "OpRestore Reg" << Inst.getRegister();
1603     break;
1604   case MCCFIInstruction::OpUndefined:
1605     OS << "OpUndefined Reg" << Inst.getRegister();
1606     break;
1607   case MCCFIInstruction::OpRegister:
1608     OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1609        << Inst.getRegister2();
1610     break;
1611   case MCCFIInstruction::OpWindowSave:
1612     OS << "OpWindowSave";
1613     break;
1614   case MCCFIInstruction::OpGnuArgsSize:
1615     OS << "OpGnuArgsSize";
1616     break;
1617   default:
1618     OS << "Op#" << Operation;
1619     break;
1620   }
1621 }
1622 
1623 void BinaryContext::printInstruction(raw_ostream &OS,
1624                                      const MCInst &Instruction,
1625                                      uint64_t Offset,
1626                                      const BinaryFunction* Function,
1627                                      bool PrintMCInst,
1628                                      bool PrintMemData,
1629                                      bool PrintRelocations) const {
1630   if (MIB->isEHLabel(Instruction)) {
1631     OS << "  EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n';
1632     return;
1633   }
1634   OS << format("    %08" PRIx64 ": ", Offset);
1635   if (MIB->isCFI(Instruction)) {
1636     uint32_t Offset = Instruction.getOperand(0).getImm();
1637     OS << "\t!CFI\t$" << Offset << "\t; ";
1638     if (Function)
1639       printCFI(OS, *Function->getCFIFor(Instruction));
1640     OS << "\n";
1641     return;
1642   }
1643   InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
1644   if (MIB->isCall(Instruction)) {
1645     if (MIB->isTailCall(Instruction))
1646       OS << " # TAILCALL ";
1647     if (MIB->isInvoke(Instruction)) {
1648       const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction);
1649       OS << " # handler: ";
1650       if (EHInfo->first)
1651         OS << *EHInfo->first;
1652       else
1653         OS << '0';
1654       OS << "; action: " << EHInfo->second;
1655       const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction);
1656       if (GnuArgsSize >= 0)
1657         OS << "; GNU_args_size = " << GnuArgsSize;
1658     }
1659   } else if (MIB->isIndirectBranch(Instruction)) {
1660     if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) {
1661       OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress);
1662     } else {
1663       OS << " # UNKNOWN CONTROL FLOW";
1664     }
1665   }
1666 
1667   MIB->printAnnotations(Instruction, OS);
1668 
1669   if (opts::PrintDebugInfo) {
1670     DebugLineTableRowRef RowRef =
1671         DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
1672     if (RowRef != DebugLineTableRowRef::NULL_ROW) {
1673       const DWARFDebugLine::LineTable *LineTable;
1674       if (Function && Function->getDWARFUnit() &&
1675           Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1676         LineTable = Function->getDWARFLineTable();
1677       } else {
1678         LineTable = DwCtx->getLineTableForUnit(
1679             DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
1680       }
1681       assert(LineTable &&
1682              "line table expected for instruction with debug info");
1683 
1684       const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1685       StringRef FileName = "";
1686       if (Optional<const char *> FName =
1687               dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name))
1688         FileName = *FName;
1689       OS << " # debug line " << FileName << ":" << Row.Line;
1690       if (Row.Column)
1691         OS << ":" << Row.Column;
1692       if (Row.Discriminator)
1693         OS << " discriminator:" << Row.Discriminator;
1694     }
1695   }
1696 
1697   if ((opts::PrintRelocations || PrintRelocations) && Function) {
1698     const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1);
1699     Function->printRelocations(OS, Offset, Size);
1700   }
1701 
1702   OS << "\n";
1703 
1704   if (PrintMCInst) {
1705     Instruction.dump_pretty(OS, InstPrinter.get());
1706     OS << "\n";
1707   }
1708 }
1709 
1710 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
1711   auto SI = AddressToSection.upper_bound(Address);
1712   if (SI != AddressToSection.begin()) {
1713     --SI;
1714     uint64_t UpperBound = SI->first + SI->second->getSize();
1715     if (!SI->second->getSize())
1716       UpperBound += 1;
1717     if (UpperBound > Address)
1718       return *SI->second;
1719   }
1720   return std::make_error_code(std::errc::bad_address);
1721 }
1722 
1723 ErrorOr<StringRef>
1724 BinaryContext::getSectionNameForAddress(uint64_t Address) const {
1725   if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) {
1726     return Section->getName();
1727   }
1728   return std::make_error_code(std::errc::bad_address);
1729 }
1730 
1731 BinarySection &BinaryContext::registerSection(BinarySection *Section) {
1732   auto Res = Sections.insert(Section);
1733   (void)Res;
1734   assert(Res.second && "can't register the same section twice.");
1735 
1736   // Only register allocatable sections in the AddressToSection map.
1737   if (Section->isAllocatable() && Section->getAddress())
1738     AddressToSection.insert(std::make_pair(Section->getAddress(), Section));
1739   NameToSection.insert(
1740       std::make_pair(std::string(Section->getName()), Section));
1741   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
1742   return *Section;
1743 }
1744 
1745 BinarySection &BinaryContext::registerSection(SectionRef Section) {
1746   return registerSection(new BinarySection(*this, Section));
1747 }
1748 
1749 BinarySection &
1750 BinaryContext::registerSection(StringRef SectionName,
1751                                const BinarySection &OriginalSection) {
1752   return registerSection(new BinarySection(*this,
1753                                            SectionName,
1754                                            OriginalSection));
1755 }
1756 
1757 BinarySection &BinaryContext::registerOrUpdateSection(StringRef Name,
1758                                                       unsigned ELFType,
1759                                                       unsigned ELFFlags,
1760                                                       uint8_t *Data,
1761                                                       uint64_t Size,
1762                                                       unsigned Alignment) {
1763   auto NamedSections = getSectionByName(Name);
1764   if (NamedSections.begin() != NamedSections.end()) {
1765     assert(std::next(NamedSections.begin()) == NamedSections.end() &&
1766            "can only update unique sections");
1767     BinarySection *Section = NamedSections.begin()->second;
1768 
1769     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
1770     const bool Flag = Section->isAllocatable();
1771     (void)Flag;
1772     Section->update(Data, Size, Alignment, ELFType, ELFFlags);
1773     LLVM_DEBUG(dbgs() << *Section << "\n");
1774     // FIXME: Fix section flags/attributes for MachO.
1775     if (isELF())
1776       assert(Flag == Section->isAllocatable() &&
1777              "can't change section allocation status");
1778     return *Section;
1779   }
1780 
1781   return registerSection(new BinarySection(*this, Name, Data, Size, Alignment,
1782                                            ELFType, ELFFlags));
1783 }
1784 
1785 bool BinaryContext::deregisterSection(BinarySection &Section) {
1786   BinarySection *SectionPtr = &Section;
1787   auto Itr = Sections.find(SectionPtr);
1788   if (Itr != Sections.end()) {
1789     auto Range = AddressToSection.equal_range(SectionPtr->getAddress());
1790     while (Range.first != Range.second) {
1791       if (Range.first->second == SectionPtr) {
1792         AddressToSection.erase(Range.first);
1793         break;
1794       }
1795       ++Range.first;
1796     }
1797 
1798     auto NameRange =
1799         NameToSection.equal_range(std::string(SectionPtr->getName()));
1800     while (NameRange.first != NameRange.second) {
1801       if (NameRange.first->second == SectionPtr) {
1802         NameToSection.erase(NameRange.first);
1803         break;
1804       }
1805       ++NameRange.first;
1806     }
1807 
1808     Sections.erase(Itr);
1809     delete SectionPtr;
1810     return true;
1811   }
1812   return false;
1813 }
1814 
1815 void BinaryContext::printSections(raw_ostream &OS) const {
1816   for (BinarySection *const &Section : Sections) {
1817     OS << "BOLT-INFO: " << *Section << "\n";
1818   }
1819 }
1820 
1821 BinarySection &BinaryContext::absoluteSection() {
1822   if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>"))
1823     return *Section;
1824   return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u);
1825 }
1826 
1827 ErrorOr<uint64_t>
1828 BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
1829                                          size_t Size) const {
1830   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1831   if (!Section)
1832     return std::make_error_code(std::errc::bad_address);
1833 
1834   if (Section->isVirtual())
1835     return 0;
1836 
1837   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1838                    AsmInfo->getCodePointerSize());
1839   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1840   return DE.getUnsigned(&ValueOffset, Size);
1841 }
1842 
1843 ErrorOr<uint64_t>
1844 BinaryContext::getSignedValueAtAddress(uint64_t Address,
1845                                        size_t Size) const {
1846   const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1847   if (!Section)
1848     return std::make_error_code(std::errc::bad_address);
1849 
1850   if (Section->isVirtual())
1851     return 0;
1852 
1853   DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
1854                    AsmInfo->getCodePointerSize());
1855   auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
1856   return DE.getSigned(&ValueOffset, Size);
1857 }
1858 
1859 void BinaryContext::addRelocation(uint64_t Address,
1860                                   MCSymbol *Symbol,
1861                                   uint64_t Type,
1862                                   uint64_t Addend,
1863                                   uint64_t Value) {
1864   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1865   assert(Section && "cannot find section for address");
1866   Section->addRelocation(Address - Section->getAddress(),
1867                          Symbol,
1868                          Type,
1869                          Addend,
1870                          Value);
1871 }
1872 
1873 void BinaryContext::addDynamicRelocation(uint64_t Address,
1874                                          MCSymbol *Symbol,
1875                                          uint64_t Type,
1876                                          uint64_t Addend,
1877                                          uint64_t Value) {
1878   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1879   assert(Section && "cannot find section for address");
1880   Section->addDynamicRelocation(Address - Section->getAddress(),
1881                                 Symbol,
1882                                 Type,
1883                                 Addend,
1884                                 Value);
1885 }
1886 
1887 bool BinaryContext::removeRelocationAt(uint64_t Address) {
1888   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1889   assert(Section && "cannot find section for address");
1890   return Section->removeRelocationAt(Address - Section->getAddress());
1891 }
1892 
1893 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) {
1894   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1895   if (!Section)
1896     return nullptr;
1897 
1898   return Section->getRelocationAt(Address - Section->getAddress());
1899 }
1900 
1901 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) {
1902   ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1903   if (!Section)
1904     return nullptr;
1905 
1906   return Section->getDynamicRelocationAt(Address - Section->getAddress());
1907 }
1908 
1909 void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
1910                                              const uint64_t Address) {
1911   auto setImmovable = [&](BinaryData &BD) {
1912     BinaryData *Root = BD.getAtomicRoot();
1913     LLVM_DEBUG(if (Root->isMoveable()) {
1914       dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
1915              << "due to ambiguous relocation referencing 0x"
1916              << Twine::utohexstr(Address) << '\n';
1917     });
1918     Root->setIsMoveable(false);
1919   };
1920 
1921   if (Address == BD.getAddress()) {
1922     setImmovable(BD);
1923 
1924     // Set previous symbol as immovable
1925     BinaryData *Prev = getBinaryDataContainingAddress(Address - 1);
1926     if (Prev && Prev->getEndAddress() == BD.getAddress())
1927       setImmovable(*Prev);
1928   }
1929 
1930   if (Address == BD.getEndAddress()) {
1931     setImmovable(BD);
1932 
1933     // Set next symbol as immovable
1934     BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress());
1935     if (Next && Next->getAddress() == BD.getEndAddress())
1936       setImmovable(*Next);
1937   }
1938 }
1939 
1940 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
1941                                                     uint64_t *EntryDesc) {
1942   std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex);
1943   auto BFI = SymbolToFunctionMap.find(Symbol);
1944   if (BFI == SymbolToFunctionMap.end())
1945     return nullptr;
1946 
1947   BinaryFunction *BF = BFI->second;
1948   if (EntryDesc)
1949     *EntryDesc = BF->getEntryIDForSymbol(Symbol);
1950 
1951   return BF;
1952 }
1953 
1954 void BinaryContext::exitWithBugReport(StringRef Message,
1955                                       const BinaryFunction &Function) const {
1956   errs() << "=======================================\n";
1957   errs() << "BOLT is unable to proceed because it couldn't properly understand "
1958             "this function.\n";
1959   errs() << "If you are running the most recent version of BOLT, you may "
1960             "want to "
1961             "report this and paste this dump.\nPlease check that there is no "
1962             "sensitive contents being shared in this dump.\n";
1963   errs() << "\nOffending function: " << Function.getPrintName() << "\n\n";
1964   ScopedPrinter SP(errs());
1965   SP.printBinaryBlock("Function contents", *Function.getData());
1966   errs() << "\n";
1967   Function.dump();
1968   errs() << "ERROR: " << Message;
1969   errs() << "\n=======================================\n";
1970   exit(1);
1971 }
1972 
1973 BinaryFunction *
1974 BinaryContext::createInjectedBinaryFunction(const std::string &Name,
1975                                             bool IsSimple) {
1976   InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple));
1977   BinaryFunction *BF = InjectedBinaryFunctions.back();
1978   setSymbolToFunctionMap(BF->getSymbol(), BF);
1979   BF->CurrentState = BinaryFunction::State::CFG;
1980   return BF;
1981 }
1982 
1983 std::pair<size_t, size_t>
1984 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
1985   // Adjust branch instruction to match the current layout.
1986   if (FixBranches)
1987     BF.fixBranches();
1988 
1989   // Create local MC context to isolate the effect of ephemeral code emission.
1990   IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
1991   MCContext *LocalCtx = MCEInstance.LocalCtx.get();
1992   MCAsmBackend *MAB =
1993       TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1994 
1995   SmallString<256> Code;
1996   raw_svector_ostream VecOS(Code);
1997 
1998   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
1999   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2000       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
2001       std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
2002       /*RelaxAll=*/false,
2003       /*IncrementalLinkerCompatible=*/false,
2004       /*DWARFMustBeAtTheEnd=*/false));
2005 
2006   Streamer->initSections(false, *STI);
2007 
2008   MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2009   Section->setHasInstructions(true);
2010 
2011   // Create symbols in the LocalCtx so that they get destroyed with it.
2012   MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2013   MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2014   MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
2015   MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();
2016 
2017   Streamer->SwitchSection(Section);
2018   Streamer->emitLabel(StartLabel);
2019   emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
2020                    /*EmitCodeOnly=*/true);
2021   Streamer->emitLabel(EndLabel);
2022 
2023   if (BF.isSplit()) {
2024     MCSectionELF *ColdSection =
2025         LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
2026                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2027     ColdSection->setHasInstructions(true);
2028 
2029     Streamer->SwitchSection(ColdSection);
2030     Streamer->emitLabel(ColdStartLabel);
2031     emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
2032                      /*EmitCodeOnly=*/true);
2033     Streamer->emitLabel(ColdEndLabel);
2034     // To avoid calling MCObjectStreamer::flushPendingLabels() which is private
2035     Streamer->emitBytes(StringRef(""));
2036     Streamer->SwitchSection(Section);
2037   }
2038 
2039   // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2040   // MCStreamer::Finish(), which does more than we want
2041   Streamer->emitBytes(StringRef(""));
2042 
2043   MCAssembler &Assembler =
2044       static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2045   MCAsmLayout Layout(Assembler);
2046   Assembler.layout(Layout);
2047 
2048   const uint64_t HotSize =
2049       Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
2050   const uint64_t ColdSize = BF.isSplit()
2051                                 ? Layout.getSymbolOffset(*ColdEndLabel) -
2052                                       Layout.getSymbolOffset(*ColdStartLabel)
2053                                 : 0ULL;
2054 
2055   // Clean-up the effect of the code emission.
2056   for (const MCSymbol &Symbol : Assembler.symbols()) {
2057     MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2058     MutableSymbol->setUndefined();
2059     MutableSymbol->setIsRegistered(false);
2060   }
2061 
2062   return std::make_pair(HotSize, ColdSize);
2063 }
2064 
2065 bool BinaryContext::validateEncoding(const MCInst &Inst,
2066                                      ArrayRef<uint8_t> InputEncoding) const {
2067   SmallString<256> Code;
2068   SmallVector<MCFixup, 4> Fixups;
2069   raw_svector_ostream VecOS(Code);
2070 
2071   MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
2072   auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2073   if (InputEncoding != EncodedData) {
2074     if (opts::Verbosity > 1) {
2075       errs() << "BOLT-WARNING: mismatched encoding detected\n"
2076              << "      input: " << InputEncoding << '\n'
2077              << "     output: " << EncodedData << '\n';
2078     }
2079     return false;
2080   }
2081 
2082   return true;
2083 }
2084 
2085 uint64_t BinaryContext::getHotThreshold() const {
2086   static uint64_t Threshold = 0;
2087   if (Threshold == 0) {
2088     Threshold = std::max((uint64_t)opts::ExecutionCountThreshold,
2089         NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2090   }
2091   return Threshold;
2092 }
2093 
2094 BinaryFunction *
2095 BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address,
2096                                                   bool CheckPastEnd,
2097                                                   bool UseMaxSize) {
2098   auto FI = BinaryFunctions.upper_bound(Address);
2099   if (FI == BinaryFunctions.begin())
2100     return nullptr;
2101   --FI;
2102 
2103   const uint64_t UsedSize =
2104       UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2105 
2106   if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2107     return nullptr;
2108 
2109   return &FI->second;
2110 }
2111 
2112 BinaryFunction *
2113 BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2114   // First, try to find a function starting at the given address. If the
2115   // function was folded, this will get us the original folded function if it
2116   // wasn't removed from the list, e.g. in non-relocation mode.
2117   auto BFI = BinaryFunctions.find(Address);
2118   if (BFI != BinaryFunctions.end()) {
2119     return &BFI->second;
2120   }
2121 
2122   // We might have folded the function matching the object at the given
2123   // address. In such case, we look for a function matching the symbol
2124   // registered at the original address. The new function (the one that the
2125   // original was folded into) will hold the symbol.
2126   if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2127     uint64_t EntryID = 0;
2128     BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID);
2129     if (BF && EntryID == 0)
2130       return BF;
2131   }
2132   return nullptr;
2133 }
2134 
2135 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2136       const DWARFAddressRangesVector &InputRanges) const {
2137   DebugAddressRangesVector OutputRanges;
2138 
2139   for (const DWARFAddressRange Range : InputRanges) {
2140     auto BFI = BinaryFunctions.lower_bound(Range.LowPC);
2141     while (BFI != BinaryFunctions.end()) {
2142       const BinaryFunction &Function = BFI->second;
2143       if (Function.getAddress() >= Range.HighPC)
2144         break;
2145       const DebugAddressRangesVector FunctionRanges =
2146           Function.getOutputAddressRanges();
2147       std::move(std::begin(FunctionRanges),
2148                 std::end(FunctionRanges),
2149                 std::back_inserter(OutputRanges));
2150       std::advance(BFI, 1);
2151     }
2152   }
2153 
2154   return OutputRanges;
2155 }
2156 
2157 } // namespace bolt
2158 } // namespace llvm
2159