12f09f445SMaksim Panchenko //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the BinaryContext class. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 14a34c753fSRafael Auler #include "bolt/Core/BinaryEmitter.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16a34c753fSRafael Auler #include "bolt/Utils/CommandLineOpts.h" 17a34c753fSRafael Auler #include "bolt/Utils/NameResolver.h" 18a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 19a34c753fSRafael Auler #include "llvm/ADT/Twine.h" 20290e4823Sserge-sans-paille #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23a34c753fSRafael Auler #include "llvm/MC/MCAsmLayout.h" 24a34c753fSRafael Auler #include "llvm/MC/MCAssembler.h" 25a34c753fSRafael Auler #include "llvm/MC/MCContext.h" 26a34c753fSRafael Auler #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27a34c753fSRafael Auler #include "llvm/MC/MCInstPrinter.h" 28a34c753fSRafael Auler #include "llvm/MC/MCObjectStreamer.h" 29a34c753fSRafael Auler #include "llvm/MC/MCObjectWriter.h" 3057f7c7d9Sserge-sans-paille #include "llvm/MC/MCRegisterInfo.h" 31a34c753fSRafael Auler #include "llvm/MC/MCSectionELF.h" 32a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h" 3357f7c7d9Sserge-sans-paille #include "llvm/MC/MCSubtargetInfo.h" 34a34c753fSRafael Auler #include "llvm/MC/MCSymbol.h" 35a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 3632d2473aSAmir Ayupov #include "llvm/Support/Error.h" 37a34c753fSRafael Auler #include "llvm/Support/Regex.h" 386aa735ceSAmir Ayupov #include <algorithm> 39a34c753fSRafael Auler #include <functional> 40a34c753fSRafael Auler #include <iterator> 416aa735ceSAmir Ayupov #include <unordered_set> 42a34c753fSRafael Auler 43a34c753fSRafael Auler using namespace llvm; 44a34c753fSRafael Auler 45a34c753fSRafael Auler #undef DEBUG_TYPE 46a34c753fSRafael Auler #define DEBUG_TYPE "bolt" 47a34c753fSRafael Auler 48a34c753fSRafael Auler namespace opts { 49a34c753fSRafael Auler 50b92436efSFangrui Song cl::opt<bool> NoHugePages("no-huge-pages", 51a34c753fSRafael Auler cl::desc("use regular size pages for code alignment"), 52b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 53a34c753fSRafael Auler 54a34c753fSRafael Auler static cl::opt<bool> 55a34c753fSRafael Auler PrintDebugInfo("print-debug-info", 56a34c753fSRafael Auler cl::desc("print debug info when printing functions"), 57a34c753fSRafael Auler cl::Hidden, 58a34c753fSRafael Auler cl::ZeroOrMore, 59a34c753fSRafael Auler cl::cat(BoltCategory)); 60a34c753fSRafael Auler 61b92436efSFangrui Song cl::opt<bool> PrintRelocations( 62b92436efSFangrui Song "print-relocations", 63b92436efSFangrui Song cl::desc("print relocations when printing functions/objects"), cl::Hidden, 64a34c753fSRafael Auler cl::cat(BoltCategory)); 65a34c753fSRafael Auler 66a34c753fSRafael Auler static cl::opt<bool> 67a34c753fSRafael Auler PrintMemData("print-mem-data", 68a34c753fSRafael Auler cl::desc("print memory data annotations when printing functions"), 69a34c753fSRafael Auler cl::Hidden, 70a34c753fSRafael Auler cl::ZeroOrMore, 71a34c753fSRafael Auler cl::cat(BoltCategory)); 72a34c753fSRafael Auler 73a34c753fSRafael Auler } // namespace opts 74a34c753fSRafael Auler 75a34c753fSRafael Auler namespace llvm { 76a34c753fSRafael Auler namespace bolt { 77a34c753fSRafael Auler 78a34c753fSRafael Auler BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 79a34c753fSRafael Auler std::unique_ptr<DWARFContext> DwCtx, 80a34c753fSRafael Auler std::unique_ptr<Triple> TheTriple, 8140c2e0faSMaksim Panchenko const Target *TheTarget, std::string TripleName, 82a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE, 83a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI, 84a34c753fSRafael Auler std::unique_ptr<const MCAsmInfo> AsmInfo, 85a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII, 86a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI, 87a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstPrinter, 88a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA, 89a34c753fSRafael Auler std::unique_ptr<MCPlusBuilder> MIB, 90a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI, 91a34c753fSRafael Auler std::unique_ptr<MCDisassembler> DisAsm) 9240c2e0faSMaksim Panchenko : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 9340c2e0faSMaksim Panchenko TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 9440c2e0faSMaksim Panchenko TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 9540c2e0faSMaksim Panchenko AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 9640c2e0faSMaksim Panchenko InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 9740c2e0faSMaksim Panchenko MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 98a34c753fSRafael Auler Relocation::Arch = this->TheTriple->getArch(); 99db65429dSElvina Yakubova RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 100a34c753fSRafael Auler PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 101a34c753fSRafael Auler } 102a34c753fSRafael Auler 103a34c753fSRafael Auler BinaryContext::~BinaryContext() { 1043652483cSRafael Auler for (BinarySection *Section : Sections) 105a34c753fSRafael Auler delete Section; 1063652483cSRafael Auler for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 107a34c753fSRafael Auler delete InjectedFunction; 1083652483cSRafael Auler for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 109a34c753fSRafael Auler delete JTI.second; 110a34c753fSRafael Auler clearBinaryData(); 111a34c753fSRafael Auler } 112a34c753fSRafael Auler 113a34c753fSRafael Auler /// Create BinaryContext for a given architecture \p ArchName and 114a34c753fSRafael Auler /// triple \p TripleName. 11532d2473aSAmir Ayupov Expected<std::unique_ptr<BinaryContext>> 116a34c753fSRafael Auler BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 117a34c753fSRafael Auler std::unique_ptr<DWARFContext> DwCtx) { 118a34c753fSRafael Auler StringRef ArchName = ""; 119a34c753fSRafael Auler StringRef FeaturesStr = ""; 120a34c753fSRafael Auler switch (File->getArch()) { 121a34c753fSRafael Auler case llvm::Triple::x86_64: 122a34c753fSRafael Auler ArchName = "x86-64"; 123a34c753fSRafael Auler FeaturesStr = "+nopl"; 124a34c753fSRafael Auler break; 125a34c753fSRafael Auler case llvm::Triple::aarch64: 126a34c753fSRafael Auler ArchName = "aarch64"; 12775641678SDenis Revunov FeaturesStr = "+all"; 128a34c753fSRafael Auler break; 129a34c753fSRafael Auler default: 13032d2473aSAmir Ayupov return createStringError(std::errc::not_supported, 13132d2473aSAmir Ayupov "BOLT-ERROR: Unrecognized machine in ELF file"); 132a34c753fSRafael Auler } 133a34c753fSRafael Auler 134a34c753fSRafael Auler auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 135a34c753fSRafael Auler const std::string TripleName = TheTriple->str(); 136a34c753fSRafael Auler 137a34c753fSRafael Auler std::string Error; 138a34c753fSRafael Auler const Target *TheTarget = 139a34c753fSRafael Auler TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 14032d2473aSAmir Ayupov if (!TheTarget) 14132d2473aSAmir Ayupov return createStringError(make_error_code(std::errc::not_supported), 14232d2473aSAmir Ayupov Twine("BOLT-ERROR: ", Error)); 143a34c753fSRafael Auler 144a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI( 145a34c753fSRafael Auler TheTarget->createMCRegInfo(TripleName)); 14632d2473aSAmir Ayupov if (!MRI) 14732d2473aSAmir Ayupov return createStringError( 14832d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 14932d2473aSAmir Ayupov Twine("BOLT-ERROR: no register info for target ", TripleName)); 150a34c753fSRafael Auler 151a34c753fSRafael Auler // Set up disassembler. 152c31af7cfSAmir Ayupov std::unique_ptr<MCAsmInfo> AsmInfo( 153a34c753fSRafael Auler TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 15432d2473aSAmir Ayupov if (!AsmInfo) 15532d2473aSAmir Ayupov return createStringError( 15632d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 15732d2473aSAmir Ayupov Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 158c31af7cfSAmir Ayupov // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 159c31af7cfSAmir Ayupov // we want to emit such names as using @PLT without double quotes to convey 160c31af7cfSAmir Ayupov // variant kind to the assembler. BOLT doesn't rely on the linker so we can 161c31af7cfSAmir Ayupov // override the default AsmInfo behavior to emit names the way we want. 162c31af7cfSAmir Ayupov AsmInfo->setAllowAtInName(true); 163a34c753fSRafael Auler 164a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI( 165a34c753fSRafael Auler TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 16632d2473aSAmir Ayupov if (!STI) 16732d2473aSAmir Ayupov return createStringError( 16832d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 16932d2473aSAmir Ayupov Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 170a34c753fSRafael Auler 171a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 17232d2473aSAmir Ayupov if (!MII) 17332d2473aSAmir Ayupov return createStringError( 17432d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 17532d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 176a34c753fSRafael Auler 177a34c753fSRafael Auler std::unique_ptr<MCContext> Ctx( 178a34c753fSRafael Auler new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 179a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI( 180a34c753fSRafael Auler TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 181a34c753fSRafael Auler Ctx->setObjectFileInfo(MOFI.get()); 182a34c753fSRafael Auler // We do not support X86 Large code model. Change this in the future. 183a34c753fSRafael Auler bool Large = false; 184a34c753fSRafael Auler if (TheTriple->getArch() == llvm::Triple::aarch64) 185a34c753fSRafael Auler Large = true; 186a34c753fSRafael Auler unsigned LSDAEncoding = 187a34c753fSRafael Auler Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 188a34c753fSRafael Auler unsigned TTypeEncoding = 189a34c753fSRafael Auler Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190a34c753fSRafael Auler if (IsPIC) { 191a34c753fSRafael Auler LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192a34c753fSRafael Auler (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193a34c753fSRafael Auler TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 194a34c753fSRafael Auler (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 195a34c753fSRafael Auler } 196a34c753fSRafael Auler 197a34c753fSRafael Auler std::unique_ptr<MCDisassembler> DisAsm( 198a34c753fSRafael Auler TheTarget->createMCDisassembler(*STI, *Ctx)); 199a34c753fSRafael Auler 20032d2473aSAmir Ayupov if (!DisAsm) 20132d2473aSAmir Ayupov return createStringError( 20232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 20332d2473aSAmir Ayupov Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 204a34c753fSRafael Auler 205a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA( 206a34c753fSRafael Auler TheTarget->createMCInstrAnalysis(MII.get())); 20732d2473aSAmir Ayupov if (!MIA) 20832d2473aSAmir Ayupov return createStringError( 20932d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 21032d2473aSAmir Ayupov Twine("BOLT-ERROR: failed to create instruction analysis for target ", 21132d2473aSAmir Ayupov TripleName)); 212a34c753fSRafael Auler 213a34c753fSRafael Auler int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 214a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstructionPrinter( 215a34c753fSRafael Auler TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 216a34c753fSRafael Auler *MII, *MRI)); 21732d2473aSAmir Ayupov if (!InstructionPrinter) 21832d2473aSAmir Ayupov return createStringError( 21932d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 22032d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 221a34c753fSRafael Auler InstructionPrinter->setPrintImmHex(true); 222a34c753fSRafael Auler 223a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE( 2242aed07e9SShao-Ce SUN TheTarget->createMCCodeEmitter(*MII, *Ctx)); 225a34c753fSRafael Auler 226a34c753fSRafael Auler // Make sure we don't miss any output on core dumps. 227a34c753fSRafael Auler outs().SetUnbuffered(); 228a34c753fSRafael Auler errs().SetUnbuffered(); 229a34c753fSRafael Auler dbgs().SetUnbuffered(); 230a34c753fSRafael Auler 231a34c753fSRafael Auler auto BC = std::make_unique<BinaryContext>( 232a34c753fSRafael Auler std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 233a34c753fSRafael Auler std::string(TripleName), std::move(MCE), std::move(MOFI), 234a34c753fSRafael Auler std::move(AsmInfo), std::move(MII), std::move(STI), 23540c2e0faSMaksim Panchenko std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 23640c2e0faSMaksim Panchenko std::move(DisAsm)); 237a34c753fSRafael Auler 238a34c753fSRafael Auler BC->TTypeEncoding = TTypeEncoding; 239a34c753fSRafael Auler BC->LSDAEncoding = LSDAEncoding; 240a34c753fSRafael Auler 241a34c753fSRafael Auler BC->MAB = std::unique_ptr<MCAsmBackend>( 242a34c753fSRafael Auler BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 243a34c753fSRafael Auler 244a34c753fSRafael Auler BC->setFilename(File->getFileName()); 245a34c753fSRafael Auler 246a34c753fSRafael Auler BC->HasFixedLoadAddress = !IsPIC; 247a34c753fSRafael Auler 248e290133cSMaksim Panchenko BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 249e290133cSMaksim Panchenko BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 250e290133cSMaksim Panchenko 251e290133cSMaksim Panchenko if (!BC->SymbolicDisAsm) 252e290133cSMaksim Panchenko return createStringError( 253e290133cSMaksim Panchenko make_error_code(std::errc::not_supported), 254e290133cSMaksim Panchenko Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 255e290133cSMaksim Panchenko 25663686af1SVladislav Khmelevsky return std::move(BC); 257a34c753fSRafael Auler } 258a34c753fSRafael Auler 259a34c753fSRafael Auler bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 26040c2e0faSMaksim Panchenko if (opts::HotText && 26140c2e0faSMaksim Panchenko (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 262a34c753fSRafael Auler return true; 263a34c753fSRafael Auler 26440c2e0faSMaksim Panchenko if (opts::HotData && 26540c2e0faSMaksim Panchenko (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 266a34c753fSRafael Auler return true; 267a34c753fSRafael Auler 268a34c753fSRafael Auler if (SymbolName == "_end") 269a34c753fSRafael Auler return true; 270a34c753fSRafael Auler 271a34c753fSRafael Auler return false; 272a34c753fSRafael Auler } 273a34c753fSRafael Auler 274a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> 275a34c753fSRafael Auler BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 276a34c753fSRafael Auler return MAB->createObjectWriter(OS); 277a34c753fSRafael Auler } 278a34c753fSRafael Auler 279a34c753fSRafael Auler bool BinaryContext::validateObjectNesting() const { 280a34c753fSRafael Auler auto Itr = BinaryDataMap.begin(); 281a34c753fSRafael Auler auto End = BinaryDataMap.end(); 282a34c753fSRafael Auler bool Valid = true; 283a34c753fSRafael Auler while (Itr != End) { 284a34c753fSRafael Auler auto Next = std::next(Itr); 285a34c753fSRafael Auler while (Next != End && 286a34c753fSRafael Auler Itr->second->getSection() == Next->second->getSection() && 287a34c753fSRafael Auler Itr->second->containsRange(Next->second->getAddress(), 288a34c753fSRafael Auler Next->second->getSize())) { 289a34c753fSRafael Auler if (Next->second->Parent != Itr->second) { 290a34c753fSRafael Auler errs() << "BOLT-WARNING: object nesting incorrect for:\n" 291a34c753fSRafael Auler << "BOLT-WARNING: " << *Itr->second << "\n" 292a34c753fSRafael Auler << "BOLT-WARNING: " << *Next->second << "\n"; 293a34c753fSRafael Auler Valid = false; 294a34c753fSRafael Auler } 295a34c753fSRafael Auler ++Next; 296a34c753fSRafael Auler } 297a34c753fSRafael Auler Itr = Next; 298a34c753fSRafael Auler } 299a34c753fSRafael Auler return Valid; 300a34c753fSRafael Auler } 301a34c753fSRafael Auler 302a34c753fSRafael Auler bool BinaryContext::validateHoles() const { 303a34c753fSRafael Auler bool Valid = true; 304a34c753fSRafael Auler for (BinarySection &Section : sections()) { 305a34c753fSRafael Auler for (const Relocation &Rel : Section.relocations()) { 306a34c753fSRafael Auler uint64_t RelAddr = Rel.Offset + Section.getAddress(); 307a34c753fSRafael Auler const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 308a34c753fSRafael Auler if (!BD) { 309a34c753fSRafael Auler errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 310a34c753fSRafael Auler << " 0x" << Twine::utohexstr(RelAddr) << " in " 311a34c753fSRafael Auler << Section.getName() << "\n"; 312a34c753fSRafael Auler Valid = false; 313a34c753fSRafael Auler } else if (!BD->getAtomicRoot()) { 314a34c753fSRafael Auler errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 315a34c753fSRafael Auler << "address 0x" << Twine::utohexstr(RelAddr) << " in " 316a34c753fSRafael Auler << Section.getName() << "\n"; 317a34c753fSRafael Auler Valid = false; 318a34c753fSRafael Auler } 319a34c753fSRafael Auler } 320a34c753fSRafael Auler } 321a34c753fSRafael Auler return Valid; 322a34c753fSRafael Auler } 323a34c753fSRafael Auler 324a34c753fSRafael Auler void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 325a34c753fSRafael Auler const uint64_t Address = GAI->second->getAddress(); 326a34c753fSRafael Auler const uint64_t Size = GAI->second->getSize(); 327a34c753fSRafael Auler 32840c2e0faSMaksim Panchenko auto fixParents = [&](BinaryDataMapType::iterator Itr, 32940c2e0faSMaksim Panchenko BinaryData *NewParent) { 330a34c753fSRafael Auler BinaryData *OldParent = Itr->second->Parent; 331a34c753fSRafael Auler Itr->second->Parent = NewParent; 332a34c753fSRafael Auler ++Itr; 333a34c753fSRafael Auler while (Itr != BinaryDataMap.end() && OldParent && 334a34c753fSRafael Auler Itr->second->Parent == OldParent) { 335a34c753fSRafael Auler Itr->second->Parent = NewParent; 336a34c753fSRafael Auler ++Itr; 337a34c753fSRafael Auler } 338a34c753fSRafael Auler }; 339a34c753fSRafael Auler 340a34c753fSRafael Auler // Check if the previous symbol contains the newly added symbol. 341a34c753fSRafael Auler if (GAI != BinaryDataMap.begin()) { 342a34c753fSRafael Auler BinaryData *Prev = std::prev(GAI)->second; 343a34c753fSRafael Auler while (Prev) { 344a34c753fSRafael Auler if (Prev->getSection() == GAI->second->getSection() && 345a34c753fSRafael Auler Prev->containsRange(Address, Size)) { 346a34c753fSRafael Auler fixParents(GAI, Prev); 347a34c753fSRafael Auler } else { 348a34c753fSRafael Auler fixParents(GAI, nullptr); 349a34c753fSRafael Auler } 350a34c753fSRafael Auler Prev = Prev->Parent; 351a34c753fSRafael Auler } 352a34c753fSRafael Auler } 353a34c753fSRafael Auler 354a34c753fSRafael Auler // Check if the newly added symbol contains any subsequent symbols. 355a34c753fSRafael Auler if (Size != 0) { 356a34c753fSRafael Auler BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 357a34c753fSRafael Auler auto Itr = std::next(GAI); 35840c2e0faSMaksim Panchenko while ( 35940c2e0faSMaksim Panchenko Itr != BinaryDataMap.end() && 36040c2e0faSMaksim Panchenko BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 361a34c753fSRafael Auler Itr->second->Parent = BD; 362a34c753fSRafael Auler ++Itr; 363a34c753fSRafael Auler } 364a34c753fSRafael Auler } 365a34c753fSRafael Auler } 366a34c753fSRafael Auler 367a34c753fSRafael Auler iterator_range<BinaryContext::binary_data_iterator> 368a34c753fSRafael Auler BinaryContext::getSubBinaryData(BinaryData *BD) { 369a34c753fSRafael Auler auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 370a34c753fSRafael Auler auto End = Start; 3713652483cSRafael Auler while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 372a34c753fSRafael Auler ++End; 373a34c753fSRafael Auler return make_range(Start, End); 374a34c753fSRafael Auler } 375a34c753fSRafael Auler 376a34c753fSRafael Auler std::pair<const MCSymbol *, uint64_t> 377a34c753fSRafael Auler BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 378a34c753fSRafael Auler bool IsPCRel) { 379a34c753fSRafael Auler uint64_t Addend = 0; 380a34c753fSRafael Auler 381a34c753fSRafael Auler if (isAArch64()) { 382a34c753fSRafael Auler // Check if this is an access to a constant island and create bookkeeping 383a34c753fSRafael Auler // to keep track of it and emit it later as part of this function. 384a34c753fSRafael Auler if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 385a34c753fSRafael Auler return std::make_pair(IslandSym, Addend); 386a34c753fSRafael Auler 387a34c753fSRafael Auler // Detect custom code written in assembly that refers to arbitrary 388a34c753fSRafael Auler // constant islands from other functions. Write this reference so we 389a34c753fSRafael Auler // can pull this constant island and emit it as part of this function 390a34c753fSRafael Auler // too. 391a34c753fSRafael Auler auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 392a34c753fSRafael Auler if (IslandIter != AddressToConstantIslandMap.end()) { 393a34c753fSRafael Auler if (MCSymbol *IslandSym = 394a34c753fSRafael Auler IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 395a34c753fSRafael Auler BF.createIslandDependency(IslandSym, IslandIter->second); 396a34c753fSRafael Auler return std::make_pair(IslandSym, Addend); 397a34c753fSRafael Auler } 398a34c753fSRafael Auler } 399a34c753fSRafael Auler } 400a34c753fSRafael Auler 401a34c753fSRafael Auler // Note that the address does not necessarily have to reside inside 402a34c753fSRafael Auler // a section, it could be an absolute address too. 403a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 404a34c753fSRafael Auler if (Section && Section->isText()) { 405a34c753fSRafael Auler if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 406a34c753fSRafael Auler if (Address != BF.getAddress()) { 407a34c753fSRafael Auler // The address could potentially escape. Mark it as another entry 408a34c753fSRafael Auler // point into the function. 409a34c753fSRafael Auler if (opts::Verbosity >= 1) { 410a34c753fSRafael Auler outs() << "BOLT-INFO: potentially escaped address 0x" 41140c2e0faSMaksim Panchenko << Twine::utohexstr(Address) << " in function " << BF << '\n'; 412a34c753fSRafael Auler } 413a34c753fSRafael Auler BF.HasInternalLabelReference = true; 414a34c753fSRafael Auler return std::make_pair( 41540c2e0faSMaksim Panchenko BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 416a34c753fSRafael Auler } 417a34c753fSRafael Auler } else { 41835efe1d8SVladislav Khmelevsky addInterproceduralReference(&BF, Address); 419a34c753fSRafael Auler } 420a34c753fSRafael Auler } 421a34c753fSRafael Auler 422a34c753fSRafael Auler // With relocations, catch jump table references outside of the basic block 423a34c753fSRafael Auler // containing the indirect jump. 424a34c753fSRafael Auler if (HasRelocations) { 425a34c753fSRafael Auler const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 426a34c753fSRafael Auler if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 427a34c753fSRafael Auler const MCSymbol *Symbol = 428a34c753fSRafael Auler getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 429a34c753fSRafael Auler 430a34c753fSRafael Auler return std::make_pair(Symbol, Addend); 431a34c753fSRafael Auler } 432a34c753fSRafael Auler } 433a34c753fSRafael Auler 4343652483cSRafael Auler if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 435a34c753fSRafael Auler return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 436a34c753fSRafael Auler 437a34c753fSRafael Auler // TODO: use DWARF info to get size/alignment here? 438a34c753fSRafael Auler MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 439a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 440a34c753fSRafael Auler return std::make_pair(TargetSymbol, Addend); 441a34c753fSRafael Auler } 442a34c753fSRafael Auler 44340c2e0faSMaksim Panchenko MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 44440c2e0faSMaksim Panchenko BinaryFunction &BF) { 445a34c753fSRafael Auler if (!isX86()) 446a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 447a34c753fSRafael Auler 448a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 449a34c753fSRafael Auler if (!Section) { 450a34c753fSRafael Auler // No section - possibly an absolute address. Since we don't allow 451a34c753fSRafael Auler // internal function addresses to escape the function scope - we 452a34c753fSRafael Auler // consider it a tail call. 453a34c753fSRafael Auler if (opts::Verbosity > 1) { 454a34c753fSRafael Auler errs() << "BOLT-WARNING: no section for address 0x" 45540c2e0faSMaksim Panchenko << Twine::utohexstr(Address) << " referenced from function " << BF 45640c2e0faSMaksim Panchenko << '\n'; 457a34c753fSRafael Auler } 458a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 459a34c753fSRafael Auler } 460a34c753fSRafael Auler 461a34c753fSRafael Auler if (Section->isVirtual()) { 462a34c753fSRafael Auler // The contents are filled at runtime. 463a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 464a34c753fSRafael Auler } 465a34c753fSRafael Auler 466a34c753fSRafael Auler // No support for jump tables in code yet. 467a34c753fSRafael Auler if (Section->isText()) 468a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 469a34c753fSRafael Auler 470a34c753fSRafael Auler // Start with checking for PIC jump table. We expect non-PIC jump tables 471a34c753fSRafael Auler // to have high 32 bits set to 0. 472a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 473a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 474a34c753fSRafael Auler 475a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 476a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_JUMP_TABLE; 477a34c753fSRafael Auler 478a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 479a34c753fSRafael Auler } 480a34c753fSRafael Auler 4816aa735ceSAmir Ayupov /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 4826aa735ceSAmir Ayupov bool isPotentialFragmentByName(BinaryFunction &Fragment, 4836aa735ceSAmir Ayupov BinaryFunction &Parent) { 4846aa735ceSAmir Ayupov for (StringRef Name : Parent.getNames()) { 4856aa735ceSAmir Ayupov std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 4866aa735ceSAmir Ayupov std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 4876aa735ceSAmir Ayupov if (Fragment.hasRestoredNameRegex(NameRegex)) 4886aa735ceSAmir Ayupov return true; 4896aa735ceSAmir Ayupov } 4906aa735ceSAmir Ayupov return false; 4916aa735ceSAmir Ayupov } 4926aa735ceSAmir Ayupov 49305523dc3SHuan Nguyen bool BinaryContext::analyzeJumpTable( 49405523dc3SHuan Nguyen const uint64_t Address, const JumpTable::JumpTableType Type, 49505523dc3SHuan Nguyen BinaryFunction &BF, const uint64_t NextJTAddress, 49605523dc3SHuan Nguyen JumpTable::AddressesType *EntriesAsAddress) { 497a34c753fSRafael Auler // Is one of the targets __builtin_unreachable? 498a34c753fSRafael Auler bool HasUnreachable = false; 499a34c753fSRafael Auler 500a34c753fSRafael Auler // Number of targets other than __builtin_unreachable. 501a34c753fSRafael Auler uint64_t NumRealEntries = 0; 502a34c753fSRafael Auler 50305523dc3SHuan Nguyen auto addEntryAddress = [&](uint64_t EntryAddress) { 50405523dc3SHuan Nguyen if (EntriesAsAddress) 50505523dc3SHuan Nguyen EntriesAsAddress->emplace_back(EntryAddress); 506a34c753fSRafael Auler }; 507a34c753fSRafael Auler 508a34c753fSRafael Auler auto doesBelongToFunction = [&](const uint64_t Addr, 509a34c753fSRafael Auler BinaryFunction *TargetBF) -> bool { 510a34c753fSRafael Auler if (BF.containsAddress(Addr)) 511a34c753fSRafael Auler return true; 512a34c753fSRafael Auler // Nothing to do if we failed to identify the containing function. 513a34c753fSRafael Auler if (!TargetBF) 514a34c753fSRafael Auler return false; 515a34c753fSRafael Auler // Case 1: check if BF is a fragment and TargetBF is its parent. 516a34c753fSRafael Auler if (BF.isFragment()) { 5176aa735ceSAmir Ayupov // Parent function may or may not be already registered. 5186aa735ceSAmir Ayupov // Set parent link based on function name matching heuristic. 5196aa735ceSAmir Ayupov return registerFragment(BF, *TargetBF); 520a34c753fSRafael Auler } 521a34c753fSRafael Auler // Case 2: check if TargetBF is a fragment and BF is its parent. 5226aa735ceSAmir Ayupov return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 523a34c753fSRafael Auler }; 524a34c753fSRafael Auler 525a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 526a34c753fSRafael Auler if (!Section) 527a34c753fSRafael Auler return false; 528a34c753fSRafael Auler 529a34c753fSRafael Auler // The upper bound is defined by containing object, section limits, and 530a34c753fSRafael Auler // the next jump table in memory. 531a34c753fSRafael Auler uint64_t UpperBound = Section->getEndAddress(); 532a34c753fSRafael Auler const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 533a34c753fSRafael Auler if (JumpTableBD && JumpTableBD->getSize()) { 534a34c753fSRafael Auler assert(JumpTableBD->getEndAddress() <= UpperBound && 535a34c753fSRafael Auler "data object cannot cross a section boundary"); 536a34c753fSRafael Auler UpperBound = JumpTableBD->getEndAddress(); 537a34c753fSRafael Auler } 5383652483cSRafael Auler if (NextJTAddress) 539a34c753fSRafael Auler UpperBound = std::min(NextJTAddress, UpperBound); 540a34c753fSRafael Auler 541*556efdbaSAmir Ayupov LLVM_DEBUG({ 542*556efdbaSAmir Ayupov using JTT = JumpTable::JumpTableType; 543*556efdbaSAmir Ayupov dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 544*556efdbaSAmir Ayupov Address, BF.getPrintName(), 545*556efdbaSAmir Ayupov Type == JTT::JTT_PIC ? "PIC" : "Normal"); 546*556efdbaSAmir Ayupov }); 547a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 548a34c753fSRafael Auler for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 549a34c753fSRafael Auler EntryAddress += EntrySize) { 550a34c753fSRafael Auler LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 551a34c753fSRafael Auler << " -> "); 552a34c753fSRafael Auler // Check if there's a proper relocation against the jump table entry. 553a34c753fSRafael Auler if (HasRelocations) { 554a34c753fSRafael Auler if (Type == JumpTable::JTT_PIC && 555a34c753fSRafael Auler !DataPCRelocations.count(EntryAddress)) { 556a34c753fSRafael Auler LLVM_DEBUG( 557a34c753fSRafael Auler dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 558a34c753fSRafael Auler break; 559a34c753fSRafael Auler } 560a34c753fSRafael Auler if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 561a34c753fSRafael Auler LLVM_DEBUG( 562a34c753fSRafael Auler dbgs() 563a34c753fSRafael Auler << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 564a34c753fSRafael Auler break; 565a34c753fSRafael Auler } 566a34c753fSRafael Auler } 567a34c753fSRafael Auler 56840c2e0faSMaksim Panchenko const uint64_t Value = 56940c2e0faSMaksim Panchenko (Type == JumpTable::JTT_PIC) 570a34c753fSRafael Auler ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 571a34c753fSRafael Auler : *getPointerAtAddress(EntryAddress); 572a34c753fSRafael Auler 573a34c753fSRafael Auler // __builtin_unreachable() case. 574a34c753fSRafael Auler if (Value == BF.getAddress() + BF.getSize()) { 57505523dc3SHuan Nguyen addEntryAddress(Value); 576a34c753fSRafael Auler HasUnreachable = true; 577*556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 578a34c753fSRafael Auler continue; 579a34c753fSRafael Auler } 580a34c753fSRafael Auler 581a34c753fSRafael Auler // Function or one of its fragments. 582a34c753fSRafael Auler BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 583a34c753fSRafael Auler 584a34c753fSRafael Auler // We assume that a jump table cannot have function start as an entry. 585a34c753fSRafael Auler if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 586a34c753fSRafael Auler LLVM_DEBUG({ 587a34c753fSRafael Auler if (!BF.containsAddress(Value)) { 588a34c753fSRafael Auler dbgs() << "FAIL: function doesn't contain this address\n"; 589a34c753fSRafael Auler if (TargetBF) { 590a34c753fSRafael Auler dbgs() << " ! function containing this address: " 591a34c753fSRafael Auler << TargetBF->getPrintName() << '\n'; 592*556efdbaSAmir Ayupov if (TargetBF->isFragment()) { 593*556efdbaSAmir Ayupov dbgs() << " ! is a fragment"; 594*556efdbaSAmir Ayupov for (BinaryFunction *Parent : TargetBF->ParentFragments) 595*556efdbaSAmir Ayupov dbgs() << ", parent: " << Parent->getPrintName(); 596*556efdbaSAmir Ayupov dbgs() << '\n'; 597*556efdbaSAmir Ayupov } 598a34c753fSRafael Auler } 599a34c753fSRafael Auler } 600a34c753fSRafael Auler if (Value == BF.getAddress()) 601a34c753fSRafael Auler dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 602a34c753fSRafael Auler }); 603a34c753fSRafael Auler break; 604a34c753fSRafael Auler } 605a34c753fSRafael Auler 606a34c753fSRafael Auler // Check there's an instruction at this offset. 607a34c753fSRafael Auler if (TargetBF->getState() == BinaryFunction::State::Disassembled && 608a34c753fSRafael Auler !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 609*556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 610a34c753fSRafael Auler break; 611a34c753fSRafael Auler } 612a34c753fSRafael Auler 613a34c753fSRafael Auler ++NumRealEntries; 614*556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 615a34c753fSRafael Auler 61605523dc3SHuan Nguyen if (TargetBF != &BF) 61705523dc3SHuan Nguyen BF.setHasIndirectTargetToSplitFragment(true); 61805523dc3SHuan Nguyen addEntryAddress(Value); 619a34c753fSRafael Auler } 620a34c753fSRafael Auler 621a34c753fSRafael Auler // It's a jump table if the number of real entries is more than 1, or there's 622a34c753fSRafael Auler // one real entry and "unreachable" targets. If there are only multiple 623a34c753fSRafael Auler // "unreachable" targets, then it's not a jump table. 624a34c753fSRafael Auler return NumRealEntries + HasUnreachable >= 2; 625a34c753fSRafael Auler } 626a34c753fSRafael Auler 627a34c753fSRafael Auler void BinaryContext::populateJumpTables() { 628a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 629a34c753fSRafael Auler << '\n'); 630a34c753fSRafael Auler for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 631a34c753fSRafael Auler ++JTI) { 632a34c753fSRafael Auler JumpTable *JT = JTI->second; 633a34c753fSRafael Auler 63405523dc3SHuan Nguyen bool NonSimpleParent = false; 63505523dc3SHuan Nguyen for (BinaryFunction *BF : JT->Parents) 63605523dc3SHuan Nguyen NonSimpleParent |= !BF->isSimple(); 63705523dc3SHuan Nguyen if (NonSimpleParent) 638a34c753fSRafael Auler continue; 639a34c753fSRafael Auler 640a34c753fSRafael Auler uint64_t NextJTAddress = 0; 641a34c753fSRafael Auler auto NextJTI = std::next(JTI); 6423652483cSRafael Auler if (NextJTI != JTE) 643a34c753fSRafael Auler NextJTAddress = NextJTI->second->getAddress(); 644a34c753fSRafael Auler 64505523dc3SHuan Nguyen const bool Success = 64605523dc3SHuan Nguyen analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 64705523dc3SHuan Nguyen NextJTAddress, &JT->EntriesAsAddress); 648a34c753fSRafael Auler if (!Success) { 64905523dc3SHuan Nguyen LLVM_DEBUG(ListSeparator LS; 65005523dc3SHuan Nguyen dbgs() << "failed to analyze jump table in function "; 65105523dc3SHuan Nguyen for (BinaryFunction *Frag 65205523dc3SHuan Nguyen : JT->Parents) dbgs() 65305523dc3SHuan Nguyen << LS << *Frag; 65405523dc3SHuan Nguyen dbgs() << '\n';); 655a34c753fSRafael Auler JT->print(dbgs()); 656a34c753fSRafael Auler if (NextJTI != JTE) { 65705523dc3SHuan Nguyen LLVM_DEBUG(ListSeparator LS; 658a34c753fSRafael Auler dbgs() << "next jump table at 0x" 659a34c753fSRafael Auler << Twine::utohexstr(NextJTI->second->getAddress()) 66005523dc3SHuan Nguyen << " belongs to function "; 66105523dc3SHuan Nguyen for (BinaryFunction *Frag 66205523dc3SHuan Nguyen : NextJTI->second->Parents) dbgs() 66305523dc3SHuan Nguyen << LS << *Frag; 66405523dc3SHuan Nguyen dbgs() << "\n";); 665a34c753fSRafael Auler NextJTI->second->print(dbgs()); 666a34c753fSRafael Auler } 667468d4f6dSAmir Ayupov llvm_unreachable("jump table heuristic failure"); 668a34c753fSRafael Auler } 66905523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) { 67005523dc3SHuan Nguyen for (uint64_t EntryAddress : JT->EntriesAsAddress) 67105523dc3SHuan Nguyen // if target is builtin_unreachable 67205523dc3SHuan Nguyen if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 67305523dc3SHuan Nguyen Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 67405523dc3SHuan Nguyen Frag->getSize()); 67505523dc3SHuan Nguyen } else if (EntryAddress >= Frag->getAddress() && 67605523dc3SHuan Nguyen EntryAddress < Frag->getAddress() + Frag->getSize()) { 67705523dc3SHuan Nguyen Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 67805523dc3SHuan Nguyen } 679a34c753fSRafael Auler } 680a34c753fSRafael Auler 681a34c753fSRafael Auler // In strict mode, erase PC-relative relocation record. Later we check that 682a34c753fSRafael Auler // all such records are erased and thus have been accounted for. 683a34c753fSRafael Auler if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 684a34c753fSRafael Auler for (uint64_t Address = JT->getAddress(); 685a34c753fSRafael Auler Address < JT->getAddress() + JT->getSize(); 686a34c753fSRafael Auler Address += JT->EntrySize) { 687a34c753fSRafael Auler DataPCRelocations.erase(DataPCRelocations.find(Address)); 688a34c753fSRafael Auler } 689a34c753fSRafael Auler } 690a34c753fSRafael Auler 691a34c753fSRafael Auler // Mark to skip the function and all its fragments. 69205523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 69305523dc3SHuan Nguyen if (Frag->hasIndirectTargetToSplitFragment()) 69405523dc3SHuan Nguyen addFragmentsToSkip(Frag); 695a34c753fSRafael Auler } 696a34c753fSRafael Auler 697a34c753fSRafael Auler if (opts::StrictMode && DataPCRelocations.size()) { 698a34c753fSRafael Auler LLVM_DEBUG({ 699a34c753fSRafael Auler dbgs() << DataPCRelocations.size() 700a34c753fSRafael Auler << " unclaimed PC-relative relocations left in data:\n"; 701a34c753fSRafael Auler for (uint64_t Reloc : DataPCRelocations) 702a34c753fSRafael Auler dbgs() << Twine::utohexstr(Reloc) << '\n'; 703a34c753fSRafael Auler }); 704a34c753fSRafael Auler assert(0 && "unclaimed PC-relative relocations left in data\n"); 705a34c753fSRafael Auler } 706a34c753fSRafael Auler clearList(DataPCRelocations); 707a34c753fSRafael Auler } 7086aa735ceSAmir Ayupov 7096aa735ceSAmir Ayupov void BinaryContext::skipMarkedFragments() { 71005523dc3SHuan Nguyen std::vector<BinaryFunction *> FragmentQueue; 71105523dc3SHuan Nguyen // Copy the functions to FragmentQueue. 71205523dc3SHuan Nguyen FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 7136aa735ceSAmir Ayupov auto addToWorklist = [&](BinaryFunction *Function) -> void { 71405523dc3SHuan Nguyen if (FragmentsToSkip.count(Function)) 7156aa735ceSAmir Ayupov return; 71605523dc3SHuan Nguyen FragmentQueue.push_back(Function); 71705523dc3SHuan Nguyen addFragmentsToSkip(Function); 7186aa735ceSAmir Ayupov }; 7196aa735ceSAmir Ayupov // Functions containing split jump tables need to be skipped with all 7206aa735ceSAmir Ayupov // fragments (transitively). 72105523dc3SHuan Nguyen for (size_t I = 0; I != FragmentQueue.size(); I++) { 72205523dc3SHuan Nguyen BinaryFunction *BF = FragmentQueue[I]; 72305523dc3SHuan Nguyen assert(FragmentsToSkip.count(BF) && 7246aa735ceSAmir Ayupov "internal error in traversing function fragments"); 7256aa735ceSAmir Ayupov if (opts::Verbosity >= 1) 7266aa735ceSAmir Ayupov errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 72782095bd5SHuan Nguyen BF->setSimple(false); 72805523dc3SHuan Nguyen BF->setHasIndirectTargetToSplitFragment(true); 72982095bd5SHuan Nguyen 730d2c87699SAmir Ayupov llvm::for_each(BF->Fragments, addToWorklist); 731d2c87699SAmir Ayupov llvm::for_each(BF->ParentFragments, addToWorklist); 7326aa735ceSAmir Ayupov } 733641e92d4SMaksim Panchenko if (!FragmentsToSkip.empty()) 73482095bd5SHuan Nguyen errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 735641e92d4SMaksim Panchenko << (FragmentsToSkip.size() == 1 ? "" : "s") 736641e92d4SMaksim Panchenko << " due to cold fragments\n"; 737a34c753fSRafael Auler } 738a34c753fSRafael Auler 73940c2e0faSMaksim Panchenko MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 740a34c753fSRafael Auler uint64_t Size, 741a34c753fSRafael Auler uint16_t Alignment, 742a34c753fSRafael Auler unsigned Flags) { 743a34c753fSRafael Auler auto Itr = BinaryDataMap.find(Address); 744a34c753fSRafael Auler if (Itr != BinaryDataMap.end()) { 745a34c753fSRafael Auler assert(Itr->second->getSize() == Size || !Size); 746a34c753fSRafael Auler return Itr->second->getSymbol(); 747a34c753fSRafael Auler } 748a34c753fSRafael Auler 749a34c753fSRafael Auler std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 750a34c753fSRafael Auler assert(!GlobalSymbols.count(Name) && "created name is not unique"); 751a34c753fSRafael Auler return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 752a34c753fSRafael Auler } 753a34c753fSRafael Auler 754a34c753fSRafael Auler MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 755a34c753fSRafael Auler return Ctx->getOrCreateSymbol(Name); 756a34c753fSRafael Auler } 757a34c753fSRafael Auler 758a34c753fSRafael Auler BinaryFunction *BinaryContext::createBinaryFunction( 759a34c753fSRafael Auler const std::string &Name, BinarySection &Section, uint64_t Address, 760a34c753fSRafael Auler uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 761a34c753fSRafael Auler auto Result = BinaryFunctions.emplace( 762a34c753fSRafael Auler Address, BinaryFunction(Name, Section, Address, Size, *this)); 763a34c753fSRafael Auler assert(Result.second == true && "unexpected duplicate function"); 764a34c753fSRafael Auler BinaryFunction *BF = &Result.first->second; 765a34c753fSRafael Auler registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 766a34c753fSRafael Auler Alignment); 767a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 768a34c753fSRafael Auler return BF; 769a34c753fSRafael Auler } 770a34c753fSRafael Auler 771a34c753fSRafael Auler const MCSymbol * 772a34c753fSRafael Auler BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 773a34c753fSRafael Auler JumpTable::JumpTableType Type) { 77428b1dcb1SHuan Nguyen auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { 77528b1dcb1SHuan Nguyen return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 77628b1dcb1SHuan Nguyen }; 7770c925861SThorsten Schütt (void)isFragmentOf; 77828b1dcb1SHuan Nguyen 77905523dc3SHuan Nguyen // Two fragments of same function access same jump table 780a34c753fSRafael Auler if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 781a34c753fSRafael Auler assert(JT->Type == Type && "jump table types have to match"); 782a34c753fSRafael Auler assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 783a34c753fSRafael Auler 78405523dc3SHuan Nguyen // Prevent associating a jump table to a specific fragment twice. 78505523dc3SHuan Nguyen // This simple check arises from the assumption: no more than 2 fragments. 78605523dc3SHuan Nguyen if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 7877651522bSDavid Blaikie assert((isFragmentOf(JT->Parents[0], &Function) || 7887651522bSDavid Blaikie isFragmentOf(&Function, JT->Parents[0])) && 78905523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 79028b1dcb1SHuan Nguyen // Duplicate the entry for the parent function for easy access 79105523dc3SHuan Nguyen JT->Parents.push_back(&Function); 79228b1dcb1SHuan Nguyen if (opts::Verbosity > 2) { 79305523dc3SHuan Nguyen outs() << "BOLT-INFO: Multiple fragments access same jump table: " 79405523dc3SHuan Nguyen << JT->Parents[0]->getPrintName() << "; " 79505523dc3SHuan Nguyen << Function.getPrintName() << "\n"; 79605523dc3SHuan Nguyen JT->print(outs()); 79728b1dcb1SHuan Nguyen } 79828b1dcb1SHuan Nguyen Function.JumpTables.emplace(Address, JT); 79905523dc3SHuan Nguyen JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 80005523dc3SHuan Nguyen JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 80128b1dcb1SHuan Nguyen } 80205523dc3SHuan Nguyen 80305523dc3SHuan Nguyen bool IsJumpTableParent = false; 8040c925861SThorsten Schütt (void)IsJumpTableParent; 80505523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 80605523dc3SHuan Nguyen if (Frag == &Function) 80705523dc3SHuan Nguyen IsJumpTableParent = true; 80805523dc3SHuan Nguyen assert(IsJumpTableParent && 80905523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 810a34c753fSRafael Auler return JT->getFirstLabel(); 811a34c753fSRafael Auler } 812a34c753fSRafael Auler 813a34c753fSRafael Auler // Re-use the existing symbol if possible. 814a34c753fSRafael Auler MCSymbol *JTLabel = nullptr; 815a34c753fSRafael Auler if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 816a34c753fSRafael Auler if (!isInternalSymbolName(Object->getSymbol()->getName())) 817a34c753fSRafael Auler JTLabel = Object->getSymbol(); 818a34c753fSRafael Auler } 819a34c753fSRafael Auler 820a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 821a34c753fSRafael Auler if (!JTLabel) { 822a34c753fSRafael Auler const std::string JumpTableName = generateJumpTableName(Function, Address); 823a34c753fSRafael Auler JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 824a34c753fSRafael Auler } 825a34c753fSRafael Auler 826a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 827a34c753fSRafael Auler << " in function " << Function << '\n'); 828a34c753fSRafael Auler 829a34c753fSRafael Auler JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 83005523dc3SHuan Nguyen JumpTable::LabelMapType{{0, JTLabel}}, 831a34c753fSRafael Auler *getSectionForAddress(Address)); 83205523dc3SHuan Nguyen JT->Parents.push_back(&Function); 83305523dc3SHuan Nguyen if (opts::Verbosity > 2) 83405523dc3SHuan Nguyen JT->print(outs()); 835a34c753fSRafael Auler JumpTables.emplace(Address, JT); 836a34c753fSRafael Auler 837a34c753fSRafael Auler // Duplicate the entry for the parent function for easy access. 838a34c753fSRafael Auler Function.JumpTables.emplace(Address, JT); 839a34c753fSRafael Auler return JTLabel; 840a34c753fSRafael Auler } 841a34c753fSRafael Auler 842a34c753fSRafael Auler std::pair<uint64_t, const MCSymbol *> 843a34c753fSRafael Auler BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 844a34c753fSRafael Auler const MCSymbol *OldLabel) { 845a34c753fSRafael Auler auto L = scopeLock(); 846a34c753fSRafael Auler unsigned Offset = 0; 847a34c753fSRafael Auler bool Found = false; 848a34c753fSRafael Auler for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 849a34c753fSRafael Auler if (Elmt.second != OldLabel) 850a34c753fSRafael Auler continue; 851a34c753fSRafael Auler Offset = Elmt.first; 852a34c753fSRafael Auler Found = true; 853a34c753fSRafael Auler break; 854a34c753fSRafael Auler } 855a34c753fSRafael Auler assert(Found && "Label not found"); 856c907d6e0SAmir Ayupov (void)Found; 857a34c753fSRafael Auler MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 858a34c753fSRafael Auler JumpTable *NewJT = 859a34c753fSRafael Auler new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 86005523dc3SHuan Nguyen JumpTable::LabelMapType{{Offset, NewLabel}}, 861a34c753fSRafael Auler *getSectionForAddress(JT->getAddress())); 86205523dc3SHuan Nguyen NewJT->Parents = JT->Parents; 863a34c753fSRafael Auler NewJT->Entries = JT->Entries; 864a34c753fSRafael Auler NewJT->Counts = JT->Counts; 865a34c753fSRafael Auler uint64_t JumpTableID = ++DuplicatedJumpTables; 866a34c753fSRafael Auler // Invert it to differentiate from regular jump tables whose IDs are their 867a34c753fSRafael Auler // addresses in the input binary memory space 868a34c753fSRafael Auler JumpTableID = ~JumpTableID; 869a34c753fSRafael Auler JumpTables.emplace(JumpTableID, NewJT); 870a34c753fSRafael Auler Function.JumpTables.emplace(JumpTableID, NewJT); 871a34c753fSRafael Auler return std::make_pair(JumpTableID, NewLabel); 872a34c753fSRafael Auler } 873a34c753fSRafael Auler 874a34c753fSRafael Auler std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 875a34c753fSRafael Auler uint64_t Address) { 876a34c753fSRafael Auler size_t Id; 877a34c753fSRafael Auler uint64_t Offset = 0; 878a34c753fSRafael Auler if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 879a34c753fSRafael Auler Offset = Address - JT->getAddress(); 880a34c753fSRafael Auler auto Itr = JT->Labels.find(Offset); 8813652483cSRafael Auler if (Itr != JT->Labels.end()) 882a34c753fSRafael Auler return std::string(Itr->second->getName()); 883a34c753fSRafael Auler Id = JumpTableIds.at(JT->getAddress()); 884a34c753fSRafael Auler } else { 885a34c753fSRafael Auler Id = JumpTableIds[Address] = BF.JumpTables.size(); 886a34c753fSRafael Auler } 887a34c753fSRafael Auler return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 888a34c753fSRafael Auler (Offset ? ("." + std::to_string(Offset)) : "")); 889a34c753fSRafael Auler } 890a34c753fSRafael Auler 891a34c753fSRafael Auler bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 892a34c753fSRafael Auler // FIXME: aarch64 support is missing. 893a34c753fSRafael Auler if (!isX86()) 894a34c753fSRafael Auler return true; 895a34c753fSRafael Auler 896a34c753fSRafael Auler if (BF.getSize() == BF.getMaxSize()) 897a34c753fSRafael Auler return true; 898a34c753fSRafael Auler 899a34c753fSRafael Auler ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 900a34c753fSRafael Auler assert(FunctionData && "cannot get function as data"); 901a34c753fSRafael Auler 902a34c753fSRafael Auler uint64_t Offset = BF.getSize(); 903a34c753fSRafael Auler MCInst Instr; 904a34c753fSRafael Auler uint64_t InstrSize = 0; 905a34c753fSRafael Auler uint64_t InstrAddress = BF.getAddress() + Offset; 906a34c753fSRafael Auler using std::placeholders::_1; 907a34c753fSRafael Auler 908a34c753fSRafael Auler // Skip instructions that satisfy the predicate condition. 909a34c753fSRafael Auler auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 910a34c753fSRafael Auler const uint64_t StartOffset = Offset; 911a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); 912a34c753fSRafael Auler Offset += InstrSize, InstrAddress += InstrSize) { 91340c2e0faSMaksim Panchenko if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 91440c2e0faSMaksim Panchenko InstrAddress, nulls())) 915a34c753fSRafael Auler break; 916a34c753fSRafael Auler if (!Predicate(Instr)) 917a34c753fSRafael Auler break; 918a34c753fSRafael Auler } 919a34c753fSRafael Auler 920a34c753fSRafael Auler return Offset - StartOffset; 921a34c753fSRafael Auler }; 922a34c753fSRafael Auler 923a34c753fSRafael Auler // Skip a sequence of zero bytes. 924a34c753fSRafael Auler auto skipZeros = [&]() { 925a34c753fSRafael Auler const uint64_t StartOffset = Offset; 926a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); ++Offset) 927a34c753fSRafael Auler if ((*FunctionData)[Offset] != 0) 928a34c753fSRafael Auler break; 929a34c753fSRafael Auler 930a34c753fSRafael Auler return Offset - StartOffset; 931a34c753fSRafael Auler }; 932a34c753fSRafael Auler 933a34c753fSRafael Auler // Accept the whole padding area filled with breakpoints. 934a34c753fSRafael Auler auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 935a34c753fSRafael Auler if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 936a34c753fSRafael Auler return true; 937a34c753fSRafael Auler 938a34c753fSRafael Auler auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 939a34c753fSRafael Auler 940a34c753fSRafael Auler // Some functions have a jump to the next function or to the padding area 941a34c753fSRafael Auler // inserted after the body. 942a34c753fSRafael Auler auto isSkipJump = [&](const MCInst &Instr) { 943a34c753fSRafael Auler uint64_t TargetAddress = 0; 944a34c753fSRafael Auler if (MIB->isUnconditionalBranch(Instr) && 945a34c753fSRafael Auler MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 946a34c753fSRafael Auler if (TargetAddress >= InstrAddress + InstrSize && 947a34c753fSRafael Auler TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 948a34c753fSRafael Auler return true; 949a34c753fSRafael Auler } 950a34c753fSRafael Auler } 951a34c753fSRafael Auler return false; 952a34c753fSRafael Auler }; 953a34c753fSRafael Auler 954a34c753fSRafael Auler // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 95540c2e0faSMaksim Panchenko while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 956a34c753fSRafael Auler skipZeros()) 957a34c753fSRafael Auler ; 958a34c753fSRafael Auler 959a34c753fSRafael Auler if (Offset == BF.getMaxSize()) 960a34c753fSRafael Auler return true; 961a34c753fSRafael Auler 962a34c753fSRafael Auler if (opts::Verbosity >= 1) { 963a34c753fSRafael Auler errs() << "BOLT-WARNING: bad padding at address 0x" 964a34c753fSRafael Auler << Twine::utohexstr(BF.getAddress() + BF.getSize()) 96540c2e0faSMaksim Panchenko << " starting at offset " << (Offset - BF.getSize()) 96640c2e0faSMaksim Panchenko << " in function " << BF << '\n' 967a34c753fSRafael Auler << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 968a34c753fSRafael Auler << '\n'; 969a34c753fSRafael Auler } 970a34c753fSRafael Auler 971a34c753fSRafael Auler return false; 972a34c753fSRafael Auler } 973a34c753fSRafael Auler 974a34c753fSRafael Auler void BinaryContext::adjustCodePadding() { 975a34c753fSRafael Auler for (auto &BFI : BinaryFunctions) { 976a34c753fSRafael Auler BinaryFunction &BF = BFI.second; 977a34c753fSRafael Auler if (!shouldEmit(BF)) 978a34c753fSRafael Auler continue; 979a34c753fSRafael Auler 980a34c753fSRafael Auler if (!hasValidCodePadding(BF)) { 981a34c753fSRafael Auler if (HasRelocations) { 982a34c753fSRafael Auler if (opts::Verbosity >= 1) { 983a34c753fSRafael Auler outs() << "BOLT-INFO: function " << BF 984a34c753fSRafael Auler << " has invalid padding. Ignoring the function.\n"; 985a34c753fSRafael Auler } 986a34c753fSRafael Auler BF.setIgnored(); 987a34c753fSRafael Auler } else { 988a34c753fSRafael Auler BF.setMaxSize(BF.getSize()); 989a34c753fSRafael Auler } 990a34c753fSRafael Auler } 991a34c753fSRafael Auler } 992a34c753fSRafael Auler } 993a34c753fSRafael Auler 99440c2e0faSMaksim Panchenko MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 995a34c753fSRafael Auler uint64_t Size, 996a34c753fSRafael Auler uint16_t Alignment, 997a34c753fSRafael Auler unsigned Flags) { 998a34c753fSRafael Auler // Register the name with MCContext. 999a34c753fSRafael Auler MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1000a34c753fSRafael Auler 1001a34c753fSRafael Auler auto GAI = BinaryDataMap.find(Address); 1002a34c753fSRafael Auler BinaryData *BD; 1003a34c753fSRafael Auler if (GAI == BinaryDataMap.end()) { 1004a34c753fSRafael Auler ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1005a34c753fSRafael Auler BinarySection &Section = 1006a34c753fSRafael Auler SectionOrErr ? SectionOrErr.get() : absoluteSection(); 100740c2e0faSMaksim Panchenko BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 100840c2e0faSMaksim Panchenko Section, Flags); 1009a34c753fSRafael Auler GAI = BinaryDataMap.emplace(Address, BD).first; 1010a34c753fSRafael Auler GlobalSymbols[Name] = BD; 1011a34c753fSRafael Auler updateObjectNesting(GAI); 1012a34c753fSRafael Auler } else { 1013a34c753fSRafael Auler BD = GAI->second; 1014a34c753fSRafael Auler if (!BD->hasName(Name)) { 1015a34c753fSRafael Auler GlobalSymbols[Name] = BD; 1016a34c753fSRafael Auler BD->Symbols.push_back(Symbol); 1017a34c753fSRafael Auler } 1018a34c753fSRafael Auler } 1019a34c753fSRafael Auler 1020a34c753fSRafael Auler return Symbol; 1021a34c753fSRafael Auler } 1022a34c753fSRafael Auler 1023a34c753fSRafael Auler const BinaryData * 1024a34c753fSRafael Auler BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1025a34c753fSRafael Auler auto NI = BinaryDataMap.lower_bound(Address); 1026a34c753fSRafael Auler auto End = BinaryDataMap.end(); 1027a34c753fSRafael Auler if ((NI != End && Address == NI->first) || 1028a34c753fSRafael Auler ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 10293652483cSRafael Auler if (NI->second->containsAddress(Address)) 1030a34c753fSRafael Auler return NI->second; 1031a34c753fSRafael Auler 1032a34c753fSRafael Auler // If this is a sub-symbol, see if a parent data contains the address. 1033a34c753fSRafael Auler const BinaryData *BD = NI->second->getParent(); 1034a34c753fSRafael Auler while (BD) { 1035a34c753fSRafael Auler if (BD->containsAddress(Address)) 1036a34c753fSRafael Auler return BD; 1037a34c753fSRafael Auler BD = BD->getParent(); 1038a34c753fSRafael Auler } 1039a34c753fSRafael Auler } 1040a34c753fSRafael Auler return nullptr; 1041a34c753fSRafael Auler } 1042a34c753fSRafael Auler 1043a34c753fSRafael Auler bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1044a34c753fSRafael Auler auto NI = BinaryDataMap.find(Address); 1045a34c753fSRafael Auler assert(NI != BinaryDataMap.end()); 1046a34c753fSRafael Auler if (NI == BinaryDataMap.end()) 1047a34c753fSRafael Auler return false; 1048a34c753fSRafael Auler // TODO: it's possible that a jump table starts at the same address 1049a34c753fSRafael Auler // as a larger blob of private data. When we set the size of the 1050a34c753fSRafael Auler // jump table, it might be smaller than the total blob size. In this 1051a34c753fSRafael Auler // case we just leave the original size since (currently) it won't really 1052933df2a4SMaksim Panchenko // affect anything. 1053a34c753fSRafael Auler assert((!NI->second->Size || NI->second->Size == Size || 1054a34c753fSRafael Auler (NI->second->isJumpTable() && NI->second->Size > Size)) && 1055a34c753fSRafael Auler "can't change the size of a symbol that has already had its " 1056a34c753fSRafael Auler "size set"); 1057a34c753fSRafael Auler if (!NI->second->Size) { 1058a34c753fSRafael Auler NI->second->Size = Size; 1059a34c753fSRafael Auler updateObjectNesting(NI); 1060a34c753fSRafael Auler return true; 1061a34c753fSRafael Auler } 1062a34c753fSRafael Auler return false; 1063a34c753fSRafael Auler } 1064a34c753fSRafael Auler 1065a34c753fSRafael Auler void BinaryContext::generateSymbolHashes() { 1066a34c753fSRafael Auler auto isPadding = [](const BinaryData &BD) { 1067a34c753fSRafael Auler StringRef Contents = BD.getSection().getContents(); 1068a34c753fSRafael Auler StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1069a34c753fSRafael Auler return (BD.getName().startswith("HOLEat") || 1070a34c753fSRafael Auler SymData.find_first_not_of(0) == StringRef::npos); 1071a34c753fSRafael Auler }; 1072a34c753fSRafael Auler 1073a34c753fSRafael Auler uint64_t NumCollisions = 0; 1074a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1075a34c753fSRafael Auler BinaryData &BD = *Entry.second; 1076a34c753fSRafael Auler StringRef Name = BD.getName(); 1077a34c753fSRafael Auler 1078a34c753fSRafael Auler if (!isInternalSymbolName(Name)) 1079a34c753fSRafael Auler continue; 1080a34c753fSRafael Auler 1081a34c753fSRafael Auler // First check if a non-anonymous alias exists and move it to the front. 1082a34c753fSRafael Auler if (BD.getSymbols().size() > 1) { 1083d2c87699SAmir Ayupov auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1084a34c753fSRafael Auler return !isInternalSymbolName(Symbol->getName()); 1085a34c753fSRafael Auler }); 1086a34c753fSRafael Auler if (Itr != BD.getSymbols().end()) { 1087a34c753fSRafael Auler size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1088a34c753fSRafael Auler std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1089a34c753fSRafael Auler continue; 1090a34c753fSRafael Auler } 1091a34c753fSRafael Auler } 1092a34c753fSRafael Auler 1093a34c753fSRafael Auler // We have to skip 0 size symbols since they will all collide. 1094a34c753fSRafael Auler if (BD.getSize() == 0) { 1095a34c753fSRafael Auler continue; 1096a34c753fSRafael Auler } 1097a34c753fSRafael Auler 1098a34c753fSRafael Auler const uint64_t Hash = BD.getSection().hash(BD); 1099a34c753fSRafael Auler const size_t Idx = Name.find("0x"); 110040c2e0faSMaksim Panchenko std::string NewName = 110140c2e0faSMaksim Panchenko (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1102a34c753fSRafael Auler if (getBinaryDataByName(NewName)) { 1103a34c753fSRafael Auler // Ignore collisions for symbols that appear to be padding 1104a34c753fSRafael Auler // (i.e. all zeros or a "hole") 1105a34c753fSRafael Auler if (!isPadding(BD)) { 1106a34c753fSRafael Auler if (opts::Verbosity) { 1107a34c753fSRafael Auler errs() << "BOLT-WARNING: collision detected when hashing " << BD 1108a34c753fSRafael Auler << " with new name (" << NewName << "), skipping.\n"; 1109a34c753fSRafael Auler } 1110a34c753fSRafael Auler ++NumCollisions; 1111a34c753fSRafael Auler } 1112a34c753fSRafael Auler continue; 1113a34c753fSRafael Auler } 111440c2e0faSMaksim Panchenko BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1115a34c753fSRafael Auler GlobalSymbols[NewName] = &BD; 1116a34c753fSRafael Auler } 1117a34c753fSRafael Auler if (NumCollisions) { 1118a34c753fSRafael Auler errs() << "BOLT-WARNING: " << NumCollisions 1119a34c753fSRafael Auler << " collisions detected while hashing binary objects"; 1120a34c753fSRafael Auler if (!opts::Verbosity) 1121a34c753fSRafael Auler errs() << ". Use -v=1 to see the list."; 1122a34c753fSRafael Auler errs() << '\n'; 1123a34c753fSRafael Auler } 1124a34c753fSRafael Auler } 1125a34c753fSRafael Auler 11266aa735ceSAmir Ayupov bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1127a34c753fSRafael Auler BinaryFunction &Function) const { 11286aa735ceSAmir Ayupov if (!isPotentialFragmentByName(TargetFunction, Function)) 11296aa735ceSAmir Ayupov return false; 11306aa735ceSAmir Ayupov assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 11316aa735ceSAmir Ayupov if (TargetFunction.isParentFragment(&Function)) 11326aa735ceSAmir Ayupov return true; 11336aa735ceSAmir Ayupov TargetFunction.addParentFragment(Function); 1134a34c753fSRafael Auler Function.addFragment(TargetFunction); 1135a34c753fSRafael Auler if (!HasRelocations) { 1136a34c753fSRafael Auler TargetFunction.setSimple(false); 1137a34c753fSRafael Auler Function.setSimple(false); 1138a34c753fSRafael Auler } 1139a34c753fSRafael Auler if (opts::Verbosity >= 1) { 114040c2e0faSMaksim Panchenko outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 114140c2e0faSMaksim Panchenko << Function << '\n'; 1142a34c753fSRafael Auler } 11436aa735ceSAmir Ayupov return true; 1144a34c753fSRafael Auler } 1145a34c753fSRafael Auler 114635efe1d8SVladislav Khmelevsky void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 114735efe1d8SVladislav Khmelevsky MCInst &LoadLowBits, 114835efe1d8SVladislav Khmelevsky MCInst &LoadHiBits, 114935efe1d8SVladislav Khmelevsky uint64_t Target) { 115035efe1d8SVladislav Khmelevsky const MCSymbol *TargetSymbol; 115135efe1d8SVladislav Khmelevsky uint64_t Addend = 0; 115235efe1d8SVladislav Khmelevsky std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 115335efe1d8SVladislav Khmelevsky /*IsPCRel*/ true); 115435efe1d8SVladislav Khmelevsky int64_t Val; 115535efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 115635efe1d8SVladislav Khmelevsky ELF::R_AARCH64_ADR_PREL_PG_HI21); 115735efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 115835efe1d8SVladislav Khmelevsky Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 115935efe1d8SVladislav Khmelevsky } 116035efe1d8SVladislav Khmelevsky 116135efe1d8SVladislav Khmelevsky bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 116235efe1d8SVladislav Khmelevsky BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 116335efe1d8SVladislav Khmelevsky if (TargetFunction) 116435efe1d8SVladislav Khmelevsky return false; 116535efe1d8SVladislav Khmelevsky 116635efe1d8SVladislav Khmelevsky ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 116735efe1d8SVladislav Khmelevsky assert(Section && "cannot get section for referenced address"); 116835efe1d8SVladislav Khmelevsky if (!Section->isText()) 116935efe1d8SVladislav Khmelevsky return false; 117035efe1d8SVladislav Khmelevsky 117135efe1d8SVladislav Khmelevsky bool Ret = false; 117235efe1d8SVladislav Khmelevsky StringRef SectionContents = Section->getContents(); 117335efe1d8SVladislav Khmelevsky uint64_t Offset = Address - Section->getAddress(); 117435efe1d8SVladislav Khmelevsky const uint64_t MaxSize = SectionContents.size() - Offset; 117535efe1d8SVladislav Khmelevsky const uint8_t *Bytes = 117635efe1d8SVladislav Khmelevsky reinterpret_cast<const uint8_t *>(SectionContents.data()); 117735efe1d8SVladislav Khmelevsky ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 117835efe1d8SVladislav Khmelevsky 117935efe1d8SVladislav Khmelevsky auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 118035efe1d8SVladislav Khmelevsky MCInst &Instruction, uint64_t Offset, 118135efe1d8SVladislav Khmelevsky uint64_t AbsoluteInstrAddr, 118235efe1d8SVladislav Khmelevsky uint64_t TotalSize) -> bool { 118335efe1d8SVladislav Khmelevsky MCInst *TargetHiBits, *TargetLowBits; 118435efe1d8SVladislav Khmelevsky uint64_t TargetAddress, Count; 118535efe1d8SVladislav Khmelevsky Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 118635efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, Instruction, TargetHiBits, 118735efe1d8SVladislav Khmelevsky TargetLowBits, TargetAddress); 118835efe1d8SVladislav Khmelevsky if (!Count) 118935efe1d8SVladislav Khmelevsky return false; 119035efe1d8SVladislav Khmelevsky 119135efe1d8SVladislav Khmelevsky if (MatchOnly) 119235efe1d8SVladislav Khmelevsky return true; 119335efe1d8SVladislav Khmelevsky 119435efe1d8SVladislav Khmelevsky // NOTE The target symbol was created during disassemble's 119535efe1d8SVladislav Khmelevsky // handleExternalReference 119635efe1d8SVladislav Khmelevsky const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 119735efe1d8SVladislav Khmelevsky BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 119835efe1d8SVladislav Khmelevsky *Section, Address, TotalSize); 119935efe1d8SVladislav Khmelevsky addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 120035efe1d8SVladislav Khmelevsky TargetAddress); 120135efe1d8SVladislav Khmelevsky MIB->addAnnotation(Instruction, "AArch64Veneer", true); 120235efe1d8SVladislav Khmelevsky Veneer->addInstruction(Offset, std::move(Instruction)); 120335efe1d8SVladislav Khmelevsky --Count; 120435efe1d8SVladislav Khmelevsky for (auto It = std::prev(Instructions.end()); Count != 0; 120535efe1d8SVladislav Khmelevsky It = std::prev(It), --Count) { 120635efe1d8SVladislav Khmelevsky MIB->addAnnotation(It->second, "AArch64Veneer", true); 120735efe1d8SVladislav Khmelevsky Veneer->addInstruction(It->first, std::move(It->second)); 120835efe1d8SVladislav Khmelevsky } 120935efe1d8SVladislav Khmelevsky 121035efe1d8SVladislav Khmelevsky Veneer->getOrCreateLocalLabel(Address); 121135efe1d8SVladislav Khmelevsky Veneer->setMaxSize(TotalSize); 121235efe1d8SVladislav Khmelevsky Veneer->updateState(BinaryFunction::State::Disassembled); 121335efe1d8SVladislav Khmelevsky LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 121435efe1d8SVladislav Khmelevsky << "\n"); 121535efe1d8SVladislav Khmelevsky return true; 121635efe1d8SVladislav Khmelevsky }; 121735efe1d8SVladislav Khmelevsky 121835efe1d8SVladislav Khmelevsky uint64_t Size = 0, TotalSize = 0; 121935efe1d8SVladislav Khmelevsky BinaryFunction::InstrMapType VeneerInstructions; 122035efe1d8SVladislav Khmelevsky for (Offset = 0; Offset < MaxSize; Offset += Size) { 122135efe1d8SVladislav Khmelevsky MCInst Instruction; 122235efe1d8SVladislav Khmelevsky const uint64_t AbsoluteInstrAddr = Address + Offset; 122335efe1d8SVladislav Khmelevsky if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 122435efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, nulls())) 122535efe1d8SVladislav Khmelevsky break; 122635efe1d8SVladislav Khmelevsky 122735efe1d8SVladislav Khmelevsky TotalSize += Size; 122835efe1d8SVladislav Khmelevsky if (MIB->isBranch(Instruction)) { 122935efe1d8SVladislav Khmelevsky Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 123035efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, TotalSize); 123135efe1d8SVladislav Khmelevsky break; 123235efe1d8SVladislav Khmelevsky } 123335efe1d8SVladislav Khmelevsky 123435efe1d8SVladislav Khmelevsky VeneerInstructions.emplace(Offset, std::move(Instruction)); 123535efe1d8SVladislav Khmelevsky } 123635efe1d8SVladislav Khmelevsky 123735efe1d8SVladislav Khmelevsky return Ret; 123835efe1d8SVladislav Khmelevsky } 123935efe1d8SVladislav Khmelevsky 124035efe1d8SVladislav Khmelevsky void BinaryContext::processInterproceduralReferences() { 124135efe1d8SVladislav Khmelevsky for (const std::pair<BinaryFunction *, uint64_t> &It : 124235efe1d8SVladislav Khmelevsky InterproceduralReferences) { 124335efe1d8SVladislav Khmelevsky BinaryFunction &Function = *It.first; 124435efe1d8SVladislav Khmelevsky uint64_t Address = It.second; 124535efe1d8SVladislav Khmelevsky if (!Address || Function.isIgnored()) 1246a34c753fSRafael Auler continue; 1247a34c753fSRafael Auler 1248a34c753fSRafael Auler BinaryFunction *TargetFunction = 1249a34c753fSRafael Auler getBinaryFunctionContainingAddress(Address); 1250a34c753fSRafael Auler if (&Function == TargetFunction) 1251a34c753fSRafael Auler continue; 1252a34c753fSRafael Auler 1253a34c753fSRafael Auler if (TargetFunction) { 125435efe1d8SVladislav Khmelevsky if (TargetFunction->isFragment() && 12556aa735ceSAmir Ayupov !registerFragment(*TargetFunction, Function)) { 12566aa735ceSAmir Ayupov errs() << "BOLT-WARNING: interprocedural reference between unrelated " 12576aa735ceSAmir Ayupov "fragments: " 12586aa735ceSAmir Ayupov << Function.getPrintName() << " and " 12596aa735ceSAmir Ayupov << TargetFunction->getPrintName() << '\n'; 12606aa735ceSAmir Ayupov } 1261a34c753fSRafael Auler if (uint64_t Offset = Address - TargetFunction->getAddress()) 1262a34c753fSRafael Auler TargetFunction->addEntryPointAtOffset(Offset); 1263a34c753fSRafael Auler 1264a34c753fSRafael Auler continue; 1265a34c753fSRafael Auler } 1266a34c753fSRafael Auler 1267a34c753fSRafael Auler // Check if address falls in function padding space - this could be 1268a34c753fSRafael Auler // unmarked data in code. In this case adjust the padding space size. 1269a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1270a34c753fSRafael Auler assert(Section && "cannot get section for referenced address"); 1271a34c753fSRafael Auler 1272a34c753fSRafael Auler if (!Section->isText()) 1273a34c753fSRafael Auler continue; 1274a34c753fSRafael Auler 1275a34c753fSRafael Auler // PLT requires special handling and could be ignored in this context. 1276a34c753fSRafael Auler StringRef SectionName = Section->getName(); 1277a34c753fSRafael Auler if (SectionName == ".plt" || SectionName == ".plt.got") 1278a34c753fSRafael Auler continue; 1279a34c753fSRafael Auler 128035efe1d8SVladislav Khmelevsky // Check if it is aarch64 veneer written at Address 128135efe1d8SVladislav Khmelevsky if (isAArch64() && handleAArch64Veneer(Address)) 128235efe1d8SVladislav Khmelevsky continue; 128335efe1d8SVladislav Khmelevsky 1284a34c753fSRafael Auler if (opts::processAllFunctions()) { 1285a34c753fSRafael Auler errs() << "BOLT-ERROR: cannot process binaries with unmarked " 128640c2e0faSMaksim Panchenko << "object in code at address 0x" << Twine::utohexstr(Address) 128740c2e0faSMaksim Panchenko << " belonging to section " << SectionName << " in current mode\n"; 1288a34c753fSRafael Auler exit(1); 1289a34c753fSRafael Auler } 1290a34c753fSRafael Auler 129140c2e0faSMaksim Panchenko TargetFunction = getBinaryFunctionContainingAddress(Address, 1292a34c753fSRafael Auler /*CheckPastEnd=*/false, 1293a34c753fSRafael Auler /*UseMaxSize=*/true); 1294a34c753fSRafael Auler // We are not going to overwrite non-simple functions, but for simple 1295a34c753fSRafael Auler // ones - adjust the padding size. 1296a34c753fSRafael Auler if (TargetFunction && TargetFunction->isSimple()) { 1297a34c753fSRafael Auler errs() << "BOLT-WARNING: function " << *TargetFunction 1298a34c753fSRafael Auler << " has an object detected in a padding region at address 0x" 1299a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 1300a34c753fSRafael Auler TargetFunction->setMaxSize(TargetFunction->getSize()); 1301a34c753fSRafael Auler } 1302a34c753fSRafael Auler } 1303a34c753fSRafael Auler 130435efe1d8SVladislav Khmelevsky InterproceduralReferences.clear(); 1305a34c753fSRafael Auler } 1306a34c753fSRafael Auler 1307a34c753fSRafael Auler void BinaryContext::postProcessSymbolTable() { 1308a34c753fSRafael Auler fixBinaryDataHoles(); 1309a34c753fSRafael Auler bool Valid = true; 1310a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1311a34c753fSRafael Auler BinaryData *BD = Entry.second; 1312a34c753fSRafael Auler if ((BD->getName().startswith("SYMBOLat") || 1313a34c753fSRafael Auler BD->getName().startswith("DATAat")) && 131440c2e0faSMaksim Panchenko !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1315a34c753fSRafael Auler BD->getSection()) { 1316a34c753fSRafael Auler errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1317a34c753fSRafael Auler Valid = false; 1318a34c753fSRafael Auler } 1319a34c753fSRafael Auler } 1320a34c753fSRafael Auler assert(Valid); 1321c907d6e0SAmir Ayupov (void)Valid; 1322a34c753fSRafael Auler generateSymbolHashes(); 1323a34c753fSRafael Auler } 1324a34c753fSRafael Auler 1325a34c753fSRafael Auler void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1326a34c753fSRafael Auler BinaryFunction &ParentBF) { 1327a34c753fSRafael Auler assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1328a34c753fSRafael Auler "cannot merge functions with multiple entry points"); 1329a34c753fSRafael Auler 1330a34c753fSRafael Auler std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1331a34c753fSRafael Auler std::defer_lock); 1332a34c753fSRafael Auler std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1333a34c753fSRafael Auler SymbolToFunctionMapMutex, std::defer_lock); 1334a34c753fSRafael Auler 1335a34c753fSRafael Auler const StringRef ChildName = ChildBF.getOneName(); 1336a34c753fSRafael Auler 1337a34c753fSRafael Auler // Move symbols over and update bookkeeping info. 1338a34c753fSRafael Auler for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1339a34c753fSRafael Auler ParentBF.getSymbols().push_back(Symbol); 1340a34c753fSRafael Auler WriteSymbolMapLock.lock(); 1341a34c753fSRafael Auler SymbolToFunctionMap[Symbol] = &ParentBF; 1342a34c753fSRafael Auler WriteSymbolMapLock.unlock(); 1343a34c753fSRafael Auler // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1344a34c753fSRafael Auler } 1345a34c753fSRafael Auler ChildBF.getSymbols().clear(); 1346a34c753fSRafael Auler 1347a34c753fSRafael Auler // Move other names the child function is known under. 1348d2c87699SAmir Ayupov llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1349a34c753fSRafael Auler ChildBF.Aliases.clear(); 1350a34c753fSRafael Auler 1351a34c753fSRafael Auler if (HasRelocations) { 1352a34c753fSRafael Auler // Merge execution counts of ChildBF into those of ParentBF. 1353a34c753fSRafael Auler // Without relocations, we cannot reliably merge profiles as both functions 1354a34c753fSRafael Auler // continue to exist and either one can be executed. 1355a34c753fSRafael Auler ChildBF.mergeProfileDataInto(ParentBF); 1356a34c753fSRafael Auler 1357a34c753fSRafael Auler std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1358a34c753fSRafael Auler std::defer_lock); 1359a34c753fSRafael Auler std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1360a34c753fSRafael Auler std::defer_lock); 1361a34c753fSRafael Auler // Remove ChildBF from the global set of functions in relocs mode. 1362a34c753fSRafael Auler ReadBfsLock.lock(); 1363a34c753fSRafael Auler auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1364a34c753fSRafael Auler ReadBfsLock.unlock(); 1365a34c753fSRafael Auler 1366a34c753fSRafael Auler assert(FI != BinaryFunctions.end() && "function not found"); 1367a34c753fSRafael Auler assert(&ChildBF == &FI->second && "function mismatch"); 1368a34c753fSRafael Auler 1369a34c753fSRafael Auler WriteBfsLock.lock(); 1370a34c753fSRafael Auler ChildBF.clearDisasmState(); 1371a34c753fSRafael Auler FI = BinaryFunctions.erase(FI); 1372a34c753fSRafael Auler WriteBfsLock.unlock(); 1373a34c753fSRafael Auler 1374a34c753fSRafael Auler } else { 1375a34c753fSRafael Auler // In non-relocation mode we keep the function, but rename it. 1376a34c753fSRafael Auler std::string NewName = "__ICF_" + ChildName.str(); 1377a34c753fSRafael Auler 1378a34c753fSRafael Auler WriteCtxLock.lock(); 1379a34c753fSRafael Auler ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1380a34c753fSRafael Auler WriteCtxLock.unlock(); 1381a34c753fSRafael Auler 1382a34c753fSRafael Auler ChildBF.setFolded(&ParentBF); 1383a34c753fSRafael Auler } 1384a34c753fSRafael Auler } 1385a34c753fSRafael Auler 1386a34c753fSRafael Auler void BinaryContext::fixBinaryDataHoles() { 1387a34c753fSRafael Auler assert(validateObjectNesting() && "object nesting inconsitency detected"); 1388a34c753fSRafael Auler 1389a34c753fSRafael Auler for (BinarySection &Section : allocatableSections()) { 1390a34c753fSRafael Auler std::vector<std::pair<uint64_t, uint64_t>> Holes; 1391a34c753fSRafael Auler 1392a34c753fSRafael Auler auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1393a34c753fSRafael Auler BinaryData *BD = Itr->second; 139440c2e0faSMaksim Panchenko bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1395a34c753fSRafael Auler (BD->getName().startswith("SYMBOLat0x") || 1396a34c753fSRafael Auler BD->getName().startswith("DATAat0x") || 1397a34c753fSRafael Auler BD->getName().startswith("ANONYMOUS"))); 1398a34c753fSRafael Auler return !isHole && BD->getSection() == Section && !BD->getParent(); 1399a34c753fSRafael Auler }; 1400a34c753fSRafael Auler 1401a34c753fSRafael Auler auto BDStart = BinaryDataMap.begin(); 1402a34c753fSRafael Auler auto BDEnd = BinaryDataMap.end(); 1403a34c753fSRafael Auler auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1404a34c753fSRafael Auler auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1405a34c753fSRafael Auler 1406a34c753fSRafael Auler uint64_t EndAddress = Section.getAddress(); 1407a34c753fSRafael Auler 1408a34c753fSRafael Auler while (Itr != End) { 1409a34c753fSRafael Auler if (Itr->second->getAddress() > EndAddress) { 1410a34c753fSRafael Auler uint64_t Gap = Itr->second->getAddress() - EndAddress; 1411a34c753fSRafael Auler Holes.emplace_back(EndAddress, Gap); 1412a34c753fSRafael Auler } 1413a34c753fSRafael Auler EndAddress = Itr->second->getEndAddress(); 1414a34c753fSRafael Auler ++Itr; 1415a34c753fSRafael Auler } 1416a34c753fSRafael Auler 14173652483cSRafael Auler if (EndAddress < Section.getEndAddress()) 1418a34c753fSRafael Auler Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1419a34c753fSRafael Auler 1420a34c753fSRafael Auler // If there is already a symbol at the start of the hole, grow that symbol 1421a34c753fSRafael Auler // to cover the rest. Otherwise, create a new symbol to cover the hole. 1422a34c753fSRafael Auler for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1423a34c753fSRafael Auler BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1424a34c753fSRafael Auler if (BD) { 1425a34c753fSRafael Auler // BD->getSection() can be != Section if there are sections that 1426a34c753fSRafael Auler // overlap. In this case it is probably safe to just skip the holes 1427a34c753fSRafael Auler // since the overlapping section will not(?) have any symbols in it. 1428a34c753fSRafael Auler if (BD->getSection() == Section) 1429a34c753fSRafael Auler setBinaryDataSize(Hole.first, Hole.second); 1430a34c753fSRafael Auler } else { 1431a34c753fSRafael Auler getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1432a34c753fSRafael Auler } 1433a34c753fSRafael Auler } 1434a34c753fSRafael Auler } 1435a34c753fSRafael Auler 1436a34c753fSRafael Auler assert(validateObjectNesting() && "object nesting inconsitency detected"); 1437a34c753fSRafael Auler assert(validateHoles() && "top level hole detected in object map"); 1438a34c753fSRafael Auler } 1439a34c753fSRafael Auler 1440a34c753fSRafael Auler void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1441a34c753fSRafael Auler const BinarySection *CurrentSection = nullptr; 1442a34c753fSRafael Auler bool FirstSection = true; 1443a34c753fSRafael Auler 1444a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1445a34c753fSRafael Auler const BinaryData *BD = Entry.second; 1446a34c753fSRafael Auler const BinarySection &Section = BD->getSection(); 1447a34c753fSRafael Auler if (FirstSection || Section != *CurrentSection) { 1448a34c753fSRafael Auler uint64_t Address, Size; 1449a34c753fSRafael Auler StringRef Name = Section.getName(); 1450a34c753fSRafael Auler if (Section) { 1451a34c753fSRafael Auler Address = Section.getAddress(); 1452a34c753fSRafael Auler Size = Section.getSize(); 1453a34c753fSRafael Auler } else { 1454a34c753fSRafael Auler Address = BD->getAddress(); 1455a34c753fSRafael Auler Size = BD->getSize(); 1456a34c753fSRafael Auler } 1457a34c753fSRafael Auler OS << "BOLT-INFO: Section " << Name << ", " 1458a34c753fSRafael Auler << "0x" + Twine::utohexstr(Address) << ":" 145940c2e0faSMaksim Panchenko << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1460a34c753fSRafael Auler CurrentSection = &Section; 1461a34c753fSRafael Auler FirstSection = false; 1462a34c753fSRafael Auler } 1463a34c753fSRafael Auler 1464a34c753fSRafael Auler OS << "BOLT-INFO: "; 1465a34c753fSRafael Auler const BinaryData *P = BD->getParent(); 1466a34c753fSRafael Auler while (P) { 1467a34c753fSRafael Auler OS << " "; 1468a34c753fSRafael Auler P = P->getParent(); 1469a34c753fSRafael Auler } 1470a34c753fSRafael Auler OS << *BD << "\n"; 1471a34c753fSRafael Auler } 1472a34c753fSRafael Auler } 1473a34c753fSRafael Auler 1474014cd37fSAlexander Yermolovich Expected<unsigned> BinaryContext::getDwarfFile( 1475014cd37fSAlexander Yermolovich StringRef Directory, StringRef FileName, unsigned FileNumber, 1476014cd37fSAlexander Yermolovich Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1477014cd37fSAlexander Yermolovich unsigned CUID, unsigned DWARFVersion) { 1478a34c753fSRafael Auler DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1479014cd37fSAlexander Yermolovich return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1480014cd37fSAlexander Yermolovich FileNumber); 1481a34c753fSRafael Auler } 1482a34c753fSRafael Auler 1483a34c753fSRafael Auler unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1484a34c753fSRafael Auler const uint32_t SrcCUID, 1485a34c753fSRafael Auler unsigned FileIndex) { 1486a34c753fSRafael Auler DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1487a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1488a34c753fSRafael Auler DwCtx->getLineTableForUnit(SrcUnit); 1489a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1490a34c753fSRafael Auler LineTable->Prologue.FileNames; 1491a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1492a34c753fSRafael Auler // means empty dir. 1493a34c753fSRafael Auler assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1494a34c753fSRafael Auler "FileIndex out of range for the compilation unit."); 1495a34c753fSRafael Auler StringRef Dir = ""; 1496a34c753fSRafael Auler if (FileNames[FileIndex - 1].DirIdx != 0) { 1497a34c753fSRafael Auler if (Optional<const char *> DirName = dwarf::toString( 1498a34c753fSRafael Auler LineTable->Prologue 1499a34c753fSRafael Auler .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1500a34c753fSRafael Auler Dir = *DirName; 1501a34c753fSRafael Auler } 1502a34c753fSRafael Auler } 1503a34c753fSRafael Auler StringRef FileName = ""; 1504a34c753fSRafael Auler if (Optional<const char *> FName = 1505a34c753fSRafael Auler dwarf::toString(FileNames[FileIndex - 1].Name)) 1506a34c753fSRafael Auler FileName = *FName; 1507a34c753fSRafael Auler assert(FileName != ""); 1508014cd37fSAlexander Yermolovich DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1509014cd37fSAlexander Yermolovich return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1510014cd37fSAlexander Yermolovich DstUnit->getVersion())); 1511a34c753fSRafael Auler } 1512a34c753fSRafael Auler 1513a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1514a34c753fSRafael Auler std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1515d2c87699SAmir Ayupov llvm::transform(BinaryFunctions, SortedFunctions.begin(), 1516a34c753fSRafael Auler [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1517a34c753fSRafael Auler return &BFI.second; 1518a34c753fSRafael Auler }); 1519a34c753fSRafael Auler 1520d2c87699SAmir Ayupov llvm::stable_sort(SortedFunctions, 1521a34c753fSRafael Auler [](const BinaryFunction *A, const BinaryFunction *B) { 1522a34c753fSRafael Auler if (A->hasValidIndex() && B->hasValidIndex()) { 1523a34c753fSRafael Auler return A->getIndex() < B->getIndex(); 1524a34c753fSRafael Auler } 1525a34c753fSRafael Auler return A->hasValidIndex(); 1526a34c753fSRafael Auler }); 1527a34c753fSRafael Auler return SortedFunctions; 1528a34c753fSRafael Auler } 1529a34c753fSRafael Auler 1530a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1531a34c753fSRafael Auler std::vector<BinaryFunction *> AllFunctions; 1532a34c753fSRafael Auler AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1533d2c87699SAmir Ayupov llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions), 1534a34c753fSRafael Auler [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1535a34c753fSRafael Auler return &BFI.second; 1536a34c753fSRafael Auler }); 1537d2c87699SAmir Ayupov llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1538a34c753fSRafael Auler 1539a34c753fSRafael Auler return AllFunctions; 1540a34c753fSRafael Auler } 1541a34c753fSRafael Auler 1542a34c753fSRafael Auler Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1543a34c753fSRafael Auler auto Iter = DWOCUs.find(DWOId); 1544a34c753fSRafael Auler if (Iter == DWOCUs.end()) 1545a34c753fSRafael Auler return None; 1546a34c753fSRafael Auler 1547a34c753fSRafael Auler return Iter->second; 1548a34c753fSRafael Auler } 1549a34c753fSRafael Auler 15507dee646bSAmir Ayupov DWARFContext *BinaryContext::getDWOContext() const { 1551a34c753fSRafael Auler if (DWOCUs.empty()) 1552a34c753fSRafael Auler return nullptr; 1553a34c753fSRafael Auler return &DWOCUs.begin()->second->getContext(); 1554a34c753fSRafael Auler } 1555a34c753fSRafael Auler 1556a34c753fSRafael Auler /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1557a34c753fSRafael Auler void BinaryContext::preprocessDWODebugInfo() { 1558a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1559a34c753fSRafael Auler DWARFUnit *const DwarfUnit = CU.get(); 1560a34c753fSRafael Auler if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1561a34c753fSRafael Auler DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1562a34c753fSRafael Auler if (!DWOCU->isDWOUnit()) { 1563a34c753fSRafael Auler std::string DWOName = dwarf::toString( 1564a34c753fSRafael Auler DwarfUnit->getUnitDIE().find( 1565a34c753fSRafael Auler {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1566a34c753fSRafael Auler ""); 1567a34c753fSRafael Auler outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1568a34c753fSRafael Auler << DWOName 1569a34c753fSRafael Auler << " was not retrieved and won't be updated. Please check " 1570a34c753fSRafael Auler "relative path.\n"; 1571a34c753fSRafael Auler continue; 1572a34c753fSRafael Auler } 1573a34c753fSRafael Auler DWOCUs[*DWOId] = DWOCU; 1574a34c753fSRafael Auler } 1575a34c753fSRafael Auler } 1576a34c753fSRafael Auler } 1577a34c753fSRafael Auler 1578a34c753fSRafael Auler void BinaryContext::preprocessDebugInfo() { 1579a34c753fSRafael Auler struct CURange { 1580a34c753fSRafael Auler uint64_t LowPC; 1581a34c753fSRafael Auler uint64_t HighPC; 1582a34c753fSRafael Auler DWARFUnit *Unit; 1583a34c753fSRafael Auler 158440c2e0faSMaksim Panchenko bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1585a34c753fSRafael Auler }; 1586a34c753fSRafael Auler 1587a34c753fSRafael Auler // Building a map of address ranges to CUs similar to .debug_aranges and use 1588a34c753fSRafael Auler // it to assign CU to functions. 1589a34c753fSRafael Auler std::vector<CURange> AllRanges; 1590a34c753fSRafael Auler AllRanges.reserve(DwCtx->getNumCompileUnits()); 1591a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1592a34c753fSRafael Auler Expected<DWARFAddressRangesVector> RangesOrError = 1593a34c753fSRafael Auler CU->getUnitDIE().getAddressRanges(); 1594a34c753fSRafael Auler if (!RangesOrError) { 1595a34c753fSRafael Auler consumeError(RangesOrError.takeError()); 1596a34c753fSRafael Auler continue; 1597a34c753fSRafael Auler } 1598a34c753fSRafael Auler for (DWARFAddressRange &Range : *RangesOrError) { 1599a34c753fSRafael Auler // Parts of the debug info could be invalidated due to corresponding code 1600a34c753fSRafael Auler // being removed from the binary by the linker. Hence we check if the 1601a34c753fSRafael Auler // address is a valid one. 1602a34c753fSRafael Auler if (containsAddress(Range.LowPC)) 1603a34c753fSRafael Auler AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1604a34c753fSRafael Auler } 1605014cd37fSAlexander Yermolovich 1606014cd37fSAlexander Yermolovich ContainsDwarf5 |= CU->getVersion() >= 5; 1607014cd37fSAlexander Yermolovich ContainsDwarfLegacy |= CU->getVersion() < 5; 1608a34c753fSRafael Auler } 1609a34c753fSRafael Auler 1610d2c87699SAmir Ayupov llvm::sort(AllRanges); 1611a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1612a34c753fSRafael Auler const uint64_t FunctionAddress = KV.first; 1613a34c753fSRafael Auler BinaryFunction &Function = KV.second; 1614a34c753fSRafael Auler 1615d2c87699SAmir Ayupov auto It = llvm::partition_point( 1616d2c87699SAmir Ayupov AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1617d2c87699SAmir Ayupov if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1618a34c753fSRafael Auler Function.setDWARFUnit(It->Unit); 1619a34c753fSRafael Auler } 1620a34c753fSRafael Auler 1621a34c753fSRafael Auler // Discover units with debug info that needs to be updated. 1622a34c753fSRafael Auler for (const auto &KV : BinaryFunctions) { 1623a34c753fSRafael Auler const BinaryFunction &BF = KV.second; 1624a34c753fSRafael Auler if (shouldEmit(BF) && BF.getDWARFUnit()) 1625a34c753fSRafael Auler ProcessedCUs.insert(BF.getDWARFUnit()); 1626a34c753fSRafael Auler } 1627a34c753fSRafael Auler 1628a34c753fSRafael Auler // Clear debug info for functions from units that we are not going to process. 1629a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1630a34c753fSRafael Auler BinaryFunction &BF = KV.second; 1631a34c753fSRafael Auler if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1632a34c753fSRafael Auler BF.setDWARFUnit(nullptr); 1633a34c753fSRafael Auler } 1634a34c753fSRafael Auler 1635a34c753fSRafael Auler if (opts::Verbosity >= 1) { 1636a34c753fSRafael Auler outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1637a34c753fSRafael Auler << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1638a34c753fSRafael Auler } 1639a34c753fSRafael Auler 1640ba1ac98cSAlexander Yermolovich preprocessDWODebugInfo(); 1641ba1ac98cSAlexander Yermolovich 1642a34c753fSRafael Auler // Populate MCContext with DWARF files from all units. 1643a34c753fSRafael Auler StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1644a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1645a34c753fSRafael Auler const uint64_t CUID = CU->getOffset(); 1646014cd37fSAlexander Yermolovich DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1647014cd37fSAlexander Yermolovich BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1648a34c753fSRafael Auler GlobalPrefix + "line_table_start" + Twine(CUID))); 1649a34c753fSRafael Auler 1650a34c753fSRafael Auler if (!ProcessedCUs.count(CU.get())) 1651a34c753fSRafael Auler continue; 1652a34c753fSRafael Auler 1653a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1654a34c753fSRafael Auler DwCtx->getLineTableForUnit(CU.get()); 1655a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1656a34c753fSRafael Auler LineTable->Prologue.FileNames; 1657a34c753fSRafael Auler 1658014cd37fSAlexander Yermolovich uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1659014cd37fSAlexander Yermolovich if (DwarfVersion >= 5) { 1660014cd37fSAlexander Yermolovich Optional<MD5::MD5Result> Checksum = None; 1661014cd37fSAlexander Yermolovich if (LineTable->Prologue.ContentTypes.HasMD5) 1662014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[0].Checksum; 1663ba1ac98cSAlexander Yermolovich Optional<const char *> Name = 1664ba1ac98cSAlexander Yermolovich dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1665ba1ac98cSAlexander Yermolovich if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1666ba1ac98cSAlexander Yermolovich auto Iter = DWOCUs.find(*DWOID); 1667ba1ac98cSAlexander Yermolovich assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1668ba1ac98cSAlexander Yermolovich Name = dwarf::toString( 1669ba1ac98cSAlexander Yermolovich Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1670ba1ac98cSAlexander Yermolovich } 1671ba1ac98cSAlexander Yermolovich BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1672ba1ac98cSAlexander Yermolovich None); 1673014cd37fSAlexander Yermolovich } 1674014cd37fSAlexander Yermolovich 1675014cd37fSAlexander Yermolovich BinaryLineTable.setDwarfVersion(DwarfVersion); 1676014cd37fSAlexander Yermolovich 1677a34c753fSRafael Auler // Assign a unique label to every line table, one per CU. 1678a34c753fSRafael Auler // Make sure empty debug line tables are registered too. 1679a34c753fSRafael Auler if (FileNames.empty()) { 1680014cd37fSAlexander Yermolovich cantFail( 1681014cd37fSAlexander Yermolovich getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1682a34c753fSRafael Auler continue; 1683a34c753fSRafael Auler } 1684014cd37fSAlexander Yermolovich const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1685a34c753fSRafael Auler for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1686a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1687a34c753fSRafael Auler // means empty dir. 1688a34c753fSRafael Auler StringRef Dir = ""; 1689014cd37fSAlexander Yermolovich if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1690a34c753fSRafael Auler if (Optional<const char *> DirName = dwarf::toString( 1691a34c753fSRafael Auler LineTable->Prologue 1692014cd37fSAlexander Yermolovich .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1693a34c753fSRafael Auler Dir = *DirName; 1694a34c753fSRafael Auler StringRef FileName = ""; 1695a34c753fSRafael Auler if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1696a34c753fSRafael Auler FileName = *FName; 1697a34c753fSRafael Auler assert(FileName != ""); 1698014cd37fSAlexander Yermolovich Optional<MD5::MD5Result> Checksum = None; 1699014cd37fSAlexander Yermolovich if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1700014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[I].Checksum; 1701014cd37fSAlexander Yermolovich cantFail( 1702014cd37fSAlexander Yermolovich getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1703a34c753fSRafael Auler } 1704a34c753fSRafael Auler } 1705a34c753fSRafael Auler } 1706a34c753fSRafael Auler 1707a34c753fSRafael Auler bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 17084c14519eSVladislav Khmelevsky if (Function.isPseudo()) 17094c14519eSVladislav Khmelevsky return false; 17104c14519eSVladislav Khmelevsky 1711a34c753fSRafael Auler if (opts::processAllFunctions()) 1712a34c753fSRafael Auler return true; 1713a34c753fSRafael Auler 1714a34c753fSRafael Auler if (Function.isIgnored()) 1715a34c753fSRafael Auler return false; 1716a34c753fSRafael Auler 1717a34c753fSRafael Auler // In relocation mode we will emit non-simple functions with CFG. 1718a34c753fSRafael Auler // If the function does not have a CFG it should be marked as ignored. 1719a34c753fSRafael Auler return HasRelocations || Function.isSimple(); 1720a34c753fSRafael Auler } 1721a34c753fSRafael Auler 1722a34c753fSRafael Auler void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1723a34c753fSRafael Auler uint32_t Operation = Inst.getOperation(); 1724a34c753fSRafael Auler switch (Operation) { 1725a34c753fSRafael Auler case MCCFIInstruction::OpSameValue: 1726a34c753fSRafael Auler OS << "OpSameValue Reg" << Inst.getRegister(); 1727a34c753fSRafael Auler break; 1728a34c753fSRafael Auler case MCCFIInstruction::OpRememberState: 1729a34c753fSRafael Auler OS << "OpRememberState"; 1730a34c753fSRafael Auler break; 1731a34c753fSRafael Auler case MCCFIInstruction::OpRestoreState: 1732a34c753fSRafael Auler OS << "OpRestoreState"; 1733a34c753fSRafael Auler break; 1734a34c753fSRafael Auler case MCCFIInstruction::OpOffset: 1735a34c753fSRafael Auler OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1736a34c753fSRafael Auler break; 1737a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaRegister: 1738a34c753fSRafael Auler OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1739a34c753fSRafael Auler break; 1740a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaOffset: 1741a34c753fSRafael Auler OS << "OpDefCfaOffset " << Inst.getOffset(); 1742a34c753fSRafael Auler break; 1743a34c753fSRafael Auler case MCCFIInstruction::OpDefCfa: 1744a34c753fSRafael Auler OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1745a34c753fSRafael Auler break; 1746a34c753fSRafael Auler case MCCFIInstruction::OpRelOffset: 1747a34c753fSRafael Auler OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1748a34c753fSRafael Auler break; 1749a34c753fSRafael Auler case MCCFIInstruction::OpAdjustCfaOffset: 1750a34c753fSRafael Auler OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1751a34c753fSRafael Auler break; 1752a34c753fSRafael Auler case MCCFIInstruction::OpEscape: 1753a34c753fSRafael Auler OS << "OpEscape"; 1754a34c753fSRafael Auler break; 1755a34c753fSRafael Auler case MCCFIInstruction::OpRestore: 1756a34c753fSRafael Auler OS << "OpRestore Reg" << Inst.getRegister(); 1757a34c753fSRafael Auler break; 1758a34c753fSRafael Auler case MCCFIInstruction::OpUndefined: 1759a34c753fSRafael Auler OS << "OpUndefined Reg" << Inst.getRegister(); 1760a34c753fSRafael Auler break; 1761a34c753fSRafael Auler case MCCFIInstruction::OpRegister: 1762a34c753fSRafael Auler OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1763a34c753fSRafael Auler << Inst.getRegister2(); 1764a34c753fSRafael Auler break; 1765a34c753fSRafael Auler case MCCFIInstruction::OpWindowSave: 1766a34c753fSRafael Auler OS << "OpWindowSave"; 1767a34c753fSRafael Auler break; 1768a34c753fSRafael Auler case MCCFIInstruction::OpGnuArgsSize: 1769a34c753fSRafael Auler OS << "OpGnuArgsSize"; 1770a34c753fSRafael Auler break; 1771a34c753fSRafael Auler default: 1772a34c753fSRafael Auler OS << "Op#" << Operation; 1773a34c753fSRafael Auler break; 1774a34c753fSRafael Auler } 1775a34c753fSRafael Auler } 1776a34c753fSRafael Auler 17778579db96SDenis Revunov MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 17788579db96SDenis Revunov // For aarch64, the ABI defines mapping symbols so we identify data in the 17798579db96SDenis Revunov // code section (see IHI0056B). $x identifies a symbol starting code or the 17808579db96SDenis Revunov // end of a data chunk inside code, $d indentifies start of data. 17818579db96SDenis Revunov if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 17828579db96SDenis Revunov return MarkerSymType::NONE; 17838579db96SDenis Revunov 17848579db96SDenis Revunov Expected<StringRef> NameOrError = Symbol.getName(); 17858579db96SDenis Revunov Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 17868579db96SDenis Revunov 17878579db96SDenis Revunov if (!TypeOrError || !NameOrError) 17888579db96SDenis Revunov return MarkerSymType::NONE; 17898579db96SDenis Revunov 17908579db96SDenis Revunov if (*TypeOrError != SymbolRef::ST_Unknown) 17918579db96SDenis Revunov return MarkerSymType::NONE; 17928579db96SDenis Revunov 17938579db96SDenis Revunov if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 17948579db96SDenis Revunov return MarkerSymType::CODE; 17958579db96SDenis Revunov 17968579db96SDenis Revunov if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 17978579db96SDenis Revunov return MarkerSymType::DATA; 17988579db96SDenis Revunov 17998579db96SDenis Revunov return MarkerSymType::NONE; 18008579db96SDenis Revunov } 18018579db96SDenis Revunov 18028579db96SDenis Revunov bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 18038579db96SDenis Revunov return getMarkerType(Symbol) != MarkerSymType::NONE; 18048579db96SDenis Revunov } 18058579db96SDenis Revunov 18067dee646bSAmir Ayupov static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 18077dee646bSAmir Ayupov const BinaryFunction *Function, 18087dee646bSAmir Ayupov DWARFContext *DwCtx) { 18097dee646bSAmir Ayupov DebugLineTableRowRef RowRef = 18107dee646bSAmir Ayupov DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 18117dee646bSAmir Ayupov if (RowRef == DebugLineTableRowRef::NULL_ROW) 18127dee646bSAmir Ayupov return; 18137dee646bSAmir Ayupov 18147dee646bSAmir Ayupov const DWARFDebugLine::LineTable *LineTable; 18157dee646bSAmir Ayupov if (Function && Function->getDWARFUnit() && 18167dee646bSAmir Ayupov Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 18177dee646bSAmir Ayupov LineTable = Function->getDWARFLineTable(); 18187dee646bSAmir Ayupov } else { 18197dee646bSAmir Ayupov LineTable = DwCtx->getLineTableForUnit( 18207dee646bSAmir Ayupov DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 18217dee646bSAmir Ayupov } 18227dee646bSAmir Ayupov assert(LineTable && "line table expected for instruction with debug info"); 18237dee646bSAmir Ayupov 18247dee646bSAmir Ayupov const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 18257dee646bSAmir Ayupov StringRef FileName = ""; 18267dee646bSAmir Ayupov if (Optional<const char *> FName = 18277dee646bSAmir Ayupov dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 18287dee646bSAmir Ayupov FileName = *FName; 18297dee646bSAmir Ayupov OS << " # debug line " << FileName << ":" << Row.Line; 18307dee646bSAmir Ayupov if (Row.Column) 18317dee646bSAmir Ayupov OS << ":" << Row.Column; 18327dee646bSAmir Ayupov if (Row.Discriminator) 18337dee646bSAmir Ayupov OS << " discriminator:" << Row.Discriminator; 18347dee646bSAmir Ayupov } 18357dee646bSAmir Ayupov 183640c2e0faSMaksim Panchenko void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1837a34c753fSRafael Auler uint64_t Offset, 1838a34c753fSRafael Auler const BinaryFunction *Function, 183940c2e0faSMaksim Panchenko bool PrintMCInst, bool PrintMemData, 184069f87b6cSAmir Ayupov bool PrintRelocations, 184169f87b6cSAmir Ayupov StringRef Endl) const { 1842a34c753fSRafael Auler if (MIB->isEHLabel(Instruction)) { 184369f87b6cSAmir Ayupov OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1844a34c753fSRafael Auler return; 1845a34c753fSRafael Auler } 1846a34c753fSRafael Auler OS << format(" %08" PRIx64 ": ", Offset); 1847a34c753fSRafael Auler if (MIB->isCFI(Instruction)) { 1848a34c753fSRafael Auler uint32_t Offset = Instruction.getOperand(0).getImm(); 1849a34c753fSRafael Auler OS << "\t!CFI\t$" << Offset << "\t; "; 1850a34c753fSRafael Auler if (Function) 1851a34c753fSRafael Auler printCFI(OS, *Function->getCFIFor(Instruction)); 185269f87b6cSAmir Ayupov OS << Endl; 1853a34c753fSRafael Auler return; 1854a34c753fSRafael Auler } 1855a34c753fSRafael Auler InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1856a34c753fSRafael Auler if (MIB->isCall(Instruction)) { 1857a34c753fSRafael Auler if (MIB->isTailCall(Instruction)) 1858a34c753fSRafael Auler OS << " # TAILCALL "; 1859a34c753fSRafael Auler if (MIB->isInvoke(Instruction)) { 1860a34c753fSRafael Auler const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1861a34c753fSRafael Auler OS << " # handler: "; 1862a34c753fSRafael Auler if (EHInfo->first) 1863a34c753fSRafael Auler OS << *EHInfo->first; 1864a34c753fSRafael Auler else 1865a34c753fSRafael Auler OS << '0'; 1866a34c753fSRafael Auler OS << "; action: " << EHInfo->second; 1867a34c753fSRafael Auler const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1868a34c753fSRafael Auler if (GnuArgsSize >= 0) 1869a34c753fSRafael Auler OS << "; GNU_args_size = " << GnuArgsSize; 1870a34c753fSRafael Auler } 1871a34c753fSRafael Auler } else if (MIB->isIndirectBranch(Instruction)) { 1872a34c753fSRafael Auler if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1873a34c753fSRafael Auler OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1874a34c753fSRafael Auler } else { 1875a34c753fSRafael Auler OS << " # UNKNOWN CONTROL FLOW"; 1876a34c753fSRafael Auler } 1877a34c753fSRafael Auler } 1878a9cd49d5SAmir Ayupov if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1879a9cd49d5SAmir Ayupov OS << " # Offset: " << *Offset; 1880a34c753fSRafael Auler 1881a34c753fSRafael Auler MIB->printAnnotations(Instruction, OS); 1882a34c753fSRafael Auler 18837dee646bSAmir Ayupov if (opts::PrintDebugInfo) 18847dee646bSAmir Ayupov printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1885a34c753fSRafael Auler 1886a34c753fSRafael Auler if ((opts::PrintRelocations || PrintRelocations) && Function) { 1887a34c753fSRafael Auler const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1888a34c753fSRafael Auler Function->printRelocations(OS, Offset, Size); 1889a34c753fSRafael Auler } 1890a34c753fSRafael Auler 189169f87b6cSAmir Ayupov OS << Endl; 1892a34c753fSRafael Auler 1893a34c753fSRafael Auler if (PrintMCInst) { 1894a34c753fSRafael Auler Instruction.dump_pretty(OS, InstPrinter.get()); 189569f87b6cSAmir Ayupov OS << Endl; 1896a34c753fSRafael Auler } 1897a34c753fSRafael Auler } 1898a34c753fSRafael Auler 189977b75ca5SMaksim Panchenko Optional<uint64_t> 190077b75ca5SMaksim Panchenko BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 190177b75ca5SMaksim Panchenko uint64_t FileOffset) const { 190277b75ca5SMaksim Panchenko // Find a segment with a matching file offset. 190377b75ca5SMaksim Panchenko for (auto &KV : SegmentMapInfo) { 190477b75ca5SMaksim Panchenko const SegmentInfo &SegInfo = KV.second; 190577b75ca5SMaksim Panchenko if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 190677b75ca5SMaksim Panchenko // Use segment's aligned memory offset to calculate the base address. 190777b75ca5SMaksim Panchenko const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 190877b75ca5SMaksim Panchenko return MMapAddress - MemOffset; 190977b75ca5SMaksim Panchenko } 191077b75ca5SMaksim Panchenko } 191177b75ca5SMaksim Panchenko 191277b75ca5SMaksim Panchenko return NoneType(); 191377b75ca5SMaksim Panchenko } 191477b75ca5SMaksim Panchenko 1915a34c753fSRafael Auler ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1916a34c753fSRafael Auler auto SI = AddressToSection.upper_bound(Address); 1917a34c753fSRafael Auler if (SI != AddressToSection.begin()) { 1918a34c753fSRafael Auler --SI; 1919a34c753fSRafael Auler uint64_t UpperBound = SI->first + SI->second->getSize(); 1920a34c753fSRafael Auler if (!SI->second->getSize()) 1921a34c753fSRafael Auler UpperBound += 1; 1922a34c753fSRafael Auler if (UpperBound > Address) 1923a34c753fSRafael Auler return *SI->second; 1924a34c753fSRafael Auler } 1925a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 1926a34c753fSRafael Auler } 1927a34c753fSRafael Auler 1928a34c753fSRafael Auler ErrorOr<StringRef> 1929a34c753fSRafael Auler BinaryContext::getSectionNameForAddress(uint64_t Address) const { 19303652483cSRafael Auler if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1931a34c753fSRafael Auler return Section->getName(); 1932a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 1933a34c753fSRafael Auler } 1934a34c753fSRafael Auler 1935a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1936a34c753fSRafael Auler auto Res = Sections.insert(Section); 1937a34c753fSRafael Auler (void)Res; 1938a34c753fSRafael Auler assert(Res.second && "can't register the same section twice."); 1939a34c753fSRafael Auler 1940a34c753fSRafael Auler // Only register allocatable sections in the AddressToSection map. 1941a34c753fSRafael Auler if (Section->isAllocatable() && Section->getAddress()) 1942a34c753fSRafael Auler AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1943a34c753fSRafael Auler NameToSection.insert( 1944a34c753fSRafael Auler std::make_pair(std::string(Section->getName()), Section)); 1945a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1946a34c753fSRafael Auler return *Section; 1947a34c753fSRafael Auler } 1948a34c753fSRafael Auler 1949a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(SectionRef Section) { 1950a34c753fSRafael Auler return registerSection(new BinarySection(*this, Section)); 1951a34c753fSRafael Auler } 1952a34c753fSRafael Auler 1953a34c753fSRafael Auler BinarySection & 1954a34c753fSRafael Auler BinaryContext::registerSection(StringRef SectionName, 1955a34c753fSRafael Auler const BinarySection &OriginalSection) { 195640c2e0faSMaksim Panchenko return registerSection( 195740c2e0faSMaksim Panchenko new BinarySection(*this, SectionName, OriginalSection)); 1958a34c753fSRafael Auler } 1959a34c753fSRafael Auler 196040c2e0faSMaksim Panchenko BinarySection & 196140c2e0faSMaksim Panchenko BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 196240c2e0faSMaksim Panchenko unsigned ELFFlags, uint8_t *Data, 196340c2e0faSMaksim Panchenko uint64_t Size, unsigned Alignment) { 1964a34c753fSRafael Auler auto NamedSections = getSectionByName(Name); 1965a34c753fSRafael Auler if (NamedSections.begin() != NamedSections.end()) { 1966a34c753fSRafael Auler assert(std::next(NamedSections.begin()) == NamedSections.end() && 1967a34c753fSRafael Auler "can only update unique sections"); 1968a34c753fSRafael Auler BinarySection *Section = NamedSections.begin()->second; 1969a34c753fSRafael Auler 1970a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1971a34c753fSRafael Auler const bool Flag = Section->isAllocatable(); 1972a34c753fSRafael Auler (void)Flag; 1973a34c753fSRafael Auler Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1974a34c753fSRafael Auler LLVM_DEBUG(dbgs() << *Section << "\n"); 1975a34c753fSRafael Auler // FIXME: Fix section flags/attributes for MachO. 1976a34c753fSRafael Auler if (isELF()) 1977a34c753fSRafael Auler assert(Flag == Section->isAllocatable() && 1978a34c753fSRafael Auler "can't change section allocation status"); 1979a34c753fSRafael Auler return *Section; 1980a34c753fSRafael Auler } 1981a34c753fSRafael Auler 198240c2e0faSMaksim Panchenko return registerSection( 198340c2e0faSMaksim Panchenko new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1984a34c753fSRafael Auler } 1985a34c753fSRafael Auler 1986a34c753fSRafael Auler bool BinaryContext::deregisterSection(BinarySection &Section) { 1987a34c753fSRafael Auler BinarySection *SectionPtr = &Section; 1988a34c753fSRafael Auler auto Itr = Sections.find(SectionPtr); 1989a34c753fSRafael Auler if (Itr != Sections.end()) { 1990a34c753fSRafael Auler auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1991a34c753fSRafael Auler while (Range.first != Range.second) { 1992a34c753fSRafael Auler if (Range.first->second == SectionPtr) { 1993a34c753fSRafael Auler AddressToSection.erase(Range.first); 1994a34c753fSRafael Auler break; 1995a34c753fSRafael Auler } 1996a34c753fSRafael Auler ++Range.first; 1997a34c753fSRafael Auler } 1998a34c753fSRafael Auler 1999a34c753fSRafael Auler auto NameRange = 2000a34c753fSRafael Auler NameToSection.equal_range(std::string(SectionPtr->getName())); 2001a34c753fSRafael Auler while (NameRange.first != NameRange.second) { 2002a34c753fSRafael Auler if (NameRange.first->second == SectionPtr) { 2003a34c753fSRafael Auler NameToSection.erase(NameRange.first); 2004a34c753fSRafael Auler break; 2005a34c753fSRafael Auler } 2006a34c753fSRafael Auler ++NameRange.first; 2007a34c753fSRafael Auler } 2008a34c753fSRafael Auler 2009a34c753fSRafael Auler Sections.erase(Itr); 2010a34c753fSRafael Auler delete SectionPtr; 2011a34c753fSRafael Auler return true; 2012a34c753fSRafael Auler } 2013a34c753fSRafael Auler return false; 2014a34c753fSRafael Auler } 2015a34c753fSRafael Auler 2016a34c753fSRafael Auler void BinaryContext::printSections(raw_ostream &OS) const { 20173652483cSRafael Auler for (BinarySection *const &Section : Sections) 2018a34c753fSRafael Auler OS << "BOLT-INFO: " << *Section << "\n"; 2019a34c753fSRafael Auler } 2020a34c753fSRafael Auler 2021a34c753fSRafael Auler BinarySection &BinaryContext::absoluteSection() { 2022a34c753fSRafael Auler if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2023a34c753fSRafael Auler return *Section; 2024a34c753fSRafael Auler return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2025a34c753fSRafael Auler } 2026a34c753fSRafael Auler 202740c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2028a34c753fSRafael Auler size_t Size) const { 2029a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2030a34c753fSRafael Auler if (!Section) 2031a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2032a34c753fSRafael Auler 2033a34c753fSRafael Auler if (Section->isVirtual()) 2034a34c753fSRafael Auler return 0; 2035a34c753fSRafael Auler 2036a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2037a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2038a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2039a34c753fSRafael Auler return DE.getUnsigned(&ValueOffset, Size); 2040a34c753fSRafael Auler } 2041a34c753fSRafael Auler 204240c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2043a34c753fSRafael Auler size_t Size) const { 2044a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2045a34c753fSRafael Auler if (!Section) 2046a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2047a34c753fSRafael Auler 2048a34c753fSRafael Auler if (Section->isVirtual()) 2049a34c753fSRafael Auler return 0; 2050a34c753fSRafael Auler 2051a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2052a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2053a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2054a34c753fSRafael Auler return DE.getSigned(&ValueOffset, Size); 2055a34c753fSRafael Auler } 2056a34c753fSRafael Auler 205740c2e0faSMaksim Panchenko void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 205840c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2059a34c753fSRafael Auler uint64_t Value) { 2060a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2061a34c753fSRafael Auler assert(Section && "cannot find section for address"); 206240c2e0faSMaksim Panchenko Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2063a34c753fSRafael Auler Value); 2064a34c753fSRafael Auler } 2065a34c753fSRafael Auler 206640c2e0faSMaksim Panchenko void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 206740c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2068a34c753fSRafael Auler uint64_t Value) { 2069a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2070a34c753fSRafael Auler assert(Section && "cannot find section for address"); 207140c2e0faSMaksim Panchenko Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 207240c2e0faSMaksim Panchenko Addend, Value); 2073a34c753fSRafael Auler } 2074a34c753fSRafael Auler 2075a34c753fSRafael Auler bool BinaryContext::removeRelocationAt(uint64_t Address) { 2076a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2077a34c753fSRafael Auler assert(Section && "cannot find section for address"); 2078a34c753fSRafael Auler return Section->removeRelocationAt(Address - Section->getAddress()); 2079a34c753fSRafael Auler } 2080a34c753fSRafael Auler 2081a34c753fSRafael Auler const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 2082a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2083a34c753fSRafael Auler if (!Section) 2084a34c753fSRafael Auler return nullptr; 2085a34c753fSRafael Auler 2086a34c753fSRafael Auler return Section->getRelocationAt(Address - Section->getAddress()); 2087a34c753fSRafael Auler } 2088a34c753fSRafael Auler 2089a34c753fSRafael Auler const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 2090a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2091a34c753fSRafael Auler if (!Section) 2092a34c753fSRafael Auler return nullptr; 2093a34c753fSRafael Auler 2094a34c753fSRafael Auler return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2095a34c753fSRafael Auler } 2096a34c753fSRafael Auler 2097a34c753fSRafael Auler void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2098a34c753fSRafael Auler const uint64_t Address) { 2099a34c753fSRafael Auler auto setImmovable = [&](BinaryData &BD) { 2100a34c753fSRafael Auler BinaryData *Root = BD.getAtomicRoot(); 2101a34c753fSRafael Auler LLVM_DEBUG(if (Root->isMoveable()) { 2102a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2103a34c753fSRafael Auler << "due to ambiguous relocation referencing 0x" 2104a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 2105a34c753fSRafael Auler }); 2106a34c753fSRafael Auler Root->setIsMoveable(false); 2107a34c753fSRafael Auler }; 2108a34c753fSRafael Auler 2109a34c753fSRafael Auler if (Address == BD.getAddress()) { 2110a34c753fSRafael Auler setImmovable(BD); 2111a34c753fSRafael Auler 2112a34c753fSRafael Auler // Set previous symbol as immovable 2113a34c753fSRafael Auler BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2114a34c753fSRafael Auler if (Prev && Prev->getEndAddress() == BD.getAddress()) 2115a34c753fSRafael Auler setImmovable(*Prev); 2116a34c753fSRafael Auler } 2117a34c753fSRafael Auler 2118a34c753fSRafael Auler if (Address == BD.getEndAddress()) { 2119a34c753fSRafael Auler setImmovable(BD); 2120a34c753fSRafael Auler 2121a34c753fSRafael Auler // Set next symbol as immovable 2122a34c753fSRafael Auler BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2123a34c753fSRafael Auler if (Next && Next->getAddress() == BD.getEndAddress()) 2124a34c753fSRafael Auler setImmovable(*Next); 2125a34c753fSRafael Auler } 2126a34c753fSRafael Auler } 2127a34c753fSRafael Auler 2128a34c753fSRafael Auler BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2129a34c753fSRafael Auler uint64_t *EntryDesc) { 2130a34c753fSRafael Auler std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 2131a34c753fSRafael Auler auto BFI = SymbolToFunctionMap.find(Symbol); 2132a34c753fSRafael Auler if (BFI == SymbolToFunctionMap.end()) 2133a34c753fSRafael Auler return nullptr; 2134a34c753fSRafael Auler 2135a34c753fSRafael Auler BinaryFunction *BF = BFI->second; 2136a34c753fSRafael Auler if (EntryDesc) 2137a34c753fSRafael Auler *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2138a34c753fSRafael Auler 2139a34c753fSRafael Auler return BF; 2140a34c753fSRafael Auler } 2141a34c753fSRafael Auler 2142a34c753fSRafael Auler void BinaryContext::exitWithBugReport(StringRef Message, 2143a34c753fSRafael Auler const BinaryFunction &Function) const { 2144a34c753fSRafael Auler errs() << "=======================================\n"; 2145a34c753fSRafael Auler errs() << "BOLT is unable to proceed because it couldn't properly understand " 2146a34c753fSRafael Auler "this function.\n"; 2147a34c753fSRafael Auler errs() << "If you are running the most recent version of BOLT, you may " 2148a34c753fSRafael Auler "want to " 2149a34c753fSRafael Auler "report this and paste this dump.\nPlease check that there is no " 2150a34c753fSRafael Auler "sensitive contents being shared in this dump.\n"; 2151a34c753fSRafael Auler errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2152a34c753fSRafael Auler ScopedPrinter SP(errs()); 2153a34c753fSRafael Auler SP.printBinaryBlock("Function contents", *Function.getData()); 2154a34c753fSRafael Auler errs() << "\n"; 2155a34c753fSRafael Auler Function.dump(); 2156a34c753fSRafael Auler errs() << "ERROR: " << Message; 2157a34c753fSRafael Auler errs() << "\n=======================================\n"; 2158a34c753fSRafael Auler exit(1); 2159a34c753fSRafael Auler } 2160a34c753fSRafael Auler 2161a34c753fSRafael Auler BinaryFunction * 2162a34c753fSRafael Auler BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2163a34c753fSRafael Auler bool IsSimple) { 2164a34c753fSRafael Auler InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2165a34c753fSRafael Auler BinaryFunction *BF = InjectedBinaryFunctions.back(); 2166a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 2167a34c753fSRafael Auler BF->CurrentState = BinaryFunction::State::CFG; 2168a34c753fSRafael Auler return BF; 2169a34c753fSRafael Auler } 2170a34c753fSRafael Auler 2171a34c753fSRafael Auler std::pair<size_t, size_t> 2172a34c753fSRafael Auler BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2173a34c753fSRafael Auler // Adjust branch instruction to match the current layout. 2174a34c753fSRafael Auler if (FixBranches) 2175a34c753fSRafael Auler BF.fixBranches(); 2176a34c753fSRafael Auler 2177a34c753fSRafael Auler // Create local MC context to isolate the effect of ephemeral code emission. 2178a34c753fSRafael Auler IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2179a34c753fSRafael Auler MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2180a34c753fSRafael Auler MCAsmBackend *MAB = 2181a34c753fSRafael Auler TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2182a34c753fSRafael Auler 2183a34c753fSRafael Auler SmallString<256> Code; 2184a34c753fSRafael Auler raw_svector_ostream VecOS(Code); 2185a34c753fSRafael Auler 2186a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2187a34c753fSRafael Auler std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2188a34c753fSRafael Auler *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2189a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2190a34c753fSRafael Auler /*RelaxAll=*/false, 2191a34c753fSRafael Auler /*IncrementalLinkerCompatible=*/false, 2192a34c753fSRafael Auler /*DWARFMustBeAtTheEnd=*/false)); 2193a34c753fSRafael Auler 2194a34c753fSRafael Auler Streamer->initSections(false, *STI); 2195a34c753fSRafael Auler 2196a34c753fSRafael Auler MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2197a34c753fSRafael Auler Section->setHasInstructions(true); 2198a34c753fSRafael Auler 2199a34c753fSRafael Auler // Create symbols in the LocalCtx so that they get destroyed with it. 2200a34c753fSRafael Auler MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2201a34c753fSRafael Auler MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2202a34c753fSRafael Auler MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2203a34c753fSRafael Auler MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2204a34c753fSRafael Auler 2205adf4142fSFangrui Song Streamer->switchSection(Section); 2206a34c753fSRafael Auler Streamer->emitLabel(StartLabel); 2207a34c753fSRafael Auler emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2208a34c753fSRafael Auler /*EmitCodeOnly=*/true); 2209a34c753fSRafael Auler Streamer->emitLabel(EndLabel); 2210a34c753fSRafael Auler 2211a34c753fSRafael Auler if (BF.isSplit()) { 2212a34c753fSRafael Auler MCSectionELF *ColdSection = 2213a34c753fSRafael Auler LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2214a34c753fSRafael Auler ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2215a34c753fSRafael Auler ColdSection->setHasInstructions(true); 2216a34c753fSRafael Auler 2217adf4142fSFangrui Song Streamer->switchSection(ColdSection); 2218a34c753fSRafael Auler Streamer->emitLabel(ColdStartLabel); 2219a34c753fSRafael Auler emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2220a34c753fSRafael Auler /*EmitCodeOnly=*/true); 2221a34c753fSRafael Auler Streamer->emitLabel(ColdEndLabel); 2222a34c753fSRafael Auler // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2223a34c753fSRafael Auler Streamer->emitBytes(StringRef("")); 2224adf4142fSFangrui Song Streamer->switchSection(Section); 2225a34c753fSRafael Auler } 2226a34c753fSRafael Auler 2227a34c753fSRafael Auler // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2228a34c753fSRafael Auler // MCStreamer::Finish(), which does more than we want 2229a34c753fSRafael Auler Streamer->emitBytes(StringRef("")); 2230a34c753fSRafael Auler 2231a34c753fSRafael Auler MCAssembler &Assembler = 2232a34c753fSRafael Auler static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2233a34c753fSRafael Auler MCAsmLayout Layout(Assembler); 2234a34c753fSRafael Auler Assembler.layout(Layout); 2235a34c753fSRafael Auler 2236a34c753fSRafael Auler const uint64_t HotSize = 2237a34c753fSRafael Auler Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2238a34c753fSRafael Auler const uint64_t ColdSize = BF.isSplit() 2239a34c753fSRafael Auler ? Layout.getSymbolOffset(*ColdEndLabel) - 2240a34c753fSRafael Auler Layout.getSymbolOffset(*ColdStartLabel) 2241a34c753fSRafael Auler : 0ULL; 2242a34c753fSRafael Auler 2243a34c753fSRafael Auler // Clean-up the effect of the code emission. 2244a34c753fSRafael Auler for (const MCSymbol &Symbol : Assembler.symbols()) { 2245a34c753fSRafael Auler MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2246a34c753fSRafael Auler MutableSymbol->setUndefined(); 2247a34c753fSRafael Auler MutableSymbol->setIsRegistered(false); 2248a34c753fSRafael Auler } 2249a34c753fSRafael Auler 2250a34c753fSRafael Auler return std::make_pair(HotSize, ColdSize); 2251a34c753fSRafael Auler } 2252a34c753fSRafael Auler 2253a34c753fSRafael Auler bool BinaryContext::validateEncoding(const MCInst &Inst, 2254a34c753fSRafael Auler ArrayRef<uint8_t> InputEncoding) const { 2255a34c753fSRafael Auler SmallString<256> Code; 2256a34c753fSRafael Auler SmallVector<MCFixup, 4> Fixups; 2257a34c753fSRafael Auler raw_svector_ostream VecOS(Code); 2258a34c753fSRafael Auler 2259a34c753fSRafael Auler MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2260a34c753fSRafael Auler auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2261a34c753fSRafael Auler if (InputEncoding != EncodedData) { 2262a34c753fSRafael Auler if (opts::Verbosity > 1) { 2263a34c753fSRafael Auler errs() << "BOLT-WARNING: mismatched encoding detected\n" 2264a34c753fSRafael Auler << " input: " << InputEncoding << '\n' 2265a34c753fSRafael Auler << " output: " << EncodedData << '\n'; 2266a34c753fSRafael Auler } 2267a34c753fSRafael Auler return false; 2268a34c753fSRafael Auler } 2269a34c753fSRafael Auler 2270a34c753fSRafael Auler return true; 2271a34c753fSRafael Auler } 2272a34c753fSRafael Auler 2273a34c753fSRafael Auler uint64_t BinaryContext::getHotThreshold() const { 2274a34c753fSRafael Auler static uint64_t Threshold = 0; 2275a34c753fSRafael Auler if (Threshold == 0) { 227640c2e0faSMaksim Panchenko Threshold = std::max( 227740c2e0faSMaksim Panchenko (uint64_t)opts::ExecutionCountThreshold, 2278a34c753fSRafael Auler NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2279a34c753fSRafael Auler } 2280a34c753fSRafael Auler return Threshold; 2281a34c753fSRafael Auler } 2282a34c753fSRafael Auler 228340c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 228440c2e0faSMaksim Panchenko uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2285a34c753fSRafael Auler auto FI = BinaryFunctions.upper_bound(Address); 2286a34c753fSRafael Auler if (FI == BinaryFunctions.begin()) 2287a34c753fSRafael Auler return nullptr; 2288a34c753fSRafael Auler --FI; 2289a34c753fSRafael Auler 2290a34c753fSRafael Auler const uint64_t UsedSize = 2291a34c753fSRafael Auler UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2292a34c753fSRafael Auler 2293a34c753fSRafael Auler if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2294a34c753fSRafael Auler return nullptr; 2295a34c753fSRafael Auler 2296a34c753fSRafael Auler return &FI->second; 2297a34c753fSRafael Auler } 2298a34c753fSRafael Auler 229940c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2300a34c753fSRafael Auler // First, try to find a function starting at the given address. If the 2301a34c753fSRafael Auler // function was folded, this will get us the original folded function if it 2302a34c753fSRafael Auler // wasn't removed from the list, e.g. in non-relocation mode. 2303a34c753fSRafael Auler auto BFI = BinaryFunctions.find(Address); 23043652483cSRafael Auler if (BFI != BinaryFunctions.end()) 2305a34c753fSRafael Auler return &BFI->second; 2306a34c753fSRafael Auler 2307a34c753fSRafael Auler // We might have folded the function matching the object at the given 2308a34c753fSRafael Auler // address. In such case, we look for a function matching the symbol 2309a34c753fSRafael Auler // registered at the original address. The new function (the one that the 2310a34c753fSRafael Auler // original was folded into) will hold the symbol. 2311a34c753fSRafael Auler if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2312a34c753fSRafael Auler uint64_t EntryID = 0; 2313a34c753fSRafael Auler BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2314a34c753fSRafael Auler if (BF && EntryID == 0) 2315a34c753fSRafael Auler return BF; 2316a34c753fSRafael Auler } 2317a34c753fSRafael Auler return nullptr; 2318a34c753fSRafael Auler } 2319a34c753fSRafael Auler 2320a34c753fSRafael Auler DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2321a34c753fSRafael Auler const DWARFAddressRangesVector &InputRanges) const { 2322a34c753fSRafael Auler DebugAddressRangesVector OutputRanges; 2323a34c753fSRafael Auler 2324a34c753fSRafael Auler for (const DWARFAddressRange Range : InputRanges) { 2325a34c753fSRafael Auler auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2326a34c753fSRafael Auler while (BFI != BinaryFunctions.end()) { 2327a34c753fSRafael Auler const BinaryFunction &Function = BFI->second; 2328a34c753fSRafael Auler if (Function.getAddress() >= Range.HighPC) 2329a34c753fSRafael Auler break; 2330a34c753fSRafael Auler const DebugAddressRangesVector FunctionRanges = 2331a34c753fSRafael Auler Function.getOutputAddressRanges(); 2332d2c87699SAmir Ayupov llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2333a34c753fSRafael Auler std::advance(BFI, 1); 2334a34c753fSRafael Auler } 2335a34c753fSRafael Auler } 2336a34c753fSRafael Auler 2337a34c753fSRafael Auler return OutputRanges; 2338a34c753fSRafael Auler } 2339a34c753fSRafael Auler 2340a34c753fSRafael Auler } // namespace bolt 2341a34c753fSRafael Auler } // namespace llvm 2342