12f09f445SMaksim Panchenko //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the BinaryContext class. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 14a34c753fSRafael Auler #include "bolt/Core/BinaryEmitter.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16a34c753fSRafael Auler #include "bolt/Utils/CommandLineOpts.h" 17a34c753fSRafael Auler #include "bolt/Utils/NameResolver.h" 18a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 1972e5b14fSAmir Ayupov #include "llvm/ADT/STLExtras.h" 20a34c753fSRafael Auler #include "llvm/ADT/Twine.h" 21290e4823Sserge-sans-paille #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24a34c753fSRafael Auler #include "llvm/MC/MCAsmLayout.h" 25a34c753fSRafael Auler #include "llvm/MC/MCAssembler.h" 26a34c753fSRafael Auler #include "llvm/MC/MCContext.h" 27a34c753fSRafael Auler #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28a34c753fSRafael Auler #include "llvm/MC/MCInstPrinter.h" 29a34c753fSRafael Auler #include "llvm/MC/MCObjectStreamer.h" 30a34c753fSRafael Auler #include "llvm/MC/MCObjectWriter.h" 3157f7c7d9Sserge-sans-paille #include "llvm/MC/MCRegisterInfo.h" 32a34c753fSRafael Auler #include "llvm/MC/MCSectionELF.h" 33a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h" 3457f7c7d9Sserge-sans-paille #include "llvm/MC/MCSubtargetInfo.h" 35a34c753fSRafael Auler #include "llvm/MC/MCSymbol.h" 36a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 3732d2473aSAmir Ayupov #include "llvm/Support/Error.h" 38a34c753fSRafael Auler #include "llvm/Support/Regex.h" 396aa735ceSAmir Ayupov #include <algorithm> 40a34c753fSRafael Auler #include <functional> 41a34c753fSRafael Auler #include <iterator> 42275e075cSFabian Parzefall #include <numeric> 436aa735ceSAmir Ayupov #include <unordered_set> 44a34c753fSRafael Auler 45a34c753fSRafael Auler using namespace llvm; 46a34c753fSRafael Auler 47a34c753fSRafael Auler #undef DEBUG_TYPE 48a34c753fSRafael Auler #define DEBUG_TYPE "bolt" 49a34c753fSRafael Auler 50a34c753fSRafael Auler namespace opts { 51a34c753fSRafael Auler 52b92436efSFangrui Song cl::opt<bool> NoHugePages("no-huge-pages", 53a34c753fSRafael Auler cl::desc("use regular size pages for code alignment"), 54b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 55a34c753fSRafael Auler 56a34c753fSRafael Auler static cl::opt<bool> 57a34c753fSRafael Auler PrintDebugInfo("print-debug-info", 58a34c753fSRafael Auler cl::desc("print debug info when printing functions"), 59a34c753fSRafael Auler cl::Hidden, 60a34c753fSRafael Auler cl::ZeroOrMore, 61a34c753fSRafael Auler cl::cat(BoltCategory)); 62a34c753fSRafael Auler 63b92436efSFangrui Song cl::opt<bool> PrintRelocations( 64b92436efSFangrui Song "print-relocations", 65b92436efSFangrui Song cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66a34c753fSRafael Auler cl::cat(BoltCategory)); 67a34c753fSRafael Auler 68a34c753fSRafael Auler static cl::opt<bool> 69a34c753fSRafael Auler PrintMemData("print-mem-data", 70a34c753fSRafael Auler cl::desc("print memory data annotations when printing functions"), 71a34c753fSRafael Auler cl::Hidden, 72a34c753fSRafael Auler cl::ZeroOrMore, 73a34c753fSRafael Auler cl::cat(BoltCategory)); 74a34c753fSRafael Auler 75a34c753fSRafael Auler } // namespace opts 76a34c753fSRafael Auler 77a34c753fSRafael Auler namespace llvm { 78a34c753fSRafael Auler namespace bolt { 79a34c753fSRafael Auler 80a34c753fSRafael Auler BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 81a34c753fSRafael Auler std::unique_ptr<DWARFContext> DwCtx, 82a34c753fSRafael Auler std::unique_ptr<Triple> TheTriple, 8340c2e0faSMaksim Panchenko const Target *TheTarget, std::string TripleName, 84a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE, 85a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI, 86a34c753fSRafael Auler std::unique_ptr<const MCAsmInfo> AsmInfo, 87a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII, 88a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI, 89a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstPrinter, 90a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA, 91a34c753fSRafael Auler std::unique_ptr<MCPlusBuilder> MIB, 92a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI, 93a34c753fSRafael Auler std::unique_ptr<MCDisassembler> DisAsm) 9440c2e0faSMaksim Panchenko : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 9540c2e0faSMaksim Panchenko TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 9640c2e0faSMaksim Panchenko TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 9740c2e0faSMaksim Panchenko AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 9840c2e0faSMaksim Panchenko InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 9940c2e0faSMaksim Panchenko MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 100a34c753fSRafael Auler Relocation::Arch = this->TheTriple->getArch(); 101db65429dSElvina Yakubova RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 102a34c753fSRafael Auler PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103a34c753fSRafael Auler } 104a34c753fSRafael Auler 105a34c753fSRafael Auler BinaryContext::~BinaryContext() { 1063652483cSRafael Auler for (BinarySection *Section : Sections) 107a34c753fSRafael Auler delete Section; 1083652483cSRafael Auler for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109a34c753fSRafael Auler delete InjectedFunction; 1103652483cSRafael Auler for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111a34c753fSRafael Auler delete JTI.second; 112a34c753fSRafael Auler clearBinaryData(); 113a34c753fSRafael Auler } 114a34c753fSRafael Auler 115a34c753fSRafael Auler /// Create BinaryContext for a given architecture \p ArchName and 116a34c753fSRafael Auler /// triple \p TripleName. 11732d2473aSAmir Ayupov Expected<std::unique_ptr<BinaryContext>> 118a34c753fSRafael Auler BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119a34c753fSRafael Auler std::unique_ptr<DWARFContext> DwCtx) { 120a34c753fSRafael Auler StringRef ArchName = ""; 121a34c753fSRafael Auler StringRef FeaturesStr = ""; 122a34c753fSRafael Auler switch (File->getArch()) { 123a34c753fSRafael Auler case llvm::Triple::x86_64: 124a34c753fSRafael Auler ArchName = "x86-64"; 125a34c753fSRafael Auler FeaturesStr = "+nopl"; 126a34c753fSRafael Auler break; 127a34c753fSRafael Auler case llvm::Triple::aarch64: 128a34c753fSRafael Auler ArchName = "aarch64"; 12975641678SDenis Revunov FeaturesStr = "+all"; 130a34c753fSRafael Auler break; 131*f8730293SJob Noorman case llvm::Triple::riscv64: 132*f8730293SJob Noorman ArchName = "riscv64"; 133*f8730293SJob Noorman // RV64GC 134*f8730293SJob Noorman FeaturesStr = "+m,+a,+f,+d,+zicsr,+zifencei,+c"; 135*f8730293SJob Noorman break; 136a34c753fSRafael Auler default: 13732d2473aSAmir Ayupov return createStringError(std::errc::not_supported, 13832d2473aSAmir Ayupov "BOLT-ERROR: Unrecognized machine in ELF file"); 139a34c753fSRafael Auler } 140a34c753fSRafael Auler 141a34c753fSRafael Auler auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 142a34c753fSRafael Auler const std::string TripleName = TheTriple->str(); 143a34c753fSRafael Auler 144a34c753fSRafael Auler std::string Error; 145a34c753fSRafael Auler const Target *TheTarget = 146a34c753fSRafael Auler TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 14732d2473aSAmir Ayupov if (!TheTarget) 14832d2473aSAmir Ayupov return createStringError(make_error_code(std::errc::not_supported), 14932d2473aSAmir Ayupov Twine("BOLT-ERROR: ", Error)); 150a34c753fSRafael Auler 151a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI( 152a34c753fSRafael Auler TheTarget->createMCRegInfo(TripleName)); 15332d2473aSAmir Ayupov if (!MRI) 15432d2473aSAmir Ayupov return createStringError( 15532d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 15632d2473aSAmir Ayupov Twine("BOLT-ERROR: no register info for target ", TripleName)); 157a34c753fSRafael Auler 158a34c753fSRafael Auler // Set up disassembler. 159c31af7cfSAmir Ayupov std::unique_ptr<MCAsmInfo> AsmInfo( 160a34c753fSRafael Auler TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 16132d2473aSAmir Ayupov if (!AsmInfo) 16232d2473aSAmir Ayupov return createStringError( 16332d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 16432d2473aSAmir Ayupov Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 165c31af7cfSAmir Ayupov // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 166c31af7cfSAmir Ayupov // we want to emit such names as using @PLT without double quotes to convey 167c31af7cfSAmir Ayupov // variant kind to the assembler. BOLT doesn't rely on the linker so we can 168c31af7cfSAmir Ayupov // override the default AsmInfo behavior to emit names the way we want. 169c31af7cfSAmir Ayupov AsmInfo->setAllowAtInName(true); 170a34c753fSRafael Auler 171a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI( 172a34c753fSRafael Auler TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 17332d2473aSAmir Ayupov if (!STI) 17432d2473aSAmir Ayupov return createStringError( 17532d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 17632d2473aSAmir Ayupov Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 177a34c753fSRafael Auler 178a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 17932d2473aSAmir Ayupov if (!MII) 18032d2473aSAmir Ayupov return createStringError( 18132d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 18232d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 183a34c753fSRafael Auler 184a34c753fSRafael Auler std::unique_ptr<MCContext> Ctx( 185a34c753fSRafael Auler new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 186a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI( 187a34c753fSRafael Auler TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 188a34c753fSRafael Auler Ctx->setObjectFileInfo(MOFI.get()); 189a34c753fSRafael Auler // We do not support X86 Large code model. Change this in the future. 190a34c753fSRafael Auler bool Large = false; 191a34c753fSRafael Auler if (TheTriple->getArch() == llvm::Triple::aarch64) 192a34c753fSRafael Auler Large = true; 193a34c753fSRafael Auler unsigned LSDAEncoding = 194a34c753fSRafael Auler Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 195a34c753fSRafael Auler if (IsPIC) { 196a34c753fSRafael Auler LSDAEncoding = dwarf::DW_EH_PE_pcrel | 197a34c753fSRafael Auler (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 198a34c753fSRafael Auler } 199a34c753fSRafael Auler 200a34c753fSRafael Auler std::unique_ptr<MCDisassembler> DisAsm( 201a34c753fSRafael Auler TheTarget->createMCDisassembler(*STI, *Ctx)); 202a34c753fSRafael Auler 20332d2473aSAmir Ayupov if (!DisAsm) 20432d2473aSAmir Ayupov return createStringError( 20532d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 20632d2473aSAmir Ayupov Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 207a34c753fSRafael Auler 208a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA( 209a34c753fSRafael Auler TheTarget->createMCInstrAnalysis(MII.get())); 21032d2473aSAmir Ayupov if (!MIA) 21132d2473aSAmir Ayupov return createStringError( 21232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 21332d2473aSAmir Ayupov Twine("BOLT-ERROR: failed to create instruction analysis for target ", 21432d2473aSAmir Ayupov TripleName)); 215a34c753fSRafael Auler 216a34c753fSRafael Auler int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 217a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstructionPrinter( 218a34c753fSRafael Auler TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 219a34c753fSRafael Auler *MII, *MRI)); 22032d2473aSAmir Ayupov if (!InstructionPrinter) 22132d2473aSAmir Ayupov return createStringError( 22232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 22332d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 224a34c753fSRafael Auler InstructionPrinter->setPrintImmHex(true); 225a34c753fSRafael Auler 226a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE( 2272aed07e9SShao-Ce SUN TheTarget->createMCCodeEmitter(*MII, *Ctx)); 228a34c753fSRafael Auler 229a34c753fSRafael Auler // Make sure we don't miss any output on core dumps. 230a34c753fSRafael Auler outs().SetUnbuffered(); 231a34c753fSRafael Auler errs().SetUnbuffered(); 232a34c753fSRafael Auler dbgs().SetUnbuffered(); 233a34c753fSRafael Auler 234a34c753fSRafael Auler auto BC = std::make_unique<BinaryContext>( 235a34c753fSRafael Auler std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 236a34c753fSRafael Auler std::string(TripleName), std::move(MCE), std::move(MOFI), 237a34c753fSRafael Auler std::move(AsmInfo), std::move(MII), std::move(STI), 23840c2e0faSMaksim Panchenko std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 23940c2e0faSMaksim Panchenko std::move(DisAsm)); 240a34c753fSRafael Auler 241a34c753fSRafael Auler BC->LSDAEncoding = LSDAEncoding; 242a34c753fSRafael Auler 243a34c753fSRafael Auler BC->MAB = std::unique_ptr<MCAsmBackend>( 244a34c753fSRafael Auler BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 245a34c753fSRafael Auler 246a34c753fSRafael Auler BC->setFilename(File->getFileName()); 247a34c753fSRafael Auler 248a34c753fSRafael Auler BC->HasFixedLoadAddress = !IsPIC; 249a34c753fSRafael Auler 250e290133cSMaksim Panchenko BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 251e290133cSMaksim Panchenko BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 252e290133cSMaksim Panchenko 253e290133cSMaksim Panchenko if (!BC->SymbolicDisAsm) 254e290133cSMaksim Panchenko return createStringError( 255e290133cSMaksim Panchenko make_error_code(std::errc::not_supported), 256e290133cSMaksim Panchenko Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 257e290133cSMaksim Panchenko 25863686af1SVladislav Khmelevsky return std::move(BC); 259a34c753fSRafael Auler } 260a34c753fSRafael Auler 261a34c753fSRafael Auler bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 26240c2e0faSMaksim Panchenko if (opts::HotText && 26340c2e0faSMaksim Panchenko (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 264a34c753fSRafael Auler return true; 265a34c753fSRafael Auler 26640c2e0faSMaksim Panchenko if (opts::HotData && 26740c2e0faSMaksim Panchenko (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 268a34c753fSRafael Auler return true; 269a34c753fSRafael Auler 270a34c753fSRafael Auler if (SymbolName == "_end") 271a34c753fSRafael Auler return true; 272a34c753fSRafael Auler 273a34c753fSRafael Auler return false; 274a34c753fSRafael Auler } 275a34c753fSRafael Auler 276a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> 277a34c753fSRafael Auler BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 278a34c753fSRafael Auler return MAB->createObjectWriter(OS); 279a34c753fSRafael Auler } 280a34c753fSRafael Auler 281a34c753fSRafael Auler bool BinaryContext::validateObjectNesting() const { 282a34c753fSRafael Auler auto Itr = BinaryDataMap.begin(); 283a34c753fSRafael Auler auto End = BinaryDataMap.end(); 284a34c753fSRafael Auler bool Valid = true; 285a34c753fSRafael Auler while (Itr != End) { 286a34c753fSRafael Auler auto Next = std::next(Itr); 287a34c753fSRafael Auler while (Next != End && 288a34c753fSRafael Auler Itr->second->getSection() == Next->second->getSection() && 289a34c753fSRafael Auler Itr->second->containsRange(Next->second->getAddress(), 290a34c753fSRafael Auler Next->second->getSize())) { 291a34c753fSRafael Auler if (Next->second->Parent != Itr->second) { 292a34c753fSRafael Auler errs() << "BOLT-WARNING: object nesting incorrect for:\n" 293a34c753fSRafael Auler << "BOLT-WARNING: " << *Itr->second << "\n" 294a34c753fSRafael Auler << "BOLT-WARNING: " << *Next->second << "\n"; 295a34c753fSRafael Auler Valid = false; 296a34c753fSRafael Auler } 297a34c753fSRafael Auler ++Next; 298a34c753fSRafael Auler } 299a34c753fSRafael Auler Itr = Next; 300a34c753fSRafael Auler } 301a34c753fSRafael Auler return Valid; 302a34c753fSRafael Auler } 303a34c753fSRafael Auler 304a34c753fSRafael Auler bool BinaryContext::validateHoles() const { 305a34c753fSRafael Auler bool Valid = true; 306a34c753fSRafael Auler for (BinarySection &Section : sections()) { 307a34c753fSRafael Auler for (const Relocation &Rel : Section.relocations()) { 308a34c753fSRafael Auler uint64_t RelAddr = Rel.Offset + Section.getAddress(); 309a34c753fSRafael Auler const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 310a34c753fSRafael Auler if (!BD) { 311a34c753fSRafael Auler errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 312a34c753fSRafael Auler << " 0x" << Twine::utohexstr(RelAddr) << " in " 313a34c753fSRafael Auler << Section.getName() << "\n"; 314a34c753fSRafael Auler Valid = false; 315a34c753fSRafael Auler } else if (!BD->getAtomicRoot()) { 316a34c753fSRafael Auler errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 317a34c753fSRafael Auler << "address 0x" << Twine::utohexstr(RelAddr) << " in " 318a34c753fSRafael Auler << Section.getName() << "\n"; 319a34c753fSRafael Auler Valid = false; 320a34c753fSRafael Auler } 321a34c753fSRafael Auler } 322a34c753fSRafael Auler } 323a34c753fSRafael Auler return Valid; 324a34c753fSRafael Auler } 325a34c753fSRafael Auler 326a34c753fSRafael Auler void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 327a34c753fSRafael Auler const uint64_t Address = GAI->second->getAddress(); 328a34c753fSRafael Auler const uint64_t Size = GAI->second->getSize(); 329a34c753fSRafael Auler 33040c2e0faSMaksim Panchenko auto fixParents = [&](BinaryDataMapType::iterator Itr, 33140c2e0faSMaksim Panchenko BinaryData *NewParent) { 332a34c753fSRafael Auler BinaryData *OldParent = Itr->second->Parent; 333a34c753fSRafael Auler Itr->second->Parent = NewParent; 334a34c753fSRafael Auler ++Itr; 335a34c753fSRafael Auler while (Itr != BinaryDataMap.end() && OldParent && 336a34c753fSRafael Auler Itr->second->Parent == OldParent) { 337a34c753fSRafael Auler Itr->second->Parent = NewParent; 338a34c753fSRafael Auler ++Itr; 339a34c753fSRafael Auler } 340a34c753fSRafael Auler }; 341a34c753fSRafael Auler 342a34c753fSRafael Auler // Check if the previous symbol contains the newly added symbol. 343a34c753fSRafael Auler if (GAI != BinaryDataMap.begin()) { 344a34c753fSRafael Auler BinaryData *Prev = std::prev(GAI)->second; 345a34c753fSRafael Auler while (Prev) { 346a34c753fSRafael Auler if (Prev->getSection() == GAI->second->getSection() && 347a34c753fSRafael Auler Prev->containsRange(Address, Size)) { 348a34c753fSRafael Auler fixParents(GAI, Prev); 349a34c753fSRafael Auler } else { 350a34c753fSRafael Auler fixParents(GAI, nullptr); 351a34c753fSRafael Auler } 352a34c753fSRafael Auler Prev = Prev->Parent; 353a34c753fSRafael Auler } 354a34c753fSRafael Auler } 355a34c753fSRafael Auler 356a34c753fSRafael Auler // Check if the newly added symbol contains any subsequent symbols. 357a34c753fSRafael Auler if (Size != 0) { 358a34c753fSRafael Auler BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 359a34c753fSRafael Auler auto Itr = std::next(GAI); 36040c2e0faSMaksim Panchenko while ( 36140c2e0faSMaksim Panchenko Itr != BinaryDataMap.end() && 36240c2e0faSMaksim Panchenko BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 363a34c753fSRafael Auler Itr->second->Parent = BD; 364a34c753fSRafael Auler ++Itr; 365a34c753fSRafael Auler } 366a34c753fSRafael Auler } 367a34c753fSRafael Auler } 368a34c753fSRafael Auler 369a34c753fSRafael Auler iterator_range<BinaryContext::binary_data_iterator> 370a34c753fSRafael Auler BinaryContext::getSubBinaryData(BinaryData *BD) { 371a34c753fSRafael Auler auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 372a34c753fSRafael Auler auto End = Start; 3733652483cSRafael Auler while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 374a34c753fSRafael Auler ++End; 375a34c753fSRafael Auler return make_range(Start, End); 376a34c753fSRafael Auler } 377a34c753fSRafael Auler 378a34c753fSRafael Auler std::pair<const MCSymbol *, uint64_t> 379a34c753fSRafael Auler BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 380a34c753fSRafael Auler bool IsPCRel) { 381a34c753fSRafael Auler if (isAArch64()) { 382a34c753fSRafael Auler // Check if this is an access to a constant island and create bookkeeping 383a34c753fSRafael Auler // to keep track of it and emit it later as part of this function. 384a34c753fSRafael Auler if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 3858d1fc45dSRafael Auler return std::make_pair(IslandSym, 0); 386a34c753fSRafael Auler 387a34c753fSRafael Auler // Detect custom code written in assembly that refers to arbitrary 388a34c753fSRafael Auler // constant islands from other functions. Write this reference so we 389a34c753fSRafael Auler // can pull this constant island and emit it as part of this function 390a34c753fSRafael Auler // too. 391a34c753fSRafael Auler auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 3926040415eSDenis Revunov 3936040415eSDenis Revunov if (IslandIter != AddressToConstantIslandMap.begin() && 3946040415eSDenis Revunov (IslandIter == AddressToConstantIslandMap.end() || 3956040415eSDenis Revunov IslandIter->first > Address)) 3966040415eSDenis Revunov --IslandIter; 3976040415eSDenis Revunov 398a34c753fSRafael Auler if (IslandIter != AddressToConstantIslandMap.end()) { 3997117af52SVladislav Khmelevsky // Fall-back to referencing the original constant island in the presence 4007117af52SVladislav Khmelevsky // of dynamic relocs, as we currently do not support cloning them. 4017117af52SVladislav Khmelevsky // Notice: we might fail to link because of this, if the original constant 4027117af52SVladislav Khmelevsky // island we are referring would be emitted too far away. 4037117af52SVladislav Khmelevsky if (IslandIter->second->hasDynamicRelocationAtIsland()) { 4047117af52SVladislav Khmelevsky MCSymbol *IslandSym = 4057117af52SVladislav Khmelevsky IslandIter->second->getOrCreateIslandAccess(Address); 4067117af52SVladislav Khmelevsky if (IslandSym) 4077117af52SVladislav Khmelevsky return std::make_pair(IslandSym, 0); 4087117af52SVladislav Khmelevsky } else if (MCSymbol *IslandSym = 4097117af52SVladislav Khmelevsky IslandIter->second->getOrCreateProxyIslandAccess(Address, 4107117af52SVladislav Khmelevsky BF)) { 411a34c753fSRafael Auler BF.createIslandDependency(IslandSym, IslandIter->second); 4128d1fc45dSRafael Auler return std::make_pair(IslandSym, 0); 413a34c753fSRafael Auler } 414a34c753fSRafael Auler } 415a34c753fSRafael Auler } 416a34c753fSRafael Auler 417a34c753fSRafael Auler // Note that the address does not necessarily have to reside inside 418a34c753fSRafael Auler // a section, it could be an absolute address too. 419a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 420a34c753fSRafael Auler if (Section && Section->isText()) { 421a34c753fSRafael Auler if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 422a34c753fSRafael Auler if (Address != BF.getAddress()) { 423a34c753fSRafael Auler // The address could potentially escape. Mark it as another entry 424a34c753fSRafael Auler // point into the function. 425a34c753fSRafael Auler if (opts::Verbosity >= 1) { 426a34c753fSRafael Auler outs() << "BOLT-INFO: potentially escaped address 0x" 42740c2e0faSMaksim Panchenko << Twine::utohexstr(Address) << " in function " << BF << '\n'; 428a34c753fSRafael Auler } 429a34c753fSRafael Auler BF.HasInternalLabelReference = true; 430a34c753fSRafael Auler return std::make_pair( 4318d1fc45dSRafael Auler BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 432a34c753fSRafael Auler } 433a34c753fSRafael Auler } else { 43435efe1d8SVladislav Khmelevsky addInterproceduralReference(&BF, Address); 435a34c753fSRafael Auler } 436a34c753fSRafael Auler } 437a34c753fSRafael Auler 438a34c753fSRafael Auler // With relocations, catch jump table references outside of the basic block 439a34c753fSRafael Auler // containing the indirect jump. 440a34c753fSRafael Auler if (HasRelocations) { 441a34c753fSRafael Auler const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 442a34c753fSRafael Auler if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 443a34c753fSRafael Auler const MCSymbol *Symbol = 444a34c753fSRafael Auler getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 445a34c753fSRafael Auler 4468d1fc45dSRafael Auler return std::make_pair(Symbol, 0); 447a34c753fSRafael Auler } 448a34c753fSRafael Auler } 449a34c753fSRafael Auler 4503652483cSRafael Auler if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 451a34c753fSRafael Auler return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 452a34c753fSRafael Auler 453a34c753fSRafael Auler // TODO: use DWARF info to get size/alignment here? 454a34c753fSRafael Auler MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 455a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 4568d1fc45dSRafael Auler return std::make_pair(TargetSymbol, 0); 457a34c753fSRafael Auler } 458a34c753fSRafael Auler 45940c2e0faSMaksim Panchenko MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 46040c2e0faSMaksim Panchenko BinaryFunction &BF) { 461a34c753fSRafael Auler if (!isX86()) 462a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 463a34c753fSRafael Auler 464a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 465a34c753fSRafael Auler if (!Section) { 466a34c753fSRafael Auler // No section - possibly an absolute address. Since we don't allow 467a34c753fSRafael Auler // internal function addresses to escape the function scope - we 468a34c753fSRafael Auler // consider it a tail call. 469a34c753fSRafael Auler if (opts::Verbosity > 1) { 470a34c753fSRafael Auler errs() << "BOLT-WARNING: no section for address 0x" 47140c2e0faSMaksim Panchenko << Twine::utohexstr(Address) << " referenced from function " << BF 47240c2e0faSMaksim Panchenko << '\n'; 473a34c753fSRafael Auler } 474a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 475a34c753fSRafael Auler } 476a34c753fSRafael Auler 477a34c753fSRafael Auler if (Section->isVirtual()) { 478a34c753fSRafael Auler // The contents are filled at runtime. 479a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 480a34c753fSRafael Auler } 481a34c753fSRafael Auler 482a34c753fSRafael Auler // No support for jump tables in code yet. 483a34c753fSRafael Auler if (Section->isText()) 484a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 485a34c753fSRafael Auler 486a34c753fSRafael Auler // Start with checking for PIC jump table. We expect non-PIC jump tables 487a34c753fSRafael Auler // to have high 32 bits set to 0. 488a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 489a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 490a34c753fSRafael Auler 491a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 492a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_JUMP_TABLE; 493a34c753fSRafael Auler 494a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 495a34c753fSRafael Auler } 496a34c753fSRafael Auler 49708ab4fafSAmir Ayupov bool BinaryContext::analyzeJumpTable(const uint64_t Address, 49808ab4fafSAmir Ayupov const JumpTable::JumpTableType Type, 49908ab4fafSAmir Ayupov const BinaryFunction &BF, 50008ab4fafSAmir Ayupov const uint64_t NextJTAddress, 50108ab4fafSAmir Ayupov JumpTable::AddressesType *EntriesAsAddress, 50208ab4fafSAmir Ayupov bool *HasEntryInFragment) const { 503a34c753fSRafael Auler // Is one of the targets __builtin_unreachable? 504a34c753fSRafael Auler bool HasUnreachable = false; 505a34c753fSRafael Auler 506a34c753fSRafael Auler // Number of targets other than __builtin_unreachable. 507a34c753fSRafael Auler uint64_t NumRealEntries = 0; 508a34c753fSRafael Auler 50905523dc3SHuan Nguyen auto addEntryAddress = [&](uint64_t EntryAddress) { 51005523dc3SHuan Nguyen if (EntriesAsAddress) 51105523dc3SHuan Nguyen EntriesAsAddress->emplace_back(EntryAddress); 512a34c753fSRafael Auler }; 513a34c753fSRafael Auler 51408ab4fafSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 515a34c753fSRafael Auler if (!Section) 516a34c753fSRafael Auler return false; 517a34c753fSRafael Auler 518a34c753fSRafael Auler // The upper bound is defined by containing object, section limits, and 519a34c753fSRafael Auler // the next jump table in memory. 520a34c753fSRafael Auler uint64_t UpperBound = Section->getEndAddress(); 521a34c753fSRafael Auler const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 522a34c753fSRafael Auler if (JumpTableBD && JumpTableBD->getSize()) { 523a34c753fSRafael Auler assert(JumpTableBD->getEndAddress() <= UpperBound && 524a34c753fSRafael Auler "data object cannot cross a section boundary"); 525a34c753fSRafael Auler UpperBound = JumpTableBD->getEndAddress(); 526a34c753fSRafael Auler } 5273652483cSRafael Auler if (NextJTAddress) 528a34c753fSRafael Auler UpperBound = std::min(NextJTAddress, UpperBound); 529a34c753fSRafael Auler 530556efdbaSAmir Ayupov LLVM_DEBUG({ 531556efdbaSAmir Ayupov using JTT = JumpTable::JumpTableType; 532556efdbaSAmir Ayupov dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 533556efdbaSAmir Ayupov Address, BF.getPrintName(), 534556efdbaSAmir Ayupov Type == JTT::JTT_PIC ? "PIC" : "Normal"); 535556efdbaSAmir Ayupov }); 536a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 537a34c753fSRafael Auler for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 538a34c753fSRafael Auler EntryAddress += EntrySize) { 539a34c753fSRafael Auler LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 540a34c753fSRafael Auler << " -> "); 541a34c753fSRafael Auler // Check if there's a proper relocation against the jump table entry. 542a34c753fSRafael Auler if (HasRelocations) { 543a34c753fSRafael Auler if (Type == JumpTable::JTT_PIC && 544a34c753fSRafael Auler !DataPCRelocations.count(EntryAddress)) { 545a34c753fSRafael Auler LLVM_DEBUG( 546a34c753fSRafael Auler dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 547a34c753fSRafael Auler break; 548a34c753fSRafael Auler } 549a34c753fSRafael Auler if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 550a34c753fSRafael Auler LLVM_DEBUG( 551a34c753fSRafael Auler dbgs() 552a34c753fSRafael Auler << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 553a34c753fSRafael Auler break; 554a34c753fSRafael Auler } 555a34c753fSRafael Auler } 556a34c753fSRafael Auler 55740c2e0faSMaksim Panchenko const uint64_t Value = 55840c2e0faSMaksim Panchenko (Type == JumpTable::JTT_PIC) 559a34c753fSRafael Auler ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 560a34c753fSRafael Auler : *getPointerAtAddress(EntryAddress); 561a34c753fSRafael Auler 562a34c753fSRafael Auler // __builtin_unreachable() case. 563a34c753fSRafael Auler if (Value == BF.getAddress() + BF.getSize()) { 56405523dc3SHuan Nguyen addEntryAddress(Value); 565a34c753fSRafael Auler HasUnreachable = true; 566556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 567a34c753fSRafael Auler continue; 568a34c753fSRafael Auler } 569a34c753fSRafael Auler 570a34c753fSRafael Auler // Function or one of its fragments. 57108ab4fafSAmir Ayupov const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 572a34c753fSRafael Auler 573068e9889SAmir Ayupov bool DoesBelongToFunction = BF.containsAddress(Value) || 574068e9889SAmir Ayupov (TargetBF && TargetBF->isParentOrChildOf(BF)); 575068e9889SAmir Ayupov 576a34c753fSRafael Auler // We assume that a jump table cannot have function start as an entry. 577068e9889SAmir Ayupov if (!DoesBelongToFunction || Value == BF.getAddress()) { 578a34c753fSRafael Auler LLVM_DEBUG({ 579a34c753fSRafael Auler if (!BF.containsAddress(Value)) { 580a34c753fSRafael Auler dbgs() << "FAIL: function doesn't contain this address\n"; 581a34c753fSRafael Auler if (TargetBF) { 582a34c753fSRafael Auler dbgs() << " ! function containing this address: " 583a34c753fSRafael Auler << TargetBF->getPrintName() << '\n'; 584556efdbaSAmir Ayupov if (TargetBF->isFragment()) { 585556efdbaSAmir Ayupov dbgs() << " ! is a fragment"; 586556efdbaSAmir Ayupov for (BinaryFunction *Parent : TargetBF->ParentFragments) 587556efdbaSAmir Ayupov dbgs() << ", parent: " << Parent->getPrintName(); 588556efdbaSAmir Ayupov dbgs() << '\n'; 589556efdbaSAmir Ayupov } 590a34c753fSRafael Auler } 591a34c753fSRafael Auler } 592a34c753fSRafael Auler if (Value == BF.getAddress()) 593a34c753fSRafael Auler dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 594a34c753fSRafael Auler }); 595a34c753fSRafael Auler break; 596a34c753fSRafael Auler } 597a34c753fSRafael Auler 598a34c753fSRafael Auler // Check there's an instruction at this offset. 599a34c753fSRafael Auler if (TargetBF->getState() == BinaryFunction::State::Disassembled && 600a34c753fSRafael Auler !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 601556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 602a34c753fSRafael Auler break; 603a34c753fSRafael Auler } 604a34c753fSRafael Auler 605a34c753fSRafael Auler ++NumRealEntries; 606556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 607a34c753fSRafael Auler 60808ab4fafSAmir Ayupov if (TargetBF != &BF && HasEntryInFragment) 60908ab4fafSAmir Ayupov *HasEntryInFragment = true; 61005523dc3SHuan Nguyen addEntryAddress(Value); 611a34c753fSRafael Auler } 612a34c753fSRafael Auler 613a34c753fSRafael Auler // It's a jump table if the number of real entries is more than 1, or there's 614a34c753fSRafael Auler // one real entry and "unreachable" targets. If there are only multiple 615a34c753fSRafael Auler // "unreachable" targets, then it's not a jump table. 616a34c753fSRafael Auler return NumRealEntries + HasUnreachable >= 2; 617a34c753fSRafael Auler } 618a34c753fSRafael Auler 619a34c753fSRafael Auler void BinaryContext::populateJumpTables() { 620a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 621a34c753fSRafael Auler << '\n'); 622a34c753fSRafael Auler for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 623a34c753fSRafael Auler ++JTI) { 624a34c753fSRafael Auler JumpTable *JT = JTI->second; 625a34c753fSRafael Auler 62605523dc3SHuan Nguyen bool NonSimpleParent = false; 62705523dc3SHuan Nguyen for (BinaryFunction *BF : JT->Parents) 62805523dc3SHuan Nguyen NonSimpleParent |= !BF->isSimple(); 62905523dc3SHuan Nguyen if (NonSimpleParent) 630a34c753fSRafael Auler continue; 631a34c753fSRafael Auler 632a34c753fSRafael Auler uint64_t NextJTAddress = 0; 633a34c753fSRafael Auler auto NextJTI = std::next(JTI); 6343652483cSRafael Auler if (NextJTI != JTE) 635a34c753fSRafael Auler NextJTAddress = NextJTI->second->getAddress(); 636a34c753fSRafael Auler 63705523dc3SHuan Nguyen const bool Success = 63805523dc3SHuan Nguyen analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 63908ab4fafSAmir Ayupov NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 640a34c753fSRafael Auler if (!Success) { 641055f9f6dSAmir Ayupov LLVM_DEBUG({ 642055f9f6dSAmir Ayupov dbgs() << "failed to analyze "; 643a34c753fSRafael Auler JT->print(dbgs()); 644a34c753fSRafael Auler if (NextJTI != JTE) { 645055f9f6dSAmir Ayupov dbgs() << "next "; 646a34c753fSRafael Auler NextJTI->second->print(dbgs()); 647a34c753fSRafael Auler } 648055f9f6dSAmir Ayupov }); 649468d4f6dSAmir Ayupov llvm_unreachable("jump table heuristic failure"); 650a34c753fSRafael Auler } 65105523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) { 65208ab4fafSAmir Ayupov if (JT->IsSplit) 65308ab4fafSAmir Ayupov Frag->setHasIndirectTargetToSplitFragment(true); 65405523dc3SHuan Nguyen for (uint64_t EntryAddress : JT->EntriesAsAddress) 65505523dc3SHuan Nguyen // if target is builtin_unreachable 65605523dc3SHuan Nguyen if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 65705523dc3SHuan Nguyen Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 65805523dc3SHuan Nguyen Frag->getSize()); 65905523dc3SHuan Nguyen } else if (EntryAddress >= Frag->getAddress() && 66005523dc3SHuan Nguyen EntryAddress < Frag->getAddress() + Frag->getSize()) { 66105523dc3SHuan Nguyen Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 66205523dc3SHuan Nguyen } 663a34c753fSRafael Auler } 664a34c753fSRafael Auler 665a34c753fSRafael Auler // In strict mode, erase PC-relative relocation record. Later we check that 666a34c753fSRafael Auler // all such records are erased and thus have been accounted for. 667a34c753fSRafael Auler if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 668a34c753fSRafael Auler for (uint64_t Address = JT->getAddress(); 669a34c753fSRafael Auler Address < JT->getAddress() + JT->getSize(); 670a34c753fSRafael Auler Address += JT->EntrySize) { 671a34c753fSRafael Auler DataPCRelocations.erase(DataPCRelocations.find(Address)); 672a34c753fSRafael Auler } 673a34c753fSRafael Auler } 674a34c753fSRafael Auler 675a34c753fSRafael Auler // Mark to skip the function and all its fragments. 67605523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 67705523dc3SHuan Nguyen if (Frag->hasIndirectTargetToSplitFragment()) 67805523dc3SHuan Nguyen addFragmentsToSkip(Frag); 679a34c753fSRafael Auler } 680a34c753fSRafael Auler 681a34c753fSRafael Auler if (opts::StrictMode && DataPCRelocations.size()) { 682a34c753fSRafael Auler LLVM_DEBUG({ 683a34c753fSRafael Auler dbgs() << DataPCRelocations.size() 684a34c753fSRafael Auler << " unclaimed PC-relative relocations left in data:\n"; 685a34c753fSRafael Auler for (uint64_t Reloc : DataPCRelocations) 686a34c753fSRafael Auler dbgs() << Twine::utohexstr(Reloc) << '\n'; 687a34c753fSRafael Auler }); 688a34c753fSRafael Auler assert(0 && "unclaimed PC-relative relocations left in data\n"); 689a34c753fSRafael Auler } 690a34c753fSRafael Auler clearList(DataPCRelocations); 691a34c753fSRafael Auler } 6926aa735ceSAmir Ayupov 6936aa735ceSAmir Ayupov void BinaryContext::skipMarkedFragments() { 69405523dc3SHuan Nguyen std::vector<BinaryFunction *> FragmentQueue; 69505523dc3SHuan Nguyen // Copy the functions to FragmentQueue. 69605523dc3SHuan Nguyen FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 6976aa735ceSAmir Ayupov auto addToWorklist = [&](BinaryFunction *Function) -> void { 69805523dc3SHuan Nguyen if (FragmentsToSkip.count(Function)) 6996aa735ceSAmir Ayupov return; 70005523dc3SHuan Nguyen FragmentQueue.push_back(Function); 70105523dc3SHuan Nguyen addFragmentsToSkip(Function); 7026aa735ceSAmir Ayupov }; 7036aa735ceSAmir Ayupov // Functions containing split jump tables need to be skipped with all 7046aa735ceSAmir Ayupov // fragments (transitively). 70505523dc3SHuan Nguyen for (size_t I = 0; I != FragmentQueue.size(); I++) { 70605523dc3SHuan Nguyen BinaryFunction *BF = FragmentQueue[I]; 70705523dc3SHuan Nguyen assert(FragmentsToSkip.count(BF) && 7086aa735ceSAmir Ayupov "internal error in traversing function fragments"); 7096aa735ceSAmir Ayupov if (opts::Verbosity >= 1) 7106aa735ceSAmir Ayupov errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 71182095bd5SHuan Nguyen BF->setSimple(false); 71205523dc3SHuan Nguyen BF->setHasIndirectTargetToSplitFragment(true); 71382095bd5SHuan Nguyen 714d2c87699SAmir Ayupov llvm::for_each(BF->Fragments, addToWorklist); 715d2c87699SAmir Ayupov llvm::for_each(BF->ParentFragments, addToWorklist); 7166aa735ceSAmir Ayupov } 717641e92d4SMaksim Panchenko if (!FragmentsToSkip.empty()) 71882095bd5SHuan Nguyen errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 719641e92d4SMaksim Panchenko << (FragmentsToSkip.size() == 1 ? "" : "s") 720641e92d4SMaksim Panchenko << " due to cold fragments\n"; 721a34c753fSRafael Auler } 722a34c753fSRafael Auler 72340c2e0faSMaksim Panchenko MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 724a34c753fSRafael Auler uint64_t Size, 725a34c753fSRafael Auler uint16_t Alignment, 726a34c753fSRafael Auler unsigned Flags) { 727a34c753fSRafael Auler auto Itr = BinaryDataMap.find(Address); 728a34c753fSRafael Auler if (Itr != BinaryDataMap.end()) { 729a34c753fSRafael Auler assert(Itr->second->getSize() == Size || !Size); 730a34c753fSRafael Auler return Itr->second->getSymbol(); 731a34c753fSRafael Auler } 732a34c753fSRafael Auler 733a34c753fSRafael Auler std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 734a34c753fSRafael Auler assert(!GlobalSymbols.count(Name) && "created name is not unique"); 735a34c753fSRafael Auler return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 736a34c753fSRafael Auler } 737a34c753fSRafael Auler 738a34c753fSRafael Auler MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 739a34c753fSRafael Auler return Ctx->getOrCreateSymbol(Name); 740a34c753fSRafael Auler } 741a34c753fSRafael Auler 742a34c753fSRafael Auler BinaryFunction *BinaryContext::createBinaryFunction( 743a34c753fSRafael Auler const std::string &Name, BinarySection &Section, uint64_t Address, 744a34c753fSRafael Auler uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 745a34c753fSRafael Auler auto Result = BinaryFunctions.emplace( 746a34c753fSRafael Auler Address, BinaryFunction(Name, Section, Address, Size, *this)); 747a34c753fSRafael Auler assert(Result.second == true && "unexpected duplicate function"); 748a34c753fSRafael Auler BinaryFunction *BF = &Result.first->second; 749a34c753fSRafael Auler registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 750a34c753fSRafael Auler Alignment); 751a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 752a34c753fSRafael Auler return BF; 753a34c753fSRafael Auler } 754a34c753fSRafael Auler 755a34c753fSRafael Auler const MCSymbol * 756a34c753fSRafael Auler BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 757a34c753fSRafael Auler JumpTable::JumpTableType Type) { 75805523dc3SHuan Nguyen // Two fragments of same function access same jump table 759a34c753fSRafael Auler if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 760a34c753fSRafael Auler assert(JT->Type == Type && "jump table types have to match"); 761a34c753fSRafael Auler assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 762a34c753fSRafael Auler 76305523dc3SHuan Nguyen // Prevent associating a jump table to a specific fragment twice. 76405523dc3SHuan Nguyen // This simple check arises from the assumption: no more than 2 fragments. 76505523dc3SHuan Nguyen if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 766068e9889SAmir Ayupov assert(JT->Parents[0]->isParentOrChildOf(Function) && 76705523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 76828b1dcb1SHuan Nguyen // Duplicate the entry for the parent function for easy access 76905523dc3SHuan Nguyen JT->Parents.push_back(&Function); 77028b1dcb1SHuan Nguyen if (opts::Verbosity > 2) { 77105523dc3SHuan Nguyen outs() << "BOLT-INFO: Multiple fragments access same jump table: " 77205523dc3SHuan Nguyen << JT->Parents[0]->getPrintName() << "; " 77305523dc3SHuan Nguyen << Function.getPrintName() << "\n"; 77405523dc3SHuan Nguyen JT->print(outs()); 77528b1dcb1SHuan Nguyen } 77628b1dcb1SHuan Nguyen Function.JumpTables.emplace(Address, JT); 77705523dc3SHuan Nguyen JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 77805523dc3SHuan Nguyen JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 77928b1dcb1SHuan Nguyen } 78005523dc3SHuan Nguyen 78105523dc3SHuan Nguyen bool IsJumpTableParent = false; 7820c925861SThorsten Schütt (void)IsJumpTableParent; 78305523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 78405523dc3SHuan Nguyen if (Frag == &Function) 78505523dc3SHuan Nguyen IsJumpTableParent = true; 78605523dc3SHuan Nguyen assert(IsJumpTableParent && 78705523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 788a34c753fSRafael Auler return JT->getFirstLabel(); 789a34c753fSRafael Auler } 790a34c753fSRafael Auler 791a34c753fSRafael Auler // Re-use the existing symbol if possible. 792a34c753fSRafael Auler MCSymbol *JTLabel = nullptr; 793a34c753fSRafael Auler if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 794a34c753fSRafael Auler if (!isInternalSymbolName(Object->getSymbol()->getName())) 795a34c753fSRafael Auler JTLabel = Object->getSymbol(); 796a34c753fSRafael Auler } 797a34c753fSRafael Auler 798a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 799a34c753fSRafael Auler if (!JTLabel) { 800a34c753fSRafael Auler const std::string JumpTableName = generateJumpTableName(Function, Address); 801a34c753fSRafael Auler JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 802a34c753fSRafael Auler } 803a34c753fSRafael Auler 804a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 805a34c753fSRafael Auler << " in function " << Function << '\n'); 806a34c753fSRafael Auler 807a34c753fSRafael Auler JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 80805523dc3SHuan Nguyen JumpTable::LabelMapType{{0, JTLabel}}, 809a34c753fSRafael Auler *getSectionForAddress(Address)); 81005523dc3SHuan Nguyen JT->Parents.push_back(&Function); 81105523dc3SHuan Nguyen if (opts::Verbosity > 2) 81205523dc3SHuan Nguyen JT->print(outs()); 813a34c753fSRafael Auler JumpTables.emplace(Address, JT); 814a34c753fSRafael Auler 815a34c753fSRafael Auler // Duplicate the entry for the parent function for easy access. 816a34c753fSRafael Auler Function.JumpTables.emplace(Address, JT); 817a34c753fSRafael Auler return JTLabel; 818a34c753fSRafael Auler } 819a34c753fSRafael Auler 820a34c753fSRafael Auler std::pair<uint64_t, const MCSymbol *> 821a34c753fSRafael Auler BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 822a34c753fSRafael Auler const MCSymbol *OldLabel) { 823a34c753fSRafael Auler auto L = scopeLock(); 824a34c753fSRafael Auler unsigned Offset = 0; 825a34c753fSRafael Auler bool Found = false; 826a34c753fSRafael Auler for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 827a34c753fSRafael Auler if (Elmt.second != OldLabel) 828a34c753fSRafael Auler continue; 829a34c753fSRafael Auler Offset = Elmt.first; 830a34c753fSRafael Auler Found = true; 831a34c753fSRafael Auler break; 832a34c753fSRafael Auler } 833a34c753fSRafael Auler assert(Found && "Label not found"); 834c907d6e0SAmir Ayupov (void)Found; 835a34c753fSRafael Auler MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 836a34c753fSRafael Auler JumpTable *NewJT = 837a34c753fSRafael Auler new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 83805523dc3SHuan Nguyen JumpTable::LabelMapType{{Offset, NewLabel}}, 839a34c753fSRafael Auler *getSectionForAddress(JT->getAddress())); 84005523dc3SHuan Nguyen NewJT->Parents = JT->Parents; 841a34c753fSRafael Auler NewJT->Entries = JT->Entries; 842a34c753fSRafael Auler NewJT->Counts = JT->Counts; 843a34c753fSRafael Auler uint64_t JumpTableID = ++DuplicatedJumpTables; 844a34c753fSRafael Auler // Invert it to differentiate from regular jump tables whose IDs are their 845a34c753fSRafael Auler // addresses in the input binary memory space 846a34c753fSRafael Auler JumpTableID = ~JumpTableID; 847a34c753fSRafael Auler JumpTables.emplace(JumpTableID, NewJT); 848a34c753fSRafael Auler Function.JumpTables.emplace(JumpTableID, NewJT); 849a34c753fSRafael Auler return std::make_pair(JumpTableID, NewLabel); 850a34c753fSRafael Auler } 851a34c753fSRafael Auler 852a34c753fSRafael Auler std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 853a34c753fSRafael Auler uint64_t Address) { 854a34c753fSRafael Auler size_t Id; 855a34c753fSRafael Auler uint64_t Offset = 0; 856a34c753fSRafael Auler if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 857a34c753fSRafael Auler Offset = Address - JT->getAddress(); 858a34c753fSRafael Auler auto Itr = JT->Labels.find(Offset); 8593652483cSRafael Auler if (Itr != JT->Labels.end()) 860a34c753fSRafael Auler return std::string(Itr->second->getName()); 861a34c753fSRafael Auler Id = JumpTableIds.at(JT->getAddress()); 862a34c753fSRafael Auler } else { 863a34c753fSRafael Auler Id = JumpTableIds[Address] = BF.JumpTables.size(); 864a34c753fSRafael Auler } 865a34c753fSRafael Auler return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 866a34c753fSRafael Auler (Offset ? ("." + std::to_string(Offset)) : "")); 867a34c753fSRafael Auler } 868a34c753fSRafael Auler 869a34c753fSRafael Auler bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 870a34c753fSRafael Auler // FIXME: aarch64 support is missing. 871a34c753fSRafael Auler if (!isX86()) 872a34c753fSRafael Auler return true; 873a34c753fSRafael Auler 874a34c753fSRafael Auler if (BF.getSize() == BF.getMaxSize()) 875a34c753fSRafael Auler return true; 876a34c753fSRafael Auler 877a34c753fSRafael Auler ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 878a34c753fSRafael Auler assert(FunctionData && "cannot get function as data"); 879a34c753fSRafael Auler 880a34c753fSRafael Auler uint64_t Offset = BF.getSize(); 881a34c753fSRafael Auler MCInst Instr; 882a34c753fSRafael Auler uint64_t InstrSize = 0; 883a34c753fSRafael Auler uint64_t InstrAddress = BF.getAddress() + Offset; 884a34c753fSRafael Auler using std::placeholders::_1; 885a34c753fSRafael Auler 886a34c753fSRafael Auler // Skip instructions that satisfy the predicate condition. 887a34c753fSRafael Auler auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 888a34c753fSRafael Auler const uint64_t StartOffset = Offset; 889a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); 890a34c753fSRafael Auler Offset += InstrSize, InstrAddress += InstrSize) { 89140c2e0faSMaksim Panchenko if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 89240c2e0faSMaksim Panchenko InstrAddress, nulls())) 893a34c753fSRafael Auler break; 894a34c753fSRafael Auler if (!Predicate(Instr)) 895a34c753fSRafael Auler break; 896a34c753fSRafael Auler } 897a34c753fSRafael Auler 898a34c753fSRafael Auler return Offset - StartOffset; 899a34c753fSRafael Auler }; 900a34c753fSRafael Auler 901a34c753fSRafael Auler // Skip a sequence of zero bytes. 902a34c753fSRafael Auler auto skipZeros = [&]() { 903a34c753fSRafael Auler const uint64_t StartOffset = Offset; 904a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); ++Offset) 905a34c753fSRafael Auler if ((*FunctionData)[Offset] != 0) 906a34c753fSRafael Auler break; 907a34c753fSRafael Auler 908a34c753fSRafael Auler return Offset - StartOffset; 909a34c753fSRafael Auler }; 910a34c753fSRafael Auler 911a34c753fSRafael Auler // Accept the whole padding area filled with breakpoints. 912a34c753fSRafael Auler auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 913a34c753fSRafael Auler if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 914a34c753fSRafael Auler return true; 915a34c753fSRafael Auler 916a34c753fSRafael Auler auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 917a34c753fSRafael Auler 918a34c753fSRafael Auler // Some functions have a jump to the next function or to the padding area 919a34c753fSRafael Auler // inserted after the body. 920a34c753fSRafael Auler auto isSkipJump = [&](const MCInst &Instr) { 921a34c753fSRafael Auler uint64_t TargetAddress = 0; 922a34c753fSRafael Auler if (MIB->isUnconditionalBranch(Instr) && 923a34c753fSRafael Auler MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 924a34c753fSRafael Auler if (TargetAddress >= InstrAddress + InstrSize && 925a34c753fSRafael Auler TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 926a34c753fSRafael Auler return true; 927a34c753fSRafael Auler } 928a34c753fSRafael Auler } 929a34c753fSRafael Auler return false; 930a34c753fSRafael Auler }; 931a34c753fSRafael Auler 932a34c753fSRafael Auler // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 93340c2e0faSMaksim Panchenko while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 934a34c753fSRafael Auler skipZeros()) 935a34c753fSRafael Auler ; 936a34c753fSRafael Auler 937a34c753fSRafael Auler if (Offset == BF.getMaxSize()) 938a34c753fSRafael Auler return true; 939a34c753fSRafael Auler 940a34c753fSRafael Auler if (opts::Verbosity >= 1) { 941a34c753fSRafael Auler errs() << "BOLT-WARNING: bad padding at address 0x" 942a34c753fSRafael Auler << Twine::utohexstr(BF.getAddress() + BF.getSize()) 94340c2e0faSMaksim Panchenko << " starting at offset " << (Offset - BF.getSize()) 94440c2e0faSMaksim Panchenko << " in function " << BF << '\n' 945a34c753fSRafael Auler << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 946a34c753fSRafael Auler << '\n'; 947a34c753fSRafael Auler } 948a34c753fSRafael Auler 949a34c753fSRafael Auler return false; 950a34c753fSRafael Auler } 951a34c753fSRafael Auler 952a34c753fSRafael Auler void BinaryContext::adjustCodePadding() { 953a34c753fSRafael Auler for (auto &BFI : BinaryFunctions) { 954a34c753fSRafael Auler BinaryFunction &BF = BFI.second; 955a34c753fSRafael Auler if (!shouldEmit(BF)) 956a34c753fSRafael Auler continue; 957a34c753fSRafael Auler 958a34c753fSRafael Auler if (!hasValidCodePadding(BF)) { 959a34c753fSRafael Auler if (HasRelocations) { 960a34c753fSRafael Auler if (opts::Verbosity >= 1) { 961a34c753fSRafael Auler outs() << "BOLT-INFO: function " << BF 962a34c753fSRafael Auler << " has invalid padding. Ignoring the function.\n"; 963a34c753fSRafael Auler } 964a34c753fSRafael Auler BF.setIgnored(); 965a34c753fSRafael Auler } else { 966a34c753fSRafael Auler BF.setMaxSize(BF.getSize()); 967a34c753fSRafael Auler } 968a34c753fSRafael Auler } 969a34c753fSRafael Auler } 970a34c753fSRafael Auler } 971a34c753fSRafael Auler 97240c2e0faSMaksim Panchenko MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 973a34c753fSRafael Auler uint64_t Size, 974a34c753fSRafael Auler uint16_t Alignment, 975a34c753fSRafael Auler unsigned Flags) { 976a34c753fSRafael Auler // Register the name with MCContext. 977a34c753fSRafael Auler MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 978a34c753fSRafael Auler 979a34c753fSRafael Auler auto GAI = BinaryDataMap.find(Address); 980a34c753fSRafael Auler BinaryData *BD; 981a34c753fSRafael Auler if (GAI == BinaryDataMap.end()) { 982a34c753fSRafael Auler ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 983a34c753fSRafael Auler BinarySection &Section = 984a34c753fSRafael Auler SectionOrErr ? SectionOrErr.get() : absoluteSection(); 98540c2e0faSMaksim Panchenko BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 98640c2e0faSMaksim Panchenko Section, Flags); 987a34c753fSRafael Auler GAI = BinaryDataMap.emplace(Address, BD).first; 988a34c753fSRafael Auler GlobalSymbols[Name] = BD; 989a34c753fSRafael Auler updateObjectNesting(GAI); 990a34c753fSRafael Auler } else { 991a34c753fSRafael Auler BD = GAI->second; 992a34c753fSRafael Auler if (!BD->hasName(Name)) { 993a34c753fSRafael Auler GlobalSymbols[Name] = BD; 994a34c753fSRafael Auler BD->Symbols.push_back(Symbol); 995a34c753fSRafael Auler } 996a34c753fSRafael Auler } 997a34c753fSRafael Auler 998a34c753fSRafael Auler return Symbol; 999a34c753fSRafael Auler } 1000a34c753fSRafael Auler 1001a34c753fSRafael Auler const BinaryData * 1002a34c753fSRafael Auler BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1003a34c753fSRafael Auler auto NI = BinaryDataMap.lower_bound(Address); 1004a34c753fSRafael Auler auto End = BinaryDataMap.end(); 1005a34c753fSRafael Auler if ((NI != End && Address == NI->first) || 1006a34c753fSRafael Auler ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 10073652483cSRafael Auler if (NI->second->containsAddress(Address)) 1008a34c753fSRafael Auler return NI->second; 1009a34c753fSRafael Auler 1010a34c753fSRafael Auler // If this is a sub-symbol, see if a parent data contains the address. 1011a34c753fSRafael Auler const BinaryData *BD = NI->second->getParent(); 1012a34c753fSRafael Auler while (BD) { 1013a34c753fSRafael Auler if (BD->containsAddress(Address)) 1014a34c753fSRafael Auler return BD; 1015a34c753fSRafael Auler BD = BD->getParent(); 1016a34c753fSRafael Auler } 1017a34c753fSRafael Auler } 1018a34c753fSRafael Auler return nullptr; 1019a34c753fSRafael Auler } 1020a34c753fSRafael Auler 1021a34c753fSRafael Auler bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1022a34c753fSRafael Auler auto NI = BinaryDataMap.find(Address); 1023a34c753fSRafael Auler assert(NI != BinaryDataMap.end()); 1024a34c753fSRafael Auler if (NI == BinaryDataMap.end()) 1025a34c753fSRafael Auler return false; 1026a34c753fSRafael Auler // TODO: it's possible that a jump table starts at the same address 1027a34c753fSRafael Auler // as a larger blob of private data. When we set the size of the 1028a34c753fSRafael Auler // jump table, it might be smaller than the total blob size. In this 1029a34c753fSRafael Auler // case we just leave the original size since (currently) it won't really 1030933df2a4SMaksim Panchenko // affect anything. 1031a34c753fSRafael Auler assert((!NI->second->Size || NI->second->Size == Size || 1032a34c753fSRafael Auler (NI->second->isJumpTable() && NI->second->Size > Size)) && 1033a34c753fSRafael Auler "can't change the size of a symbol that has already had its " 1034a34c753fSRafael Auler "size set"); 1035a34c753fSRafael Auler if (!NI->second->Size) { 1036a34c753fSRafael Auler NI->second->Size = Size; 1037a34c753fSRafael Auler updateObjectNesting(NI); 1038a34c753fSRafael Auler return true; 1039a34c753fSRafael Auler } 1040a34c753fSRafael Auler return false; 1041a34c753fSRafael Auler } 1042a34c753fSRafael Auler 1043a34c753fSRafael Auler void BinaryContext::generateSymbolHashes() { 1044a34c753fSRafael Auler auto isPadding = [](const BinaryData &BD) { 1045a34c753fSRafael Auler StringRef Contents = BD.getSection().getContents(); 1046a34c753fSRafael Auler StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1047a34c753fSRafael Auler return (BD.getName().startswith("HOLEat") || 1048a34c753fSRafael Auler SymData.find_first_not_of(0) == StringRef::npos); 1049a34c753fSRafael Auler }; 1050a34c753fSRafael Auler 1051a34c753fSRafael Auler uint64_t NumCollisions = 0; 1052a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1053a34c753fSRafael Auler BinaryData &BD = *Entry.second; 1054a34c753fSRafael Auler StringRef Name = BD.getName(); 1055a34c753fSRafael Auler 1056a34c753fSRafael Auler if (!isInternalSymbolName(Name)) 1057a34c753fSRafael Auler continue; 1058a34c753fSRafael Auler 1059a34c753fSRafael Auler // First check if a non-anonymous alias exists and move it to the front. 1060a34c753fSRafael Auler if (BD.getSymbols().size() > 1) { 1061d2c87699SAmir Ayupov auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1062a34c753fSRafael Auler return !isInternalSymbolName(Symbol->getName()); 1063a34c753fSRafael Auler }); 1064a34c753fSRafael Auler if (Itr != BD.getSymbols().end()) { 1065a34c753fSRafael Auler size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1066a34c753fSRafael Auler std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1067a34c753fSRafael Auler continue; 1068a34c753fSRafael Auler } 1069a34c753fSRafael Auler } 1070a34c753fSRafael Auler 1071a34c753fSRafael Auler // We have to skip 0 size symbols since they will all collide. 1072a34c753fSRafael Auler if (BD.getSize() == 0) { 1073a34c753fSRafael Auler continue; 1074a34c753fSRafael Auler } 1075a34c753fSRafael Auler 1076a34c753fSRafael Auler const uint64_t Hash = BD.getSection().hash(BD); 1077a34c753fSRafael Auler const size_t Idx = Name.find("0x"); 107840c2e0faSMaksim Panchenko std::string NewName = 107940c2e0faSMaksim Panchenko (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1080a34c753fSRafael Auler if (getBinaryDataByName(NewName)) { 1081a34c753fSRafael Auler // Ignore collisions for symbols that appear to be padding 1082a34c753fSRafael Auler // (i.e. all zeros or a "hole") 1083a34c753fSRafael Auler if (!isPadding(BD)) { 1084a34c753fSRafael Auler if (opts::Verbosity) { 1085a34c753fSRafael Auler errs() << "BOLT-WARNING: collision detected when hashing " << BD 1086a34c753fSRafael Auler << " with new name (" << NewName << "), skipping.\n"; 1087a34c753fSRafael Auler } 1088a34c753fSRafael Auler ++NumCollisions; 1089a34c753fSRafael Auler } 1090a34c753fSRafael Auler continue; 1091a34c753fSRafael Auler } 109240c2e0faSMaksim Panchenko BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1093a34c753fSRafael Auler GlobalSymbols[NewName] = &BD; 1094a34c753fSRafael Auler } 1095a34c753fSRafael Auler if (NumCollisions) { 1096a34c753fSRafael Auler errs() << "BOLT-WARNING: " << NumCollisions 1097a34c753fSRafael Auler << " collisions detected while hashing binary objects"; 1098a34c753fSRafael Auler if (!opts::Verbosity) 1099a34c753fSRafael Auler errs() << ". Use -v=1 to see the list."; 1100a34c753fSRafael Auler errs() << '\n'; 1101a34c753fSRafael Auler } 1102a34c753fSRafael Auler } 1103a34c753fSRafael Auler 11046aa735ceSAmir Ayupov bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1105a34c753fSRafael Auler BinaryFunction &Function) const { 11066aa735ceSAmir Ayupov assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1107e88122f5SAmir Ayupov if (TargetFunction.isChildOf(Function)) 11086aa735ceSAmir Ayupov return true; 11096aa735ceSAmir Ayupov TargetFunction.addParentFragment(Function); 1110a34c753fSRafael Auler Function.addFragment(TargetFunction); 1111a34c753fSRafael Auler if (!HasRelocations) { 1112a34c753fSRafael Auler TargetFunction.setSimple(false); 1113a34c753fSRafael Auler Function.setSimple(false); 1114a34c753fSRafael Auler } 1115a34c753fSRafael Auler if (opts::Verbosity >= 1) { 111640c2e0faSMaksim Panchenko outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 111740c2e0faSMaksim Panchenko << Function << '\n'; 1118a34c753fSRafael Auler } 11196aa735ceSAmir Ayupov return true; 1120a34c753fSRafael Auler } 1121a34c753fSRafael Auler 112235efe1d8SVladislav Khmelevsky void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 112335efe1d8SVladislav Khmelevsky MCInst &LoadLowBits, 112435efe1d8SVladislav Khmelevsky MCInst &LoadHiBits, 112535efe1d8SVladislav Khmelevsky uint64_t Target) { 112635efe1d8SVladislav Khmelevsky const MCSymbol *TargetSymbol; 112735efe1d8SVladislav Khmelevsky uint64_t Addend = 0; 112835efe1d8SVladislav Khmelevsky std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 112935efe1d8SVladislav Khmelevsky /*IsPCRel*/ true); 113035efe1d8SVladislav Khmelevsky int64_t Val; 113135efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 113235efe1d8SVladislav Khmelevsky ELF::R_AARCH64_ADR_PREL_PG_HI21); 113335efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 113435efe1d8SVladislav Khmelevsky Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 113535efe1d8SVladislav Khmelevsky } 113635efe1d8SVladislav Khmelevsky 113735efe1d8SVladislav Khmelevsky bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 113835efe1d8SVladislav Khmelevsky BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 113935efe1d8SVladislav Khmelevsky if (TargetFunction) 114035efe1d8SVladislav Khmelevsky return false; 114135efe1d8SVladislav Khmelevsky 114235efe1d8SVladislav Khmelevsky ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 114335efe1d8SVladislav Khmelevsky assert(Section && "cannot get section for referenced address"); 114435efe1d8SVladislav Khmelevsky if (!Section->isText()) 114535efe1d8SVladislav Khmelevsky return false; 114635efe1d8SVladislav Khmelevsky 114735efe1d8SVladislav Khmelevsky bool Ret = false; 114835efe1d8SVladislav Khmelevsky StringRef SectionContents = Section->getContents(); 114935efe1d8SVladislav Khmelevsky uint64_t Offset = Address - Section->getAddress(); 115035efe1d8SVladislav Khmelevsky const uint64_t MaxSize = SectionContents.size() - Offset; 115135efe1d8SVladislav Khmelevsky const uint8_t *Bytes = 115235efe1d8SVladislav Khmelevsky reinterpret_cast<const uint8_t *>(SectionContents.data()); 115335efe1d8SVladislav Khmelevsky ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 115435efe1d8SVladislav Khmelevsky 115535efe1d8SVladislav Khmelevsky auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 115635efe1d8SVladislav Khmelevsky MCInst &Instruction, uint64_t Offset, 115735efe1d8SVladislav Khmelevsky uint64_t AbsoluteInstrAddr, 115835efe1d8SVladislav Khmelevsky uint64_t TotalSize) -> bool { 115935efe1d8SVladislav Khmelevsky MCInst *TargetHiBits, *TargetLowBits; 116035efe1d8SVladislav Khmelevsky uint64_t TargetAddress, Count; 116135efe1d8SVladislav Khmelevsky Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 116235efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, Instruction, TargetHiBits, 116335efe1d8SVladislav Khmelevsky TargetLowBits, TargetAddress); 116435efe1d8SVladislav Khmelevsky if (!Count) 116535efe1d8SVladislav Khmelevsky return false; 116635efe1d8SVladislav Khmelevsky 116735efe1d8SVladislav Khmelevsky if (MatchOnly) 116835efe1d8SVladislav Khmelevsky return true; 116935efe1d8SVladislav Khmelevsky 117035efe1d8SVladislav Khmelevsky // NOTE The target symbol was created during disassemble's 117135efe1d8SVladislav Khmelevsky // handleExternalReference 117235efe1d8SVladislav Khmelevsky const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 117335efe1d8SVladislav Khmelevsky BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 117435efe1d8SVladislav Khmelevsky *Section, Address, TotalSize); 117535efe1d8SVladislav Khmelevsky addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 117635efe1d8SVladislav Khmelevsky TargetAddress); 117735efe1d8SVladislav Khmelevsky MIB->addAnnotation(Instruction, "AArch64Veneer", true); 117835efe1d8SVladislav Khmelevsky Veneer->addInstruction(Offset, std::move(Instruction)); 117935efe1d8SVladislav Khmelevsky --Count; 1180f65e8c3cSNico Weber for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 118135efe1d8SVladislav Khmelevsky MIB->addAnnotation(It->second, "AArch64Veneer", true); 118235efe1d8SVladislav Khmelevsky Veneer->addInstruction(It->first, std::move(It->second)); 118335efe1d8SVladislav Khmelevsky } 118435efe1d8SVladislav Khmelevsky 118535efe1d8SVladislav Khmelevsky Veneer->getOrCreateLocalLabel(Address); 118635efe1d8SVladislav Khmelevsky Veneer->setMaxSize(TotalSize); 118735efe1d8SVladislav Khmelevsky Veneer->updateState(BinaryFunction::State::Disassembled); 118835efe1d8SVladislav Khmelevsky LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 118935efe1d8SVladislav Khmelevsky << "\n"); 119035efe1d8SVladislav Khmelevsky return true; 119135efe1d8SVladislav Khmelevsky }; 119235efe1d8SVladislav Khmelevsky 119335efe1d8SVladislav Khmelevsky uint64_t Size = 0, TotalSize = 0; 119435efe1d8SVladislav Khmelevsky BinaryFunction::InstrMapType VeneerInstructions; 119535efe1d8SVladislav Khmelevsky for (Offset = 0; Offset < MaxSize; Offset += Size) { 119635efe1d8SVladislav Khmelevsky MCInst Instruction; 119735efe1d8SVladislav Khmelevsky const uint64_t AbsoluteInstrAddr = Address + Offset; 119835efe1d8SVladislav Khmelevsky if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 119935efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, nulls())) 120035efe1d8SVladislav Khmelevsky break; 120135efe1d8SVladislav Khmelevsky 120235efe1d8SVladislav Khmelevsky TotalSize += Size; 120335efe1d8SVladislav Khmelevsky if (MIB->isBranch(Instruction)) { 120435efe1d8SVladislav Khmelevsky Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 120535efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, TotalSize); 120635efe1d8SVladislav Khmelevsky break; 120735efe1d8SVladislav Khmelevsky } 120835efe1d8SVladislav Khmelevsky 120935efe1d8SVladislav Khmelevsky VeneerInstructions.emplace(Offset, std::move(Instruction)); 121035efe1d8SVladislav Khmelevsky } 121135efe1d8SVladislav Khmelevsky 121235efe1d8SVladislav Khmelevsky return Ret; 121335efe1d8SVladislav Khmelevsky } 121435efe1d8SVladislav Khmelevsky 121535efe1d8SVladislav Khmelevsky void BinaryContext::processInterproceduralReferences() { 121635efe1d8SVladislav Khmelevsky for (const std::pair<BinaryFunction *, uint64_t> &It : 121735efe1d8SVladislav Khmelevsky InterproceduralReferences) { 121835efe1d8SVladislav Khmelevsky BinaryFunction &Function = *It.first; 121935efe1d8SVladislav Khmelevsky uint64_t Address = It.second; 122035efe1d8SVladislav Khmelevsky if (!Address || Function.isIgnored()) 1221a34c753fSRafael Auler continue; 1222a34c753fSRafael Auler 1223a34c753fSRafael Auler BinaryFunction *TargetFunction = 1224a34c753fSRafael Auler getBinaryFunctionContainingAddress(Address); 1225a34c753fSRafael Auler if (&Function == TargetFunction) 1226a34c753fSRafael Auler continue; 1227a34c753fSRafael Auler 1228a34c753fSRafael Auler if (TargetFunction) { 122935efe1d8SVladislav Khmelevsky if (TargetFunction->isFragment() && 1230e88122f5SAmir Ayupov !TargetFunction->isChildOf(Function)) { 12316aa735ceSAmir Ayupov errs() << "BOLT-WARNING: interprocedural reference between unrelated " 12326aa735ceSAmir Ayupov "fragments: " 12336aa735ceSAmir Ayupov << Function.getPrintName() << " and " 12346aa735ceSAmir Ayupov << TargetFunction->getPrintName() << '\n'; 12356aa735ceSAmir Ayupov } 1236a34c753fSRafael Auler if (uint64_t Offset = Address - TargetFunction->getAddress()) 1237a34c753fSRafael Auler TargetFunction->addEntryPointAtOffset(Offset); 1238a34c753fSRafael Auler 1239a34c753fSRafael Auler continue; 1240a34c753fSRafael Auler } 1241a34c753fSRafael Auler 1242a34c753fSRafael Auler // Check if address falls in function padding space - this could be 1243a34c753fSRafael Auler // unmarked data in code. In this case adjust the padding space size. 1244a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1245a34c753fSRafael Auler assert(Section && "cannot get section for referenced address"); 1246a34c753fSRafael Auler 1247a34c753fSRafael Auler if (!Section->isText()) 1248a34c753fSRafael Auler continue; 1249a34c753fSRafael Auler 1250a34c753fSRafael Auler // PLT requires special handling and could be ignored in this context. 1251a34c753fSRafael Auler StringRef SectionName = Section->getName(); 1252a34c753fSRafael Auler if (SectionName == ".plt" || SectionName == ".plt.got") 1253a34c753fSRafael Auler continue; 1254a34c753fSRafael Auler 125535efe1d8SVladislav Khmelevsky // Check if it is aarch64 veneer written at Address 125635efe1d8SVladislav Khmelevsky if (isAArch64() && handleAArch64Veneer(Address)) 125735efe1d8SVladislav Khmelevsky continue; 125835efe1d8SVladislav Khmelevsky 1259a34c753fSRafael Auler if (opts::processAllFunctions()) { 1260a34c753fSRafael Auler errs() << "BOLT-ERROR: cannot process binaries with unmarked " 126140c2e0faSMaksim Panchenko << "object in code at address 0x" << Twine::utohexstr(Address) 126240c2e0faSMaksim Panchenko << " belonging to section " << SectionName << " in current mode\n"; 1263a34c753fSRafael Auler exit(1); 1264a34c753fSRafael Auler } 1265a34c753fSRafael Auler 126640c2e0faSMaksim Panchenko TargetFunction = getBinaryFunctionContainingAddress(Address, 1267a34c753fSRafael Auler /*CheckPastEnd=*/false, 1268a34c753fSRafael Auler /*UseMaxSize=*/true); 1269a34c753fSRafael Auler // We are not going to overwrite non-simple functions, but for simple 1270a34c753fSRafael Auler // ones - adjust the padding size. 1271a34c753fSRafael Auler if (TargetFunction && TargetFunction->isSimple()) { 1272a34c753fSRafael Auler errs() << "BOLT-WARNING: function " << *TargetFunction 1273a34c753fSRafael Auler << " has an object detected in a padding region at address 0x" 1274a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 1275a34c753fSRafael Auler TargetFunction->setMaxSize(TargetFunction->getSize()); 1276a34c753fSRafael Auler } 1277a34c753fSRafael Auler } 1278a34c753fSRafael Auler 127935efe1d8SVladislav Khmelevsky InterproceduralReferences.clear(); 1280a34c753fSRafael Auler } 1281a34c753fSRafael Auler 1282a34c753fSRafael Auler void BinaryContext::postProcessSymbolTable() { 1283a34c753fSRafael Auler fixBinaryDataHoles(); 1284a34c753fSRafael Auler bool Valid = true; 1285a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1286a34c753fSRafael Auler BinaryData *BD = Entry.second; 1287a34c753fSRafael Auler if ((BD->getName().startswith("SYMBOLat") || 1288a34c753fSRafael Auler BD->getName().startswith("DATAat")) && 128940c2e0faSMaksim Panchenko !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1290a34c753fSRafael Auler BD->getSection()) { 1291a34c753fSRafael Auler errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1292a34c753fSRafael Auler Valid = false; 1293a34c753fSRafael Auler } 1294a34c753fSRafael Auler } 1295a34c753fSRafael Auler assert(Valid); 1296c907d6e0SAmir Ayupov (void)Valid; 1297a34c753fSRafael Auler generateSymbolHashes(); 1298a34c753fSRafael Auler } 1299a34c753fSRafael Auler 1300a34c753fSRafael Auler void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1301a34c753fSRafael Auler BinaryFunction &ParentBF) { 1302a34c753fSRafael Auler assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1303a34c753fSRafael Auler "cannot merge functions with multiple entry points"); 1304a34c753fSRafael Auler 1305e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1306e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1307a34c753fSRafael Auler SymbolToFunctionMapMutex, std::defer_lock); 1308a34c753fSRafael Auler 1309a34c753fSRafael Auler const StringRef ChildName = ChildBF.getOneName(); 1310a34c753fSRafael Auler 1311a34c753fSRafael Auler // Move symbols over and update bookkeeping info. 1312a34c753fSRafael Auler for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1313a34c753fSRafael Auler ParentBF.getSymbols().push_back(Symbol); 1314a34c753fSRafael Auler WriteSymbolMapLock.lock(); 1315a34c753fSRafael Auler SymbolToFunctionMap[Symbol] = &ParentBF; 1316a34c753fSRafael Auler WriteSymbolMapLock.unlock(); 1317a34c753fSRafael Auler // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1318a34c753fSRafael Auler } 1319a34c753fSRafael Auler ChildBF.getSymbols().clear(); 1320a34c753fSRafael Auler 1321a34c753fSRafael Auler // Move other names the child function is known under. 1322d2c87699SAmir Ayupov llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1323a34c753fSRafael Auler ChildBF.Aliases.clear(); 1324a34c753fSRafael Auler 1325a34c753fSRafael Auler if (HasRelocations) { 1326a34c753fSRafael Auler // Merge execution counts of ChildBF into those of ParentBF. 1327a34c753fSRafael Auler // Without relocations, we cannot reliably merge profiles as both functions 1328a34c753fSRafael Auler // continue to exist and either one can be executed. 1329a34c753fSRafael Auler ChildBF.mergeProfileDataInto(ParentBF); 1330a34c753fSRafael Auler 1331e8ce5f1eSNico Weber std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1332a34c753fSRafael Auler std::defer_lock); 1333e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1334a34c753fSRafael Auler std::defer_lock); 1335a34c753fSRafael Auler // Remove ChildBF from the global set of functions in relocs mode. 1336a34c753fSRafael Auler ReadBfsLock.lock(); 1337a34c753fSRafael Auler auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1338a34c753fSRafael Auler ReadBfsLock.unlock(); 1339a34c753fSRafael Auler 1340a34c753fSRafael Auler assert(FI != BinaryFunctions.end() && "function not found"); 1341a34c753fSRafael Auler assert(&ChildBF == &FI->second && "function mismatch"); 1342a34c753fSRafael Auler 1343a34c753fSRafael Auler WriteBfsLock.lock(); 1344a34c753fSRafael Auler ChildBF.clearDisasmState(); 1345a34c753fSRafael Auler FI = BinaryFunctions.erase(FI); 1346a34c753fSRafael Auler WriteBfsLock.unlock(); 1347a34c753fSRafael Auler 1348a34c753fSRafael Auler } else { 1349a34c753fSRafael Auler // In non-relocation mode we keep the function, but rename it. 1350a34c753fSRafael Auler std::string NewName = "__ICF_" + ChildName.str(); 1351a34c753fSRafael Auler 1352a34c753fSRafael Auler WriteCtxLock.lock(); 1353a34c753fSRafael Auler ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1354a34c753fSRafael Auler WriteCtxLock.unlock(); 1355a34c753fSRafael Auler 1356a34c753fSRafael Auler ChildBF.setFolded(&ParentBF); 1357a34c753fSRafael Auler } 135803e94f66SMaksim Panchenko 135903e94f66SMaksim Panchenko ParentBF.setHasFunctionsFoldedInto(); 1360a34c753fSRafael Auler } 1361a34c753fSRafael Auler 1362a34c753fSRafael Auler void BinaryContext::fixBinaryDataHoles() { 1363a34c753fSRafael Auler assert(validateObjectNesting() && "object nesting inconsitency detected"); 1364a34c753fSRafael Auler 1365a34c753fSRafael Auler for (BinarySection &Section : allocatableSections()) { 1366a34c753fSRafael Auler std::vector<std::pair<uint64_t, uint64_t>> Holes; 1367a34c753fSRafael Auler 1368a34c753fSRafael Auler auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1369a34c753fSRafael Auler BinaryData *BD = Itr->second; 137040c2e0faSMaksim Panchenko bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1371a34c753fSRafael Auler (BD->getName().startswith("SYMBOLat0x") || 1372a34c753fSRafael Auler BD->getName().startswith("DATAat0x") || 1373a34c753fSRafael Auler BD->getName().startswith("ANONYMOUS"))); 1374a34c753fSRafael Auler return !isHole && BD->getSection() == Section && !BD->getParent(); 1375a34c753fSRafael Auler }; 1376a34c753fSRafael Auler 1377a34c753fSRafael Auler auto BDStart = BinaryDataMap.begin(); 1378a34c753fSRafael Auler auto BDEnd = BinaryDataMap.end(); 1379a34c753fSRafael Auler auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1380a34c753fSRafael Auler auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1381a34c753fSRafael Auler 1382a34c753fSRafael Auler uint64_t EndAddress = Section.getAddress(); 1383a34c753fSRafael Auler 1384a34c753fSRafael Auler while (Itr != End) { 1385a34c753fSRafael Auler if (Itr->second->getAddress() > EndAddress) { 1386a34c753fSRafael Auler uint64_t Gap = Itr->second->getAddress() - EndAddress; 1387a34c753fSRafael Auler Holes.emplace_back(EndAddress, Gap); 1388a34c753fSRafael Auler } 1389a34c753fSRafael Auler EndAddress = Itr->second->getEndAddress(); 1390a34c753fSRafael Auler ++Itr; 1391a34c753fSRafael Auler } 1392a34c753fSRafael Auler 13933652483cSRafael Auler if (EndAddress < Section.getEndAddress()) 1394a34c753fSRafael Auler Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1395a34c753fSRafael Auler 1396a34c753fSRafael Auler // If there is already a symbol at the start of the hole, grow that symbol 1397a34c753fSRafael Auler // to cover the rest. Otherwise, create a new symbol to cover the hole. 1398a34c753fSRafael Auler for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1399a34c753fSRafael Auler BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1400a34c753fSRafael Auler if (BD) { 1401a34c753fSRafael Auler // BD->getSection() can be != Section if there are sections that 1402a34c753fSRafael Auler // overlap. In this case it is probably safe to just skip the holes 1403a34c753fSRafael Auler // since the overlapping section will not(?) have any symbols in it. 1404a34c753fSRafael Auler if (BD->getSection() == Section) 1405a34c753fSRafael Auler setBinaryDataSize(Hole.first, Hole.second); 1406a34c753fSRafael Auler } else { 1407a34c753fSRafael Auler getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1408a34c753fSRafael Auler } 1409a34c753fSRafael Auler } 1410a34c753fSRafael Auler } 1411a34c753fSRafael Auler 1412a34c753fSRafael Auler assert(validateObjectNesting() && "object nesting inconsitency detected"); 1413a34c753fSRafael Auler assert(validateHoles() && "top level hole detected in object map"); 1414a34c753fSRafael Auler } 1415a34c753fSRafael Auler 1416a34c753fSRafael Auler void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1417a34c753fSRafael Auler const BinarySection *CurrentSection = nullptr; 1418a34c753fSRafael Auler bool FirstSection = true; 1419a34c753fSRafael Auler 1420a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1421a34c753fSRafael Auler const BinaryData *BD = Entry.second; 1422a34c753fSRafael Auler const BinarySection &Section = BD->getSection(); 1423a34c753fSRafael Auler if (FirstSection || Section != *CurrentSection) { 1424a34c753fSRafael Auler uint64_t Address, Size; 1425a34c753fSRafael Auler StringRef Name = Section.getName(); 1426a34c753fSRafael Auler if (Section) { 1427a34c753fSRafael Auler Address = Section.getAddress(); 1428a34c753fSRafael Auler Size = Section.getSize(); 1429a34c753fSRafael Auler } else { 1430a34c753fSRafael Auler Address = BD->getAddress(); 1431a34c753fSRafael Auler Size = BD->getSize(); 1432a34c753fSRafael Auler } 1433a34c753fSRafael Auler OS << "BOLT-INFO: Section " << Name << ", " 1434a34c753fSRafael Auler << "0x" + Twine::utohexstr(Address) << ":" 143540c2e0faSMaksim Panchenko << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1436a34c753fSRafael Auler CurrentSection = &Section; 1437a34c753fSRafael Auler FirstSection = false; 1438a34c753fSRafael Auler } 1439a34c753fSRafael Auler 1440a34c753fSRafael Auler OS << "BOLT-INFO: "; 1441a34c753fSRafael Auler const BinaryData *P = BD->getParent(); 1442a34c753fSRafael Auler while (P) { 1443a34c753fSRafael Auler OS << " "; 1444a34c753fSRafael Auler P = P->getParent(); 1445a34c753fSRafael Auler } 1446a34c753fSRafael Auler OS << *BD << "\n"; 1447a34c753fSRafael Auler } 1448a34c753fSRafael Auler } 1449a34c753fSRafael Auler 1450014cd37fSAlexander Yermolovich Expected<unsigned> BinaryContext::getDwarfFile( 1451014cd37fSAlexander Yermolovich StringRef Directory, StringRef FileName, unsigned FileNumber, 1452f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1453014cd37fSAlexander Yermolovich unsigned CUID, unsigned DWARFVersion) { 1454a34c753fSRafael Auler DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1455014cd37fSAlexander Yermolovich return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1456014cd37fSAlexander Yermolovich FileNumber); 1457a34c753fSRafael Auler } 1458a34c753fSRafael Auler 1459a34c753fSRafael Auler unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1460a34c753fSRafael Auler const uint32_t SrcCUID, 1461a34c753fSRafael Auler unsigned FileIndex) { 1462a34c753fSRafael Auler DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1463a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1464a34c753fSRafael Auler DwCtx->getLineTableForUnit(SrcUnit); 1465a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1466a34c753fSRafael Auler LineTable->Prologue.FileNames; 1467a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1468a34c753fSRafael Auler // means empty dir. 1469a34c753fSRafael Auler assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1470a34c753fSRafael Auler "FileIndex out of range for the compilation unit."); 1471a34c753fSRafael Auler StringRef Dir = ""; 1472a34c753fSRafael Auler if (FileNames[FileIndex - 1].DirIdx != 0) { 147389fab98eSFangrui Song if (std::optional<const char *> DirName = dwarf::toString( 1474a34c753fSRafael Auler LineTable->Prologue 1475a34c753fSRafael Auler .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1476a34c753fSRafael Auler Dir = *DirName; 1477a34c753fSRafael Auler } 1478a34c753fSRafael Auler } 1479a34c753fSRafael Auler StringRef FileName = ""; 148089fab98eSFangrui Song if (std::optional<const char *> FName = 1481a34c753fSRafael Auler dwarf::toString(FileNames[FileIndex - 1].Name)) 1482a34c753fSRafael Auler FileName = *FName; 1483a34c753fSRafael Auler assert(FileName != ""); 1484014cd37fSAlexander Yermolovich DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1485e324a80fSKazu Hirata return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1486e324a80fSKazu Hirata DestCUID, DstUnit->getVersion())); 1487a34c753fSRafael Auler } 1488a34c753fSRafael Auler 1489a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1490a34c753fSRafael Auler std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 149172e5b14fSAmir Ayupov llvm::transform(llvm::make_second_range(BinaryFunctions), 149272e5b14fSAmir Ayupov SortedFunctions.begin(), 149372e5b14fSAmir Ayupov [](BinaryFunction &BF) { return &BF; }); 1494a34c753fSRafael Auler 1495d2c87699SAmir Ayupov llvm::stable_sort(SortedFunctions, 1496a34c753fSRafael Auler [](const BinaryFunction *A, const BinaryFunction *B) { 1497a34c753fSRafael Auler if (A->hasValidIndex() && B->hasValidIndex()) { 1498a34c753fSRafael Auler return A->getIndex() < B->getIndex(); 1499a34c753fSRafael Auler } 1500a34c753fSRafael Auler return A->hasValidIndex(); 1501a34c753fSRafael Auler }); 1502a34c753fSRafael Auler return SortedFunctions; 1503a34c753fSRafael Auler } 1504a34c753fSRafael Auler 1505a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1506a34c753fSRafael Auler std::vector<BinaryFunction *> AllFunctions; 1507a34c753fSRafael Auler AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 150872e5b14fSAmir Ayupov llvm::transform(llvm::make_second_range(BinaryFunctions), 150972e5b14fSAmir Ayupov std::back_inserter(AllFunctions), 151072e5b14fSAmir Ayupov [](BinaryFunction &BF) { return &BF; }); 1511d2c87699SAmir Ayupov llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1512a34c753fSRafael Auler 1513a34c753fSRafael Auler return AllFunctions; 1514a34c753fSRafael Auler } 1515a34c753fSRafael Auler 1516e8f5743eSAmir Ayupov std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1517a34c753fSRafael Auler auto Iter = DWOCUs.find(DWOId); 1518a34c753fSRafael Auler if (Iter == DWOCUs.end()) 1519e324a80fSKazu Hirata return std::nullopt; 1520a34c753fSRafael Auler 1521a34c753fSRafael Auler return Iter->second; 1522a34c753fSRafael Auler } 1523a34c753fSRafael Auler 15247dee646bSAmir Ayupov DWARFContext *BinaryContext::getDWOContext() const { 1525a34c753fSRafael Auler if (DWOCUs.empty()) 1526a34c753fSRafael Auler return nullptr; 1527a34c753fSRafael Auler return &DWOCUs.begin()->second->getContext(); 1528a34c753fSRafael Auler } 1529a34c753fSRafael Auler 1530a34c753fSRafael Auler /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1531a34c753fSRafael Auler void BinaryContext::preprocessDWODebugInfo() { 1532a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1533a34c753fSRafael Auler DWARFUnit *const DwarfUnit = CU.get(); 153489fab98eSFangrui Song if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1535a34c753fSRafael Auler DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1536a34c753fSRafael Auler if (!DWOCU->isDWOUnit()) { 1537a34c753fSRafael Auler std::string DWOName = dwarf::toString( 1538a34c753fSRafael Auler DwarfUnit->getUnitDIE().find( 1539a34c753fSRafael Auler {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1540a34c753fSRafael Auler ""); 1541a34c753fSRafael Auler outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1542a34c753fSRafael Auler << DWOName 1543a34c753fSRafael Auler << " was not retrieved and won't be updated. Please check " 1544a34c753fSRafael Auler "relative path.\n"; 1545a34c753fSRafael Auler continue; 1546a34c753fSRafael Auler } 1547a34c753fSRafael Auler DWOCUs[*DWOId] = DWOCU; 1548a34c753fSRafael Auler } 1549a34c753fSRafael Auler } 1550864133c5SAlexander Yermolovich if (!DWOCUs.empty()) 1551864133c5SAlexander Yermolovich outs() << "BOLT-INFO: processing split DWARF\n"; 1552a34c753fSRafael Auler } 1553a34c753fSRafael Auler 1554a34c753fSRafael Auler void BinaryContext::preprocessDebugInfo() { 1555a34c753fSRafael Auler struct CURange { 1556a34c753fSRafael Auler uint64_t LowPC; 1557a34c753fSRafael Auler uint64_t HighPC; 1558a34c753fSRafael Auler DWARFUnit *Unit; 1559a34c753fSRafael Auler 156040c2e0faSMaksim Panchenko bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1561a34c753fSRafael Auler }; 1562a34c753fSRafael Auler 1563a34c753fSRafael Auler // Building a map of address ranges to CUs similar to .debug_aranges and use 1564a34c753fSRafael Auler // it to assign CU to functions. 1565a34c753fSRafael Auler std::vector<CURange> AllRanges; 1566a34c753fSRafael Auler AllRanges.reserve(DwCtx->getNumCompileUnits()); 1567a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1568a34c753fSRafael Auler Expected<DWARFAddressRangesVector> RangesOrError = 1569a34c753fSRafael Auler CU->getUnitDIE().getAddressRanges(); 1570a34c753fSRafael Auler if (!RangesOrError) { 1571a34c753fSRafael Auler consumeError(RangesOrError.takeError()); 1572a34c753fSRafael Auler continue; 1573a34c753fSRafael Auler } 1574a34c753fSRafael Auler for (DWARFAddressRange &Range : *RangesOrError) { 1575a34c753fSRafael Auler // Parts of the debug info could be invalidated due to corresponding code 1576a34c753fSRafael Auler // being removed from the binary by the linker. Hence we check if the 1577a34c753fSRafael Auler // address is a valid one. 1578a34c753fSRafael Auler if (containsAddress(Range.LowPC)) 1579a34c753fSRafael Auler AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1580a34c753fSRafael Auler } 1581014cd37fSAlexander Yermolovich 1582014cd37fSAlexander Yermolovich ContainsDwarf5 |= CU->getVersion() >= 5; 1583014cd37fSAlexander Yermolovich ContainsDwarfLegacy |= CU->getVersion() < 5; 1584a34c753fSRafael Auler } 1585a34c753fSRafael Auler 1586d2c87699SAmir Ayupov llvm::sort(AllRanges); 1587a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1588a34c753fSRafael Auler const uint64_t FunctionAddress = KV.first; 1589a34c753fSRafael Auler BinaryFunction &Function = KV.second; 1590a34c753fSRafael Auler 1591d2c87699SAmir Ayupov auto It = llvm::partition_point( 1592d2c87699SAmir Ayupov AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1593d2c87699SAmir Ayupov if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1594a34c753fSRafael Auler Function.setDWARFUnit(It->Unit); 1595a34c753fSRafael Auler } 1596a34c753fSRafael Auler 1597a34c753fSRafael Auler // Discover units with debug info that needs to be updated. 1598a34c753fSRafael Auler for (const auto &KV : BinaryFunctions) { 1599a34c753fSRafael Auler const BinaryFunction &BF = KV.second; 1600a34c753fSRafael Auler if (shouldEmit(BF) && BF.getDWARFUnit()) 1601a34c753fSRafael Auler ProcessedCUs.insert(BF.getDWARFUnit()); 1602a34c753fSRafael Auler } 1603a34c753fSRafael Auler 1604a34c753fSRafael Auler // Clear debug info for functions from units that we are not going to process. 1605a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1606a34c753fSRafael Auler BinaryFunction &BF = KV.second; 1607a34c753fSRafael Auler if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1608a34c753fSRafael Auler BF.setDWARFUnit(nullptr); 1609a34c753fSRafael Auler } 1610a34c753fSRafael Auler 1611a34c753fSRafael Auler if (opts::Verbosity >= 1) { 1612a34c753fSRafael Auler outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1613a34c753fSRafael Auler << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1614a34c753fSRafael Auler } 1615a34c753fSRafael Auler 1616ba1ac98cSAlexander Yermolovich preprocessDWODebugInfo(); 1617ba1ac98cSAlexander Yermolovich 1618a34c753fSRafael Auler // Populate MCContext with DWARF files from all units. 1619a34c753fSRafael Auler StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1620a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1621a34c753fSRafael Auler const uint64_t CUID = CU->getOffset(); 1622014cd37fSAlexander Yermolovich DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1623014cd37fSAlexander Yermolovich BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1624a34c753fSRafael Auler GlobalPrefix + "line_table_start" + Twine(CUID))); 1625a34c753fSRafael Auler 1626a34c753fSRafael Auler if (!ProcessedCUs.count(CU.get())) 1627a34c753fSRafael Auler continue; 1628a34c753fSRafael Auler 1629a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1630a34c753fSRafael Auler DwCtx->getLineTableForUnit(CU.get()); 1631a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1632a34c753fSRafael Auler LineTable->Prologue.FileNames; 1633a34c753fSRafael Auler 1634014cd37fSAlexander Yermolovich uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1635014cd37fSAlexander Yermolovich if (DwarfVersion >= 5) { 1636f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum; 1637014cd37fSAlexander Yermolovich if (LineTable->Prologue.ContentTypes.HasMD5) 1638014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[0].Checksum; 163989fab98eSFangrui Song std::optional<const char *> Name = 1640ba1ac98cSAlexander Yermolovich dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 164189fab98eSFangrui Song if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1642ba1ac98cSAlexander Yermolovich auto Iter = DWOCUs.find(*DWOID); 1643ba1ac98cSAlexander Yermolovich assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1644ba1ac98cSAlexander Yermolovich Name = dwarf::toString( 1645ba1ac98cSAlexander Yermolovich Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1646ba1ac98cSAlexander Yermolovich } 1647ba1ac98cSAlexander Yermolovich BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1648e324a80fSKazu Hirata std::nullopt); 1649014cd37fSAlexander Yermolovich } 1650014cd37fSAlexander Yermolovich 1651014cd37fSAlexander Yermolovich BinaryLineTable.setDwarfVersion(DwarfVersion); 1652014cd37fSAlexander Yermolovich 1653a34c753fSRafael Auler // Assign a unique label to every line table, one per CU. 1654a34c753fSRafael Auler // Make sure empty debug line tables are registered too. 1655a34c753fSRafael Auler if (FileNames.empty()) { 1656e324a80fSKazu Hirata cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1657e324a80fSKazu Hirata CUID, DwarfVersion)); 1658a34c753fSRafael Auler continue; 1659a34c753fSRafael Auler } 1660014cd37fSAlexander Yermolovich const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1661a34c753fSRafael Auler for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1662a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1663a34c753fSRafael Auler // means empty dir. 1664a34c753fSRafael Auler StringRef Dir = ""; 1665014cd37fSAlexander Yermolovich if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 166689fab98eSFangrui Song if (std::optional<const char *> DirName = dwarf::toString( 1667a34c753fSRafael Auler LineTable->Prologue 1668014cd37fSAlexander Yermolovich .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1669a34c753fSRafael Auler Dir = *DirName; 1670a34c753fSRafael Auler StringRef FileName = ""; 167189fab98eSFangrui Song if (std::optional<const char *> FName = 167289fab98eSFangrui Song dwarf::toString(FileNames[I].Name)) 1673a34c753fSRafael Auler FileName = *FName; 1674a34c753fSRafael Auler assert(FileName != ""); 1675f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum; 1676014cd37fSAlexander Yermolovich if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1677014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[I].Checksum; 1678e324a80fSKazu Hirata cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1679e324a80fSKazu Hirata DwarfVersion)); 1680a34c753fSRafael Auler } 1681a34c753fSRafael Auler } 1682a34c753fSRafael Auler } 1683a34c753fSRafael Auler 1684a34c753fSRafael Auler bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 16854c14519eSVladislav Khmelevsky if (Function.isPseudo()) 16864c14519eSVladislav Khmelevsky return false; 16874c14519eSVladislav Khmelevsky 1688a34c753fSRafael Auler if (opts::processAllFunctions()) 1689a34c753fSRafael Auler return true; 1690a34c753fSRafael Auler 1691a34c753fSRafael Auler if (Function.isIgnored()) 1692a34c753fSRafael Auler return false; 1693a34c753fSRafael Auler 1694a34c753fSRafael Auler // In relocation mode we will emit non-simple functions with CFG. 1695a34c753fSRafael Auler // If the function does not have a CFG it should be marked as ignored. 1696a34c753fSRafael Auler return HasRelocations || Function.isSimple(); 1697a34c753fSRafael Auler } 1698a34c753fSRafael Auler 1699a34c753fSRafael Auler void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1700a34c753fSRafael Auler uint32_t Operation = Inst.getOperation(); 1701a34c753fSRafael Auler switch (Operation) { 1702a34c753fSRafael Auler case MCCFIInstruction::OpSameValue: 1703a34c753fSRafael Auler OS << "OpSameValue Reg" << Inst.getRegister(); 1704a34c753fSRafael Auler break; 1705a34c753fSRafael Auler case MCCFIInstruction::OpRememberState: 1706a34c753fSRafael Auler OS << "OpRememberState"; 1707a34c753fSRafael Auler break; 1708a34c753fSRafael Auler case MCCFIInstruction::OpRestoreState: 1709a34c753fSRafael Auler OS << "OpRestoreState"; 1710a34c753fSRafael Auler break; 1711a34c753fSRafael Auler case MCCFIInstruction::OpOffset: 1712a34c753fSRafael Auler OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1713a34c753fSRafael Auler break; 1714a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaRegister: 1715a34c753fSRafael Auler OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1716a34c753fSRafael Auler break; 1717a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaOffset: 1718a34c753fSRafael Auler OS << "OpDefCfaOffset " << Inst.getOffset(); 1719a34c753fSRafael Auler break; 1720a34c753fSRafael Auler case MCCFIInstruction::OpDefCfa: 1721a34c753fSRafael Auler OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1722a34c753fSRafael Auler break; 1723a34c753fSRafael Auler case MCCFIInstruction::OpRelOffset: 1724a34c753fSRafael Auler OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1725a34c753fSRafael Auler break; 1726a34c753fSRafael Auler case MCCFIInstruction::OpAdjustCfaOffset: 1727a34c753fSRafael Auler OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1728a34c753fSRafael Auler break; 1729a34c753fSRafael Auler case MCCFIInstruction::OpEscape: 1730a34c753fSRafael Auler OS << "OpEscape"; 1731a34c753fSRafael Auler break; 1732a34c753fSRafael Auler case MCCFIInstruction::OpRestore: 1733a34c753fSRafael Auler OS << "OpRestore Reg" << Inst.getRegister(); 1734a34c753fSRafael Auler break; 1735a34c753fSRafael Auler case MCCFIInstruction::OpUndefined: 1736a34c753fSRafael Auler OS << "OpUndefined Reg" << Inst.getRegister(); 1737a34c753fSRafael Auler break; 1738a34c753fSRafael Auler case MCCFIInstruction::OpRegister: 1739a34c753fSRafael Auler OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1740a34c753fSRafael Auler << Inst.getRegister2(); 1741a34c753fSRafael Auler break; 1742a34c753fSRafael Auler case MCCFIInstruction::OpWindowSave: 1743a34c753fSRafael Auler OS << "OpWindowSave"; 1744a34c753fSRafael Auler break; 1745a34c753fSRafael Auler case MCCFIInstruction::OpGnuArgsSize: 1746a34c753fSRafael Auler OS << "OpGnuArgsSize"; 1747a34c753fSRafael Auler break; 1748a34c753fSRafael Auler default: 1749a34c753fSRafael Auler OS << "Op#" << Operation; 1750a34c753fSRafael Auler break; 1751a34c753fSRafael Auler } 1752a34c753fSRafael Auler } 1753a34c753fSRafael Auler 17548579db96SDenis Revunov MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 17558579db96SDenis Revunov // For aarch64, the ABI defines mapping symbols so we identify data in the 17568579db96SDenis Revunov // code section (see IHI0056B). $x identifies a symbol starting code or the 17578579db96SDenis Revunov // end of a data chunk inside code, $d indentifies start of data. 17588579db96SDenis Revunov if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 17598579db96SDenis Revunov return MarkerSymType::NONE; 17608579db96SDenis Revunov 17618579db96SDenis Revunov Expected<StringRef> NameOrError = Symbol.getName(); 17628579db96SDenis Revunov Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 17638579db96SDenis Revunov 17648579db96SDenis Revunov if (!TypeOrError || !NameOrError) 17658579db96SDenis Revunov return MarkerSymType::NONE; 17668579db96SDenis Revunov 17678579db96SDenis Revunov if (*TypeOrError != SymbolRef::ST_Unknown) 17688579db96SDenis Revunov return MarkerSymType::NONE; 17698579db96SDenis Revunov 17708579db96SDenis Revunov if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 17718579db96SDenis Revunov return MarkerSymType::CODE; 17728579db96SDenis Revunov 17738579db96SDenis Revunov if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 17748579db96SDenis Revunov return MarkerSymType::DATA; 17758579db96SDenis Revunov 17768579db96SDenis Revunov return MarkerSymType::NONE; 17778579db96SDenis Revunov } 17788579db96SDenis Revunov 17798579db96SDenis Revunov bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 17808579db96SDenis Revunov return getMarkerType(Symbol) != MarkerSymType::NONE; 17818579db96SDenis Revunov } 17828579db96SDenis Revunov 17837dee646bSAmir Ayupov static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 17847dee646bSAmir Ayupov const BinaryFunction *Function, 17857dee646bSAmir Ayupov DWARFContext *DwCtx) { 17867dee646bSAmir Ayupov DebugLineTableRowRef RowRef = 17877dee646bSAmir Ayupov DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 17887dee646bSAmir Ayupov if (RowRef == DebugLineTableRowRef::NULL_ROW) 17897dee646bSAmir Ayupov return; 17907dee646bSAmir Ayupov 17917dee646bSAmir Ayupov const DWARFDebugLine::LineTable *LineTable; 17927dee646bSAmir Ayupov if (Function && Function->getDWARFUnit() && 17937dee646bSAmir Ayupov Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 17947dee646bSAmir Ayupov LineTable = Function->getDWARFLineTable(); 17957dee646bSAmir Ayupov } else { 17967dee646bSAmir Ayupov LineTable = DwCtx->getLineTableForUnit( 17977dee646bSAmir Ayupov DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 17987dee646bSAmir Ayupov } 17997dee646bSAmir Ayupov assert(LineTable && "line table expected for instruction with debug info"); 18007dee646bSAmir Ayupov 18017dee646bSAmir Ayupov const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 18027dee646bSAmir Ayupov StringRef FileName = ""; 180389fab98eSFangrui Song if (std::optional<const char *> FName = 18047dee646bSAmir Ayupov dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 18057dee646bSAmir Ayupov FileName = *FName; 18067dee646bSAmir Ayupov OS << " # debug line " << FileName << ":" << Row.Line; 18077dee646bSAmir Ayupov if (Row.Column) 18087dee646bSAmir Ayupov OS << ":" << Row.Column; 18097dee646bSAmir Ayupov if (Row.Discriminator) 18107dee646bSAmir Ayupov OS << " discriminator:" << Row.Discriminator; 18117dee646bSAmir Ayupov } 18127dee646bSAmir Ayupov 181340c2e0faSMaksim Panchenko void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1814a34c753fSRafael Auler uint64_t Offset, 1815a34c753fSRafael Auler const BinaryFunction *Function, 181640c2e0faSMaksim Panchenko bool PrintMCInst, bool PrintMemData, 181769f87b6cSAmir Ayupov bool PrintRelocations, 181869f87b6cSAmir Ayupov StringRef Endl) const { 1819a34c753fSRafael Auler if (MIB->isEHLabel(Instruction)) { 182069f87b6cSAmir Ayupov OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1821a34c753fSRafael Auler return; 1822a34c753fSRafael Auler } 1823a34c753fSRafael Auler OS << format(" %08" PRIx64 ": ", Offset); 1824a34c753fSRafael Auler if (MIB->isCFI(Instruction)) { 1825a34c753fSRafael Auler uint32_t Offset = Instruction.getOperand(0).getImm(); 1826a34c753fSRafael Auler OS << "\t!CFI\t$" << Offset << "\t; "; 1827a34c753fSRafael Auler if (Function) 1828a34c753fSRafael Auler printCFI(OS, *Function->getCFIFor(Instruction)); 182969f87b6cSAmir Ayupov OS << Endl; 1830a34c753fSRafael Auler return; 1831a34c753fSRafael Auler } 1832a34c753fSRafael Auler InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1833a34c753fSRafael Auler if (MIB->isCall(Instruction)) { 1834a34c753fSRafael Auler if (MIB->isTailCall(Instruction)) 1835a34c753fSRafael Auler OS << " # TAILCALL "; 1836a34c753fSRafael Auler if (MIB->isInvoke(Instruction)) { 18372563fd63SAmir Ayupov const std::optional<MCPlus::MCLandingPad> EHInfo = 18382563fd63SAmir Ayupov MIB->getEHInfo(Instruction); 1839a34c753fSRafael Auler OS << " # handler: "; 1840a34c753fSRafael Auler if (EHInfo->first) 1841a34c753fSRafael Auler OS << *EHInfo->first; 1842a34c753fSRafael Auler else 1843a34c753fSRafael Auler OS << '0'; 1844a34c753fSRafael Auler OS << "; action: " << EHInfo->second; 1845a34c753fSRafael Auler const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1846a34c753fSRafael Auler if (GnuArgsSize >= 0) 1847a34c753fSRafael Auler OS << "; GNU_args_size = " << GnuArgsSize; 1848a34c753fSRafael Auler } 1849a34c753fSRafael Auler } else if (MIB->isIndirectBranch(Instruction)) { 1850a34c753fSRafael Auler if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1851a34c753fSRafael Auler OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1852a34c753fSRafael Auler } else { 1853a34c753fSRafael Auler OS << " # UNKNOWN CONTROL FLOW"; 1854a34c753fSRafael Auler } 1855a34c753fSRafael Auler } 18562563fd63SAmir Ayupov if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1857a9cd49d5SAmir Ayupov OS << " # Offset: " << *Offset; 1858a34c753fSRafael Auler 1859a34c753fSRafael Auler MIB->printAnnotations(Instruction, OS); 1860a34c753fSRafael Auler 18617dee646bSAmir Ayupov if (opts::PrintDebugInfo) 18627dee646bSAmir Ayupov printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1863a34c753fSRafael Auler 1864a34c753fSRafael Auler if ((opts::PrintRelocations || PrintRelocations) && Function) { 1865a34c753fSRafael Auler const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1866a34c753fSRafael Auler Function->printRelocations(OS, Offset, Size); 1867a34c753fSRafael Auler } 1868a34c753fSRafael Auler 186969f87b6cSAmir Ayupov OS << Endl; 1870a34c753fSRafael Auler 1871a34c753fSRafael Auler if (PrintMCInst) { 1872a34c753fSRafael Auler Instruction.dump_pretty(OS, InstPrinter.get()); 187369f87b6cSAmir Ayupov OS << Endl; 1874a34c753fSRafael Auler } 1875a34c753fSRafael Auler } 1876a34c753fSRafael Auler 1877e8f5743eSAmir Ayupov std::optional<uint64_t> 187877b75ca5SMaksim Panchenko BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 187977b75ca5SMaksim Panchenko uint64_t FileOffset) const { 188077b75ca5SMaksim Panchenko // Find a segment with a matching file offset. 188177b75ca5SMaksim Panchenko for (auto &KV : SegmentMapInfo) { 188277b75ca5SMaksim Panchenko const SegmentInfo &SegInfo = KV.second; 188377b75ca5SMaksim Panchenko if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 188477b75ca5SMaksim Panchenko // Use segment's aligned memory offset to calculate the base address. 188577b75ca5SMaksim Panchenko const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 188677b75ca5SMaksim Panchenko return MMapAddress - MemOffset; 188777b75ca5SMaksim Panchenko } 188877b75ca5SMaksim Panchenko } 188977b75ca5SMaksim Panchenko 1890e324a80fSKazu Hirata return std::nullopt; 189177b75ca5SMaksim Panchenko } 189277b75ca5SMaksim Panchenko 1893a34c753fSRafael Auler ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1894a34c753fSRafael Auler auto SI = AddressToSection.upper_bound(Address); 1895a34c753fSRafael Auler if (SI != AddressToSection.begin()) { 1896a34c753fSRafael Auler --SI; 1897a34c753fSRafael Auler uint64_t UpperBound = SI->first + SI->second->getSize(); 1898a34c753fSRafael Auler if (!SI->second->getSize()) 1899a34c753fSRafael Auler UpperBound += 1; 1900a34c753fSRafael Auler if (UpperBound > Address) 1901a34c753fSRafael Auler return *SI->second; 1902a34c753fSRafael Auler } 1903a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 1904a34c753fSRafael Auler } 1905a34c753fSRafael Auler 1906a34c753fSRafael Auler ErrorOr<StringRef> 1907a34c753fSRafael Auler BinaryContext::getSectionNameForAddress(uint64_t Address) const { 19083652483cSRafael Auler if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1909a34c753fSRafael Auler return Section->getName(); 1910a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 1911a34c753fSRafael Auler } 1912a34c753fSRafael Auler 1913a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1914a34c753fSRafael Auler auto Res = Sections.insert(Section); 1915a34c753fSRafael Auler (void)Res; 1916a34c753fSRafael Auler assert(Res.second && "can't register the same section twice."); 1917a34c753fSRafael Auler 1918a34c753fSRafael Auler // Only register allocatable sections in the AddressToSection map. 1919a34c753fSRafael Auler if (Section->isAllocatable() && Section->getAddress()) 1920a34c753fSRafael Auler AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1921a34c753fSRafael Auler NameToSection.insert( 1922a34c753fSRafael Auler std::make_pair(std::string(Section->getName()), Section)); 19234d3a0cadSMaksim Panchenko if (Section->hasSectionRef()) 19244d3a0cadSMaksim Panchenko SectionRefToBinarySection.insert( 19254d3a0cadSMaksim Panchenko std::make_pair(Section->getSectionRef(), Section)); 19264d3a0cadSMaksim Panchenko 1927a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1928a34c753fSRafael Auler return *Section; 1929a34c753fSRafael Auler } 1930a34c753fSRafael Auler 1931a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(SectionRef Section) { 1932a34c753fSRafael Auler return registerSection(new BinarySection(*this, Section)); 1933a34c753fSRafael Auler } 1934a34c753fSRafael Auler 1935a34c753fSRafael Auler BinarySection & 19364d3a0cadSMaksim Panchenko BinaryContext::registerSection(const Twine &SectionName, 1937a34c753fSRafael Auler const BinarySection &OriginalSection) { 193840c2e0faSMaksim Panchenko return registerSection( 193940c2e0faSMaksim Panchenko new BinarySection(*this, SectionName, OriginalSection)); 1940a34c753fSRafael Auler } 1941a34c753fSRafael Auler 194240c2e0faSMaksim Panchenko BinarySection & 19434d3a0cadSMaksim Panchenko BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 194440c2e0faSMaksim Panchenko unsigned ELFFlags, uint8_t *Data, 194540c2e0faSMaksim Panchenko uint64_t Size, unsigned Alignment) { 1946a34c753fSRafael Auler auto NamedSections = getSectionByName(Name); 1947a34c753fSRafael Auler if (NamedSections.begin() != NamedSections.end()) { 1948a34c753fSRafael Auler assert(std::next(NamedSections.begin()) == NamedSections.end() && 1949a34c753fSRafael Auler "can only update unique sections"); 1950a34c753fSRafael Auler BinarySection *Section = NamedSections.begin()->second; 1951a34c753fSRafael Auler 1952a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1953a34c753fSRafael Auler const bool Flag = Section->isAllocatable(); 1954a34c753fSRafael Auler (void)Flag; 1955a34c753fSRafael Auler Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1956a34c753fSRafael Auler LLVM_DEBUG(dbgs() << *Section << "\n"); 1957a34c753fSRafael Auler // FIXME: Fix section flags/attributes for MachO. 1958a34c753fSRafael Auler if (isELF()) 1959a34c753fSRafael Auler assert(Flag == Section->isAllocatable() && 1960a34c753fSRafael Auler "can't change section allocation status"); 1961a34c753fSRafael Auler return *Section; 1962a34c753fSRafael Auler } 1963a34c753fSRafael Auler 196440c2e0faSMaksim Panchenko return registerSection( 196540c2e0faSMaksim Panchenko new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1966a34c753fSRafael Auler } 1967a34c753fSRafael Auler 19684d3a0cadSMaksim Panchenko void BinaryContext::deregisterSectionName(const BinarySection &Section) { 19694d3a0cadSMaksim Panchenko auto NameRange = NameToSection.equal_range(Section.getName().str()); 19704d3a0cadSMaksim Panchenko while (NameRange.first != NameRange.second) { 19714d3a0cadSMaksim Panchenko if (NameRange.first->second == &Section) { 19724d3a0cadSMaksim Panchenko NameToSection.erase(NameRange.first); 19734d3a0cadSMaksim Panchenko break; 19744d3a0cadSMaksim Panchenko } 19754d3a0cadSMaksim Panchenko ++NameRange.first; 19764d3a0cadSMaksim Panchenko } 19774d3a0cadSMaksim Panchenko } 19784d3a0cadSMaksim Panchenko 19794d3a0cadSMaksim Panchenko void BinaryContext::deregisterUnusedSections() { 19804d3a0cadSMaksim Panchenko ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 19814d3a0cadSMaksim Panchenko for (auto SI = Sections.begin(); SI != Sections.end();) { 19824d3a0cadSMaksim Panchenko BinarySection *Section = *SI; 198305634f73SJob Noorman // We check getOutputData() instead of getOutputSize() because sometimes 198405634f73SJob Noorman // zero-sized .text.cold sections are allocated. 198505634f73SJob Noorman if (Section->hasSectionRef() || Section->getOutputData() || 19864d3a0cadSMaksim Panchenko (AbsSection && Section == &AbsSection.get())) { 19874d3a0cadSMaksim Panchenko ++SI; 19884d3a0cadSMaksim Panchenko continue; 19894d3a0cadSMaksim Panchenko } 19904d3a0cadSMaksim Panchenko 19914d3a0cadSMaksim Panchenko LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 19924d3a0cadSMaksim Panchenko << '\n';); 19934d3a0cadSMaksim Panchenko deregisterSectionName(*Section); 19944d3a0cadSMaksim Panchenko SI = Sections.erase(SI); 19954d3a0cadSMaksim Panchenko delete Section; 19964d3a0cadSMaksim Panchenko } 19974d3a0cadSMaksim Panchenko } 19984d3a0cadSMaksim Panchenko 1999a34c753fSRafael Auler bool BinaryContext::deregisterSection(BinarySection &Section) { 2000a34c753fSRafael Auler BinarySection *SectionPtr = &Section; 2001a34c753fSRafael Auler auto Itr = Sections.find(SectionPtr); 2002a34c753fSRafael Auler if (Itr != Sections.end()) { 2003a34c753fSRafael Auler auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2004a34c753fSRafael Auler while (Range.first != Range.second) { 2005a34c753fSRafael Auler if (Range.first->second == SectionPtr) { 2006a34c753fSRafael Auler AddressToSection.erase(Range.first); 2007a34c753fSRafael Auler break; 2008a34c753fSRafael Auler } 2009a34c753fSRafael Auler ++Range.first; 2010a34c753fSRafael Auler } 2011a34c753fSRafael Auler 20124d3a0cadSMaksim Panchenko deregisterSectionName(*SectionPtr); 2013a34c753fSRafael Auler Sections.erase(Itr); 2014a34c753fSRafael Auler delete SectionPtr; 2015a34c753fSRafael Auler return true; 2016a34c753fSRafael Auler } 2017a34c753fSRafael Auler return false; 2018a34c753fSRafael Auler } 2019a34c753fSRafael Auler 20204d3a0cadSMaksim Panchenko void BinaryContext::renameSection(BinarySection &Section, 20214d3a0cadSMaksim Panchenko const Twine &NewName) { 20224d3a0cadSMaksim Panchenko auto Itr = Sections.find(&Section); 20234d3a0cadSMaksim Panchenko assert(Itr != Sections.end() && "Section must exist to be renamed."); 20244d3a0cadSMaksim Panchenko Sections.erase(Itr); 20254d3a0cadSMaksim Panchenko 20264d3a0cadSMaksim Panchenko deregisterSectionName(Section); 20274d3a0cadSMaksim Panchenko 20284d3a0cadSMaksim Panchenko Section.Name = NewName.str(); 2029c92ff2a3Srevunov.denis@huawei.com Section.setOutputName(Section.Name); 20304d3a0cadSMaksim Panchenko 2031c92ff2a3Srevunov.denis@huawei.com NameToSection.insert(std::make_pair(Section.Name, &Section)); 20324d3a0cadSMaksim Panchenko 20334d3a0cadSMaksim Panchenko // Reinsert with the new name. 20344d3a0cadSMaksim Panchenko Sections.insert(&Section); 20354d3a0cadSMaksim Panchenko } 20364d3a0cadSMaksim Panchenko 2037a34c753fSRafael Auler void BinaryContext::printSections(raw_ostream &OS) const { 20383652483cSRafael Auler for (BinarySection *const &Section : Sections) 2039a34c753fSRafael Auler OS << "BOLT-INFO: " << *Section << "\n"; 2040a34c753fSRafael Auler } 2041a34c753fSRafael Auler 2042a34c753fSRafael Auler BinarySection &BinaryContext::absoluteSection() { 2043a34c753fSRafael Auler if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2044a34c753fSRafael Auler return *Section; 2045a34c753fSRafael Auler return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2046a34c753fSRafael Auler } 2047a34c753fSRafael Auler 204840c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2049a34c753fSRafael Auler size_t Size) const { 2050a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2051a34c753fSRafael Auler if (!Section) 2052a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2053a34c753fSRafael Auler 2054a34c753fSRafael Auler if (Section->isVirtual()) 2055a34c753fSRafael Auler return 0; 2056a34c753fSRafael Auler 2057a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2058a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2059a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2060a34c753fSRafael Auler return DE.getUnsigned(&ValueOffset, Size); 2061a34c753fSRafael Auler } 2062a34c753fSRafael Auler 206340c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2064a34c753fSRafael Auler size_t Size) const { 2065a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2066a34c753fSRafael Auler if (!Section) 2067a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2068a34c753fSRafael Auler 2069a34c753fSRafael Auler if (Section->isVirtual()) 2070a34c753fSRafael Auler return 0; 2071a34c753fSRafael Auler 2072a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2073a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2074a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2075a34c753fSRafael Auler return DE.getSigned(&ValueOffset, Size); 2076a34c753fSRafael Auler } 2077a34c753fSRafael Auler 207840c2e0faSMaksim Panchenko void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 207940c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2080a34c753fSRafael Auler uint64_t Value) { 2081a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2082a34c753fSRafael Auler assert(Section && "cannot find section for address"); 208340c2e0faSMaksim Panchenko Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2084a34c753fSRafael Auler Value); 2085a34c753fSRafael Auler } 2086a34c753fSRafael Auler 208740c2e0faSMaksim Panchenko void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 208840c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2089a34c753fSRafael Auler uint64_t Value) { 2090a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2091a34c753fSRafael Auler assert(Section && "cannot find section for address"); 209240c2e0faSMaksim Panchenko Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 209340c2e0faSMaksim Panchenko Addend, Value); 2094a34c753fSRafael Auler } 2095a34c753fSRafael Auler 2096a34c753fSRafael Auler bool BinaryContext::removeRelocationAt(uint64_t Address) { 2097a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2098a34c753fSRafael Auler assert(Section && "cannot find section for address"); 2099a34c753fSRafael Auler return Section->removeRelocationAt(Address - Section->getAddress()); 2100a34c753fSRafael Auler } 2101a34c753fSRafael Auler 210208ab4fafSAmir Ayupov const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 210308ab4fafSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2104a34c753fSRafael Auler if (!Section) 2105a34c753fSRafael Auler return nullptr; 2106a34c753fSRafael Auler 2107a34c753fSRafael Auler return Section->getRelocationAt(Address - Section->getAddress()); 2108a34c753fSRafael Auler } 2109a34c753fSRafael Auler 2110702fe36bSAmir Ayupov const Relocation * 2111702fe36bSAmir Ayupov BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2112702fe36bSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2113a34c753fSRafael Auler if (!Section) 2114a34c753fSRafael Auler return nullptr; 2115a34c753fSRafael Auler 2116a34c753fSRafael Auler return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2117a34c753fSRafael Auler } 2118a34c753fSRafael Auler 2119a34c753fSRafael Auler void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2120a34c753fSRafael Auler const uint64_t Address) { 2121a34c753fSRafael Auler auto setImmovable = [&](BinaryData &BD) { 2122a34c753fSRafael Auler BinaryData *Root = BD.getAtomicRoot(); 2123a34c753fSRafael Auler LLVM_DEBUG(if (Root->isMoveable()) { 2124a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2125a34c753fSRafael Auler << "due to ambiguous relocation referencing 0x" 2126a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 2127a34c753fSRafael Auler }); 2128a34c753fSRafael Auler Root->setIsMoveable(false); 2129a34c753fSRafael Auler }; 2130a34c753fSRafael Auler 2131a34c753fSRafael Auler if (Address == BD.getAddress()) { 2132a34c753fSRafael Auler setImmovable(BD); 2133a34c753fSRafael Auler 2134a34c753fSRafael Auler // Set previous symbol as immovable 2135a34c753fSRafael Auler BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2136a34c753fSRafael Auler if (Prev && Prev->getEndAddress() == BD.getAddress()) 2137a34c753fSRafael Auler setImmovable(*Prev); 2138a34c753fSRafael Auler } 2139a34c753fSRafael Auler 2140a34c753fSRafael Auler if (Address == BD.getEndAddress()) { 2141a34c753fSRafael Auler setImmovable(BD); 2142a34c753fSRafael Auler 2143a34c753fSRafael Auler // Set next symbol as immovable 2144a34c753fSRafael Auler BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2145a34c753fSRafael Auler if (Next && Next->getAddress() == BD.getEndAddress()) 2146a34c753fSRafael Auler setImmovable(*Next); 2147a34c753fSRafael Auler } 2148a34c753fSRafael Auler } 2149a34c753fSRafael Auler 2150a34c753fSRafael Auler BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2151a34c753fSRafael Auler uint64_t *EntryDesc) { 2152e8ce5f1eSNico Weber std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2153a34c753fSRafael Auler auto BFI = SymbolToFunctionMap.find(Symbol); 2154a34c753fSRafael Auler if (BFI == SymbolToFunctionMap.end()) 2155a34c753fSRafael Auler return nullptr; 2156a34c753fSRafael Auler 2157a34c753fSRafael Auler BinaryFunction *BF = BFI->second; 2158a34c753fSRafael Auler if (EntryDesc) 2159a34c753fSRafael Auler *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2160a34c753fSRafael Auler 2161a34c753fSRafael Auler return BF; 2162a34c753fSRafael Auler } 2163a34c753fSRafael Auler 2164a34c753fSRafael Auler void BinaryContext::exitWithBugReport(StringRef Message, 2165a34c753fSRafael Auler const BinaryFunction &Function) const { 2166a34c753fSRafael Auler errs() << "=======================================\n"; 2167a34c753fSRafael Auler errs() << "BOLT is unable to proceed because it couldn't properly understand " 2168a34c753fSRafael Auler "this function.\n"; 2169a34c753fSRafael Auler errs() << "If you are running the most recent version of BOLT, you may " 2170a34c753fSRafael Auler "want to " 2171a34c753fSRafael Auler "report this and paste this dump.\nPlease check that there is no " 2172a34c753fSRafael Auler "sensitive contents being shared in this dump.\n"; 2173a34c753fSRafael Auler errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2174a34c753fSRafael Auler ScopedPrinter SP(errs()); 2175a34c753fSRafael Auler SP.printBinaryBlock("Function contents", *Function.getData()); 2176a34c753fSRafael Auler errs() << "\n"; 2177a34c753fSRafael Auler Function.dump(); 2178a34c753fSRafael Auler errs() << "ERROR: " << Message; 2179a34c753fSRafael Auler errs() << "\n=======================================\n"; 2180a34c753fSRafael Auler exit(1); 2181a34c753fSRafael Auler } 2182a34c753fSRafael Auler 2183a34c753fSRafael Auler BinaryFunction * 2184a34c753fSRafael Auler BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2185a34c753fSRafael Auler bool IsSimple) { 2186a34c753fSRafael Auler InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2187a34c753fSRafael Auler BinaryFunction *BF = InjectedBinaryFunctions.back(); 2188a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 2189a34c753fSRafael Auler BF->CurrentState = BinaryFunction::State::CFG; 2190a34c753fSRafael Auler return BF; 2191a34c753fSRafael Auler } 2192a34c753fSRafael Auler 2193a34c753fSRafael Auler std::pair<size_t, size_t> 2194a34c753fSRafael Auler BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2195a34c753fSRafael Auler // Adjust branch instruction to match the current layout. 2196a34c753fSRafael Auler if (FixBranches) 2197a34c753fSRafael Auler BF.fixBranches(); 2198a34c753fSRafael Auler 2199a34c753fSRafael Auler // Create local MC context to isolate the effect of ephemeral code emission. 2200a34c753fSRafael Auler IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2201a34c753fSRafael Auler MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2202a34c753fSRafael Auler MCAsmBackend *MAB = 2203a34c753fSRafael Auler TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2204a34c753fSRafael Auler 2205a34c753fSRafael Auler SmallString<256> Code; 2206a34c753fSRafael Auler raw_svector_ostream VecOS(Code); 2207a34c753fSRafael Auler 2208a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2209a34c753fSRafael Auler std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2210a34c753fSRafael Auler *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2211a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2212a34c753fSRafael Auler /*RelaxAll=*/false, 2213a34c753fSRafael Auler /*IncrementalLinkerCompatible=*/false, 2214a34c753fSRafael Auler /*DWARFMustBeAtTheEnd=*/false)); 2215a34c753fSRafael Auler 2216a34c753fSRafael Auler Streamer->initSections(false, *STI); 2217a34c753fSRafael Auler 2218a34c753fSRafael Auler MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2219a34c753fSRafael Auler Section->setHasInstructions(true); 2220a34c753fSRafael Auler 2221a34c753fSRafael Auler // Create symbols in the LocalCtx so that they get destroyed with it. 2222a34c753fSRafael Auler MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2223a34c753fSRafael Auler MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2224a34c753fSRafael Auler 2225adf4142fSFangrui Song Streamer->switchSection(Section); 2226a34c753fSRafael Auler Streamer->emitLabel(StartLabel); 2227275e075cSFabian Parzefall emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2228a34c753fSRafael Auler /*EmitCodeOnly=*/true); 2229a34c753fSRafael Auler Streamer->emitLabel(EndLabel); 2230a34c753fSRafael Auler 2231275e075cSFabian Parzefall using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2232275e075cSFabian Parzefall SmallVector<LabelRange> SplitLabels; 223307f63b0aSFabian Parzefall for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2234275e075cSFabian Parzefall MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2235275e075cSFabian Parzefall MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2236275e075cSFabian Parzefall SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2237a34c753fSRafael Auler 2238275e075cSFabian Parzefall MCSectionELF *const SplitSection = LocalCtx->getELFSection( 22390f74d191SFabian Parzefall BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2240275e075cSFabian Parzefall ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2241275e075cSFabian Parzefall SplitSection->setHasInstructions(true); 2242275e075cSFabian Parzefall Streamer->switchSection(SplitSection); 2243275e075cSFabian Parzefall 2244275e075cSFabian Parzefall Streamer->emitLabel(SplitStartLabel); 2245275e075cSFabian Parzefall emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2246275e075cSFabian Parzefall Streamer->emitLabel(SplitEndLabel); 2247275e075cSFabian Parzefall // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2248275e075cSFabian Parzefall // private 2249a34c753fSRafael Auler Streamer->emitBytes(StringRef("")); 2250adf4142fSFangrui Song Streamer->switchSection(Section); 2251a34c753fSRafael Auler } 2252a34c753fSRafael Auler 2253a34c753fSRafael Auler // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2254a34c753fSRafael Auler // MCStreamer::Finish(), which does more than we want 2255a34c753fSRafael Auler Streamer->emitBytes(StringRef("")); 2256a34c753fSRafael Auler 2257a34c753fSRafael Auler MCAssembler &Assembler = 2258a34c753fSRafael Auler static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2259a34c753fSRafael Auler MCAsmLayout Layout(Assembler); 2260a34c753fSRafael Auler Assembler.layout(Layout); 2261a34c753fSRafael Auler 2262a34c753fSRafael Auler const uint64_t HotSize = 2263a34c753fSRafael Auler Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2264275e075cSFabian Parzefall const uint64_t ColdSize = 2265275e075cSFabian Parzefall std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2266275e075cSFabian Parzefall [&](const uint64_t Accu, const LabelRange &Labels) { 2267275e075cSFabian Parzefall return Accu + Layout.getSymbolOffset(*Labels.second) - 2268275e075cSFabian Parzefall Layout.getSymbolOffset(*Labels.first); 2269275e075cSFabian Parzefall }); 2270a34c753fSRafael Auler 2271a34c753fSRafael Auler // Clean-up the effect of the code emission. 2272a34c753fSRafael Auler for (const MCSymbol &Symbol : Assembler.symbols()) { 2273a34c753fSRafael Auler MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2274a34c753fSRafael Auler MutableSymbol->setUndefined(); 2275a34c753fSRafael Auler MutableSymbol->setIsRegistered(false); 2276a34c753fSRafael Auler } 2277a34c753fSRafael Auler 2278a34c753fSRafael Auler return std::make_pair(HotSize, ColdSize); 2279a34c753fSRafael Auler } 2280a34c753fSRafael Auler 2281bcc4c909SMaksim Panchenko bool BinaryContext::validateInstructionEncoding( 2282bcc4c909SMaksim Panchenko ArrayRef<uint8_t> InputSequence) const { 2283bcc4c909SMaksim Panchenko MCInst Inst; 2284bcc4c909SMaksim Panchenko uint64_t InstSize; 2285bcc4c909SMaksim Panchenko DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2286bcc4c909SMaksim Panchenko assert(InstSize == InputSequence.size() && 2287bcc4c909SMaksim Panchenko "Disassembled instruction size does not match the sequence."); 2288bcc4c909SMaksim Panchenko 2289a34c753fSRafael Auler SmallString<256> Code; 2290a34c753fSRafael Auler SmallVector<MCFixup, 4> Fixups; 2291a34c753fSRafael Auler 22920c049ea6SAlexis Engelke MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2293bcc4c909SMaksim Panchenko auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2294bcc4c909SMaksim Panchenko if (InputSequence != OutputSequence) { 2295a34c753fSRafael Auler if (opts::Verbosity > 1) { 2296a34c753fSRafael Auler errs() << "BOLT-WARNING: mismatched encoding detected\n" 2297bcc4c909SMaksim Panchenko << " input: " << InputSequence << '\n' 2298bcc4c909SMaksim Panchenko << " output: " << OutputSequence << '\n'; 2299a34c753fSRafael Auler } 2300a34c753fSRafael Auler return false; 2301a34c753fSRafael Auler } 2302a34c753fSRafael Auler 2303a34c753fSRafael Auler return true; 2304a34c753fSRafael Auler } 2305a34c753fSRafael Auler 2306a34c753fSRafael Auler uint64_t BinaryContext::getHotThreshold() const { 2307a34c753fSRafael Auler static uint64_t Threshold = 0; 2308a34c753fSRafael Auler if (Threshold == 0) { 230940c2e0faSMaksim Panchenko Threshold = std::max( 231040c2e0faSMaksim Panchenko (uint64_t)opts::ExecutionCountThreshold, 2311a34c753fSRafael Auler NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2312a34c753fSRafael Auler } 2313a34c753fSRafael Auler return Threshold; 2314a34c753fSRafael Auler } 2315a34c753fSRafael Auler 231640c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 231740c2e0faSMaksim Panchenko uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2318a34c753fSRafael Auler auto FI = BinaryFunctions.upper_bound(Address); 2319a34c753fSRafael Auler if (FI == BinaryFunctions.begin()) 2320a34c753fSRafael Auler return nullptr; 2321a34c753fSRafael Auler --FI; 2322a34c753fSRafael Auler 2323a34c753fSRafael Auler const uint64_t UsedSize = 2324a34c753fSRafael Auler UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2325a34c753fSRafael Auler 2326a34c753fSRafael Auler if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2327a34c753fSRafael Auler return nullptr; 2328a34c753fSRafael Auler 2329a34c753fSRafael Auler return &FI->second; 2330a34c753fSRafael Auler } 2331a34c753fSRafael Auler 233240c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2333a34c753fSRafael Auler // First, try to find a function starting at the given address. If the 2334a34c753fSRafael Auler // function was folded, this will get us the original folded function if it 2335a34c753fSRafael Auler // wasn't removed from the list, e.g. in non-relocation mode. 2336a34c753fSRafael Auler auto BFI = BinaryFunctions.find(Address); 23373652483cSRafael Auler if (BFI != BinaryFunctions.end()) 2338a34c753fSRafael Auler return &BFI->second; 2339a34c753fSRafael Auler 2340a34c753fSRafael Auler // We might have folded the function matching the object at the given 2341a34c753fSRafael Auler // address. In such case, we look for a function matching the symbol 2342a34c753fSRafael Auler // registered at the original address. The new function (the one that the 2343a34c753fSRafael Auler // original was folded into) will hold the symbol. 2344a34c753fSRafael Auler if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2345a34c753fSRafael Auler uint64_t EntryID = 0; 2346a34c753fSRafael Auler BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2347a34c753fSRafael Auler if (BF && EntryID == 0) 2348a34c753fSRafael Auler return BF; 2349a34c753fSRafael Auler } 2350a34c753fSRafael Auler return nullptr; 2351a34c753fSRafael Auler } 2352a34c753fSRafael Auler 2353a34c753fSRafael Auler DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2354a34c753fSRafael Auler const DWARFAddressRangesVector &InputRanges) const { 2355a34c753fSRafael Auler DebugAddressRangesVector OutputRanges; 2356a34c753fSRafael Auler 2357a34c753fSRafael Auler for (const DWARFAddressRange Range : InputRanges) { 2358a34c753fSRafael Auler auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2359a34c753fSRafael Auler while (BFI != BinaryFunctions.end()) { 2360a34c753fSRafael Auler const BinaryFunction &Function = BFI->second; 2361a34c753fSRafael Auler if (Function.getAddress() >= Range.HighPC) 2362a34c753fSRafael Auler break; 2363a34c753fSRafael Auler const DebugAddressRangesVector FunctionRanges = 2364a34c753fSRafael Auler Function.getOutputAddressRanges(); 2365d2c87699SAmir Ayupov llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2366a34c753fSRafael Auler std::advance(BFI, 1); 2367a34c753fSRafael Auler } 2368a34c753fSRafael Auler } 2369a34c753fSRafael Auler 2370a34c753fSRafael Auler return OutputRanges; 2371a34c753fSRafael Auler } 2372a34c753fSRafael Auler 2373a34c753fSRafael Auler } // namespace bolt 2374a34c753fSRafael Auler } // namespace llvm 2375