12f09f445SMaksim Panchenko //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler // 92f09f445SMaksim Panchenko // This file implements the BinaryContext class. 102f09f445SMaksim Panchenko // 11a34c753fSRafael Auler //===----------------------------------------------------------------------===// 12a34c753fSRafael Auler 13a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 14a34c753fSRafael Auler #include "bolt/Core/BinaryEmitter.h" 15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 16a34c753fSRafael Auler #include "bolt/Utils/CommandLineOpts.h" 17a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 1872e5b14fSAmir Ayupov #include "llvm/ADT/STLExtras.h" 19a34c753fSRafael Auler #include "llvm/ADT/Twine.h" 20290e4823Sserge-sans-paille #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23a34c753fSRafael Auler #include "llvm/MC/MCAssembler.h" 24a34c753fSRafael Auler #include "llvm/MC/MCContext.h" 25a34c753fSRafael Auler #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26a34c753fSRafael Auler #include "llvm/MC/MCInstPrinter.h" 27a34c753fSRafael Auler #include "llvm/MC/MCObjectStreamer.h" 28a34c753fSRafael Auler #include "llvm/MC/MCObjectWriter.h" 2957f7c7d9Sserge-sans-paille #include "llvm/MC/MCRegisterInfo.h" 30a34c753fSRafael Auler #include "llvm/MC/MCSectionELF.h" 31a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h" 3257f7c7d9Sserge-sans-paille #include "llvm/MC/MCSubtargetInfo.h" 33a34c753fSRafael Auler #include "llvm/MC/MCSymbol.h" 34a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 3532d2473aSAmir Ayupov #include "llvm/Support/Error.h" 36a34c753fSRafael Auler #include "llvm/Support/Regex.h" 376aa735ceSAmir Ayupov #include <algorithm> 38a34c753fSRafael Auler #include <functional> 39a34c753fSRafael Auler #include <iterator> 406aa735ceSAmir Ayupov #include <unordered_set> 41a34c753fSRafael Auler 42a34c753fSRafael Auler using namespace llvm; 43a34c753fSRafael Auler 44a34c753fSRafael Auler #undef DEBUG_TYPE 45a34c753fSRafael Auler #define DEBUG_TYPE "bolt" 46a34c753fSRafael Auler 47a34c753fSRafael Auler namespace opts { 48a34c753fSRafael Auler 49b92436efSFangrui Song cl::opt<bool> NoHugePages("no-huge-pages", 50a34c753fSRafael Auler cl::desc("use regular size pages for code alignment"), 51b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 52a34c753fSRafael Auler 53a34c753fSRafael Auler static cl::opt<bool> 54a34c753fSRafael Auler PrintDebugInfo("print-debug-info", 55a34c753fSRafael Auler cl::desc("print debug info when printing functions"), 56a34c753fSRafael Auler cl::Hidden, 57a34c753fSRafael Auler cl::ZeroOrMore, 58a34c753fSRafael Auler cl::cat(BoltCategory)); 59a34c753fSRafael Auler 60b92436efSFangrui Song cl::opt<bool> PrintRelocations( 61b92436efSFangrui Song "print-relocations", 62b92436efSFangrui Song cl::desc("print relocations when printing functions/objects"), cl::Hidden, 63a34c753fSRafael Auler cl::cat(BoltCategory)); 64a34c753fSRafael Auler 65a34c753fSRafael Auler static cl::opt<bool> 66a34c753fSRafael Auler PrintMemData("print-mem-data", 67a34c753fSRafael Auler cl::desc("print memory data annotations when printing functions"), 68a34c753fSRafael Auler cl::Hidden, 69a34c753fSRafael Auler cl::ZeroOrMore, 70a34c753fSRafael Auler cl::cat(BoltCategory)); 71a34c753fSRafael Auler 727d272722SAlexander Yermolovich cl::opt<std::string> CompDirOverride( 737d272722SAlexander Yermolovich "comp-dir-override", 74d251a328SJordan Brantner cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " 757d272722SAlexander Yermolovich "location, which is used with DW_AT_dwo_name to construct a path " 767d272722SAlexander Yermolovich "to *.dwo files."), 777d272722SAlexander Yermolovich cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 78a34c753fSRafael Auler } // namespace opts 79a34c753fSRafael Auler 80a34c753fSRafael Auler namespace llvm { 81a34c753fSRafael Auler namespace bolt { 82a34c753fSRafael Auler 83fa7dd491SAmir Ayupov char BOLTError::ID = 0; 84fa7dd491SAmir Ayupov 85fa7dd491SAmir Ayupov BOLTError::BOLTError(bool IsFatal, const Twine &S) 86fa7dd491SAmir Ayupov : IsFatal(IsFatal), Msg(S.str()) {} 87fa7dd491SAmir Ayupov 88fa7dd491SAmir Ayupov void BOLTError::log(raw_ostream &OS) const { 89fa7dd491SAmir Ayupov if (IsFatal) 90fa7dd491SAmir Ayupov OS << "FATAL "; 91fa7dd491SAmir Ayupov StringRef ErrMsg = StringRef(Msg); 92fa7dd491SAmir Ayupov // Prepend our error prefix if it is missing 93fa7dd491SAmir Ayupov if (ErrMsg.empty()) { 94fa7dd491SAmir Ayupov OS << "BOLT-ERROR\n"; 95fa7dd491SAmir Ayupov } else { 96fa7dd491SAmir Ayupov if (!ErrMsg.starts_with("BOLT-ERROR")) 97fa7dd491SAmir Ayupov OS << "BOLT-ERROR: "; 98fa7dd491SAmir Ayupov OS << ErrMsg << "\n"; 99fa7dd491SAmir Ayupov } 100fa7dd491SAmir Ayupov } 101fa7dd491SAmir Ayupov 102fa7dd491SAmir Ayupov std::error_code BOLTError::convertToErrorCode() const { 103fa7dd491SAmir Ayupov return inconvertibleErrorCode(); 104fa7dd491SAmir Ayupov } 105fa7dd491SAmir Ayupov 106fa7dd491SAmir Ayupov Error createNonFatalBOLTError(const Twine &S) { 107fa7dd491SAmir Ayupov return make_error<BOLTError>(/*IsFatal*/ false, S); 108fa7dd491SAmir Ayupov } 109fa7dd491SAmir Ayupov 110fa7dd491SAmir Ayupov Error createFatalBOLTError(const Twine &S) { 111fa7dd491SAmir Ayupov return make_error<BOLTError>(/*IsFatal*/ true, S); 112fa7dd491SAmir Ayupov } 113fa7dd491SAmir Ayupov 11452cf0711SAmir Ayupov void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { 11552cf0711SAmir Ayupov handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { 11652cf0711SAmir Ayupov if (!E.getMessage().empty()) 11752cf0711SAmir Ayupov E.log(this->errs()); 11852cf0711SAmir Ayupov if (E.isFatal()) 11952cf0711SAmir Ayupov exit(1); 12052cf0711SAmir Ayupov }); 12152cf0711SAmir Ayupov } 12252cf0711SAmir Ayupov 123a34c753fSRafael Auler BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 124a34c753fSRafael Auler std::unique_ptr<DWARFContext> DwCtx, 125a34c753fSRafael Auler std::unique_ptr<Triple> TheTriple, 1262ccf7ed2SJared Wyles std::shared_ptr<orc::SymbolStringPool> SSP, 12740c2e0faSMaksim Panchenko const Target *TheTarget, std::string TripleName, 128a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE, 129a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI, 130a34c753fSRafael Auler std::unique_ptr<const MCAsmInfo> AsmInfo, 131a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII, 132a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI, 133a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstPrinter, 134a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA, 135a34c753fSRafael Auler std::unique_ptr<MCPlusBuilder> MIB, 136a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI, 13752cf0711SAmir Ayupov std::unique_ptr<MCDisassembler> DisAsm, 13852cf0711SAmir Ayupov JournalingStreams Logger) 13940c2e0faSMaksim Panchenko : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 1402ccf7ed2SJared Wyles TheTriple(std::move(TheTriple)), SSP(std::move(SSP)), 1412ccf7ed2SJared Wyles TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), 1422ccf7ed2SJared Wyles MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), 1432ccf7ed2SJared Wyles STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), 1442ccf7ed2SJared Wyles MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), 1452ccf7ed2SJared Wyles DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) { 146db65429dSElvina Yakubova RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 147a34c753fSRafael Auler PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 148a34c753fSRafael Auler } 149a34c753fSRafael Auler 150a34c753fSRafael Auler BinaryContext::~BinaryContext() { 1513652483cSRafael Auler for (BinarySection *Section : Sections) 152a34c753fSRafael Auler delete Section; 1533652483cSRafael Auler for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 154a34c753fSRafael Auler delete InjectedFunction; 1553652483cSRafael Auler for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 156a34c753fSRafael Auler delete JTI.second; 157a34c753fSRafael Auler clearBinaryData(); 158a34c753fSRafael Auler } 159a34c753fSRafael Auler 160a34c753fSRafael Auler /// Create BinaryContext for a given architecture \p ArchName and 161a34c753fSRafael Auler /// triple \p TripleName. 162c0febca3SAmir Ayupov Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( 1632ccf7ed2SJared Wyles Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP, 1642ccf7ed2SJared Wyles StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC, 1652ccf7ed2SJared Wyles std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { 166a34c753fSRafael Auler StringRef ArchName = ""; 16786bc4867SJob Noorman std::string FeaturesStr = ""; 168c0febca3SAmir Ayupov switch (TheTriple.getArch()) { 169a34c753fSRafael Auler case llvm::Triple::x86_64: 170c0febca3SAmir Ayupov if (Features) 171c0febca3SAmir Ayupov return createFatalBOLTError( 172c0febca3SAmir Ayupov "x86_64 target does not use SubtargetFeatures"); 173a34c753fSRafael Auler ArchName = "x86-64"; 174a34c753fSRafael Auler FeaturesStr = "+nopl"; 175a34c753fSRafael Auler break; 176a34c753fSRafael Auler case llvm::Triple::aarch64: 177c0febca3SAmir Ayupov if (Features) 178c0febca3SAmir Ayupov return createFatalBOLTError( 179c0febca3SAmir Ayupov "AArch64 target does not use SubtargetFeatures"); 180a34c753fSRafael Auler ArchName = "aarch64"; 18175641678SDenis Revunov FeaturesStr = "+all"; 182a34c753fSRafael Auler break; 18386bc4867SJob Noorman case llvm::Triple::riscv64: { 184f8730293SJob Noorman ArchName = "riscv64"; 185c0febca3SAmir Ayupov if (!Features) 186c0febca3SAmir Ayupov return createFatalBOLTError("RISCV target needs SubtargetFeatures"); 18786bc4867SJob Noorman // We rely on relaxation for some transformations (e.g., promoting all calls 18886bc4867SJob Noorman // to PseudoCALL and then making JITLink relax them). Since the relax 18986bc4867SJob Noorman // feature is not stored in the object file, we manually enable it. 19086bc4867SJob Noorman Features->AddFeature("relax"); 19186bc4867SJob Noorman FeaturesStr = Features->getString(); 192f8730293SJob Noorman break; 19386bc4867SJob Noorman } 194a34c753fSRafael Auler default: 19532d2473aSAmir Ayupov return createStringError(std::errc::not_supported, 19632d2473aSAmir Ayupov "BOLT-ERROR: Unrecognized machine in ELF file"); 197a34c753fSRafael Auler } 198a34c753fSRafael Auler 199c0febca3SAmir Ayupov const std::string TripleName = TheTriple.str(); 200a34c753fSRafael Auler 201a34c753fSRafael Auler std::string Error; 202a34c753fSRafael Auler const Target *TheTarget = 203c0febca3SAmir Ayupov TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); 20432d2473aSAmir Ayupov if (!TheTarget) 20532d2473aSAmir Ayupov return createStringError(make_error_code(std::errc::not_supported), 20632d2473aSAmir Ayupov Twine("BOLT-ERROR: ", Error)); 207a34c753fSRafael Auler 208a34c753fSRafael Auler std::unique_ptr<const MCRegisterInfo> MRI( 209a34c753fSRafael Auler TheTarget->createMCRegInfo(TripleName)); 21032d2473aSAmir Ayupov if (!MRI) 21132d2473aSAmir Ayupov return createStringError( 21232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 21332d2473aSAmir Ayupov Twine("BOLT-ERROR: no register info for target ", TripleName)); 214a34c753fSRafael Auler 215a34c753fSRafael Auler // Set up disassembler. 216c31af7cfSAmir Ayupov std::unique_ptr<MCAsmInfo> AsmInfo( 217a34c753fSRafael Auler TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 21832d2473aSAmir Ayupov if (!AsmInfo) 21932d2473aSAmir Ayupov return createStringError( 22032d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 22132d2473aSAmir Ayupov Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 222c31af7cfSAmir Ayupov // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 223c31af7cfSAmir Ayupov // we want to emit such names as using @PLT without double quotes to convey 224c31af7cfSAmir Ayupov // variant kind to the assembler. BOLT doesn't rely on the linker so we can 225c31af7cfSAmir Ayupov // override the default AsmInfo behavior to emit names the way we want. 226c31af7cfSAmir Ayupov AsmInfo->setAllowAtInName(true); 227a34c753fSRafael Auler 228a34c753fSRafael Auler std::unique_ptr<const MCSubtargetInfo> STI( 229a34c753fSRafael Auler TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 23032d2473aSAmir Ayupov if (!STI) 23132d2473aSAmir Ayupov return createStringError( 23232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 23332d2473aSAmir Ayupov Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 234a34c753fSRafael Auler 235a34c753fSRafael Auler std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 23632d2473aSAmir Ayupov if (!MII) 23732d2473aSAmir Ayupov return createStringError( 23832d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 23932d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 240a34c753fSRafael Auler 241a34c753fSRafael Auler std::unique_ptr<MCContext> Ctx( 242c0febca3SAmir Ayupov new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 243a34c753fSRafael Auler std::unique_ptr<MCObjectFileInfo> MOFI( 244a34c753fSRafael Auler TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 245a34c753fSRafael Auler Ctx->setObjectFileInfo(MOFI.get()); 246a34c753fSRafael Auler // We do not support X86 Large code model. Change this in the future. 247a34c753fSRafael Auler bool Large = false; 248c0febca3SAmir Ayupov if (TheTriple.getArch() == llvm::Triple::aarch64) 249a34c753fSRafael Auler Large = true; 250a34c753fSRafael Auler unsigned LSDAEncoding = 251a34c753fSRafael Auler Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 252a34c753fSRafael Auler if (IsPIC) { 253a34c753fSRafael Auler LSDAEncoding = dwarf::DW_EH_PE_pcrel | 254a34c753fSRafael Auler (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 255a34c753fSRafael Auler } 256a34c753fSRafael Auler 257a34c753fSRafael Auler std::unique_ptr<MCDisassembler> DisAsm( 258a34c753fSRafael Auler TheTarget->createMCDisassembler(*STI, *Ctx)); 259a34c753fSRafael Auler 26032d2473aSAmir Ayupov if (!DisAsm) 26132d2473aSAmir Ayupov return createStringError( 26232d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 26332d2473aSAmir Ayupov Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 264a34c753fSRafael Auler 265a34c753fSRafael Auler std::unique_ptr<const MCInstrAnalysis> MIA( 266a34c753fSRafael Auler TheTarget->createMCInstrAnalysis(MII.get())); 26732d2473aSAmir Ayupov if (!MIA) 26832d2473aSAmir Ayupov return createStringError( 26932d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 27032d2473aSAmir Ayupov Twine("BOLT-ERROR: failed to create instruction analysis for target ", 27132d2473aSAmir Ayupov TripleName)); 272a34c753fSRafael Auler 273a34c753fSRafael Auler int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 274a34c753fSRafael Auler std::unique_ptr<MCInstPrinter> InstructionPrinter( 275c0febca3SAmir Ayupov TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, 276a34c753fSRafael Auler *MII, *MRI)); 27732d2473aSAmir Ayupov if (!InstructionPrinter) 27832d2473aSAmir Ayupov return createStringError( 27932d2473aSAmir Ayupov make_error_code(std::errc::not_supported), 28032d2473aSAmir Ayupov Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 281a34c753fSRafael Auler InstructionPrinter->setPrintImmHex(true); 282a34c753fSRafael Auler 283a34c753fSRafael Auler std::unique_ptr<MCCodeEmitter> MCE( 2842aed07e9SShao-Ce SUN TheTarget->createMCCodeEmitter(*MII, *Ctx)); 285a34c753fSRafael Auler 286a34c753fSRafael Auler auto BC = std::make_unique<BinaryContext>( 287c0febca3SAmir Ayupov std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), 2882ccf7ed2SJared Wyles std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE), 2892ccf7ed2SJared Wyles std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI), 29040c2e0faSMaksim Panchenko std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 29152cf0711SAmir Ayupov std::move(DisAsm), Logger); 292a34c753fSRafael Auler 293a34c753fSRafael Auler BC->LSDAEncoding = LSDAEncoding; 294a34c753fSRafael Auler 295a34c753fSRafael Auler BC->MAB = std::unique_ptr<MCAsmBackend>( 296a34c753fSRafael Auler BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 297a34c753fSRafael Auler 298c0febca3SAmir Ayupov BC->setFilename(InputFileName); 299a34c753fSRafael Auler 300a34c753fSRafael Auler BC->HasFixedLoadAddress = !IsPIC; 301a34c753fSRafael Auler 302e290133cSMaksim Panchenko BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 303e290133cSMaksim Panchenko BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 304e290133cSMaksim Panchenko 305e290133cSMaksim Panchenko if (!BC->SymbolicDisAsm) 306e290133cSMaksim Panchenko return createStringError( 307e290133cSMaksim Panchenko make_error_code(std::errc::not_supported), 308e290133cSMaksim Panchenko Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 309e290133cSMaksim Panchenko 31063686af1SVladislav Khmelevsky return std::move(BC); 311a34c753fSRafael Auler } 312a34c753fSRafael Auler 313a34c753fSRafael Auler bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 31440c2e0faSMaksim Panchenko if (opts::HotText && 31540c2e0faSMaksim Panchenko (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 316a34c753fSRafael Auler return true; 317a34c753fSRafael Auler 31840c2e0faSMaksim Panchenko if (opts::HotData && 31940c2e0faSMaksim Panchenko (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 320a34c753fSRafael Auler return true; 321a34c753fSRafael Auler 322a34c753fSRafael Auler if (SymbolName == "_end") 323a34c753fSRafael Auler return true; 324a34c753fSRafael Auler 325a34c753fSRafael Auler return false; 326a34c753fSRafael Auler } 327a34c753fSRafael Auler 328a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> 329a34c753fSRafael Auler BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 330a34c753fSRafael Auler return MAB->createObjectWriter(OS); 331a34c753fSRafael Auler } 332a34c753fSRafael Auler 333a34c753fSRafael Auler bool BinaryContext::validateObjectNesting() const { 334a34c753fSRafael Auler auto Itr = BinaryDataMap.begin(); 335a34c753fSRafael Auler auto End = BinaryDataMap.end(); 336a34c753fSRafael Auler bool Valid = true; 337a34c753fSRafael Auler while (Itr != End) { 338a34c753fSRafael Auler auto Next = std::next(Itr); 339a34c753fSRafael Auler while (Next != End && 340a34c753fSRafael Auler Itr->second->getSection() == Next->second->getSection() && 341a34c753fSRafael Auler Itr->second->containsRange(Next->second->getAddress(), 342a34c753fSRafael Auler Next->second->getSize())) { 343a34c753fSRafael Auler if (Next->second->Parent != Itr->second) { 34452cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" 345a34c753fSRafael Auler << "BOLT-WARNING: " << *Itr->second << "\n" 346a34c753fSRafael Auler << "BOLT-WARNING: " << *Next->second << "\n"; 347a34c753fSRafael Auler Valid = false; 348a34c753fSRafael Auler } 349a34c753fSRafael Auler ++Next; 350a34c753fSRafael Auler } 351a34c753fSRafael Auler Itr = Next; 352a34c753fSRafael Auler } 353a34c753fSRafael Auler return Valid; 354a34c753fSRafael Auler } 355a34c753fSRafael Auler 356a34c753fSRafael Auler bool BinaryContext::validateHoles() const { 357a34c753fSRafael Auler bool Valid = true; 358a34c753fSRafael Auler for (BinarySection &Section : sections()) { 359a34c753fSRafael Auler for (const Relocation &Rel : Section.relocations()) { 360a34c753fSRafael Auler uint64_t RelAddr = Rel.Offset + Section.getAddress(); 361a34c753fSRafael Auler const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 362a34c753fSRafael Auler if (!BD) { 36352cf0711SAmir Ayupov this->errs() 36452cf0711SAmir Ayupov << "BOLT-WARNING: no BinaryData found for relocation at address" 36552cf0711SAmir Ayupov << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() 36652cf0711SAmir Ayupov << "\n"; 367a34c753fSRafael Auler Valid = false; 368a34c753fSRafael Auler } else if (!BD->getAtomicRoot()) { 36952cf0711SAmir Ayupov this->errs() 37052cf0711SAmir Ayupov << "BOLT-WARNING: no atomic BinaryData found for relocation at " 371a34c753fSRafael Auler << "address 0x" << Twine::utohexstr(RelAddr) << " in " 372a34c753fSRafael Auler << Section.getName() << "\n"; 373a34c753fSRafael Auler Valid = false; 374a34c753fSRafael Auler } 375a34c753fSRafael Auler } 376a34c753fSRafael Auler } 377a34c753fSRafael Auler return Valid; 378a34c753fSRafael Auler } 379a34c753fSRafael Auler 380a34c753fSRafael Auler void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 381a34c753fSRafael Auler const uint64_t Address = GAI->second->getAddress(); 382a34c753fSRafael Auler const uint64_t Size = GAI->second->getSize(); 383a34c753fSRafael Auler 38440c2e0faSMaksim Panchenko auto fixParents = [&](BinaryDataMapType::iterator Itr, 38540c2e0faSMaksim Panchenko BinaryData *NewParent) { 386a34c753fSRafael Auler BinaryData *OldParent = Itr->second->Parent; 387a34c753fSRafael Auler Itr->second->Parent = NewParent; 388a34c753fSRafael Auler ++Itr; 389a34c753fSRafael Auler while (Itr != BinaryDataMap.end() && OldParent && 390a34c753fSRafael Auler Itr->second->Parent == OldParent) { 391a34c753fSRafael Auler Itr->second->Parent = NewParent; 392a34c753fSRafael Auler ++Itr; 393a34c753fSRafael Auler } 394a34c753fSRafael Auler }; 395a34c753fSRafael Auler 396a34c753fSRafael Auler // Check if the previous symbol contains the newly added symbol. 397a34c753fSRafael Auler if (GAI != BinaryDataMap.begin()) { 398a34c753fSRafael Auler BinaryData *Prev = std::prev(GAI)->second; 399a34c753fSRafael Auler while (Prev) { 400a34c753fSRafael Auler if (Prev->getSection() == GAI->second->getSection() && 401a34c753fSRafael Auler Prev->containsRange(Address, Size)) { 402a34c753fSRafael Auler fixParents(GAI, Prev); 403a34c753fSRafael Auler } else { 404a34c753fSRafael Auler fixParents(GAI, nullptr); 405a34c753fSRafael Auler } 406a34c753fSRafael Auler Prev = Prev->Parent; 407a34c753fSRafael Auler } 408a34c753fSRafael Auler } 409a34c753fSRafael Auler 410a34c753fSRafael Auler // Check if the newly added symbol contains any subsequent symbols. 411a34c753fSRafael Auler if (Size != 0) { 412a34c753fSRafael Auler BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 413a34c753fSRafael Auler auto Itr = std::next(GAI); 41440c2e0faSMaksim Panchenko while ( 41540c2e0faSMaksim Panchenko Itr != BinaryDataMap.end() && 41640c2e0faSMaksim Panchenko BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 417a34c753fSRafael Auler Itr->second->Parent = BD; 418a34c753fSRafael Auler ++Itr; 419a34c753fSRafael Auler } 420a34c753fSRafael Auler } 421a34c753fSRafael Auler } 422a34c753fSRafael Auler 423a34c753fSRafael Auler iterator_range<BinaryContext::binary_data_iterator> 424a34c753fSRafael Auler BinaryContext::getSubBinaryData(BinaryData *BD) { 425a34c753fSRafael Auler auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 426a34c753fSRafael Auler auto End = Start; 4273652483cSRafael Auler while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 428a34c753fSRafael Auler ++End; 429a34c753fSRafael Auler return make_range(Start, End); 430a34c753fSRafael Auler } 431a34c753fSRafael Auler 432a34c753fSRafael Auler std::pair<const MCSymbol *, uint64_t> 433a34c753fSRafael Auler BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 434a34c753fSRafael Auler bool IsPCRel) { 435a34c753fSRafael Auler if (isAArch64()) { 436a34c753fSRafael Auler // Check if this is an access to a constant island and create bookkeeping 437a34c753fSRafael Auler // to keep track of it and emit it later as part of this function. 438a34c753fSRafael Auler if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 4398d1fc45dSRafael Auler return std::make_pair(IslandSym, 0); 440a34c753fSRafael Auler 441a34c753fSRafael Auler // Detect custom code written in assembly that refers to arbitrary 442a34c753fSRafael Auler // constant islands from other functions. Write this reference so we 443a34c753fSRafael Auler // can pull this constant island and emit it as part of this function 444a34c753fSRafael Auler // too. 445a34c753fSRafael Auler auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 4466040415eSDenis Revunov 4476040415eSDenis Revunov if (IslandIter != AddressToConstantIslandMap.begin() && 4486040415eSDenis Revunov (IslandIter == AddressToConstantIslandMap.end() || 4496040415eSDenis Revunov IslandIter->first > Address)) 4506040415eSDenis Revunov --IslandIter; 4516040415eSDenis Revunov 452a34c753fSRafael Auler if (IslandIter != AddressToConstantIslandMap.end()) { 4537117af52SVladislav Khmelevsky // Fall-back to referencing the original constant island in the presence 4547117af52SVladislav Khmelevsky // of dynamic relocs, as we currently do not support cloning them. 4557117af52SVladislav Khmelevsky // Notice: we might fail to link because of this, if the original constant 4567117af52SVladislav Khmelevsky // island we are referring would be emitted too far away. 4577117af52SVladislav Khmelevsky if (IslandIter->second->hasDynamicRelocationAtIsland()) { 4587117af52SVladislav Khmelevsky MCSymbol *IslandSym = 4597117af52SVladislav Khmelevsky IslandIter->second->getOrCreateIslandAccess(Address); 4607117af52SVladislav Khmelevsky if (IslandSym) 4617117af52SVladislav Khmelevsky return std::make_pair(IslandSym, 0); 4627117af52SVladislav Khmelevsky } else if (MCSymbol *IslandSym = 4637117af52SVladislav Khmelevsky IslandIter->second->getOrCreateProxyIslandAccess(Address, 4647117af52SVladislav Khmelevsky BF)) { 465a34c753fSRafael Auler BF.createIslandDependency(IslandSym, IslandIter->second); 4668d1fc45dSRafael Auler return std::make_pair(IslandSym, 0); 467a34c753fSRafael Auler } 468a34c753fSRafael Auler } 469a34c753fSRafael Auler } 470a34c753fSRafael Auler 471a34c753fSRafael Auler // Note that the address does not necessarily have to reside inside 472a34c753fSRafael Auler // a section, it could be an absolute address too. 473a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 474a34c753fSRafael Auler if (Section && Section->isText()) { 475a34c753fSRafael Auler if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 476a34c753fSRafael Auler if (Address != BF.getAddress()) { 477a34c753fSRafael Auler // The address could potentially escape. Mark it as another entry 478a34c753fSRafael Auler // point into the function. 479a34c753fSRafael Auler if (opts::Verbosity >= 1) { 48052cf0711SAmir Ayupov this->outs() << "BOLT-INFO: potentially escaped address 0x" 48152cf0711SAmir Ayupov << Twine::utohexstr(Address) << " in function " << BF 48252cf0711SAmir Ayupov << '\n'; 483a34c753fSRafael Auler } 484a34c753fSRafael Auler BF.HasInternalLabelReference = true; 485a34c753fSRafael Auler return std::make_pair( 4868d1fc45dSRafael Auler BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 487a34c753fSRafael Auler } 488a34c753fSRafael Auler } else { 48935efe1d8SVladislav Khmelevsky addInterproceduralReference(&BF, Address); 490a34c753fSRafael Auler } 491a34c753fSRafael Auler } 492a34c753fSRafael Auler 493a34c753fSRafael Auler // With relocations, catch jump table references outside of the basic block 494a34c753fSRafael Auler // containing the indirect jump. 495a34c753fSRafael Auler if (HasRelocations) { 496a34c753fSRafael Auler const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 497a34c753fSRafael Auler if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 498a34c753fSRafael Auler const MCSymbol *Symbol = 499a34c753fSRafael Auler getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 500a34c753fSRafael Auler 5018d1fc45dSRafael Auler return std::make_pair(Symbol, 0); 502a34c753fSRafael Auler } 503a34c753fSRafael Auler } 504a34c753fSRafael Auler 5053652483cSRafael Auler if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 506a34c753fSRafael Auler return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 507a34c753fSRafael Auler 508a34c753fSRafael Auler // TODO: use DWARF info to get size/alignment here? 509a34c753fSRafael Auler MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 510a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 5118d1fc45dSRafael Auler return std::make_pair(TargetSymbol, 0); 512a34c753fSRafael Auler } 513a34c753fSRafael Auler 51440c2e0faSMaksim Panchenko MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 51540c2e0faSMaksim Panchenko BinaryFunction &BF) { 516a34c753fSRafael Auler if (!isX86()) 517a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 518a34c753fSRafael Auler 519a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 520a34c753fSRafael Auler if (!Section) { 521a34c753fSRafael Auler // No section - possibly an absolute address. Since we don't allow 522a34c753fSRafael Auler // internal function addresses to escape the function scope - we 523a34c753fSRafael Auler // consider it a tail call. 524a34c753fSRafael Auler if (opts::Verbosity > 1) { 52552cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: no section for address 0x" 52652cf0711SAmir Ayupov << Twine::utohexstr(Address) << " referenced from function " 52752cf0711SAmir Ayupov << BF << '\n'; 528a34c753fSRafael Auler } 529a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 530a34c753fSRafael Auler } 531a34c753fSRafael Auler 532a34c753fSRafael Auler if (Section->isVirtual()) { 533a34c753fSRafael Auler // The contents are filled at runtime. 534a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 535a34c753fSRafael Auler } 536a34c753fSRafael Auler 537a34c753fSRafael Auler // No support for jump tables in code yet. 538a34c753fSRafael Auler if (Section->isText()) 539a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 540a34c753fSRafael Auler 541a34c753fSRafael Auler // Start with checking for PIC jump table. We expect non-PIC jump tables 542a34c753fSRafael Auler // to have high 32 bits set to 0. 543a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 544a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 545a34c753fSRafael Auler 546a34c753fSRafael Auler if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 547a34c753fSRafael Auler return MemoryContentsType::POSSIBLE_JUMP_TABLE; 548a34c753fSRafael Auler 549a34c753fSRafael Auler return MemoryContentsType::UNKNOWN; 550a34c753fSRafael Auler } 551a34c753fSRafael Auler 55208ab4fafSAmir Ayupov bool BinaryContext::analyzeJumpTable(const uint64_t Address, 55308ab4fafSAmir Ayupov const JumpTable::JumpTableType Type, 55408ab4fafSAmir Ayupov const BinaryFunction &BF, 55508ab4fafSAmir Ayupov const uint64_t NextJTAddress, 55608ab4fafSAmir Ayupov JumpTable::AddressesType *EntriesAsAddress, 55708ab4fafSAmir Ayupov bool *HasEntryInFragment) const { 55843d0891dSMaksim Panchenko // Target address of __builtin_unreachable. 55943d0891dSMaksim Panchenko const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); 56043d0891dSMaksim Panchenko 561a34c753fSRafael Auler // Is one of the targets __builtin_unreachable? 562a34c753fSRafael Auler bool HasUnreachable = false; 563a34c753fSRafael Auler 5641e4ee588SMaksim Panchenko // Does one of the entries match function start address? 5651e4ee588SMaksim Panchenko bool HasStartAsEntry = false; 5661e4ee588SMaksim Panchenko 567a34c753fSRafael Auler // Number of targets other than __builtin_unreachable. 568a34c753fSRafael Auler uint64_t NumRealEntries = 0; 569a34c753fSRafael Auler 57043d0891dSMaksim Panchenko // Size of the jump table without trailing __builtin_unreachable entries. 57143d0891dSMaksim Panchenko size_t TrimmedSize = 0; 57243d0891dSMaksim Panchenko 57343d0891dSMaksim Panchenko auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { 57443d0891dSMaksim Panchenko if (!EntriesAsAddress) 57543d0891dSMaksim Panchenko return; 57605523dc3SHuan Nguyen EntriesAsAddress->emplace_back(EntryAddress); 57743d0891dSMaksim Panchenko if (!Unreachable) 57843d0891dSMaksim Panchenko TrimmedSize = EntriesAsAddress->size(); 579a34c753fSRafael Auler }; 580a34c753fSRafael Auler 58108ab4fafSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 582a34c753fSRafael Auler if (!Section) 583a34c753fSRafael Auler return false; 584a34c753fSRafael Auler 585a34c753fSRafael Auler // The upper bound is defined by containing object, section limits, and 586a34c753fSRafael Auler // the next jump table in memory. 587a34c753fSRafael Auler uint64_t UpperBound = Section->getEndAddress(); 588a34c753fSRafael Auler const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 589a34c753fSRafael Auler if (JumpTableBD && JumpTableBD->getSize()) { 590a34c753fSRafael Auler assert(JumpTableBD->getEndAddress() <= UpperBound && 591a34c753fSRafael Auler "data object cannot cross a section boundary"); 592a34c753fSRafael Auler UpperBound = JumpTableBD->getEndAddress(); 593a34c753fSRafael Auler } 5943652483cSRafael Auler if (NextJTAddress) 595a34c753fSRafael Auler UpperBound = std::min(NextJTAddress, UpperBound); 596a34c753fSRafael Auler 597556efdbaSAmir Ayupov LLVM_DEBUG({ 598556efdbaSAmir Ayupov using JTT = JumpTable::JumpTableType; 599556efdbaSAmir Ayupov dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 600556efdbaSAmir Ayupov Address, BF.getPrintName(), 601556efdbaSAmir Ayupov Type == JTT::JTT_PIC ? "PIC" : "Normal"); 602556efdbaSAmir Ayupov }); 603a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 604a34c753fSRafael Auler for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 605a34c753fSRafael Auler EntryAddress += EntrySize) { 606a34c753fSRafael Auler LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 607a34c753fSRafael Auler << " -> "); 608a34c753fSRafael Auler // Check if there's a proper relocation against the jump table entry. 609a34c753fSRafael Auler if (HasRelocations) { 610a34c753fSRafael Auler if (Type == JumpTable::JTT_PIC && 611a34c753fSRafael Auler !DataPCRelocations.count(EntryAddress)) { 612a34c753fSRafael Auler LLVM_DEBUG( 613a34c753fSRafael Auler dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 614a34c753fSRafael Auler break; 615a34c753fSRafael Auler } 616a34c753fSRafael Auler if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 617a34c753fSRafael Auler LLVM_DEBUG( 618a34c753fSRafael Auler dbgs() 619a34c753fSRafael Auler << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 620a34c753fSRafael Auler break; 621a34c753fSRafael Auler } 622a34c753fSRafael Auler } 623a34c753fSRafael Auler 62440c2e0faSMaksim Panchenko const uint64_t Value = 62540c2e0faSMaksim Panchenko (Type == JumpTable::JTT_PIC) 626a34c753fSRafael Auler ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 627a34c753fSRafael Auler : *getPointerAtAddress(EntryAddress); 628a34c753fSRafael Auler 629a34c753fSRafael Auler // __builtin_unreachable() case. 63043d0891dSMaksim Panchenko if (Value == UnreachableAddress) { 63143d0891dSMaksim Panchenko addEntryAddress(Value, /*Unreachable*/ true); 632a34c753fSRafael Auler HasUnreachable = true; 633556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 634a34c753fSRafael Auler continue; 635a34c753fSRafael Auler } 636a34c753fSRafael Auler 6371e4ee588SMaksim Panchenko // Function start is another special case. It is allowed in the jump table, 6381e4ee588SMaksim Panchenko // but we need at least one another regular entry to distinguish the table 6391e4ee588SMaksim Panchenko // from, e.g. a function pointer array. 6401e4ee588SMaksim Panchenko if (Value == BF.getAddress()) { 6411e4ee588SMaksim Panchenko HasStartAsEntry = true; 6421e4ee588SMaksim Panchenko addEntryAddress(Value); 6431e4ee588SMaksim Panchenko continue; 6441e4ee588SMaksim Panchenko } 6451e4ee588SMaksim Panchenko 646a34c753fSRafael Auler // Function or one of its fragments. 64708ab4fafSAmir Ayupov const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 6481e4ee588SMaksim Panchenko const bool DoesBelongToFunction = 6491e4ee588SMaksim Panchenko BF.containsAddress(Value) || 65083ea7ce3SAmir Ayupov (TargetBF && areRelatedFragments(TargetBF, &BF)); 6511e4ee588SMaksim Panchenko if (!DoesBelongToFunction) { 652a34c753fSRafael Auler LLVM_DEBUG({ 653a34c753fSRafael Auler if (!BF.containsAddress(Value)) { 654a34c753fSRafael Auler dbgs() << "FAIL: function doesn't contain this address\n"; 655a34c753fSRafael Auler if (TargetBF) { 656a34c753fSRafael Auler dbgs() << " ! function containing this address: " 657a34c753fSRafael Auler << TargetBF->getPrintName() << '\n'; 658556efdbaSAmir Ayupov if (TargetBF->isFragment()) { 659556efdbaSAmir Ayupov dbgs() << " ! is a fragment"; 660556efdbaSAmir Ayupov for (BinaryFunction *Parent : TargetBF->ParentFragments) 661556efdbaSAmir Ayupov dbgs() << ", parent: " << Parent->getPrintName(); 662556efdbaSAmir Ayupov dbgs() << '\n'; 663556efdbaSAmir Ayupov } 664a34c753fSRafael Auler } 665a34c753fSRafael Auler } 666a34c753fSRafael Auler }); 667a34c753fSRafael Auler break; 668a34c753fSRafael Auler } 669a34c753fSRafael Auler 670a34c753fSRafael Auler // Check there's an instruction at this offset. 671a34c753fSRafael Auler if (TargetBF->getState() == BinaryFunction::State::Disassembled && 672a34c753fSRafael Auler !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 673556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 674a34c753fSRafael Auler break; 675a34c753fSRafael Auler } 676a34c753fSRafael Auler 677a34c753fSRafael Auler ++NumRealEntries; 678556efdbaSAmir Ayupov LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 679a34c753fSRafael Auler 68008ab4fafSAmir Ayupov if (TargetBF != &BF && HasEntryInFragment) 68108ab4fafSAmir Ayupov *HasEntryInFragment = true; 68205523dc3SHuan Nguyen addEntryAddress(Value); 683a34c753fSRafael Auler } 684a34c753fSRafael Auler 68543d0891dSMaksim Panchenko // Trim direct/normal jump table to exclude trailing unreachable entries that 68643d0891dSMaksim Panchenko // can collide with a function address. 68743d0891dSMaksim Panchenko if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && 68843d0891dSMaksim Panchenko TrimmedSize != EntriesAsAddress->size() && 68943d0891dSMaksim Panchenko getBinaryFunctionAtAddress(UnreachableAddress)) 69043d0891dSMaksim Panchenko EntriesAsAddress->resize(TrimmedSize); 69143d0891dSMaksim Panchenko 692a34c753fSRafael Auler // It's a jump table if the number of real entries is more than 1, or there's 6931e4ee588SMaksim Panchenko // one real entry and one or more special targets. If there are only multiple 6941e4ee588SMaksim Panchenko // special targets, then it's not a jump table. 6951e4ee588SMaksim Panchenko return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 696a34c753fSRafael Auler } 697a34c753fSRafael Auler 698a34c753fSRafael Auler void BinaryContext::populateJumpTables() { 699a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 700a34c753fSRafael Auler << '\n'); 701a34c753fSRafael Auler for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 702a34c753fSRafael Auler ++JTI) { 703a34c753fSRafael Auler JumpTable *JT = JTI->second; 704a34c753fSRafael Auler 70505523dc3SHuan Nguyen bool NonSimpleParent = false; 70605523dc3SHuan Nguyen for (BinaryFunction *BF : JT->Parents) 70705523dc3SHuan Nguyen NonSimpleParent |= !BF->isSimple(); 70805523dc3SHuan Nguyen if (NonSimpleParent) 709a34c753fSRafael Auler continue; 710a34c753fSRafael Auler 711a34c753fSRafael Auler uint64_t NextJTAddress = 0; 712a34c753fSRafael Auler auto NextJTI = std::next(JTI); 7133652483cSRafael Auler if (NextJTI != JTE) 714a34c753fSRafael Auler NextJTAddress = NextJTI->second->getAddress(); 715a34c753fSRafael Auler 71605523dc3SHuan Nguyen const bool Success = 71705523dc3SHuan Nguyen analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 71808ab4fafSAmir Ayupov NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 719a34c753fSRafael Auler if (!Success) { 720055f9f6dSAmir Ayupov LLVM_DEBUG({ 721055f9f6dSAmir Ayupov dbgs() << "failed to analyze "; 722a34c753fSRafael Auler JT->print(dbgs()); 723a34c753fSRafael Auler if (NextJTI != JTE) { 724055f9f6dSAmir Ayupov dbgs() << "next "; 725a34c753fSRafael Auler NextJTI->second->print(dbgs()); 726a34c753fSRafael Auler } 727055f9f6dSAmir Ayupov }); 728468d4f6dSAmir Ayupov llvm_unreachable("jump table heuristic failure"); 729a34c753fSRafael Auler } 73005523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) { 73108ab4fafSAmir Ayupov if (JT->IsSplit) 73208ab4fafSAmir Ayupov Frag->setHasIndirectTargetToSplitFragment(true); 73305523dc3SHuan Nguyen for (uint64_t EntryAddress : JT->EntriesAsAddress) 73405523dc3SHuan Nguyen // if target is builtin_unreachable 73505523dc3SHuan Nguyen if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 73605523dc3SHuan Nguyen Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 73705523dc3SHuan Nguyen Frag->getSize()); 73805523dc3SHuan Nguyen } else if (EntryAddress >= Frag->getAddress() && 73905523dc3SHuan Nguyen EntryAddress < Frag->getAddress() + Frag->getSize()) { 74005523dc3SHuan Nguyen Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 74105523dc3SHuan Nguyen } 742a34c753fSRafael Auler } 743a34c753fSRafael Auler 744a34c753fSRafael Auler // In strict mode, erase PC-relative relocation record. Later we check that 745a34c753fSRafael Auler // all such records are erased and thus have been accounted for. 746a34c753fSRafael Auler if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 747a34c753fSRafael Auler for (uint64_t Address = JT->getAddress(); 748a34c753fSRafael Auler Address < JT->getAddress() + JT->getSize(); 749a34c753fSRafael Auler Address += JT->EntrySize) { 750a34c753fSRafael Auler DataPCRelocations.erase(DataPCRelocations.find(Address)); 751a34c753fSRafael Auler } 752a34c753fSRafael Auler } 753a34c753fSRafael Auler 754a34c753fSRafael Auler // Mark to skip the function and all its fragments. 75505523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 75605523dc3SHuan Nguyen if (Frag->hasIndirectTargetToSplitFragment()) 75705523dc3SHuan Nguyen addFragmentsToSkip(Frag); 758a34c753fSRafael Auler } 759a34c753fSRafael Auler 760a34c753fSRafael Auler if (opts::StrictMode && DataPCRelocations.size()) { 761a34c753fSRafael Auler LLVM_DEBUG({ 762a34c753fSRafael Auler dbgs() << DataPCRelocations.size() 763a34c753fSRafael Auler << " unclaimed PC-relative relocations left in data:\n"; 764a34c753fSRafael Auler for (uint64_t Reloc : DataPCRelocations) 765a34c753fSRafael Auler dbgs() << Twine::utohexstr(Reloc) << '\n'; 766a34c753fSRafael Auler }); 767a34c753fSRafael Auler assert(0 && "unclaimed PC-relative relocations left in data\n"); 768a34c753fSRafael Auler } 769a34c753fSRafael Auler clearList(DataPCRelocations); 770a34c753fSRafael Auler } 7716aa735ceSAmir Ayupov 7726aa735ceSAmir Ayupov void BinaryContext::skipMarkedFragments() { 77305523dc3SHuan Nguyen std::vector<BinaryFunction *> FragmentQueue; 77405523dc3SHuan Nguyen // Copy the functions to FragmentQueue. 77505523dc3SHuan Nguyen FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 7766aa735ceSAmir Ayupov auto addToWorklist = [&](BinaryFunction *Function) -> void { 77705523dc3SHuan Nguyen if (FragmentsToSkip.count(Function)) 7786aa735ceSAmir Ayupov return; 77905523dc3SHuan Nguyen FragmentQueue.push_back(Function); 78005523dc3SHuan Nguyen addFragmentsToSkip(Function); 7816aa735ceSAmir Ayupov }; 7826aa735ceSAmir Ayupov // Functions containing split jump tables need to be skipped with all 7836aa735ceSAmir Ayupov // fragments (transitively). 78405523dc3SHuan Nguyen for (size_t I = 0; I != FragmentQueue.size(); I++) { 78505523dc3SHuan Nguyen BinaryFunction *BF = FragmentQueue[I]; 78605523dc3SHuan Nguyen assert(FragmentsToSkip.count(BF) && 7876aa735ceSAmir Ayupov "internal error in traversing function fragments"); 7886aa735ceSAmir Ayupov if (opts::Verbosity >= 1) 78952cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 79082095bd5SHuan Nguyen BF->setSimple(false); 79105523dc3SHuan Nguyen BF->setHasIndirectTargetToSplitFragment(true); 79282095bd5SHuan Nguyen 793d2c87699SAmir Ayupov llvm::for_each(BF->Fragments, addToWorklist); 794d2c87699SAmir Ayupov llvm::for_each(BF->ParentFragments, addToWorklist); 7956aa735ceSAmir Ayupov } 796641e92d4SMaksim Panchenko if (!FragmentsToSkip.empty()) 79752cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() 79852cf0711SAmir Ayupov << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") 799641e92d4SMaksim Panchenko << " due to cold fragments\n"; 800a34c753fSRafael Auler } 801a34c753fSRafael Auler 80240c2e0faSMaksim Panchenko MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 803a34c753fSRafael Auler uint64_t Size, 804a34c753fSRafael Auler uint16_t Alignment, 805a34c753fSRafael Auler unsigned Flags) { 806a34c753fSRafael Auler auto Itr = BinaryDataMap.find(Address); 807a34c753fSRafael Auler if (Itr != BinaryDataMap.end()) { 808a34c753fSRafael Auler assert(Itr->second->getSize() == Size || !Size); 809a34c753fSRafael Auler return Itr->second->getSymbol(); 810a34c753fSRafael Auler } 811a34c753fSRafael Auler 812a34c753fSRafael Auler std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 813a34c753fSRafael Auler assert(!GlobalSymbols.count(Name) && "created name is not unique"); 814a34c753fSRafael Auler return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 815a34c753fSRafael Auler } 816a34c753fSRafael Auler 817a34c753fSRafael Auler MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 818a34c753fSRafael Auler return Ctx->getOrCreateSymbol(Name); 819a34c753fSRafael Auler } 820a34c753fSRafael Auler 821a34c753fSRafael Auler BinaryFunction *BinaryContext::createBinaryFunction( 822a34c753fSRafael Auler const std::string &Name, BinarySection &Section, uint64_t Address, 823a34c753fSRafael Auler uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 824a34c753fSRafael Auler auto Result = BinaryFunctions.emplace( 825a34c753fSRafael Auler Address, BinaryFunction(Name, Section, Address, Size, *this)); 826a34c753fSRafael Auler assert(Result.second == true && "unexpected duplicate function"); 827a34c753fSRafael Auler BinaryFunction *BF = &Result.first->second; 828a34c753fSRafael Auler registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 829a34c753fSRafael Auler Alignment); 830a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 831a34c753fSRafael Auler return BF; 832a34c753fSRafael Auler } 833a34c753fSRafael Auler 834a34c753fSRafael Auler const MCSymbol * 835a34c753fSRafael Auler BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 836a34c753fSRafael Auler JumpTable::JumpTableType Type) { 83705523dc3SHuan Nguyen // Two fragments of same function access same jump table 838a34c753fSRafael Auler if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 839a34c753fSRafael Auler assert(JT->Type == Type && "jump table types have to match"); 840a34c753fSRafael Auler assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 841a34c753fSRafael Auler 84205523dc3SHuan Nguyen // Prevent associating a jump table to a specific fragment twice. 8439d2dd009SAmir Ayupov if (!llvm::is_contained(JT->Parents, &Function)) { 8449d2dd009SAmir Ayupov assert(llvm::all_of(JT->Parents, 8459d2dd009SAmir Ayupov [&](const BinaryFunction *BF) { 8469d2dd009SAmir Ayupov return areRelatedFragments(&Function, BF); 8479d2dd009SAmir Ayupov }) && 84805523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 84928b1dcb1SHuan Nguyen // Duplicate the entry for the parent function for easy access 85005523dc3SHuan Nguyen JT->Parents.push_back(&Function); 85128b1dcb1SHuan Nguyen if (opts::Verbosity > 2) { 85252cf0711SAmir Ayupov this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " 85305523dc3SHuan Nguyen << JT->Parents[0]->getPrintName() << "; " 85405523dc3SHuan Nguyen << Function.getPrintName() << "\n"; 85552cf0711SAmir Ayupov JT->print(this->outs()); 85628b1dcb1SHuan Nguyen } 85728b1dcb1SHuan Nguyen Function.JumpTables.emplace(Address, JT); 8589d2dd009SAmir Ayupov for (BinaryFunction *Parent : JT->Parents) 8599d2dd009SAmir Ayupov Parent->setHasIndirectTargetToSplitFragment(true); 86028b1dcb1SHuan Nguyen } 86105523dc3SHuan Nguyen 86205523dc3SHuan Nguyen bool IsJumpTableParent = false; 8630c925861SThorsten Schütt (void)IsJumpTableParent; 86405523dc3SHuan Nguyen for (BinaryFunction *Frag : JT->Parents) 86505523dc3SHuan Nguyen if (Frag == &Function) 86605523dc3SHuan Nguyen IsJumpTableParent = true; 86705523dc3SHuan Nguyen assert(IsJumpTableParent && 86805523dc3SHuan Nguyen "cannot re-use jump table of a different function"); 869a34c753fSRafael Auler return JT->getFirstLabel(); 870a34c753fSRafael Auler } 871a34c753fSRafael Auler 872a34c753fSRafael Auler // Re-use the existing symbol if possible. 873a34c753fSRafael Auler MCSymbol *JTLabel = nullptr; 874a34c753fSRafael Auler if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 875a34c753fSRafael Auler if (!isInternalSymbolName(Object->getSymbol()->getName())) 876a34c753fSRafael Auler JTLabel = Object->getSymbol(); 877a34c753fSRafael Auler } 878a34c753fSRafael Auler 879a34c753fSRafael Auler const uint64_t EntrySize = getJumpTableEntrySize(Type); 880a34c753fSRafael Auler if (!JTLabel) { 881a34c753fSRafael Auler const std::string JumpTableName = generateJumpTableName(Function, Address); 882a34c753fSRafael Auler JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 883a34c753fSRafael Auler } 884a34c753fSRafael Auler 885a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 886a34c753fSRafael Auler << " in function " << Function << '\n'); 887a34c753fSRafael Auler 888a34c753fSRafael Auler JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 88905523dc3SHuan Nguyen JumpTable::LabelMapType{{0, JTLabel}}, 890a34c753fSRafael Auler *getSectionForAddress(Address)); 89105523dc3SHuan Nguyen JT->Parents.push_back(&Function); 89205523dc3SHuan Nguyen if (opts::Verbosity > 2) 89352cf0711SAmir Ayupov JT->print(this->outs()); 894a34c753fSRafael Auler JumpTables.emplace(Address, JT); 895a34c753fSRafael Auler 896a34c753fSRafael Auler // Duplicate the entry for the parent function for easy access. 897a34c753fSRafael Auler Function.JumpTables.emplace(Address, JT); 898a34c753fSRafael Auler return JTLabel; 899a34c753fSRafael Auler } 900a34c753fSRafael Auler 901a34c753fSRafael Auler std::pair<uint64_t, const MCSymbol *> 902a34c753fSRafael Auler BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 903a34c753fSRafael Auler const MCSymbol *OldLabel) { 904a34c753fSRafael Auler auto L = scopeLock(); 905a34c753fSRafael Auler unsigned Offset = 0; 906a34c753fSRafael Auler bool Found = false; 907a34c753fSRafael Auler for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 908a34c753fSRafael Auler if (Elmt.second != OldLabel) 909a34c753fSRafael Auler continue; 910a34c753fSRafael Auler Offset = Elmt.first; 911a34c753fSRafael Auler Found = true; 912a34c753fSRafael Auler break; 913a34c753fSRafael Auler } 914a34c753fSRafael Auler assert(Found && "Label not found"); 915c907d6e0SAmir Ayupov (void)Found; 916a34c753fSRafael Auler MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 917a34c753fSRafael Auler JumpTable *NewJT = 918a34c753fSRafael Auler new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 91905523dc3SHuan Nguyen JumpTable::LabelMapType{{Offset, NewLabel}}, 920a34c753fSRafael Auler *getSectionForAddress(JT->getAddress())); 92105523dc3SHuan Nguyen NewJT->Parents = JT->Parents; 922a34c753fSRafael Auler NewJT->Entries = JT->Entries; 923a34c753fSRafael Auler NewJT->Counts = JT->Counts; 924a34c753fSRafael Auler uint64_t JumpTableID = ++DuplicatedJumpTables; 925a34c753fSRafael Auler // Invert it to differentiate from regular jump tables whose IDs are their 926a34c753fSRafael Auler // addresses in the input binary memory space 927a34c753fSRafael Auler JumpTableID = ~JumpTableID; 928a34c753fSRafael Auler JumpTables.emplace(JumpTableID, NewJT); 929a34c753fSRafael Auler Function.JumpTables.emplace(JumpTableID, NewJT); 930a34c753fSRafael Auler return std::make_pair(JumpTableID, NewLabel); 931a34c753fSRafael Auler } 932a34c753fSRafael Auler 933a34c753fSRafael Auler std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 934a34c753fSRafael Auler uint64_t Address) { 935a34c753fSRafael Auler size_t Id; 936a34c753fSRafael Auler uint64_t Offset = 0; 937a34c753fSRafael Auler if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 938a34c753fSRafael Auler Offset = Address - JT->getAddress(); 939c8fc234eSshaw young auto JTLabelsIt = JT->Labels.find(Offset); 940c8fc234eSshaw young if (JTLabelsIt != JT->Labels.end()) 941c8fc234eSshaw young return std::string(JTLabelsIt->second->getName()); 942c8fc234eSshaw young 943c8fc234eSshaw young auto JTIdsIt = JumpTableIds.find(JT->getAddress()); 944c8fc234eSshaw young assert(JTIdsIt != JumpTableIds.end()); 945c8fc234eSshaw young Id = JTIdsIt->second; 946a34c753fSRafael Auler } else { 947a34c753fSRafael Auler Id = JumpTableIds[Address] = BF.JumpTables.size(); 948a34c753fSRafael Auler } 949a34c753fSRafael Auler return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 950a34c753fSRafael Auler (Offset ? ("." + std::to_string(Offset)) : "")); 951a34c753fSRafael Auler } 952a34c753fSRafael Auler 953a34c753fSRafael Auler bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 954a34c753fSRafael Auler // FIXME: aarch64 support is missing. 955a34c753fSRafael Auler if (!isX86()) 956a34c753fSRafael Auler return true; 957a34c753fSRafael Auler 958a34c753fSRafael Auler if (BF.getSize() == BF.getMaxSize()) 959a34c753fSRafael Auler return true; 960a34c753fSRafael Auler 961a34c753fSRafael Auler ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 962a34c753fSRafael Auler assert(FunctionData && "cannot get function as data"); 963a34c753fSRafael Auler 964a34c753fSRafael Auler uint64_t Offset = BF.getSize(); 965a34c753fSRafael Auler MCInst Instr; 966a34c753fSRafael Auler uint64_t InstrSize = 0; 967a34c753fSRafael Auler uint64_t InstrAddress = BF.getAddress() + Offset; 968a34c753fSRafael Auler using std::placeholders::_1; 969a34c753fSRafael Auler 970a34c753fSRafael Auler // Skip instructions that satisfy the predicate condition. 971a34c753fSRafael Auler auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 972a34c753fSRafael Auler const uint64_t StartOffset = Offset; 973a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); 974a34c753fSRafael Auler Offset += InstrSize, InstrAddress += InstrSize) { 97540c2e0faSMaksim Panchenko if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 97640c2e0faSMaksim Panchenko InstrAddress, nulls())) 977a34c753fSRafael Auler break; 978a34c753fSRafael Auler if (!Predicate(Instr)) 979a34c753fSRafael Auler break; 980a34c753fSRafael Auler } 981a34c753fSRafael Auler 982a34c753fSRafael Auler return Offset - StartOffset; 983a34c753fSRafael Auler }; 984a34c753fSRafael Auler 985a34c753fSRafael Auler // Skip a sequence of zero bytes. 986a34c753fSRafael Auler auto skipZeros = [&]() { 987a34c753fSRafael Auler const uint64_t StartOffset = Offset; 988a34c753fSRafael Auler for (; Offset < BF.getMaxSize(); ++Offset) 989a34c753fSRafael Auler if ((*FunctionData)[Offset] != 0) 990a34c753fSRafael Auler break; 991a34c753fSRafael Auler 992a34c753fSRafael Auler return Offset - StartOffset; 993a34c753fSRafael Auler }; 994a34c753fSRafael Auler 995a34c753fSRafael Auler // Accept the whole padding area filled with breakpoints. 996a34c753fSRafael Auler auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 997a34c753fSRafael Auler if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 998a34c753fSRafael Auler return true; 999a34c753fSRafael Auler 1000a34c753fSRafael Auler auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 1001a34c753fSRafael Auler 1002a34c753fSRafael Auler // Some functions have a jump to the next function or to the padding area 1003a34c753fSRafael Auler // inserted after the body. 1004a34c753fSRafael Auler auto isSkipJump = [&](const MCInst &Instr) { 1005a34c753fSRafael Auler uint64_t TargetAddress = 0; 1006a34c753fSRafael Auler if (MIB->isUnconditionalBranch(Instr) && 1007a34c753fSRafael Auler MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 1008a34c753fSRafael Auler if (TargetAddress >= InstrAddress + InstrSize && 1009a34c753fSRafael Auler TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 1010a34c753fSRafael Auler return true; 1011a34c753fSRafael Auler } 1012a34c753fSRafael Auler } 1013a34c753fSRafael Auler return false; 1014a34c753fSRafael Auler }; 1015a34c753fSRafael Auler 1016a34c753fSRafael Auler // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 101740c2e0faSMaksim Panchenko while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 1018a34c753fSRafael Auler skipZeros()) 1019a34c753fSRafael Auler ; 1020a34c753fSRafael Auler 1021a34c753fSRafael Auler if (Offset == BF.getMaxSize()) 1022a34c753fSRafael Auler return true; 1023a34c753fSRafael Auler 1024a34c753fSRafael Auler if (opts::Verbosity >= 1) { 102552cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: bad padding at address 0x" 1026a34c753fSRafael Auler << Twine::utohexstr(BF.getAddress() + BF.getSize()) 102740c2e0faSMaksim Panchenko << " starting at offset " << (Offset - BF.getSize()) 102840c2e0faSMaksim Panchenko << " in function " << BF << '\n' 102952cf0711SAmir Ayupov << FunctionData->slice(BF.getSize(), 103052cf0711SAmir Ayupov BF.getMaxSize() - BF.getSize()) 1031a34c753fSRafael Auler << '\n'; 1032a34c753fSRafael Auler } 1033a34c753fSRafael Auler 1034a34c753fSRafael Auler return false; 1035a34c753fSRafael Auler } 1036a34c753fSRafael Auler 1037a34c753fSRafael Auler void BinaryContext::adjustCodePadding() { 1038a34c753fSRafael Auler for (auto &BFI : BinaryFunctions) { 1039a34c753fSRafael Auler BinaryFunction &BF = BFI.second; 1040a34c753fSRafael Auler if (!shouldEmit(BF)) 1041a34c753fSRafael Auler continue; 1042a34c753fSRafael Auler 1043a34c753fSRafael Auler if (!hasValidCodePadding(BF)) { 1044a34c753fSRafael Auler if (HasRelocations) { 1045a34c753fSRafael Auler if (opts::Verbosity >= 1) { 104652cf0711SAmir Ayupov this->outs() << "BOLT-INFO: function " << BF 1047a34c753fSRafael Auler << " has invalid padding. Ignoring the function.\n"; 1048a34c753fSRafael Auler } 1049a34c753fSRafael Auler BF.setIgnored(); 1050a34c753fSRafael Auler } else { 1051a34c753fSRafael Auler BF.setMaxSize(BF.getSize()); 1052a34c753fSRafael Auler } 1053a34c753fSRafael Auler } 1054a34c753fSRafael Auler } 1055a34c753fSRafael Auler } 1056a34c753fSRafael Auler 105740c2e0faSMaksim Panchenko MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1058a34c753fSRafael Auler uint64_t Size, 1059a34c753fSRafael Auler uint16_t Alignment, 1060e49549ffSDavide Italiano unsigned Flags) { 1061a34c753fSRafael Auler // Register the name with MCContext. 1062a34c753fSRafael Auler MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1063a34c753fSRafael Auler 1064a34c753fSRafael Auler auto GAI = BinaryDataMap.find(Address); 1065e49549ffSDavide Italiano BinaryData *BD; 1066a34c753fSRafael Auler if (GAI == BinaryDataMap.end()) { 1067a34c753fSRafael Auler ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1068e49549ffSDavide Italiano BinarySection &Section = 1069e49549ffSDavide Italiano SectionOrErr ? SectionOrErr.get() : absoluteSection(); 107040c2e0faSMaksim Panchenko BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1071e49549ffSDavide Italiano Section, Flags); 1072a34c753fSRafael Auler GAI = BinaryDataMap.emplace(Address, BD).first; 1073a34c753fSRafael Auler GlobalSymbols[Name] = BD; 1074a34c753fSRafael Auler updateObjectNesting(GAI); 1075a34c753fSRafael Auler } else { 1076a34c753fSRafael Auler BD = GAI->second; 1077a34c753fSRafael Auler if (!BD->hasName(Name)) { 1078a34c753fSRafael Auler GlobalSymbols[Name] = BD; 1079*6e8a1a45SFranklin BD->updateSize(Size); 1080a34c753fSRafael Auler BD->Symbols.push_back(Symbol); 1081a34c753fSRafael Auler } 1082a34c753fSRafael Auler } 1083a34c753fSRafael Auler 1084a34c753fSRafael Auler return Symbol; 1085a34c753fSRafael Auler } 1086a34c753fSRafael Auler 1087a34c753fSRafael Auler const BinaryData * 1088a34c753fSRafael Auler BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1089a34c753fSRafael Auler auto NI = BinaryDataMap.lower_bound(Address); 1090a34c753fSRafael Auler auto End = BinaryDataMap.end(); 1091a34c753fSRafael Auler if ((NI != End && Address == NI->first) || 1092a34c753fSRafael Auler ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 10933652483cSRafael Auler if (NI->second->containsAddress(Address)) 1094a34c753fSRafael Auler return NI->second; 1095a34c753fSRafael Auler 1096a34c753fSRafael Auler // If this is a sub-symbol, see if a parent data contains the address. 1097a34c753fSRafael Auler const BinaryData *BD = NI->second->getParent(); 1098a34c753fSRafael Auler while (BD) { 1099a34c753fSRafael Auler if (BD->containsAddress(Address)) 1100a34c753fSRafael Auler return BD; 1101a34c753fSRafael Auler BD = BD->getParent(); 1102a34c753fSRafael Auler } 1103a34c753fSRafael Auler } 1104a34c753fSRafael Auler return nullptr; 1105a34c753fSRafael Auler } 1106a34c753fSRafael Auler 1107853e126cSRafael Auler BinaryData *BinaryContext::getGOTSymbol() { 1108853e126cSRafael Auler // First tries to find a global symbol with that name 1109853e126cSRafael Auler BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1110853e126cSRafael Auler if (GOTSymBD) 1111853e126cSRafael Auler return GOTSymBD; 1112853e126cSRafael Auler 1113853e126cSRafael Auler // This symbol might be hidden from run-time link, so fetch the local 1114853e126cSRafael Auler // definition if available. 1115853e126cSRafael Auler GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1116853e126cSRafael Auler if (!GOTSymBD) 1117853e126cSRafael Auler return nullptr; 1118853e126cSRafael Auler 1119853e126cSRafael Auler // If the local symbol is not unique, fail 1120853e126cSRafael Auler unsigned Index = 2; 1121853e126cSRafael Auler SmallString<30> Storage; 1122853e126cSRafael Auler while (const BinaryData *BD = 1123853e126cSRafael Auler getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1124853e126cSRafael Auler .concat(Twine(Index++)) 1125853e126cSRafael Auler .toStringRef(Storage))) 1126853e126cSRafael Auler if (BD->getAddress() != GOTSymBD->getAddress()) 1127853e126cSRafael Auler return nullptr; 1128853e126cSRafael Auler 1129853e126cSRafael Auler return GOTSymBD; 1130853e126cSRafael Auler } 1131853e126cSRafael Auler 1132a34c753fSRafael Auler bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1133a34c753fSRafael Auler auto NI = BinaryDataMap.find(Address); 1134a34c753fSRafael Auler assert(NI != BinaryDataMap.end()); 1135a34c753fSRafael Auler if (NI == BinaryDataMap.end()) 1136a34c753fSRafael Auler return false; 1137a34c753fSRafael Auler // TODO: it's possible that a jump table starts at the same address 1138a34c753fSRafael Auler // as a larger blob of private data. When we set the size of the 1139a34c753fSRafael Auler // jump table, it might be smaller than the total blob size. In this 1140a34c753fSRafael Auler // case we just leave the original size since (currently) it won't really 1141933df2a4SMaksim Panchenko // affect anything. 1142a34c753fSRafael Auler assert((!NI->second->Size || NI->second->Size == Size || 1143a34c753fSRafael Auler (NI->second->isJumpTable() && NI->second->Size > Size)) && 1144a34c753fSRafael Auler "can't change the size of a symbol that has already had its " 1145a34c753fSRafael Auler "size set"); 1146a34c753fSRafael Auler if (!NI->second->Size) { 1147a34c753fSRafael Auler NI->second->Size = Size; 1148a34c753fSRafael Auler updateObjectNesting(NI); 1149a34c753fSRafael Auler return true; 1150a34c753fSRafael Auler } 1151a34c753fSRafael Auler return false; 1152a34c753fSRafael Auler } 1153a34c753fSRafael Auler 1154a34c753fSRafael Auler void BinaryContext::generateSymbolHashes() { 1155a34c753fSRafael Auler auto isPadding = [](const BinaryData &BD) { 1156a34c753fSRafael Auler StringRef Contents = BD.getSection().getContents(); 1157a34c753fSRafael Auler StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1158ad8fd5b1SKazu Hirata return (BD.getName().starts_with("HOLEat") || 1159a34c753fSRafael Auler SymData.find_first_not_of(0) == StringRef::npos); 1160a34c753fSRafael Auler }; 1161a34c753fSRafael Auler 1162a34c753fSRafael Auler uint64_t NumCollisions = 0; 1163a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1164a34c753fSRafael Auler BinaryData &BD = *Entry.second; 1165a34c753fSRafael Auler StringRef Name = BD.getName(); 1166a34c753fSRafael Auler 1167a34c753fSRafael Auler if (!isInternalSymbolName(Name)) 1168a34c753fSRafael Auler continue; 1169a34c753fSRafael Auler 1170a34c753fSRafael Auler // First check if a non-anonymous alias exists and move it to the front. 1171a34c753fSRafael Auler if (BD.getSymbols().size() > 1) { 1172d2c87699SAmir Ayupov auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1173a34c753fSRafael Auler return !isInternalSymbolName(Symbol->getName()); 1174a34c753fSRafael Auler }); 1175a34c753fSRafael Auler if (Itr != BD.getSymbols().end()) { 1176a34c753fSRafael Auler size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1177a34c753fSRafael Auler std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1178a34c753fSRafael Auler continue; 1179a34c753fSRafael Auler } 1180a34c753fSRafael Auler } 1181a34c753fSRafael Auler 1182a34c753fSRafael Auler // We have to skip 0 size symbols since they will all collide. 1183a34c753fSRafael Auler if (BD.getSize() == 0) { 1184a34c753fSRafael Auler continue; 1185a34c753fSRafael Auler } 1186a34c753fSRafael Auler 1187a34c753fSRafael Auler const uint64_t Hash = BD.getSection().hash(BD); 1188a34c753fSRafael Auler const size_t Idx = Name.find("0x"); 118940c2e0faSMaksim Panchenko std::string NewName = 119040c2e0faSMaksim Panchenko (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1191a34c753fSRafael Auler if (getBinaryDataByName(NewName)) { 1192a34c753fSRafael Auler // Ignore collisions for symbols that appear to be padding 1193a34c753fSRafael Auler // (i.e. all zeros or a "hole") 1194a34c753fSRafael Auler if (!isPadding(BD)) { 1195a34c753fSRafael Auler if (opts::Verbosity) { 119652cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: collision detected when hashing " << BD 1197a34c753fSRafael Auler << " with new name (" << NewName << "), skipping.\n"; 1198a34c753fSRafael Auler } 1199a34c753fSRafael Auler ++NumCollisions; 1200a34c753fSRafael Auler } 1201a34c753fSRafael Auler continue; 1202a34c753fSRafael Auler } 120340c2e0faSMaksim Panchenko BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1204a34c753fSRafael Auler GlobalSymbols[NewName] = &BD; 1205a34c753fSRafael Auler } 1206a34c753fSRafael Auler if (NumCollisions) { 120752cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: " << NumCollisions 1208a34c753fSRafael Auler << " collisions detected while hashing binary objects"; 1209a34c753fSRafael Auler if (!opts::Verbosity) 121052cf0711SAmir Ayupov this->errs() << ". Use -v=1 to see the list."; 121152cf0711SAmir Ayupov this->errs() << '\n'; 1212a34c753fSRafael Auler } 1213a34c753fSRafael Auler } 1214a34c753fSRafael Auler 12156aa735ceSAmir Ayupov bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 121683ea7ce3SAmir Ayupov BinaryFunction &Function) { 12176aa735ceSAmir Ayupov assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1218e88122f5SAmir Ayupov if (TargetFunction.isChildOf(Function)) 12196aa735ceSAmir Ayupov return true; 12206aa735ceSAmir Ayupov TargetFunction.addParentFragment(Function); 1221a34c753fSRafael Auler Function.addFragment(TargetFunction); 122283ea7ce3SAmir Ayupov FragmentClasses.unionSets(&TargetFunction, &Function); 1223a34c753fSRafael Auler if (!HasRelocations) { 1224a34c753fSRafael Auler TargetFunction.setSimple(false); 1225a34c753fSRafael Auler Function.setSimple(false); 1226a34c753fSRafael Auler } 1227a34c753fSRafael Auler if (opts::Verbosity >= 1) { 122852cf0711SAmir Ayupov this->outs() << "BOLT-INFO: marking " << TargetFunction 122952cf0711SAmir Ayupov << " as a fragment of " << Function << '\n'; 1230a34c753fSRafael Auler } 12316aa735ceSAmir Ayupov return true; 1232a34c753fSRafael Auler } 1233a34c753fSRafael Auler 123435efe1d8SVladislav Khmelevsky void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 123535efe1d8SVladislav Khmelevsky MCInst &LoadLowBits, 123635efe1d8SVladislav Khmelevsky MCInst &LoadHiBits, 123735efe1d8SVladislav Khmelevsky uint64_t Target) { 123835efe1d8SVladislav Khmelevsky const MCSymbol *TargetSymbol; 123935efe1d8SVladislav Khmelevsky uint64_t Addend = 0; 124035efe1d8SVladislav Khmelevsky std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 124135efe1d8SVladislav Khmelevsky /*IsPCRel*/ true); 124235efe1d8SVladislav Khmelevsky int64_t Val; 124335efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 124435efe1d8SVladislav Khmelevsky ELF::R_AARCH64_ADR_PREL_PG_HI21); 124535efe1d8SVladislav Khmelevsky MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 124635efe1d8SVladislav Khmelevsky Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 124735efe1d8SVladislav Khmelevsky } 124835efe1d8SVladislav Khmelevsky 124935efe1d8SVladislav Khmelevsky bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 125035efe1d8SVladislav Khmelevsky BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 125135efe1d8SVladislav Khmelevsky if (TargetFunction) 125235efe1d8SVladislav Khmelevsky return false; 125335efe1d8SVladislav Khmelevsky 125435efe1d8SVladislav Khmelevsky ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 125535efe1d8SVladislav Khmelevsky assert(Section && "cannot get section for referenced address"); 125635efe1d8SVladislav Khmelevsky if (!Section->isText()) 125735efe1d8SVladislav Khmelevsky return false; 125835efe1d8SVladislav Khmelevsky 125935efe1d8SVladislav Khmelevsky bool Ret = false; 126035efe1d8SVladislav Khmelevsky StringRef SectionContents = Section->getContents(); 126135efe1d8SVladislav Khmelevsky uint64_t Offset = Address - Section->getAddress(); 126235efe1d8SVladislav Khmelevsky const uint64_t MaxSize = SectionContents.size() - Offset; 126335efe1d8SVladislav Khmelevsky const uint8_t *Bytes = 126435efe1d8SVladislav Khmelevsky reinterpret_cast<const uint8_t *>(SectionContents.data()); 126535efe1d8SVladislav Khmelevsky ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 126635efe1d8SVladislav Khmelevsky 126735efe1d8SVladislav Khmelevsky auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 126835efe1d8SVladislav Khmelevsky MCInst &Instruction, uint64_t Offset, 126935efe1d8SVladislav Khmelevsky uint64_t AbsoluteInstrAddr, 127035efe1d8SVladislav Khmelevsky uint64_t TotalSize) -> bool { 127135efe1d8SVladislav Khmelevsky MCInst *TargetHiBits, *TargetLowBits; 127235efe1d8SVladislav Khmelevsky uint64_t TargetAddress, Count; 127335efe1d8SVladislav Khmelevsky Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 127435efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, Instruction, TargetHiBits, 127535efe1d8SVladislav Khmelevsky TargetLowBits, TargetAddress); 127635efe1d8SVladislav Khmelevsky if (!Count) 127735efe1d8SVladislav Khmelevsky return false; 127835efe1d8SVladislav Khmelevsky 127935efe1d8SVladislav Khmelevsky if (MatchOnly) 128035efe1d8SVladislav Khmelevsky return true; 128135efe1d8SVladislav Khmelevsky 128235efe1d8SVladislav Khmelevsky // NOTE The target symbol was created during disassemble's 128335efe1d8SVladislav Khmelevsky // handleExternalReference 128435efe1d8SVladislav Khmelevsky const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 128535efe1d8SVladislav Khmelevsky BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 128635efe1d8SVladislav Khmelevsky *Section, Address, TotalSize); 128735efe1d8SVladislav Khmelevsky addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 128835efe1d8SVladislav Khmelevsky TargetAddress); 128935efe1d8SVladislav Khmelevsky MIB->addAnnotation(Instruction, "AArch64Veneer", true); 129035efe1d8SVladislav Khmelevsky Veneer->addInstruction(Offset, std::move(Instruction)); 129135efe1d8SVladislav Khmelevsky --Count; 1292f65e8c3cSNico Weber for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 129335efe1d8SVladislav Khmelevsky MIB->addAnnotation(It->second, "AArch64Veneer", true); 129435efe1d8SVladislav Khmelevsky Veneer->addInstruction(It->first, std::move(It->second)); 129535efe1d8SVladislav Khmelevsky } 129635efe1d8SVladislav Khmelevsky 129735efe1d8SVladislav Khmelevsky Veneer->getOrCreateLocalLabel(Address); 129835efe1d8SVladislav Khmelevsky Veneer->setMaxSize(TotalSize); 129935efe1d8SVladislav Khmelevsky Veneer->updateState(BinaryFunction::State::Disassembled); 1300c3bbc3a5Ssinan LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" 1301c3bbc3a5Ssinan << Twine::utohexstr(Address) << "\n"); 130235efe1d8SVladislav Khmelevsky return true; 130335efe1d8SVladislav Khmelevsky }; 130435efe1d8SVladislav Khmelevsky 130535efe1d8SVladislav Khmelevsky uint64_t Size = 0, TotalSize = 0; 130635efe1d8SVladislav Khmelevsky BinaryFunction::InstrMapType VeneerInstructions; 130735efe1d8SVladislav Khmelevsky for (Offset = 0; Offset < MaxSize; Offset += Size) { 130835efe1d8SVladislav Khmelevsky MCInst Instruction; 130935efe1d8SVladislav Khmelevsky const uint64_t AbsoluteInstrAddr = Address + Offset; 131035efe1d8SVladislav Khmelevsky if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 131135efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, nulls())) 131235efe1d8SVladislav Khmelevsky break; 131335efe1d8SVladislav Khmelevsky 131435efe1d8SVladislav Khmelevsky TotalSize += Size; 131535efe1d8SVladislav Khmelevsky if (MIB->isBranch(Instruction)) { 131635efe1d8SVladislav Khmelevsky Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 131735efe1d8SVladislav Khmelevsky AbsoluteInstrAddr, TotalSize); 131835efe1d8SVladislav Khmelevsky break; 131935efe1d8SVladislav Khmelevsky } 132035efe1d8SVladislav Khmelevsky 132135efe1d8SVladislav Khmelevsky VeneerInstructions.emplace(Offset, std::move(Instruction)); 132235efe1d8SVladislav Khmelevsky } 132335efe1d8SVladislav Khmelevsky 132435efe1d8SVladislav Khmelevsky return Ret; 132535efe1d8SVladislav Khmelevsky } 132635efe1d8SVladislav Khmelevsky 132735efe1d8SVladislav Khmelevsky void BinaryContext::processInterproceduralReferences() { 132835efe1d8SVladislav Khmelevsky for (const std::pair<BinaryFunction *, uint64_t> &It : 132935efe1d8SVladislav Khmelevsky InterproceduralReferences) { 133035efe1d8SVladislav Khmelevsky BinaryFunction &Function = *It.first; 133135efe1d8SVladislav Khmelevsky uint64_t Address = It.second; 1332935b946bSAmir Ayupov // Process interprocedural references from ignored functions in BAT mode 1333935b946bSAmir Ayupov // (non-simple in non-relocation mode) to properly register entry points 1334935b946bSAmir Ayupov if (!Address || (Function.isIgnored() && !HasBATSection)) 1335a34c753fSRafael Auler continue; 1336a34c753fSRafael Auler 1337a34c753fSRafael Auler BinaryFunction *TargetFunction = 1338a34c753fSRafael Auler getBinaryFunctionContainingAddress(Address); 1339a34c753fSRafael Auler if (&Function == TargetFunction) 1340a34c753fSRafael Auler continue; 1341a34c753fSRafael Auler 1342a34c753fSRafael Auler if (TargetFunction) { 134335efe1d8SVladislav Khmelevsky if (TargetFunction->isFragment() && 134483ea7ce3SAmir Ayupov !areRelatedFragments(TargetFunction, &Function)) { 134552cf0711SAmir Ayupov this->errs() 134652cf0711SAmir Ayupov << "BOLT-WARNING: interprocedural reference between unrelated " 13476aa735ceSAmir Ayupov "fragments: " 13486aa735ceSAmir Ayupov << Function.getPrintName() << " and " 13496aa735ceSAmir Ayupov << TargetFunction->getPrintName() << '\n'; 13506aa735ceSAmir Ayupov } 1351a34c753fSRafael Auler if (uint64_t Offset = Address - TargetFunction->getAddress()) 1352a34c753fSRafael Auler TargetFunction->addEntryPointAtOffset(Offset); 1353a34c753fSRafael Auler 1354a34c753fSRafael Auler continue; 1355a34c753fSRafael Auler } 1356a34c753fSRafael Auler 1357a34c753fSRafael Auler // Check if address falls in function padding space - this could be 1358a34c753fSRafael Auler // unmarked data in code. In this case adjust the padding space size. 1359a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1360a34c753fSRafael Auler assert(Section && "cannot get section for referenced address"); 1361a34c753fSRafael Auler 1362a34c753fSRafael Auler if (!Section->isText()) 1363a34c753fSRafael Auler continue; 1364a34c753fSRafael Auler 1365a34c753fSRafael Auler // PLT requires special handling and could be ignored in this context. 1366a34c753fSRafael Auler StringRef SectionName = Section->getName(); 1367a34c753fSRafael Auler if (SectionName == ".plt" || SectionName == ".plt.got") 1368a34c753fSRafael Auler continue; 1369a34c753fSRafael Auler 137035efe1d8SVladislav Khmelevsky // Check if it is aarch64 veneer written at Address 137135efe1d8SVladislav Khmelevsky if (isAArch64() && handleAArch64Veneer(Address)) 137235efe1d8SVladislav Khmelevsky continue; 137335efe1d8SVladislav Khmelevsky 1374a34c753fSRafael Auler if (opts::processAllFunctions()) { 137552cf0711SAmir Ayupov this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " 137652cf0711SAmir Ayupov << "object in code at address 0x" 137752cf0711SAmir Ayupov << Twine::utohexstr(Address) << " belonging to section " 137852cf0711SAmir Ayupov << SectionName << " in current mode\n"; 1379a34c753fSRafael Auler exit(1); 1380a34c753fSRafael Auler } 1381a34c753fSRafael Auler 138240c2e0faSMaksim Panchenko TargetFunction = getBinaryFunctionContainingAddress(Address, 1383a34c753fSRafael Auler /*CheckPastEnd=*/false, 1384a34c753fSRafael Auler /*UseMaxSize=*/true); 1385a34c753fSRafael Auler // We are not going to overwrite non-simple functions, but for simple 1386a34c753fSRafael Auler // ones - adjust the padding size. 1387a34c753fSRafael Auler if (TargetFunction && TargetFunction->isSimple()) { 138852cf0711SAmir Ayupov this->errs() 138952cf0711SAmir Ayupov << "BOLT-WARNING: function " << *TargetFunction 1390a34c753fSRafael Auler << " has an object detected in a padding region at address 0x" 1391a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 1392a34c753fSRafael Auler TargetFunction->setMaxSize(TargetFunction->getSize()); 1393a34c753fSRafael Auler } 1394a34c753fSRafael Auler } 1395a34c753fSRafael Auler 139635efe1d8SVladislav Khmelevsky InterproceduralReferences.clear(); 1397a34c753fSRafael Auler } 1398a34c753fSRafael Auler 1399a34c753fSRafael Auler void BinaryContext::postProcessSymbolTable() { 1400a34c753fSRafael Auler fixBinaryDataHoles(); 1401a34c753fSRafael Auler bool Valid = true; 1402a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1403a34c753fSRafael Auler BinaryData *BD = Entry.second; 1404ad8fd5b1SKazu Hirata if ((BD->getName().starts_with("SYMBOLat") || 1405ad8fd5b1SKazu Hirata BD->getName().starts_with("DATAat")) && 140640c2e0faSMaksim Panchenko !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1407e49549ffSDavide Italiano BD->getSection()) { 140852cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD 140952cf0711SAmir Ayupov << "\n"; 1410a34c753fSRafael Auler Valid = false; 1411a34c753fSRafael Auler } 1412a34c753fSRafael Auler } 1413a34c753fSRafael Auler assert(Valid); 1414c907d6e0SAmir Ayupov (void)Valid; 1415a34c753fSRafael Auler generateSymbolHashes(); 1416a34c753fSRafael Auler } 1417a34c753fSRafael Auler 1418a34c753fSRafael Auler void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1419a34c753fSRafael Auler BinaryFunction &ParentBF) { 1420a34c753fSRafael Auler assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1421a34c753fSRafael Auler "cannot merge functions with multiple entry points"); 1422a34c753fSRafael Auler 1423e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1424e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1425a34c753fSRafael Auler SymbolToFunctionMapMutex, std::defer_lock); 1426a34c753fSRafael Auler 1427a34c753fSRafael Auler const StringRef ChildName = ChildBF.getOneName(); 1428a34c753fSRafael Auler 1429a34c753fSRafael Auler // Move symbols over and update bookkeeping info. 1430a34c753fSRafael Auler for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1431a34c753fSRafael Auler ParentBF.getSymbols().push_back(Symbol); 1432a34c753fSRafael Auler WriteSymbolMapLock.lock(); 1433a34c753fSRafael Auler SymbolToFunctionMap[Symbol] = &ParentBF; 1434a34c753fSRafael Auler WriteSymbolMapLock.unlock(); 1435a34c753fSRafael Auler // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1436a34c753fSRafael Auler } 1437a34c753fSRafael Auler ChildBF.getSymbols().clear(); 1438a34c753fSRafael Auler 1439a34c753fSRafael Auler // Move other names the child function is known under. 1440d2c87699SAmir Ayupov llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1441a34c753fSRafael Auler ChildBF.Aliases.clear(); 1442a34c753fSRafael Auler 1443a34c753fSRafael Auler if (HasRelocations) { 1444a34c753fSRafael Auler // Merge execution counts of ChildBF into those of ParentBF. 1445a34c753fSRafael Auler // Without relocations, we cannot reliably merge profiles as both functions 1446a34c753fSRafael Auler // continue to exist and either one can be executed. 1447a34c753fSRafael Auler ChildBF.mergeProfileDataInto(ParentBF); 1448a34c753fSRafael Auler 1449e8ce5f1eSNico Weber std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1450a34c753fSRafael Auler std::defer_lock); 1451e8ce5f1eSNico Weber std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1452a34c753fSRafael Auler std::defer_lock); 1453a34c753fSRafael Auler // Remove ChildBF from the global set of functions in relocs mode. 1454a34c753fSRafael Auler ReadBfsLock.lock(); 1455a34c753fSRafael Auler auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1456a34c753fSRafael Auler ReadBfsLock.unlock(); 1457a34c753fSRafael Auler 1458a34c753fSRafael Auler assert(FI != BinaryFunctions.end() && "function not found"); 1459a34c753fSRafael Auler assert(&ChildBF == &FI->second && "function mismatch"); 1460a34c753fSRafael Auler 1461a34c753fSRafael Auler WriteBfsLock.lock(); 1462a34c753fSRafael Auler ChildBF.clearDisasmState(); 1463a34c753fSRafael Auler FI = BinaryFunctions.erase(FI); 1464a34c753fSRafael Auler WriteBfsLock.unlock(); 1465a34c753fSRafael Auler 1466a34c753fSRafael Auler } else { 1467a34c753fSRafael Auler // In non-relocation mode we keep the function, but rename it. 1468a34c753fSRafael Auler std::string NewName = "__ICF_" + ChildName.str(); 1469a34c753fSRafael Auler 1470a34c753fSRafael Auler WriteCtxLock.lock(); 1471a34c753fSRafael Auler ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1472a34c753fSRafael Auler WriteCtxLock.unlock(); 1473a34c753fSRafael Auler 1474a34c753fSRafael Auler ChildBF.setFolded(&ParentBF); 1475a34c753fSRafael Auler } 147603e94f66SMaksim Panchenko 147703e94f66SMaksim Panchenko ParentBF.setHasFunctionsFoldedInto(); 1478a34c753fSRafael Auler } 1479a34c753fSRafael Auler 1480a34c753fSRafael Auler void BinaryContext::fixBinaryDataHoles() { 14811a2f8336Sspaette assert(validateObjectNesting() && "object nesting inconsistency detected"); 1482a34c753fSRafael Auler 1483a34c753fSRafael Auler for (BinarySection &Section : allocatableSections()) { 1484a34c753fSRafael Auler std::vector<std::pair<uint64_t, uint64_t>> Holes; 1485a34c753fSRafael Auler 1486a34c753fSRafael Auler auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1487a34c753fSRafael Auler BinaryData *BD = Itr->second; 148840c2e0faSMaksim Panchenko bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1489ad8fd5b1SKazu Hirata (BD->getName().starts_with("SYMBOLat0x") || 1490ad8fd5b1SKazu Hirata BD->getName().starts_with("DATAat0x") || 1491ad8fd5b1SKazu Hirata BD->getName().starts_with("ANONYMOUS"))); 1492a34c753fSRafael Auler return !isHole && BD->getSection() == Section && !BD->getParent(); 1493a34c753fSRafael Auler }; 1494a34c753fSRafael Auler 1495a34c753fSRafael Auler auto BDStart = BinaryDataMap.begin(); 1496a34c753fSRafael Auler auto BDEnd = BinaryDataMap.end(); 1497a34c753fSRafael Auler auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1498a34c753fSRafael Auler auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1499a34c753fSRafael Auler 1500a34c753fSRafael Auler uint64_t EndAddress = Section.getAddress(); 1501a34c753fSRafael Auler 1502a34c753fSRafael Auler while (Itr != End) { 1503a34c753fSRafael Auler if (Itr->second->getAddress() > EndAddress) { 1504a34c753fSRafael Auler uint64_t Gap = Itr->second->getAddress() - EndAddress; 1505a34c753fSRafael Auler Holes.emplace_back(EndAddress, Gap); 1506a34c753fSRafael Auler } 1507a34c753fSRafael Auler EndAddress = Itr->second->getEndAddress(); 1508a34c753fSRafael Auler ++Itr; 1509a34c753fSRafael Auler } 1510a34c753fSRafael Auler 15113652483cSRafael Auler if (EndAddress < Section.getEndAddress()) 1512a34c753fSRafael Auler Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1513a34c753fSRafael Auler 1514a34c753fSRafael Auler // If there is already a symbol at the start of the hole, grow that symbol 1515a34c753fSRafael Auler // to cover the rest. Otherwise, create a new symbol to cover the hole. 1516a34c753fSRafael Auler for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1517a34c753fSRafael Auler BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1518a34c753fSRafael Auler if (BD) { 1519a34c753fSRafael Auler // BD->getSection() can be != Section if there are sections that 1520a34c753fSRafael Auler // overlap. In this case it is probably safe to just skip the holes 1521a34c753fSRafael Auler // since the overlapping section will not(?) have any symbols in it. 1522a34c753fSRafael Auler if (BD->getSection() == Section) 1523a34c753fSRafael Auler setBinaryDataSize(Hole.first, Hole.second); 1524a34c753fSRafael Auler } else { 1525a34c753fSRafael Auler getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1526a34c753fSRafael Auler } 1527a34c753fSRafael Auler } 1528a34c753fSRafael Auler } 1529a34c753fSRafael Auler 15301a2f8336Sspaette assert(validateObjectNesting() && "object nesting inconsistency detected"); 1531a34c753fSRafael Auler assert(validateHoles() && "top level hole detected in object map"); 1532a34c753fSRafael Auler } 1533a34c753fSRafael Auler 1534a34c753fSRafael Auler void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1535a34c753fSRafael Auler const BinarySection *CurrentSection = nullptr; 1536a34c753fSRafael Auler bool FirstSection = true; 1537a34c753fSRafael Auler 1538a34c753fSRafael Auler for (auto &Entry : BinaryDataMap) { 1539a34c753fSRafael Auler const BinaryData *BD = Entry.second; 1540a34c753fSRafael Auler const BinarySection &Section = BD->getSection(); 1541a34c753fSRafael Auler if (FirstSection || Section != *CurrentSection) { 1542a34c753fSRafael Auler uint64_t Address, Size; 1543a34c753fSRafael Auler StringRef Name = Section.getName(); 1544a34c753fSRafael Auler if (Section) { 1545a34c753fSRafael Auler Address = Section.getAddress(); 1546a34c753fSRafael Auler Size = Section.getSize(); 1547a34c753fSRafael Auler } else { 1548a34c753fSRafael Auler Address = BD->getAddress(); 1549a34c753fSRafael Auler Size = BD->getSize(); 1550a34c753fSRafael Auler } 1551a34c753fSRafael Auler OS << "BOLT-INFO: Section " << Name << ", " 1552a34c753fSRafael Auler << "0x" + Twine::utohexstr(Address) << ":" 155340c2e0faSMaksim Panchenko << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1554a34c753fSRafael Auler CurrentSection = &Section; 1555a34c753fSRafael Auler FirstSection = false; 1556a34c753fSRafael Auler } 1557a34c753fSRafael Auler 1558a34c753fSRafael Auler OS << "BOLT-INFO: "; 1559a34c753fSRafael Auler const BinaryData *P = BD->getParent(); 1560a34c753fSRafael Auler while (P) { 1561a34c753fSRafael Auler OS << " "; 1562a34c753fSRafael Auler P = P->getParent(); 1563a34c753fSRafael Auler } 1564a34c753fSRafael Auler OS << *BD << "\n"; 1565a34c753fSRafael Auler } 1566a34c753fSRafael Auler } 1567a34c753fSRafael Auler 1568014cd37fSAlexander Yermolovich Expected<unsigned> BinaryContext::getDwarfFile( 1569014cd37fSAlexander Yermolovich StringRef Directory, StringRef FileName, unsigned FileNumber, 1570f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1571014cd37fSAlexander Yermolovich unsigned CUID, unsigned DWARFVersion) { 1572a34c753fSRafael Auler DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1573014cd37fSAlexander Yermolovich return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1574014cd37fSAlexander Yermolovich FileNumber); 1575a34c753fSRafael Auler } 1576a34c753fSRafael Auler 1577a34c753fSRafael Auler unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1578a34c753fSRafael Auler const uint32_t SrcCUID, 1579a34c753fSRafael Auler unsigned FileIndex) { 1580a34c753fSRafael Auler DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1581a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1582a34c753fSRafael Auler DwCtx->getLineTableForUnit(SrcUnit); 1583a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1584a34c753fSRafael Auler LineTable->Prologue.FileNames; 1585a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1586a34c753fSRafael Auler // means empty dir. 1587a34c753fSRafael Auler assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1588a34c753fSRafael Auler "FileIndex out of range for the compilation unit."); 1589a34c753fSRafael Auler StringRef Dir = ""; 1590a34c753fSRafael Auler if (FileNames[FileIndex - 1].DirIdx != 0) { 159189fab98eSFangrui Song if (std::optional<const char *> DirName = dwarf::toString( 1592a34c753fSRafael Auler LineTable->Prologue 1593a34c753fSRafael Auler .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1594a34c753fSRafael Auler Dir = *DirName; 1595a34c753fSRafael Auler } 1596a34c753fSRafael Auler } 1597a34c753fSRafael Auler StringRef FileName = ""; 159889fab98eSFangrui Song if (std::optional<const char *> FName = 1599a34c753fSRafael Auler dwarf::toString(FileNames[FileIndex - 1].Name)) 1600a34c753fSRafael Auler FileName = *FName; 1601a34c753fSRafael Auler assert(FileName != ""); 1602014cd37fSAlexander Yermolovich DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1603e324a80fSKazu Hirata return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1604e324a80fSKazu Hirata DestCUID, DstUnit->getVersion())); 1605a34c753fSRafael Auler } 1606a34c753fSRafael Auler 1607a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1608a34c753fSRafael Auler std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 160972e5b14fSAmir Ayupov llvm::transform(llvm::make_second_range(BinaryFunctions), 161072e5b14fSAmir Ayupov SortedFunctions.begin(), 161172e5b14fSAmir Ayupov [](BinaryFunction &BF) { return &BF; }); 1612a34c753fSRafael Auler 16134d2bc0adSEnna1 llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); 1614a34c753fSRafael Auler return SortedFunctions; 1615a34c753fSRafael Auler } 1616a34c753fSRafael Auler 1617a34c753fSRafael Auler std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1618a34c753fSRafael Auler std::vector<BinaryFunction *> AllFunctions; 1619a34c753fSRafael Auler AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 162072e5b14fSAmir Ayupov llvm::transform(llvm::make_second_range(BinaryFunctions), 162172e5b14fSAmir Ayupov std::back_inserter(AllFunctions), 162272e5b14fSAmir Ayupov [](BinaryFunction &BF) { return &BF; }); 1623d2c87699SAmir Ayupov llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1624a34c753fSRafael Auler 1625a34c753fSRafael Auler return AllFunctions; 1626a34c753fSRafael Auler } 1627a34c753fSRafael Auler 1628e8f5743eSAmir Ayupov std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1629a34c753fSRafael Auler auto Iter = DWOCUs.find(DWOId); 1630a34c753fSRafael Auler if (Iter == DWOCUs.end()) 1631e324a80fSKazu Hirata return std::nullopt; 1632a34c753fSRafael Auler 1633a34c753fSRafael Auler return Iter->second; 1634a34c753fSRafael Auler } 1635a34c753fSRafael Auler 16367dee646bSAmir Ayupov DWARFContext *BinaryContext::getDWOContext() const { 1637a34c753fSRafael Auler if (DWOCUs.empty()) 1638a34c753fSRafael Auler return nullptr; 1639a34c753fSRafael Auler return &DWOCUs.begin()->second->getContext(); 1640a34c753fSRafael Auler } 1641a34c753fSRafael Auler 1642a34c753fSRafael Auler /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1643a34c753fSRafael Auler void BinaryContext::preprocessDWODebugInfo() { 1644a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1645a34c753fSRafael Auler DWARFUnit *const DwarfUnit = CU.get(); 164689fab98eSFangrui Song if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1647a34c753fSRafael Auler std::string DWOName = dwarf::toString( 1648a34c753fSRafael Auler DwarfUnit->getUnitDIE().find( 1649a34c753fSRafael Auler {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1650a34c753fSRafael Auler ""); 16517d272722SAlexander Yermolovich SmallString<16> AbsolutePath; 16527d272722SAlexander Yermolovich if (!opts::CompDirOverride.empty()) { 16537d272722SAlexander Yermolovich sys::path::append(AbsolutePath, opts::CompDirOverride); 16547d272722SAlexander Yermolovich sys::path::append(AbsolutePath, DWOName); 16557d272722SAlexander Yermolovich } 16567d272722SAlexander Yermolovich DWARFUnit *DWOCU = 16577d272722SAlexander Yermolovich DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 16587d272722SAlexander Yermolovich if (!DWOCU->isDWOUnit()) { 165952cf0711SAmir Ayupov this->outs() 166052cf0711SAmir Ayupov << "BOLT-WARNING: Debug Fission: DWO debug information for " 1661a34c753fSRafael Auler << DWOName 1662a34c753fSRafael Auler << " was not retrieved and won't be updated. Please check " 1663a34c753fSRafael Auler "relative path.\n"; 1664a34c753fSRafael Auler continue; 1665a34c753fSRafael Auler } 1666a34c753fSRafael Auler DWOCUs[*DWOId] = DWOCU; 1667a34c753fSRafael Auler } 1668a34c753fSRafael Auler } 1669864133c5SAlexander Yermolovich if (!DWOCUs.empty()) 167052cf0711SAmir Ayupov this->outs() << "BOLT-INFO: processing split DWARF\n"; 1671a34c753fSRafael Auler } 1672a34c753fSRafael Auler 1673a34c753fSRafael Auler void BinaryContext::preprocessDebugInfo() { 1674a34c753fSRafael Auler struct CURange { 1675a34c753fSRafael Auler uint64_t LowPC; 1676a34c753fSRafael Auler uint64_t HighPC; 1677a34c753fSRafael Auler DWARFUnit *Unit; 1678a34c753fSRafael Auler 167940c2e0faSMaksim Panchenko bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1680a34c753fSRafael Auler }; 1681a34c753fSRafael Auler 1682a34c753fSRafael Auler // Building a map of address ranges to CUs similar to .debug_aranges and use 1683a34c753fSRafael Auler // it to assign CU to functions. 1684a34c753fSRafael Auler std::vector<CURange> AllRanges; 1685a34c753fSRafael Auler AllRanges.reserve(DwCtx->getNumCompileUnits()); 1686a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1687a34c753fSRafael Auler Expected<DWARFAddressRangesVector> RangesOrError = 1688a34c753fSRafael Auler CU->getUnitDIE().getAddressRanges(); 1689a34c753fSRafael Auler if (!RangesOrError) { 1690a34c753fSRafael Auler consumeError(RangesOrError.takeError()); 1691a34c753fSRafael Auler continue; 1692a34c753fSRafael Auler } 1693a34c753fSRafael Auler for (DWARFAddressRange &Range : *RangesOrError) { 1694a34c753fSRafael Auler // Parts of the debug info could be invalidated due to corresponding code 1695a34c753fSRafael Auler // being removed from the binary by the linker. Hence we check if the 1696a34c753fSRafael Auler // address is a valid one. 1697a34c753fSRafael Auler if (containsAddress(Range.LowPC)) 1698a34c753fSRafael Auler AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1699a34c753fSRafael Auler } 1700014cd37fSAlexander Yermolovich 1701014cd37fSAlexander Yermolovich ContainsDwarf5 |= CU->getVersion() >= 5; 1702014cd37fSAlexander Yermolovich ContainsDwarfLegacy |= CU->getVersion() < 5; 1703a34c753fSRafael Auler } 1704a34c753fSRafael Auler 1705d2c87699SAmir Ayupov llvm::sort(AllRanges); 1706a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1707a34c753fSRafael Auler const uint64_t FunctionAddress = KV.first; 1708a34c753fSRafael Auler BinaryFunction &Function = KV.second; 1709a34c753fSRafael Auler 1710d2c87699SAmir Ayupov auto It = llvm::partition_point( 1711d2c87699SAmir Ayupov AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1712d2c87699SAmir Ayupov if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1713a34c753fSRafael Auler Function.setDWARFUnit(It->Unit); 1714a34c753fSRafael Auler } 1715a34c753fSRafael Auler 1716a34c753fSRafael Auler // Discover units with debug info that needs to be updated. 1717a34c753fSRafael Auler for (const auto &KV : BinaryFunctions) { 1718a34c753fSRafael Auler const BinaryFunction &BF = KV.second; 1719a34c753fSRafael Auler if (shouldEmit(BF) && BF.getDWARFUnit()) 1720a34c753fSRafael Auler ProcessedCUs.insert(BF.getDWARFUnit()); 1721a34c753fSRafael Auler } 1722a34c753fSRafael Auler 1723a34c753fSRafael Auler // Clear debug info for functions from units that we are not going to process. 1724a34c753fSRafael Auler for (auto &KV : BinaryFunctions) { 1725a34c753fSRafael Auler BinaryFunction &BF = KV.second; 1726a34c753fSRafael Auler if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1727a34c753fSRafael Auler BF.setDWARFUnit(nullptr); 1728a34c753fSRafael Auler } 1729a34c753fSRafael Auler 1730a34c753fSRafael Auler if (opts::Verbosity >= 1) { 173152cf0711SAmir Ayupov this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1732a34c753fSRafael Auler << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1733a34c753fSRafael Auler } 1734a34c753fSRafael Auler 1735ba1ac98cSAlexander Yermolovich preprocessDWODebugInfo(); 1736ba1ac98cSAlexander Yermolovich 1737a34c753fSRafael Auler // Populate MCContext with DWARF files from all units. 1738a34c753fSRafael Auler StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1739a34c753fSRafael Auler for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1740a34c753fSRafael Auler const uint64_t CUID = CU->getOffset(); 1741014cd37fSAlexander Yermolovich DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1742014cd37fSAlexander Yermolovich BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1743a34c753fSRafael Auler GlobalPrefix + "line_table_start" + Twine(CUID))); 1744a34c753fSRafael Auler 1745a34c753fSRafael Auler if (!ProcessedCUs.count(CU.get())) 1746a34c753fSRafael Auler continue; 1747a34c753fSRafael Auler 1748a34c753fSRafael Auler const DWARFDebugLine::LineTable *LineTable = 1749a34c753fSRafael Auler DwCtx->getLineTableForUnit(CU.get()); 1750a34c753fSRafael Auler const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1751a34c753fSRafael Auler LineTable->Prologue.FileNames; 1752a34c753fSRafael Auler 1753014cd37fSAlexander Yermolovich uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1754014cd37fSAlexander Yermolovich if (DwarfVersion >= 5) { 1755f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum; 1756014cd37fSAlexander Yermolovich if (LineTable->Prologue.ContentTypes.HasMD5) 1757014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[0].Checksum; 175889fab98eSFangrui Song std::optional<const char *> Name = 1759ba1ac98cSAlexander Yermolovich dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 176089fab98eSFangrui Song if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1761ba1ac98cSAlexander Yermolovich auto Iter = DWOCUs.find(*DWOID); 1762ba1ac98cSAlexander Yermolovich assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1763ba1ac98cSAlexander Yermolovich Name = dwarf::toString( 1764ba1ac98cSAlexander Yermolovich Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1765ba1ac98cSAlexander Yermolovich } 1766ba1ac98cSAlexander Yermolovich BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1767e324a80fSKazu Hirata std::nullopt); 1768014cd37fSAlexander Yermolovich } 1769014cd37fSAlexander Yermolovich 1770014cd37fSAlexander Yermolovich BinaryLineTable.setDwarfVersion(DwarfVersion); 1771014cd37fSAlexander Yermolovich 1772a34c753fSRafael Auler // Assign a unique label to every line table, one per CU. 1773a34c753fSRafael Auler // Make sure empty debug line tables are registered too. 1774a34c753fSRafael Auler if (FileNames.empty()) { 1775e324a80fSKazu Hirata cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1776e324a80fSKazu Hirata CUID, DwarfVersion)); 1777a34c753fSRafael Auler continue; 1778a34c753fSRafael Auler } 1779014cd37fSAlexander Yermolovich const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1780a34c753fSRafael Auler for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1781a34c753fSRafael Auler // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1782a34c753fSRafael Auler // means empty dir. 1783a34c753fSRafael Auler StringRef Dir = ""; 1784014cd37fSAlexander Yermolovich if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 178589fab98eSFangrui Song if (std::optional<const char *> DirName = dwarf::toString( 1786a34c753fSRafael Auler LineTable->Prologue 1787014cd37fSAlexander Yermolovich .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1788a34c753fSRafael Auler Dir = *DirName; 1789a34c753fSRafael Auler StringRef FileName = ""; 179089fab98eSFangrui Song if (std::optional<const char *> FName = 179189fab98eSFangrui Song dwarf::toString(FileNames[I].Name)) 1792a34c753fSRafael Auler FileName = *FName; 1793a34c753fSRafael Auler assert(FileName != ""); 1794f4c16c44SFangrui Song std::optional<MD5::MD5Result> Checksum; 1795014cd37fSAlexander Yermolovich if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1796014cd37fSAlexander Yermolovich Checksum = LineTable->Prologue.FileNames[I].Checksum; 1797e324a80fSKazu Hirata cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1798e324a80fSKazu Hirata DwarfVersion)); 1799a34c753fSRafael Auler } 1800a34c753fSRafael Auler } 1801a34c753fSRafael Auler } 1802a34c753fSRafael Auler 1803a34c753fSRafael Auler bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 18044c14519eSVladislav Khmelevsky if (Function.isPseudo()) 18054c14519eSVladislav Khmelevsky return false; 18064c14519eSVladislav Khmelevsky 1807a34c753fSRafael Auler if (opts::processAllFunctions()) 1808a34c753fSRafael Auler return true; 1809a34c753fSRafael Auler 1810a34c753fSRafael Auler if (Function.isIgnored()) 1811a34c753fSRafael Auler return false; 1812a34c753fSRafael Auler 1813a34c753fSRafael Auler // In relocation mode we will emit non-simple functions with CFG. 1814a34c753fSRafael Auler // If the function does not have a CFG it should be marked as ignored. 1815a34c753fSRafael Auler return HasRelocations || Function.isSimple(); 1816a34c753fSRafael Auler } 1817a34c753fSRafael Auler 1818f3e54f2fSllongint void BinaryContext::dump(const MCInst &Inst) const { 1819f3e54f2fSllongint if (LLVM_UNLIKELY(!InstPrinter)) { 1820f3e54f2fSllongint dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1821f3e54f2fSllongint return; 1822f3e54f2fSllongint } 1823f3e54f2fSllongint InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1824f3e54f2fSllongint dbgs() << "\n"; 1825f3e54f2fSllongint } 1826f3e54f2fSllongint 1827a34c753fSRafael Auler void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1828a34c753fSRafael Auler uint32_t Operation = Inst.getOperation(); 1829a34c753fSRafael Auler switch (Operation) { 1830a34c753fSRafael Auler case MCCFIInstruction::OpSameValue: 1831a34c753fSRafael Auler OS << "OpSameValue Reg" << Inst.getRegister(); 1832a34c753fSRafael Auler break; 1833a34c753fSRafael Auler case MCCFIInstruction::OpRememberState: 1834a34c753fSRafael Auler OS << "OpRememberState"; 1835a34c753fSRafael Auler break; 1836a34c753fSRafael Auler case MCCFIInstruction::OpRestoreState: 1837a34c753fSRafael Auler OS << "OpRestoreState"; 1838a34c753fSRafael Auler break; 1839a34c753fSRafael Auler case MCCFIInstruction::OpOffset: 1840a34c753fSRafael Auler OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1841a34c753fSRafael Auler break; 1842a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaRegister: 1843a34c753fSRafael Auler OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1844a34c753fSRafael Auler break; 1845a34c753fSRafael Auler case MCCFIInstruction::OpDefCfaOffset: 1846a34c753fSRafael Auler OS << "OpDefCfaOffset " << Inst.getOffset(); 1847a34c753fSRafael Auler break; 1848a34c753fSRafael Auler case MCCFIInstruction::OpDefCfa: 1849a34c753fSRafael Auler OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1850a34c753fSRafael Auler break; 1851a34c753fSRafael Auler case MCCFIInstruction::OpRelOffset: 1852a34c753fSRafael Auler OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1853a34c753fSRafael Auler break; 1854a34c753fSRafael Auler case MCCFIInstruction::OpAdjustCfaOffset: 1855a34c753fSRafael Auler OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1856a34c753fSRafael Auler break; 1857a34c753fSRafael Auler case MCCFIInstruction::OpEscape: 1858a34c753fSRafael Auler OS << "OpEscape"; 1859a34c753fSRafael Auler break; 1860a34c753fSRafael Auler case MCCFIInstruction::OpRestore: 1861a34c753fSRafael Auler OS << "OpRestore Reg" << Inst.getRegister(); 1862a34c753fSRafael Auler break; 1863a34c753fSRafael Auler case MCCFIInstruction::OpUndefined: 1864a34c753fSRafael Auler OS << "OpUndefined Reg" << Inst.getRegister(); 1865a34c753fSRafael Auler break; 1866a34c753fSRafael Auler case MCCFIInstruction::OpRegister: 1867a34c753fSRafael Auler OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1868a34c753fSRafael Auler << Inst.getRegister2(); 1869a34c753fSRafael Auler break; 1870a34c753fSRafael Auler case MCCFIInstruction::OpWindowSave: 1871a34c753fSRafael Auler OS << "OpWindowSave"; 1872a34c753fSRafael Auler break; 1873a34c753fSRafael Auler case MCCFIInstruction::OpGnuArgsSize: 1874a34c753fSRafael Auler OS << "OpGnuArgsSize"; 1875a34c753fSRafael Auler break; 1876a34c753fSRafael Auler default: 1877a34c753fSRafael Auler OS << "Op#" << Operation; 1878a34c753fSRafael Auler break; 1879a34c753fSRafael Auler } 1880a34c753fSRafael Auler } 1881a34c753fSRafael Auler 18828579db96SDenis Revunov MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1883fc395884SJob Noorman // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1884fc395884SJob Noorman // in the code section (see IHI0056B). $x identifies a symbol starting code or 18851a2f8336Sspaette // the end of a data chunk inside code, $d identifies start of data. 1886603fa4c6SNathan Sidwell if (isX86() || ELFSymbolRef(Symbol).getSize()) 18878579db96SDenis Revunov return MarkerSymType::NONE; 18888579db96SDenis Revunov 18898579db96SDenis Revunov Expected<StringRef> NameOrError = Symbol.getName(); 18908579db96SDenis Revunov Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 18918579db96SDenis Revunov 18928579db96SDenis Revunov if (!TypeOrError || !NameOrError) 18938579db96SDenis Revunov return MarkerSymType::NONE; 18948579db96SDenis Revunov 18958579db96SDenis Revunov if (*TypeOrError != SymbolRef::ST_Unknown) 18968579db96SDenis Revunov return MarkerSymType::NONE; 18978579db96SDenis Revunov 1898ad8fd5b1SKazu Hirata if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 18998579db96SDenis Revunov return MarkerSymType::CODE; 19008579db96SDenis Revunov 1901c6f065d9SJob Noorman // $x<ISA> 1902ad8fd5b1SKazu Hirata if (isRISCV() && NameOrError->starts_with("$x")) 1903c6f065d9SJob Noorman return MarkerSymType::CODE; 1904c6f065d9SJob Noorman 1905ad8fd5b1SKazu Hirata if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 19068579db96SDenis Revunov return MarkerSymType::DATA; 19078579db96SDenis Revunov 19088579db96SDenis Revunov return MarkerSymType::NONE; 19098579db96SDenis Revunov } 19108579db96SDenis Revunov 19118579db96SDenis Revunov bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 19128579db96SDenis Revunov return getMarkerType(Symbol) != MarkerSymType::NONE; 19138579db96SDenis Revunov } 19148579db96SDenis Revunov 19157dee646bSAmir Ayupov static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 19167dee646bSAmir Ayupov const BinaryFunction *Function, 19177dee646bSAmir Ayupov DWARFContext *DwCtx) { 19187dee646bSAmir Ayupov DebugLineTableRowRef RowRef = 19197dee646bSAmir Ayupov DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 19207dee646bSAmir Ayupov if (RowRef == DebugLineTableRowRef::NULL_ROW) 19217dee646bSAmir Ayupov return; 19227dee646bSAmir Ayupov 19237dee646bSAmir Ayupov const DWARFDebugLine::LineTable *LineTable; 19247dee646bSAmir Ayupov if (Function && Function->getDWARFUnit() && 19257dee646bSAmir Ayupov Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 19267dee646bSAmir Ayupov LineTable = Function->getDWARFLineTable(); 19277dee646bSAmir Ayupov } else { 19287dee646bSAmir Ayupov LineTable = DwCtx->getLineTableForUnit( 19297dee646bSAmir Ayupov DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 19307dee646bSAmir Ayupov } 19317dee646bSAmir Ayupov assert(LineTable && "line table expected for instruction with debug info"); 19327dee646bSAmir Ayupov 19337dee646bSAmir Ayupov const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 19347dee646bSAmir Ayupov StringRef FileName = ""; 193589fab98eSFangrui Song if (std::optional<const char *> FName = 19367dee646bSAmir Ayupov dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 19377dee646bSAmir Ayupov FileName = *FName; 19387dee646bSAmir Ayupov OS << " # debug line " << FileName << ":" << Row.Line; 19397dee646bSAmir Ayupov if (Row.Column) 19407dee646bSAmir Ayupov OS << ":" << Row.Column; 19417dee646bSAmir Ayupov if (Row.Discriminator) 19427dee646bSAmir Ayupov OS << " discriminator:" << Row.Discriminator; 19437dee646bSAmir Ayupov } 19447dee646bSAmir Ayupov 194540c2e0faSMaksim Panchenko void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1946a34c753fSRafael Auler uint64_t Offset, 1947a34c753fSRafael Auler const BinaryFunction *Function, 194840c2e0faSMaksim Panchenko bool PrintMCInst, bool PrintMemData, 194969f87b6cSAmir Ayupov bool PrintRelocations, 195069f87b6cSAmir Ayupov StringRef Endl) const { 1951a34c753fSRafael Auler OS << format(" %08" PRIx64 ": ", Offset); 1952a34c753fSRafael Auler if (MIB->isCFI(Instruction)) { 1953a34c753fSRafael Auler uint32_t Offset = Instruction.getOperand(0).getImm(); 1954a34c753fSRafael Auler OS << "\t!CFI\t$" << Offset << "\t; "; 1955a34c753fSRafael Auler if (Function) 1956a34c753fSRafael Auler printCFI(OS, *Function->getCFIFor(Instruction)); 195769f87b6cSAmir Ayupov OS << Endl; 1958a34c753fSRafael Auler return; 1959a34c753fSRafael Auler } 19606b1cf004SMaksim Panchenko if (std::optional<uint32_t> DynamicID = 19616b1cf004SMaksim Panchenko MIB->getDynamicBranchID(Instruction)) { 19626b1cf004SMaksim Panchenko OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName() 19636b1cf004SMaksim Panchenko << " # ID: " << DynamicID; 19646b1cf004SMaksim Panchenko } else { 19654111841fSKristof Beyls // If there are annotations on the instruction, the MCInstPrinter will fail 19664111841fSKristof Beyls // to print the preferred alias as it only does so when the number of 19674111841fSKristof Beyls // operands is as expected. See 19684111841fSKristof Beyls // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142 19694111841fSKristof Beyls // Therefore, create a temporary copy of the Inst from which the annotations 19704111841fSKristof Beyls // are removed, and print that Inst. 19714111841fSKristof Beyls MCInst InstNoAnnot = Instruction; 19724111841fSKristof Beyls MIB->stripAnnotations(InstNoAnnot); 19734111841fSKristof Beyls InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS); 19746b1cf004SMaksim Panchenko } 1975a34c753fSRafael Auler if (MIB->isCall(Instruction)) { 1976a34c753fSRafael Auler if (MIB->isTailCall(Instruction)) 1977a34c753fSRafael Auler OS << " # TAILCALL "; 1978a34c753fSRafael Auler if (MIB->isInvoke(Instruction)) { 19792563fd63SAmir Ayupov const std::optional<MCPlus::MCLandingPad> EHInfo = 19802563fd63SAmir Ayupov MIB->getEHInfo(Instruction); 1981a34c753fSRafael Auler OS << " # handler: "; 1982a34c753fSRafael Auler if (EHInfo->first) 1983a34c753fSRafael Auler OS << *EHInfo->first; 1984a34c753fSRafael Auler else 1985a34c753fSRafael Auler OS << '0'; 1986a34c753fSRafael Auler OS << "; action: " << EHInfo->second; 1987a34c753fSRafael Auler const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1988a34c753fSRafael Auler if (GnuArgsSize >= 0) 1989a34c753fSRafael Auler OS << "; GNU_args_size = " << GnuArgsSize; 1990a34c753fSRafael Auler } 1991a34c753fSRafael Auler } else if (MIB->isIndirectBranch(Instruction)) { 1992a34c753fSRafael Auler if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1993a34c753fSRafael Auler OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1994a34c753fSRafael Auler } else { 1995a34c753fSRafael Auler OS << " # UNKNOWN CONTROL FLOW"; 1996a34c753fSRafael Auler } 1997a34c753fSRafael Auler } 19982563fd63SAmir Ayupov if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1999a9cd49d5SAmir Ayupov OS << " # Offset: " << *Offset; 20002db9b6a9SMaksim Panchenko if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 20012db9b6a9SMaksim Panchenko OS << " # Size: " << *Size; 20027c206c78SMaksim Panchenko if (MCSymbol *Label = MIB->getInstLabel(Instruction)) 2003e28c393bSmaksfb OS << " # Label: " << *Label; 2004a34c753fSRafael Auler 2005a34c753fSRafael Auler MIB->printAnnotations(Instruction, OS); 2006a34c753fSRafael Auler 20077dee646bSAmir Ayupov if (opts::PrintDebugInfo) 20087dee646bSAmir Ayupov printDebugInfo(OS, Instruction, Function, DwCtx.get()); 2009a34c753fSRafael Auler 2010a34c753fSRafael Auler if ((opts::PrintRelocations || PrintRelocations) && Function) { 2011a34c753fSRafael Auler const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 2012a34c753fSRafael Auler Function->printRelocations(OS, Offset, Size); 2013a34c753fSRafael Auler } 2014a34c753fSRafael Auler 201569f87b6cSAmir Ayupov OS << Endl; 2016a34c753fSRafael Auler 2017a34c753fSRafael Auler if (PrintMCInst) { 2018a34c753fSRafael Auler Instruction.dump_pretty(OS, InstPrinter.get()); 201969f87b6cSAmir Ayupov OS << Endl; 2020a34c753fSRafael Auler } 2021a34c753fSRafael Auler } 2022a34c753fSRafael Auler 2023e8f5743eSAmir Ayupov std::optional<uint64_t> 202477b75ca5SMaksim Panchenko BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 202577b75ca5SMaksim Panchenko uint64_t FileOffset) const { 202677b75ca5SMaksim Panchenko // Find a segment with a matching file offset. 202777b75ca5SMaksim Panchenko for (auto &KV : SegmentMapInfo) { 202877b75ca5SMaksim Panchenko const SegmentInfo &SegInfo = KV.second; 20296d216fb7SKristof Beyls // Only consider executable segments. 20306d216fb7SKristof Beyls if (!SegInfo.IsExecutable) 20316d216fb7SKristof Beyls continue; 2032ae51ec84SJohnLee1243 // FileOffset is got from perf event, 2033ae51ec84SJohnLee1243 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 2034ae51ec84SJohnLee1243 // If the pagesize is not equal to SegInfo.Alignment. 2035ae51ec84SJohnLee1243 // FileOffset and SegInfo.FileOffset should be aligned first, 2036ae51ec84SJohnLee1243 // and then judge whether they are equal. 2037ae51ec84SJohnLee1243 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 2038ae51ec84SJohnLee1243 alignDown(FileOffset, SegInfo.Alignment)) { 2039ae51ec84SJohnLee1243 // The function's offset from base address in VAS is aligned by pagesize 2040ae51ec84SJohnLee1243 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 2041ae51ec84SJohnLee1243 // However, The ELF document says that SegInfo.FileOffset should equal 2042ae51ec84SJohnLee1243 // to SegInfo.Address, modulo the pagesize. 2043ae51ec84SJohnLee1243 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 2044ae51ec84SJohnLee1243 2045ae51ec84SJohnLee1243 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 2046ae51ec84SJohnLee1243 // alignDown(SegInfo.Address, pagesize) 2047ae51ec84SJohnLee1243 // = SegInfo.Address - (SegInfo.Address % pagesize) 2048ae51ec84SJohnLee1243 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 2049ae51ec84SJohnLee1243 // = SegInfo.Address - SegInfo.FileOffset + 2050ae51ec84SJohnLee1243 // alignDown(SegInfo.FileOffset, pagesize) 2051ae51ec84SJohnLee1243 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2052ae51ec84SJohnLee1243 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 205377b75ca5SMaksim Panchenko } 205477b75ca5SMaksim Panchenko } 205577b75ca5SMaksim Panchenko 2056e324a80fSKazu Hirata return std::nullopt; 205777b75ca5SMaksim Panchenko } 205877b75ca5SMaksim Panchenko 2059a34c753fSRafael Auler ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2060a34c753fSRafael Auler auto SI = AddressToSection.upper_bound(Address); 2061a34c753fSRafael Auler if (SI != AddressToSection.begin()) { 2062a34c753fSRafael Auler --SI; 2063a34c753fSRafael Auler uint64_t UpperBound = SI->first + SI->second->getSize(); 2064a34c753fSRafael Auler if (!SI->second->getSize()) 2065a34c753fSRafael Auler UpperBound += 1; 2066a34c753fSRafael Auler if (UpperBound > Address) 2067a34c753fSRafael Auler return *SI->second; 2068a34c753fSRafael Auler } 2069a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2070a34c753fSRafael Auler } 2071a34c753fSRafael Auler 2072a34c753fSRafael Auler ErrorOr<StringRef> 2073a34c753fSRafael Auler BinaryContext::getSectionNameForAddress(uint64_t Address) const { 20743652483cSRafael Auler if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2075a34c753fSRafael Auler return Section->getName(); 2076a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2077a34c753fSRafael Auler } 2078a34c753fSRafael Auler 2079a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2080a34c753fSRafael Auler auto Res = Sections.insert(Section); 2081a34c753fSRafael Auler (void)Res; 2082a34c753fSRafael Auler assert(Res.second && "can't register the same section twice."); 2083a34c753fSRafael Auler 2084a34c753fSRafael Auler // Only register allocatable sections in the AddressToSection map. 2085a34c753fSRafael Auler if (Section->isAllocatable() && Section->getAddress()) 2086a34c753fSRafael Auler AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2087a34c753fSRafael Auler NameToSection.insert( 2088a34c753fSRafael Auler std::make_pair(std::string(Section->getName()), Section)); 20894d3a0cadSMaksim Panchenko if (Section->hasSectionRef()) 20904d3a0cadSMaksim Panchenko SectionRefToBinarySection.insert( 20914d3a0cadSMaksim Panchenko std::make_pair(Section->getSectionRef(), Section)); 20924d3a0cadSMaksim Panchenko 2093a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2094a34c753fSRafael Auler return *Section; 2095a34c753fSRafael Auler } 2096a34c753fSRafael Auler 2097a34c753fSRafael Auler BinarySection &BinaryContext::registerSection(SectionRef Section) { 2098a34c753fSRafael Auler return registerSection(new BinarySection(*this, Section)); 2099a34c753fSRafael Auler } 2100a34c753fSRafael Auler 2101a34c753fSRafael Auler BinarySection & 21024d3a0cadSMaksim Panchenko BinaryContext::registerSection(const Twine &SectionName, 2103a34c753fSRafael Auler const BinarySection &OriginalSection) { 210440c2e0faSMaksim Panchenko return registerSection( 210540c2e0faSMaksim Panchenko new BinarySection(*this, SectionName, OriginalSection)); 2106a34c753fSRafael Auler } 2107a34c753fSRafael Auler 210840c2e0faSMaksim Panchenko BinarySection & 21094d3a0cadSMaksim Panchenko BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 211040c2e0faSMaksim Panchenko unsigned ELFFlags, uint8_t *Data, 211140c2e0faSMaksim Panchenko uint64_t Size, unsigned Alignment) { 2112a34c753fSRafael Auler auto NamedSections = getSectionByName(Name); 2113a34c753fSRafael Auler if (NamedSections.begin() != NamedSections.end()) { 2114a34c753fSRafael Auler assert(std::next(NamedSections.begin()) == NamedSections.end() && 2115a34c753fSRafael Auler "can only update unique sections"); 2116a34c753fSRafael Auler BinarySection *Section = NamedSections.begin()->second; 2117a34c753fSRafael Auler 2118a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2119a34c753fSRafael Auler const bool Flag = Section->isAllocatable(); 2120a34c753fSRafael Auler (void)Flag; 2121a34c753fSRafael Auler Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2122a34c753fSRafael Auler LLVM_DEBUG(dbgs() << *Section << "\n"); 2123a34c753fSRafael Auler // FIXME: Fix section flags/attributes for MachO. 2124a34c753fSRafael Auler if (isELF()) 2125a34c753fSRafael Auler assert(Flag == Section->isAllocatable() && 2126a34c753fSRafael Auler "can't change section allocation status"); 2127a34c753fSRafael Auler return *Section; 2128a34c753fSRafael Auler } 2129a34c753fSRafael Auler 213040c2e0faSMaksim Panchenko return registerSection( 213140c2e0faSMaksim Panchenko new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2132a34c753fSRafael Auler } 2133a34c753fSRafael Auler 21344d3a0cadSMaksim Panchenko void BinaryContext::deregisterSectionName(const BinarySection &Section) { 21354d3a0cadSMaksim Panchenko auto NameRange = NameToSection.equal_range(Section.getName().str()); 21364d3a0cadSMaksim Panchenko while (NameRange.first != NameRange.second) { 21374d3a0cadSMaksim Panchenko if (NameRange.first->second == &Section) { 21384d3a0cadSMaksim Panchenko NameToSection.erase(NameRange.first); 21394d3a0cadSMaksim Panchenko break; 21404d3a0cadSMaksim Panchenko } 21414d3a0cadSMaksim Panchenko ++NameRange.first; 21424d3a0cadSMaksim Panchenko } 21434d3a0cadSMaksim Panchenko } 21444d3a0cadSMaksim Panchenko 21454d3a0cadSMaksim Panchenko void BinaryContext::deregisterUnusedSections() { 21464d3a0cadSMaksim Panchenko ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 21474d3a0cadSMaksim Panchenko for (auto SI = Sections.begin(); SI != Sections.end();) { 21484d3a0cadSMaksim Panchenko BinarySection *Section = *SI; 214905634f73SJob Noorman // We check getOutputData() instead of getOutputSize() because sometimes 215005634f73SJob Noorman // zero-sized .text.cold sections are allocated. 215105634f73SJob Noorman if (Section->hasSectionRef() || Section->getOutputData() || 21524d3a0cadSMaksim Panchenko (AbsSection && Section == &AbsSection.get())) { 21534d3a0cadSMaksim Panchenko ++SI; 21544d3a0cadSMaksim Panchenko continue; 21554d3a0cadSMaksim Panchenko } 21564d3a0cadSMaksim Panchenko 21574d3a0cadSMaksim Panchenko LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 21584d3a0cadSMaksim Panchenko << '\n';); 21594d3a0cadSMaksim Panchenko deregisterSectionName(*Section); 21604d3a0cadSMaksim Panchenko SI = Sections.erase(SI); 21614d3a0cadSMaksim Panchenko delete Section; 21624d3a0cadSMaksim Panchenko } 21634d3a0cadSMaksim Panchenko } 21644d3a0cadSMaksim Panchenko 2165a34c753fSRafael Auler bool BinaryContext::deregisterSection(BinarySection &Section) { 2166a34c753fSRafael Auler BinarySection *SectionPtr = &Section; 2167a34c753fSRafael Auler auto Itr = Sections.find(SectionPtr); 2168a34c753fSRafael Auler if (Itr != Sections.end()) { 2169a34c753fSRafael Auler auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2170a34c753fSRafael Auler while (Range.first != Range.second) { 2171a34c753fSRafael Auler if (Range.first->second == SectionPtr) { 2172a34c753fSRafael Auler AddressToSection.erase(Range.first); 2173a34c753fSRafael Auler break; 2174a34c753fSRafael Auler } 2175a34c753fSRafael Auler ++Range.first; 2176a34c753fSRafael Auler } 2177a34c753fSRafael Auler 21784d3a0cadSMaksim Panchenko deregisterSectionName(*SectionPtr); 2179a34c753fSRafael Auler Sections.erase(Itr); 2180a34c753fSRafael Auler delete SectionPtr; 2181a34c753fSRafael Auler return true; 2182a34c753fSRafael Auler } 2183a34c753fSRafael Auler return false; 2184a34c753fSRafael Auler } 2185a34c753fSRafael Auler 21864d3a0cadSMaksim Panchenko void BinaryContext::renameSection(BinarySection &Section, 21874d3a0cadSMaksim Panchenko const Twine &NewName) { 21884d3a0cadSMaksim Panchenko auto Itr = Sections.find(&Section); 21894d3a0cadSMaksim Panchenko assert(Itr != Sections.end() && "Section must exist to be renamed."); 21904d3a0cadSMaksim Panchenko Sections.erase(Itr); 21914d3a0cadSMaksim Panchenko 21924d3a0cadSMaksim Panchenko deregisterSectionName(Section); 21934d3a0cadSMaksim Panchenko 21944d3a0cadSMaksim Panchenko Section.Name = NewName.str(); 2195c92ff2a3Srevunov.denis@huawei.com Section.setOutputName(Section.Name); 21964d3a0cadSMaksim Panchenko 2197c92ff2a3Srevunov.denis@huawei.com NameToSection.insert(std::make_pair(Section.Name, &Section)); 21984d3a0cadSMaksim Panchenko 21994d3a0cadSMaksim Panchenko // Reinsert with the new name. 22004d3a0cadSMaksim Panchenko Sections.insert(&Section); 22014d3a0cadSMaksim Panchenko } 22024d3a0cadSMaksim Panchenko 2203a34c753fSRafael Auler void BinaryContext::printSections(raw_ostream &OS) const { 22043652483cSRafael Auler for (BinarySection *const &Section : Sections) 2205a34c753fSRafael Auler OS << "BOLT-INFO: " << *Section << "\n"; 2206a34c753fSRafael Auler } 2207a34c753fSRafael Auler 2208a34c753fSRafael Auler BinarySection &BinaryContext::absoluteSection() { 2209a34c753fSRafael Auler if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2210a34c753fSRafael Auler return *Section; 2211a34c753fSRafael Auler return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2212a34c753fSRafael Auler } 2213a34c753fSRafael Auler 221440c2e0faSMaksim Panchenko ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2215a34c753fSRafael Auler size_t Size) const { 2216a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2217a34c753fSRafael Auler if (!Section) 2218a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2219a34c753fSRafael Auler 2220a34c753fSRafael Auler if (Section->isVirtual()) 2221a34c753fSRafael Auler return 0; 2222a34c753fSRafael Auler 2223a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2224a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2225a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2226a34c753fSRafael Auler return DE.getUnsigned(&ValueOffset, Size); 2227a34c753fSRafael Auler } 2228a34c753fSRafael Auler 2229c460e454SAmir Ayupov ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2230a34c753fSRafael Auler size_t Size) const { 2231a34c753fSRafael Auler const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2232a34c753fSRafael Auler if (!Section) 2233a34c753fSRafael Auler return std::make_error_code(std::errc::bad_address); 2234a34c753fSRafael Auler 2235a34c753fSRafael Auler if (Section->isVirtual()) 2236a34c753fSRafael Auler return 0; 2237a34c753fSRafael Auler 2238a34c753fSRafael Auler DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2239a34c753fSRafael Auler AsmInfo->getCodePointerSize()); 2240a34c753fSRafael Auler auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2241a34c753fSRafael Auler return DE.getSigned(&ValueOffset, Size); 2242a34c753fSRafael Auler } 2243a34c753fSRafael Auler 224440c2e0faSMaksim Panchenko void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 224540c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2246a34c753fSRafael Auler uint64_t Value) { 2247a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2248a34c753fSRafael Auler assert(Section && "cannot find section for address"); 224940c2e0faSMaksim Panchenko Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2250a34c753fSRafael Auler Value); 2251a34c753fSRafael Auler } 2252a34c753fSRafael Auler 225340c2e0faSMaksim Panchenko void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 225440c2e0faSMaksim Panchenko uint64_t Type, uint64_t Addend, 2255a34c753fSRafael Auler uint64_t Value) { 2256a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2257a34c753fSRafael Auler assert(Section && "cannot find section for address"); 225840c2e0faSMaksim Panchenko Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 225940c2e0faSMaksim Panchenko Addend, Value); 2260a34c753fSRafael Auler } 2261a34c753fSRafael Auler 2262a34c753fSRafael Auler bool BinaryContext::removeRelocationAt(uint64_t Address) { 2263a34c753fSRafael Auler ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2264a34c753fSRafael Auler assert(Section && "cannot find section for address"); 2265a34c753fSRafael Auler return Section->removeRelocationAt(Address - Section->getAddress()); 2266a34c753fSRafael Auler } 2267a34c753fSRafael Auler 226808ab4fafSAmir Ayupov const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 226908ab4fafSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2270a34c753fSRafael Auler if (!Section) 2271a34c753fSRafael Auler return nullptr; 2272a34c753fSRafael Auler 2273a34c753fSRafael Auler return Section->getRelocationAt(Address - Section->getAddress()); 2274a34c753fSRafael Auler } 2275a34c753fSRafael Auler 2276702fe36bSAmir Ayupov const Relocation * 2277702fe36bSAmir Ayupov BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2278702fe36bSAmir Ayupov ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2279a34c753fSRafael Auler if (!Section) 2280a34c753fSRafael Auler return nullptr; 2281a34c753fSRafael Auler 2282a34c753fSRafael Auler return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2283a34c753fSRafael Auler } 2284a34c753fSRafael Auler 2285a34c753fSRafael Auler void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2286a34c753fSRafael Auler const uint64_t Address) { 2287a34c753fSRafael Auler auto setImmovable = [&](BinaryData &BD) { 2288a34c753fSRafael Auler BinaryData *Root = BD.getAtomicRoot(); 2289a34c753fSRafael Auler LLVM_DEBUG(if (Root->isMoveable()) { 2290a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2291a34c753fSRafael Auler << "due to ambiguous relocation referencing 0x" 2292a34c753fSRafael Auler << Twine::utohexstr(Address) << '\n'; 2293a34c753fSRafael Auler }); 2294a34c753fSRafael Auler Root->setIsMoveable(false); 2295a34c753fSRafael Auler }; 2296a34c753fSRafael Auler 2297a34c753fSRafael Auler if (Address == BD.getAddress()) { 2298a34c753fSRafael Auler setImmovable(BD); 2299a34c753fSRafael Auler 2300a34c753fSRafael Auler // Set previous symbol as immovable 2301a34c753fSRafael Auler BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2302a34c753fSRafael Auler if (Prev && Prev->getEndAddress() == BD.getAddress()) 2303a34c753fSRafael Auler setImmovable(*Prev); 2304a34c753fSRafael Auler } 2305a34c753fSRafael Auler 2306a34c753fSRafael Auler if (Address == BD.getEndAddress()) { 2307a34c753fSRafael Auler setImmovable(BD); 2308a34c753fSRafael Auler 2309a34c753fSRafael Auler // Set next symbol as immovable 2310a34c753fSRafael Auler BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2311a34c753fSRafael Auler if (Next && Next->getAddress() == BD.getEndAddress()) 2312a34c753fSRafael Auler setImmovable(*Next); 2313a34c753fSRafael Auler } 2314a34c753fSRafael Auler } 2315a34c753fSRafael Auler 2316a34c753fSRafael Auler BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2317a34c753fSRafael Auler uint64_t *EntryDesc) { 2318e8ce5f1eSNico Weber std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2319a34c753fSRafael Auler auto BFI = SymbolToFunctionMap.find(Symbol); 2320a34c753fSRafael Auler if (BFI == SymbolToFunctionMap.end()) 2321a34c753fSRafael Auler return nullptr; 2322a34c753fSRafael Auler 2323a34c753fSRafael Auler BinaryFunction *BF = BFI->second; 2324a34c753fSRafael Auler if (EntryDesc) 2325a34c753fSRafael Auler *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2326a34c753fSRafael Auler 2327a34c753fSRafael Auler return BF; 2328a34c753fSRafael Auler } 2329a34c753fSRafael Auler 233052cf0711SAmir Ayupov std::string 233152cf0711SAmir Ayupov BinaryContext::generateBugReportMessage(StringRef Message, 2332a34c753fSRafael Auler const BinaryFunction &Function) const { 233352cf0711SAmir Ayupov std::string Msg; 233452cf0711SAmir Ayupov raw_string_ostream SS(Msg); 233552cf0711SAmir Ayupov SS << "=======================================\n"; 233652cf0711SAmir Ayupov SS << "BOLT is unable to proceed because it couldn't properly understand " 2337a34c753fSRafael Auler "this function.\n"; 233852cf0711SAmir Ayupov SS << "If you are running the most recent version of BOLT, you may " 2339a34c753fSRafael Auler "want to " 2340a34c753fSRafael Auler "report this and paste this dump.\nPlease check that there is no " 2341a34c753fSRafael Auler "sensitive contents being shared in this dump.\n"; 234252cf0711SAmir Ayupov SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; 234352cf0711SAmir Ayupov ScopedPrinter SP(SS); 2344a34c753fSRafael Auler SP.printBinaryBlock("Function contents", *Function.getData()); 234552cf0711SAmir Ayupov SS << "\n"; 234652cf0711SAmir Ayupov const_cast<BinaryFunction &>(Function).print(SS, ""); 234752cf0711SAmir Ayupov SS << "ERROR: " << Message; 234852cf0711SAmir Ayupov SS << "\n=======================================\n"; 234952cf0711SAmir Ayupov return Msg; 2350a34c753fSRafael Auler } 2351a34c753fSRafael Auler 2352a34c753fSRafael Auler BinaryFunction * 2353a34c753fSRafael Auler BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2354a34c753fSRafael Auler bool IsSimple) { 2355a34c753fSRafael Auler InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2356a34c753fSRafael Auler BinaryFunction *BF = InjectedBinaryFunctions.back(); 2357a34c753fSRafael Auler setSymbolToFunctionMap(BF->getSymbol(), BF); 2358a34c753fSRafael Auler BF->CurrentState = BinaryFunction::State::CFG; 2359a34c753fSRafael Auler return BF; 2360a34c753fSRafael Auler } 2361a34c753fSRafael Auler 2362a34c753fSRafael Auler std::pair<size_t, size_t> 2363a34c753fSRafael Auler BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2364a34c753fSRafael Auler // Adjust branch instruction to match the current layout. 2365a34c753fSRafael Auler if (FixBranches) 2366a34c753fSRafael Auler BF.fixBranches(); 2367a34c753fSRafael Auler 2368a34c753fSRafael Auler // Create local MC context to isolate the effect of ephemeral code emission. 2369a34c753fSRafael Auler IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2370a34c753fSRafael Auler MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2371a34c753fSRafael Auler MCAsmBackend *MAB = 2372a34c753fSRafael Auler TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2373a34c753fSRafael Auler 2374a34c753fSRafael Auler SmallString<256> Code; 2375a34c753fSRafael Auler raw_svector_ostream VecOS(Code); 2376a34c753fSRafael Auler 2377a34c753fSRafael Auler std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2378a34c753fSRafael Auler std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2379a34c753fSRafael Auler *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 238086e21e1aSFangrui Song std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI)); 2381a34c753fSRafael Auler 2382a34c753fSRafael Auler Streamer->initSections(false, *STI); 2383a34c753fSRafael Auler 2384a34c753fSRafael Auler MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2385a34c753fSRafael Auler Section->setHasInstructions(true); 2386a34c753fSRafael Auler 2387a34c753fSRafael Auler // Create symbols in the LocalCtx so that they get destroyed with it. 2388a34c753fSRafael Auler MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2389a34c753fSRafael Auler MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2390a34c753fSRafael Auler 2391adf4142fSFangrui Song Streamer->switchSection(Section); 2392a34c753fSRafael Auler Streamer->emitLabel(StartLabel); 2393275e075cSFabian Parzefall emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2394a34c753fSRafael Auler /*EmitCodeOnly=*/true); 2395a34c753fSRafael Auler Streamer->emitLabel(EndLabel); 2396a34c753fSRafael Auler 2397275e075cSFabian Parzefall using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2398275e075cSFabian Parzefall SmallVector<LabelRange> SplitLabels; 239907f63b0aSFabian Parzefall for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2400275e075cSFabian Parzefall MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2401275e075cSFabian Parzefall MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2402275e075cSFabian Parzefall SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2403a34c753fSRafael Auler 2404275e075cSFabian Parzefall MCSectionELF *const SplitSection = LocalCtx->getELFSection( 24050f74d191SFabian Parzefall BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2406275e075cSFabian Parzefall ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2407275e075cSFabian Parzefall SplitSection->setHasInstructions(true); 2408275e075cSFabian Parzefall Streamer->switchSection(SplitSection); 2409275e075cSFabian Parzefall 2410275e075cSFabian Parzefall Streamer->emitLabel(SplitStartLabel); 2411275e075cSFabian Parzefall emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2412275e075cSFabian Parzefall Streamer->emitLabel(SplitEndLabel); 2413a34c753fSRafael Auler } 2414a34c753fSRafael Auler 2415a34c753fSRafael Auler MCAssembler &Assembler = 2416a34c753fSRafael Auler static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 241735668e2cSFangrui Song Assembler.layout(); 2418a34c753fSRafael Auler 2419d333c0e0SShatianWang // Obtain fragment sizes. 2420d333c0e0SShatianWang std::vector<uint64_t> FragmentSizes; 2421d333c0e0SShatianWang // Main fragment size. 2422dbf12b2fSFangrui Song const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) - 2423dbf12b2fSFangrui Song Assembler.getSymbolOffset(*StartLabel); 2424d333c0e0SShatianWang FragmentSizes.push_back(HotSize); 2425d333c0e0SShatianWang // Split fragment sizes. 2426d333c0e0SShatianWang uint64_t ColdSize = 0; 2427d333c0e0SShatianWang for (const auto &Labels : SplitLabels) { 2428dbf12b2fSFangrui Song uint64_t Size = Assembler.getSymbolOffset(*Labels.second) - 2429dbf12b2fSFangrui Song Assembler.getSymbolOffset(*Labels.first); 2430d333c0e0SShatianWang FragmentSizes.push_back(Size); 2431d333c0e0SShatianWang ColdSize += Size; 2432d333c0e0SShatianWang } 2433d333c0e0SShatianWang 2434d333c0e0SShatianWang // Populate new start and end offsets of each basic block. 2435d333c0e0SShatianWang uint64_t FragmentIndex = 0; 2436d333c0e0SShatianWang for (FunctionFragment &FF : BF.getLayout().fragments()) { 2437d333c0e0SShatianWang BinaryBasicBlock *PrevBB = nullptr; 2438d333c0e0SShatianWang for (BinaryBasicBlock *BB : FF) { 2439dbf12b2fSFangrui Song const uint64_t BBStartOffset = 2440dbf12b2fSFangrui Song Assembler.getSymbolOffset(*(BB->getLabel())); 2441d333c0e0SShatianWang BB->setOutputStartAddress(BBStartOffset); 2442d333c0e0SShatianWang if (PrevBB) 2443d333c0e0SShatianWang PrevBB->setOutputEndAddress(BBStartOffset); 2444d333c0e0SShatianWang PrevBB = BB; 2445d333c0e0SShatianWang } 2446d333c0e0SShatianWang if (PrevBB) 2447d333c0e0SShatianWang PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2448d333c0e0SShatianWang FragmentIndex++; 2449d333c0e0SShatianWang } 2450a34c753fSRafael Auler 2451a34c753fSRafael Auler // Clean-up the effect of the code emission. 2452a34c753fSRafael Auler for (const MCSymbol &Symbol : Assembler.symbols()) { 2453a34c753fSRafael Auler MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2454a34c753fSRafael Auler MutableSymbol->setUndefined(); 2455a34c753fSRafael Auler MutableSymbol->setIsRegistered(false); 2456a34c753fSRafael Auler } 2457a34c753fSRafael Auler 2458a34c753fSRafael Auler return std::make_pair(HotSize, ColdSize); 2459a34c753fSRafael Auler } 2460a34c753fSRafael Auler 2461bcc4c909SMaksim Panchenko bool BinaryContext::validateInstructionEncoding( 2462bcc4c909SMaksim Panchenko ArrayRef<uint8_t> InputSequence) const { 2463bcc4c909SMaksim Panchenko MCInst Inst; 2464bcc4c909SMaksim Panchenko uint64_t InstSize; 2465bcc4c909SMaksim Panchenko DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2466bcc4c909SMaksim Panchenko assert(InstSize == InputSequence.size() && 2467bcc4c909SMaksim Panchenko "Disassembled instruction size does not match the sequence."); 2468bcc4c909SMaksim Panchenko 2469a34c753fSRafael Auler SmallString<256> Code; 2470a34c753fSRafael Auler SmallVector<MCFixup, 4> Fixups; 2471a34c753fSRafael Auler 24720c049ea6SAlexis Engelke MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2473bcc4c909SMaksim Panchenko auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2474bcc4c909SMaksim Panchenko if (InputSequence != OutputSequence) { 2475a34c753fSRafael Auler if (opts::Verbosity > 1) { 247652cf0711SAmir Ayupov this->errs() << "BOLT-WARNING: mismatched encoding detected\n" 2477bcc4c909SMaksim Panchenko << " input: " << InputSequence << '\n' 2478bcc4c909SMaksim Panchenko << " output: " << OutputSequence << '\n'; 2479a34c753fSRafael Auler } 2480a34c753fSRafael Auler return false; 2481a34c753fSRafael Auler } 2482a34c753fSRafael Auler 2483a34c753fSRafael Auler return true; 2484a34c753fSRafael Auler } 2485a34c753fSRafael Auler 2486a34c753fSRafael Auler uint64_t BinaryContext::getHotThreshold() const { 2487a34c753fSRafael Auler static uint64_t Threshold = 0; 2488a34c753fSRafael Auler if (Threshold == 0) { 248940c2e0faSMaksim Panchenko Threshold = std::max( 249040c2e0faSMaksim Panchenko (uint64_t)opts::ExecutionCountThreshold, 2491a34c753fSRafael Auler NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2492a34c753fSRafael Auler } 2493a34c753fSRafael Auler return Threshold; 2494a34c753fSRafael Auler } 2495a34c753fSRafael Auler 249640c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 249740c2e0faSMaksim Panchenko uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2498a34c753fSRafael Auler auto FI = BinaryFunctions.upper_bound(Address); 2499a34c753fSRafael Auler if (FI == BinaryFunctions.begin()) 2500a34c753fSRafael Auler return nullptr; 2501a34c753fSRafael Auler --FI; 2502a34c753fSRafael Auler 2503a34c753fSRafael Auler const uint64_t UsedSize = 2504a34c753fSRafael Auler UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2505a34c753fSRafael Auler 2506a34c753fSRafael Auler if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2507a34c753fSRafael Auler return nullptr; 2508a34c753fSRafael Auler 2509a34c753fSRafael Auler return &FI->second; 2510a34c753fSRafael Auler } 2511a34c753fSRafael Auler 251240c2e0faSMaksim Panchenko BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2513a34c753fSRafael Auler // First, try to find a function starting at the given address. If the 2514a34c753fSRafael Auler // function was folded, this will get us the original folded function if it 2515a34c753fSRafael Auler // wasn't removed from the list, e.g. in non-relocation mode. 2516a34c753fSRafael Auler auto BFI = BinaryFunctions.find(Address); 25173652483cSRafael Auler if (BFI != BinaryFunctions.end()) 2518a34c753fSRafael Auler return &BFI->second; 2519a34c753fSRafael Auler 2520a34c753fSRafael Auler // We might have folded the function matching the object at the given 2521a34c753fSRafael Auler // address. In such case, we look for a function matching the symbol 2522a34c753fSRafael Auler // registered at the original address. The new function (the one that the 2523a34c753fSRafael Auler // original was folded into) will hold the symbol. 2524a34c753fSRafael Auler if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2525a34c753fSRafael Auler uint64_t EntryID = 0; 2526a34c753fSRafael Auler BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2527a34c753fSRafael Auler if (BF && EntryID == 0) 2528a34c753fSRafael Auler return BF; 2529a34c753fSRafael Auler } 2530a34c753fSRafael Auler return nullptr; 2531a34c753fSRafael Auler } 2532a34c753fSRafael Auler 25333023b15fSAmir Ayupov /// Deregister JumpTable registered at a given \p Address and delete it. 25343023b15fSAmir Ayupov void BinaryContext::deleteJumpTable(uint64_t Address) { 25353023b15fSAmir Ayupov assert(JumpTables.count(Address) && "Must have a jump table at address"); 25363023b15fSAmir Ayupov JumpTable *JT = JumpTables.at(Address); 25373023b15fSAmir Ayupov for (BinaryFunction *Parent : JT->Parents) 25383023b15fSAmir Ayupov Parent->JumpTables.erase(Address); 25393023b15fSAmir Ayupov JumpTables.erase(Address); 25403023b15fSAmir Ayupov delete JT; 25413023b15fSAmir Ayupov } 25423023b15fSAmir Ayupov 2543a34c753fSRafael Auler DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2544a34c753fSRafael Auler const DWARFAddressRangesVector &InputRanges) const { 2545a34c753fSRafael Auler DebugAddressRangesVector OutputRanges; 2546a34c753fSRafael Auler 2547a34c753fSRafael Auler for (const DWARFAddressRange Range : InputRanges) { 2548a34c753fSRafael Auler auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2549a34c753fSRafael Auler while (BFI != BinaryFunctions.end()) { 2550a34c753fSRafael Auler const BinaryFunction &Function = BFI->second; 2551a34c753fSRafael Auler if (Function.getAddress() >= Range.HighPC) 2552a34c753fSRafael Auler break; 2553a34c753fSRafael Auler const DebugAddressRangesVector FunctionRanges = 2554a34c753fSRafael Auler Function.getOutputAddressRanges(); 2555d2c87699SAmir Ayupov llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2556a34c753fSRafael Auler std::advance(BFI, 1); 2557a34c753fSRafael Auler } 2558a34c753fSRafael Auler } 2559a34c753fSRafael Auler 2560a34c753fSRafael Auler return OutputRanges; 2561a34c753fSRafael Auler } 2562a34c753fSRafael Auler 2563a34c753fSRafael Auler } // namespace bolt 2564a34c753fSRafael Auler } // namespace llvm 2565