12f09f445SMaksim Panchenko //===- bolt/Rewrite/RewriteInstance.cpp - ELF rewriter --------------------===// 2a34c753fSRafael Auler // 3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information. 5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a34c753fSRafael Auler // 7a34c753fSRafael Auler //===----------------------------------------------------------------------===// 8a34c753fSRafael Auler 9a34c753fSRafael Auler #include "bolt/Rewrite/RewriteInstance.h" 1023c8d382SJob Noorman #include "bolt/Core/AddressMap.h" 11a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h" 12a34c753fSRafael Auler #include "bolt/Core/BinaryEmitter.h" 13a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h" 14a34c753fSRafael Auler #include "bolt/Core/DebugData.h" 15a34c753fSRafael Auler #include "bolt/Core/Exceptions.h" 16275e075cSFabian Parzefall #include "bolt/Core/FunctionLayout.h" 17a34c753fSRafael Auler #include "bolt/Core/MCPlusBuilder.h" 18a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h" 19a34c753fSRafael Auler #include "bolt/Core/Relocation.h" 201529ec08SAmir Ayupov #include "bolt/Passes/BinaryPasses.h" 21a34c753fSRafael Auler #include "bolt/Passes/CacheMetrics.h" 22*3c357a49SAlexander Yermolovich #include "bolt/Passes/IdenticalCodeFolding.h" 23a34c753fSRafael Auler #include "bolt/Passes/ReorderFunctions.h" 24a34c753fSRafael Auler #include "bolt/Profile/BoltAddressTranslation.h" 25a34c753fSRafael Auler #include "bolt/Profile/DataAggregator.h" 26a34c753fSRafael Auler #include "bolt/Profile/DataReader.h" 27a34c753fSRafael Auler #include "bolt/Profile/YAMLProfileReader.h" 28a34c753fSRafael Auler #include "bolt/Profile/YAMLProfileWriter.h" 29a34c753fSRafael Auler #include "bolt/Rewrite/BinaryPassManager.h" 30a34c753fSRafael Auler #include "bolt/Rewrite/DWARFRewriter.h" 31a34c753fSRafael Auler #include "bolt/Rewrite/ExecutableFileMemoryManager.h" 3205634f73SJob Noorman #include "bolt/Rewrite/JITLinkLinker.h" 33c9b1f062SMaksim Panchenko #include "bolt/Rewrite/MetadataRewriters.h" 34a34c753fSRafael Auler #include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h" 35a34c753fSRafael Auler #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" 36a34c753fSRafael Auler #include "bolt/Utils/CommandLineOpts.h" 37a34c753fSRafael Auler #include "bolt/Utils/Utils.h" 3882ef86c1SAmir Ayupov #include "llvm/ADT/AddressRanges.h" 39f119a248SAmir Ayupov #include "llvm/ADT/STLExtras.h" 40a34c753fSRafael Auler #include "llvm/DebugInfo/DWARF/DWARFContext.h" 41290e4823Sserge-sans-paille #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" 42a34c753fSRafael Auler #include "llvm/MC/MCAsmBackend.h" 43a34c753fSRafael Auler #include "llvm/MC/MCAsmInfo.h" 44a34c753fSRafael Auler #include "llvm/MC/MCDisassembler/MCDisassembler.h" 45a34c753fSRafael Auler #include "llvm/MC/MCObjectStreamer.h" 46a34c753fSRafael Auler #include "llvm/MC/MCStreamer.h" 47a34c753fSRafael Auler #include "llvm/MC/MCSymbol.h" 48a34c753fSRafael Auler #include "llvm/MC/TargetRegistry.h" 49a34c753fSRafael Auler #include "llvm/Object/ObjectFile.h" 50a34c753fSRafael Auler #include "llvm/Support/Alignment.h" 51a34c753fSRafael Auler #include "llvm/Support/Casting.h" 52a34c753fSRafael Auler #include "llvm/Support/CommandLine.h" 53a34c753fSRafael Auler #include "llvm/Support/DataExtractor.h" 54a34c753fSRafael Auler #include "llvm/Support/Errc.h" 5532d2473aSAmir Ayupov #include "llvm/Support/Error.h" 56a34c753fSRafael Auler #include "llvm/Support/FileSystem.h" 57a34c753fSRafael Auler #include "llvm/Support/ManagedStatic.h" 58a34c753fSRafael Auler #include "llvm/Support/Timer.h" 59a34c753fSRafael Auler #include "llvm/Support/ToolOutputFile.h" 60a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h" 61a34c753fSRafael Auler #include <algorithm> 62a34c753fSRafael Auler #include <fstream> 6332d2473aSAmir Ayupov #include <memory> 64e8d6c537SKazu Hirata #include <optional> 65a34c753fSRafael Auler #include <system_error> 66a34c753fSRafael Auler 67a34c753fSRafael Auler #undef DEBUG_TYPE 68a34c753fSRafael Auler #define DEBUG_TYPE "bolt" 69a34c753fSRafael Auler 70a34c753fSRafael Auler using namespace llvm; 71a34c753fSRafael Auler using namespace object; 72a34c753fSRafael Auler using namespace bolt; 73a34c753fSRafael Auler 74a34c753fSRafael Auler extern cl::opt<uint32_t> X86AlignBranchBoundary; 75a34c753fSRafael Auler extern cl::opt<bool> X86AlignBranchWithin32BBoundaries; 76a34c753fSRafael Auler 77a34c753fSRafael Auler namespace opts { 78a34c753fSRafael Auler 79a34c753fSRafael Auler extern cl::list<std::string> HotTextMoveSections; 80a34c753fSRafael Auler extern cl::opt<bool> Hugify; 81a34c753fSRafael Auler extern cl::opt<bool> Instrument; 8251268a57SMaksim Panchenko extern cl::opt<bool> KeepNops; 8349fdbbcfSShaw Young extern cl::opt<bool> Lite; 84a34c753fSRafael Auler extern cl::list<std::string> ReorderData; 85a34c753fSRafael Auler extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions; 867de82ca3SMaksim Panchenko extern cl::opt<bool> TerminalTrap; 87a34c753fSRafael Auler extern cl::opt<bool> TimeBuild; 8896378b3dSshaw young extern cl::opt<bool> TimeRewrite; 89*3c357a49SAlexander Yermolovich extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, 90*3c357a49SAlexander Yermolovich llvm::bolt::DeprecatedICFNumericOptionParser> 91*3c357a49SAlexander Yermolovich ICF; 92a34c753fSRafael Auler 93c8482da7SAmir Ayupov cl::opt<bool> AllowStripped("allow-stripped", 94c8482da7SAmir Ayupov cl::desc("allow processing of stripped binaries"), 95c8482da7SAmir Ayupov cl::Hidden, cl::cat(BoltCategory)); 96c8482da7SAmir Ayupov 97b92436efSFangrui Song static cl::opt<bool> ForceToDataRelocations( 98b92436efSFangrui Song "force-data-relocations", 99a34c753fSRafael Auler cl::desc("force relocations to data sections to always be processed"), 100b92436efSFangrui Song 101b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 102a34c753fSRafael Auler 103a34c753fSRafael Auler cl::opt<std::string> 104a34c753fSRafael Auler BoltID("bolt-id", 105a34c753fSRafael Auler cl::desc("add any string to tag this execution in the " 106a34c753fSRafael Auler "output binary via bolt info section"), 107a34c753fSRafael Auler cl::cat(BoltCategory)); 108a34c753fSRafael Auler 1096333e5ddSAmir Ayupov cl::opt<bool> DumpDotAll( 1106333e5ddSAmir Ayupov "dump-dot-all", 1116333e5ddSAmir Ayupov cl::desc("dump function CFGs to graphviz format after each stage;" 1126333e5ddSAmir Ayupov "enable '-print-loops' for color-coded blocks"), 113b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 114a34c753fSRafael Auler 115a34c753fSRafael Auler static cl::list<std::string> 116a34c753fSRafael Auler ForceFunctionNames("funcs", 117a34c753fSRafael Auler cl::CommaSeparated, 118a34c753fSRafael Auler cl::desc("limit optimizations to functions from the list"), 119a34c753fSRafael Auler cl::value_desc("func1,func2,func3,..."), 120a34c753fSRafael Auler cl::Hidden, 121a34c753fSRafael Auler cl::cat(BoltCategory)); 122a34c753fSRafael Auler 123a34c753fSRafael Auler static cl::opt<std::string> 124a34c753fSRafael Auler FunctionNamesFile("funcs-file", 125a34c753fSRafael Auler cl::desc("file with list of functions to optimize"), 126a34c753fSRafael Auler cl::Hidden, 127a34c753fSRafael Auler cl::cat(BoltCategory)); 128a34c753fSRafael Auler 129d474dbdfSAmir Ayupov static cl::list<std::string> ForceFunctionNamesNR( 130d474dbdfSAmir Ayupov "funcs-no-regex", cl::CommaSeparated, 131d474dbdfSAmir Ayupov cl::desc("limit optimizations to functions from the list (non-regex)"), 132d474dbdfSAmir Ayupov cl::value_desc("func1,func2,func3,..."), cl::Hidden, cl::cat(BoltCategory)); 133d474dbdfSAmir Ayupov 134d474dbdfSAmir Ayupov static cl::opt<std::string> FunctionNamesFileNR( 135d474dbdfSAmir Ayupov "funcs-file-no-regex", 136d474dbdfSAmir Ayupov cl::desc("file with list of functions to optimize (non-regex)"), cl::Hidden, 137d474dbdfSAmir Ayupov cl::cat(BoltCategory)); 138d474dbdfSAmir Ayupov 139a34c753fSRafael Auler cl::opt<bool> 140a34c753fSRafael Auler KeepTmp("keep-tmp", 141a34c753fSRafael Auler cl::desc("preserve intermediate .o file"), 142a34c753fSRafael Auler cl::Hidden, 143a34c753fSRafael Auler cl::cat(BoltCategory)); 144a34c753fSRafael Auler 145a34c753fSRafael Auler static cl::opt<unsigned> 146a34c753fSRafael Auler LiteThresholdPct("lite-threshold-pct", 147a34c753fSRafael Auler cl::desc("threshold (in percent) for selecting functions to process in lite " 148a34c753fSRafael Auler "mode. Higher threshold means fewer functions to process. E.g " 149a34c753fSRafael Auler "threshold of 90 means only top 10 percent of functions with " 150a34c753fSRafael Auler "profile will be processed."), 151a34c753fSRafael Auler cl::init(0), 152a34c753fSRafael Auler cl::ZeroOrMore, 153a34c753fSRafael Auler cl::Hidden, 154a34c753fSRafael Auler cl::cat(BoltOptCategory)); 155a34c753fSRafael Auler 156b92436efSFangrui Song static cl::opt<unsigned> LiteThresholdCount( 157b92436efSFangrui Song "lite-threshold-count", 158a34c753fSRafael Auler cl::desc("similar to '-lite-threshold-pct' but specify threshold using " 159a34c753fSRafael Auler "absolute function call count. I.e. limit processing to functions " 160a34c753fSRafael Auler "executed at least the specified number of times."), 161b92436efSFangrui Song cl::init(0), cl::Hidden, cl::cat(BoltOptCategory)); 162a34c753fSRafael Auler 163a34c753fSRafael Auler static cl::opt<unsigned> 164a34c753fSRafael Auler MaxFunctions("max-funcs", 165b92436efSFangrui Song cl::desc("maximum number of functions to process"), cl::Hidden, 166a34c753fSRafael Auler cl::cat(BoltCategory)); 167a34c753fSRafael Auler 168b92436efSFangrui Song static cl::opt<unsigned> MaxDataRelocations( 169b92436efSFangrui Song "max-data-relocations", 170b92436efSFangrui Song cl::desc("maximum number of data relocations to process"), cl::Hidden, 171a34c753fSRafael Auler cl::cat(BoltCategory)); 172a34c753fSRafael Auler 173b92436efSFangrui Song cl::opt<bool> PrintAll("print-all", 174b92436efSFangrui Song cl::desc("print functions after each stage"), cl::Hidden, 175a34c753fSRafael Auler cl::cat(BoltCategory)); 176a34c753fSRafael Auler 177fd49cc87SAmir Ayupov cl::opt<bool> PrintProfile("print-profile", 178fd49cc87SAmir Ayupov cl::desc("print functions after attaching profile"), 179fd49cc87SAmir Ayupov cl::Hidden, cl::cat(BoltCategory)); 180fd49cc87SAmir Ayupov 181b92436efSFangrui Song cl::opt<bool> PrintCFG("print-cfg", 182a34c753fSRafael Auler cl::desc("print functions after CFG construction"), 183b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 184a34c753fSRafael Auler 185a34c753fSRafael Auler cl::opt<bool> PrintDisasm("print-disasm", 186a34c753fSRafael Auler cl::desc("print function after disassembly"), 187b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 188a34c753fSRafael Auler 189a34c753fSRafael Auler static cl::opt<bool> 190a34c753fSRafael Auler PrintGlobals("print-globals", 191b92436efSFangrui Song cl::desc("print global symbols after disassembly"), cl::Hidden, 192a34c753fSRafael Auler cl::cat(BoltCategory)); 193a34c753fSRafael Auler 194a34c753fSRafael Auler extern cl::opt<bool> PrintSections; 195a34c753fSRafael Auler 196b92436efSFangrui Song static cl::opt<bool> PrintLoopInfo("print-loops", 197a34c753fSRafael Auler cl::desc("print loop related information"), 198b92436efSFangrui Song cl::Hidden, cl::cat(BoltCategory)); 199a34c753fSRafael Auler 200b92436efSFangrui Song static cl::opt<cl::boolOrDefault> RelocationMode( 201b92436efSFangrui Song "relocs", cl::desc("use relocations in the binary (default=autodetect)"), 202a34c753fSRafael Auler cl::cat(BoltCategory)); 203a34c753fSRafael Auler 20462806811SAmir Ayupov extern cl::opt<std::string> SaveProfile; 205a34c753fSRafael Auler 206a34c753fSRafael Auler static cl::list<std::string> 207a34c753fSRafael Auler SkipFunctionNames("skip-funcs", 208a34c753fSRafael Auler cl::CommaSeparated, 209a34c753fSRafael Auler cl::desc("list of functions to skip"), 210a34c753fSRafael Auler cl::value_desc("func1,func2,func3,..."), 211a34c753fSRafael Auler cl::Hidden, 212a34c753fSRafael Auler cl::cat(BoltCategory)); 213a34c753fSRafael Auler 214a34c753fSRafael Auler static cl::opt<std::string> 215a34c753fSRafael Auler SkipFunctionNamesFile("skip-funcs-file", 216a34c753fSRafael Auler cl::desc("file with list of functions to skip"), 217a34c753fSRafael Auler cl::Hidden, 218a34c753fSRafael Auler cl::cat(BoltCategory)); 219a34c753fSRafael Auler 220a34c753fSRafael Auler cl::opt<bool> 221a34c753fSRafael Auler TrapOldCode("trap-old-code", 222a34c753fSRafael Auler cl::desc("insert traps in old function bodies (relocation mode)"), 223a34c753fSRafael Auler cl::Hidden, 224a34c753fSRafael Auler cl::cat(BoltCategory)); 225a34c753fSRafael Auler 226a34c753fSRafael Auler static cl::opt<std::string> DWPPathName("dwp", 227a34c753fSRafael Auler cl::desc("Path and name to DWP file."), 22836c7d79dSFangrui Song cl::Hidden, cl::init(""), 22936c7d79dSFangrui Song cl::cat(BoltCategory)); 230a34c753fSRafael Auler 231a34c753fSRafael Auler static cl::opt<bool> 232a34c753fSRafael Auler UseGnuStack("use-gnu-stack", 233a34c753fSRafael Auler cl::desc("use GNU_STACK program header for new segment (workaround for " 234a34c753fSRafael Auler "issues with strip/objcopy)"), 235a34c753fSRafael Auler cl::ZeroOrMore, 236a34c753fSRafael Auler cl::cat(BoltCategory)); 237a34c753fSRafael Auler 23832c9d5efSAmir Ayupov static cl::opt<bool> 239a34c753fSRafael Auler SequentialDisassembly("sequential-disassembly", 240a34c753fSRafael Auler cl::desc("performs disassembly sequentially"), 241a34c753fSRafael Auler cl::init(false), 242a34c753fSRafael Auler cl::cat(BoltOptCategory)); 243a34c753fSRafael Auler 244b92436efSFangrui Song static cl::opt<bool> WriteBoltInfoSection( 245b92436efSFangrui Song "bolt-info", cl::desc("write bolt info section in the output binary"), 246b92436efSFangrui Song cl::init(true), cl::Hidden, cl::cat(BoltOutputCategory)); 247a34c753fSRafael Auler 248a34c753fSRafael Auler } // namespace opts 249a34c753fSRafael Auler 25043dce27cSMaksim Panchenko // FIXME: implement a better way to mark sections for replacement. 251a34c753fSRafael Auler constexpr const char *RewriteInstance::SectionsToOverwrite[]; 252a34c753fSRafael Auler std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = { 253014cd37fSAlexander Yermolovich ".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str", 254014cd37fSAlexander Yermolovich ".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists", 25587fb0ea2SRui Zhong ".gdb_index", ".debug_addr", ".debug_abbrev", ".debug_info", 25687fb0ea2SRui Zhong ".debug_types", ".pseudo_probe"}; 257a34c753fSRafael Auler 258a34c753fSRafael Auler const char RewriteInstance::TimerGroupName[] = "rewrite"; 259a34c753fSRafael Auler const char RewriteInstance::TimerGroupDesc[] = "Rewrite passes"; 260a34c753fSRafael Auler 261a34c753fSRafael Auler namespace llvm { 262a34c753fSRafael Auler namespace bolt { 263a34c753fSRafael Auler 264a34c753fSRafael Auler extern const char *BoltRevision; 265a34c753fSRafael Auler 266c0febca3SAmir Ayupov // Weird location for createMCPlusBuilder, but this is here to avoid a 267c0febca3SAmir Ayupov // cyclic dependency of libCore (its natural place) and libTarget. libRewrite 268c0febca3SAmir Ayupov // can depend on libTarget, but not libCore. Since libRewrite is the only 269c0febca3SAmir Ayupov // user of this function, we define it here. 270a34c753fSRafael Auler MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, 271a34c753fSRafael Auler const MCInstrAnalysis *Analysis, 272a34c753fSRafael Auler const MCInstrInfo *Info, 2738fb83bf5SJob Noorman const MCRegisterInfo *RegInfo, 2748fb83bf5SJob Noorman const MCSubtargetInfo *STI) { 275a34c753fSRafael Auler #ifdef X86_AVAILABLE 276a34c753fSRafael Auler if (Arch == Triple::x86_64) 2778fb83bf5SJob Noorman return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI); 278a34c753fSRafael Auler #endif 279a34c753fSRafael Auler 280a34c753fSRafael Auler #ifdef AARCH64_AVAILABLE 281a34c753fSRafael Auler if (Arch == Triple::aarch64) 2828fb83bf5SJob Noorman return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI); 283a34c753fSRafael Auler #endif 284a34c753fSRafael Auler 285f8730293SJob Noorman #ifdef RISCV_AVAILABLE 286f8730293SJob Noorman if (Arch == Triple::riscv64) 2878fb83bf5SJob Noorman return createRISCVMCPlusBuilder(Analysis, Info, RegInfo, STI); 288f8730293SJob Noorman #endif 289f8730293SJob Noorman 290a34c753fSRafael Auler llvm_unreachable("architecture unsupported by MCPlusBuilder"); 291a34c753fSRafael Auler } 292a34c753fSRafael Auler 29320e9d4caSVladislav Khmelevsky } // namespace bolt 29420e9d4caSVladislav Khmelevsky } // namespace llvm 29520e9d4caSVladislav Khmelevsky 2969c92b023SNathan Sidwell using ELF64LEPhdrTy = ELF64LEFile::Elf_Phdr; 2979c92b023SNathan Sidwell 29820e9d4caSVladislav Khmelevsky namespace { 29920e9d4caSVladislav Khmelevsky 30020e9d4caSVladislav Khmelevsky bool refersToReorderedSection(ErrorOr<BinarySection &> Section) { 30112b29900SKazu Hirata return llvm::any_of(opts::ReorderData, [&](const std::string &SectionName) { 30212b29900SKazu Hirata return Section && Section->getName() == SectionName; 30320e9d4caSVladislav Khmelevsky }); 30420e9d4caSVladislav Khmelevsky } 30520e9d4caSVladislav Khmelevsky 306a34c753fSRafael Auler } // anonymous namespace 307a34c753fSRafael Auler 30832d2473aSAmir Ayupov Expected<std::unique_ptr<RewriteInstance>> 30916492a61SAmir Ayupov RewriteInstance::create(ELFObjectFileBase *File, const int Argc, 31052cf0711SAmir Ayupov const char *const *Argv, StringRef ToolPath, 31152cf0711SAmir Ayupov raw_ostream &Stdout, raw_ostream &Stderr) { 31232d2473aSAmir Ayupov Error Err = Error::success(); 31352cf0711SAmir Ayupov auto RI = std::make_unique<RewriteInstance>(File, Argc, Argv, ToolPath, 31452cf0711SAmir Ayupov Stdout, Stderr, Err); 31532d2473aSAmir Ayupov if (Err) 31632d2473aSAmir Ayupov return std::move(Err); 31763686af1SVladislav Khmelevsky return std::move(RI); 31832d2473aSAmir Ayupov } 31932d2473aSAmir Ayupov 320a34c753fSRafael Auler RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc, 32132d2473aSAmir Ayupov const char *const *Argv, StringRef ToolPath, 32252cf0711SAmir Ayupov raw_ostream &Stdout, raw_ostream &Stderr, 32332d2473aSAmir Ayupov Error &Err) 324a34c753fSRafael Auler : InputFile(File), Argc(Argc), Argv(Argv), ToolPath(ToolPath), 325a34c753fSRafael Auler SHStrTab(StringTableBuilder::ELF) { 32632d2473aSAmir Ayupov ErrorAsOutParameter EAO(&Err); 327a34c753fSRafael Auler auto ELF64LEFile = dyn_cast<ELF64LEObjectFile>(InputFile); 328a34c753fSRafael Auler if (!ELF64LEFile) { 32932d2473aSAmir Ayupov Err = createStringError(errc::not_supported, 33032d2473aSAmir Ayupov "Only 64-bit LE ELF binaries are supported"); 33132d2473aSAmir Ayupov return; 332a34c753fSRafael Auler } 333a34c753fSRafael Auler 334a34c753fSRafael Auler bool IsPIC = false; 335a34c753fSRafael Auler const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 336a34c753fSRafael Auler if (Obj.getHeader().e_type != ELF::ET_EXEC) { 33752cf0711SAmir Ayupov Stdout << "BOLT-INFO: shared object or position-independent executable " 338a34c753fSRafael Auler "detected\n"; 339a34c753fSRafael Auler IsPIC = true; 340a34c753fSRafael Auler } 341a34c753fSRafael Auler 34252cf0711SAmir Ayupov // Make sure we don't miss any output on core dumps. 34352cf0711SAmir Ayupov Stdout.SetUnbuffered(); 34452cf0711SAmir Ayupov Stderr.SetUnbuffered(); 34552cf0711SAmir Ayupov LLVM_DEBUG(dbgs().SetUnbuffered()); 34652cf0711SAmir Ayupov 347c0febca3SAmir Ayupov // Read RISCV subtarget features from input file 348c0febca3SAmir Ayupov std::unique_ptr<SubtargetFeatures> Features; 349c0febca3SAmir Ayupov Triple TheTriple = File->makeTriple(); 350c0febca3SAmir Ayupov if (TheTriple.getArch() == llvm::Triple::riscv64) { 351c0febca3SAmir Ayupov Expected<SubtargetFeatures> FeaturesOrErr = File->getFeatures(); 352c0febca3SAmir Ayupov if (auto E = FeaturesOrErr.takeError()) { 353c0febca3SAmir Ayupov Err = std::move(E); 354c0febca3SAmir Ayupov return; 355c0febca3SAmir Ayupov } else { 356c0febca3SAmir Ayupov Features.reset(new SubtargetFeatures(*FeaturesOrErr)); 357c0febca3SAmir Ayupov } 358c0febca3SAmir Ayupov } 359c0febca3SAmir Ayupov 36062e894e0SSayhaan Siddiqui Relocation::Arch = TheTriple.getArch(); 36132d2473aSAmir Ayupov auto BCOrErr = BinaryContext::createBinaryContext( 3622ccf7ed2SJared Wyles TheTriple, std::make_shared<orc::SymbolStringPool>(), File->getFileName(), 3632ccf7ed2SJared Wyles Features.get(), IsPIC, 364a34c753fSRafael Auler DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore, 365a34c753fSRafael Auler nullptr, opts::DWPPathName, 366a34c753fSRafael Auler WithColor::defaultErrorHandler, 36752cf0711SAmir Ayupov WithColor::defaultWarningHandler), 36852cf0711SAmir Ayupov JournalingStreams{Stdout, Stderr}); 36932d2473aSAmir Ayupov if (Error E = BCOrErr.takeError()) { 37032d2473aSAmir Ayupov Err = std::move(E); 37132d2473aSAmir Ayupov return; 37232d2473aSAmir Ayupov } 37332d2473aSAmir Ayupov BC = std::move(BCOrErr.get()); 3748fb83bf5SJob Noorman BC->initializeTarget(std::unique_ptr<MCPlusBuilder>( 3758fb83bf5SJob Noorman createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(), 3768fb83bf5SJob Noorman BC->MII.get(), BC->MRI.get(), BC->STI.get()))); 377a34c753fSRafael Auler 378fc0ced73SRafael Auler BAT = std::make_unique<BoltAddressTranslation>(); 379a34c753fSRafael Auler 380a34c753fSRafael Auler if (opts::UpdateDebugSections) 381a34c753fSRafael Auler DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC); 382a34c753fSRafael Auler 383ee0e9ccbSMaksim Panchenko if (opts::Instrument) 384a34c753fSRafael Auler BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>()); 385ee0e9ccbSMaksim Panchenko else if (opts::Hugify) 386a34c753fSRafael Auler BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>()); 387a34c753fSRafael Auler } 388a34c753fSRafael Auler 389a34c753fSRafael Auler RewriteInstance::~RewriteInstance() {} 390a34c753fSRafael Auler 391a34c753fSRafael Auler Error RewriteInstance::setProfile(StringRef Filename) { 392a34c753fSRafael Auler if (!sys::fs::exists(Filename)) 393a34c753fSRafael Auler return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); 394a34c753fSRafael Auler 395a34c753fSRafael Auler if (ProfileReader) { 396a34c753fSRafael Auler // Already exists 39740c2e0faSMaksim Panchenko return make_error<StringError>(Twine("multiple profiles specified: ") + 39840c2e0faSMaksim Panchenko ProfileReader->getFilename() + " and " + 39940c2e0faSMaksim Panchenko Filename, 40040c2e0faSMaksim Panchenko inconvertibleErrorCode()); 401a34c753fSRafael Auler } 402a34c753fSRafael Auler 403a34c753fSRafael Auler // Spawn a profile reader based on file contents. 404ee0e9ccbSMaksim Panchenko if (DataAggregator::checkPerfDataMagic(Filename)) 405a34c753fSRafael Auler ProfileReader = std::make_unique<DataAggregator>(Filename); 406ee0e9ccbSMaksim Panchenko else if (YAMLProfileReader::isYAML(Filename)) 407a34c753fSRafael Auler ProfileReader = std::make_unique<YAMLProfileReader>(Filename); 408ee0e9ccbSMaksim Panchenko else 409a34c753fSRafael Auler ProfileReader = std::make_unique<DataReader>(Filename); 410a34c753fSRafael Auler 411a34c753fSRafael Auler return Error::success(); 412a34c753fSRafael Auler } 413a34c753fSRafael Auler 414a34c753fSRafael Auler /// Return true if the function \p BF should be disassembled. 415a34c753fSRafael Auler static bool shouldDisassemble(const BinaryFunction &BF) { 416a34c753fSRafael Auler if (BF.isPseudo()) 417a34c753fSRafael Auler return false; 418a34c753fSRafael Auler 419a34c753fSRafael Auler if (opts::processAllFunctions()) 420a34c753fSRafael Auler return true; 421a34c753fSRafael Auler 422a34c753fSRafael Auler return !BF.isIgnored(); 423a34c753fSRafael Auler } 424a34c753fSRafael Auler 42582ef86c1SAmir Ayupov // Return if a section stored in the image falls into a segment address space. 42682ef86c1SAmir Ayupov // If not, Set \p Overlap to true if there's a partial overlap. 42782ef86c1SAmir Ayupov template <class ELFT> 42882ef86c1SAmir Ayupov static bool checkOffsets(const typename ELFT::Phdr &Phdr, 42982ef86c1SAmir Ayupov const typename ELFT::Shdr &Sec, bool &Overlap) { 43082ef86c1SAmir Ayupov // SHT_NOBITS sections don't need to have an offset inside the segment. 43182ef86c1SAmir Ayupov if (Sec.sh_type == ELF::SHT_NOBITS) 43282ef86c1SAmir Ayupov return true; 43382ef86c1SAmir Ayupov 43482ef86c1SAmir Ayupov // Only non-empty sections can be at the end of a segment. 4358fd02d54SAmir Ayupov uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull; 4368fd02d54SAmir Ayupov AddressRange SectionAddressRange((uint64_t)Sec.sh_offset, 4378fd02d54SAmir Ayupov Sec.sh_offset + SectionSize); 43882ef86c1SAmir Ayupov AddressRange SegmentAddressRange(Phdr.p_offset, 43982ef86c1SAmir Ayupov Phdr.p_offset + Phdr.p_filesz); 44082ef86c1SAmir Ayupov if (SegmentAddressRange.contains(SectionAddressRange)) 44182ef86c1SAmir Ayupov return true; 44282ef86c1SAmir Ayupov 44382ef86c1SAmir Ayupov Overlap = SegmentAddressRange.intersects(SectionAddressRange); 44482ef86c1SAmir Ayupov return false; 44582ef86c1SAmir Ayupov } 44682ef86c1SAmir Ayupov 44782ef86c1SAmir Ayupov // Check that an allocatable section belongs to a virtual address 44882ef86c1SAmir Ayupov // space of a segment. 44982ef86c1SAmir Ayupov template <class ELFT> 45082ef86c1SAmir Ayupov static bool checkVMA(const typename ELFT::Phdr &Phdr, 45182ef86c1SAmir Ayupov const typename ELFT::Shdr &Sec, bool &Overlap) { 45282ef86c1SAmir Ayupov // Only non-empty sections can be at the end of a segment. 4538fd02d54SAmir Ayupov uint64_t SectionSize = Sec.sh_size ? Sec.sh_size : 1ull; 4548fd02d54SAmir Ayupov AddressRange SectionAddressRange((uint64_t)Sec.sh_addr, 4558fd02d54SAmir Ayupov Sec.sh_addr + SectionSize); 45682ef86c1SAmir Ayupov AddressRange SegmentAddressRange(Phdr.p_vaddr, Phdr.p_vaddr + Phdr.p_memsz); 45782ef86c1SAmir Ayupov 45882ef86c1SAmir Ayupov if (SegmentAddressRange.contains(SectionAddressRange)) 45982ef86c1SAmir Ayupov return true; 46082ef86c1SAmir Ayupov Overlap = SegmentAddressRange.intersects(SectionAddressRange); 46182ef86c1SAmir Ayupov return false; 46282ef86c1SAmir Ayupov } 46382ef86c1SAmir Ayupov 46482ef86c1SAmir Ayupov void RewriteInstance::markGnuRelroSections() { 46582ef86c1SAmir Ayupov using ELFT = ELF64LE; 46682ef86c1SAmir Ayupov using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 46782ef86c1SAmir Ayupov auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile); 46882ef86c1SAmir Ayupov const ELFFile<ELFT> &Obj = ELF64LEFile->getELFFile(); 46982ef86c1SAmir Ayupov 47082ef86c1SAmir Ayupov auto handleSection = [&](const ELFT::Phdr &Phdr, SectionRef SecRef) { 47182ef86c1SAmir Ayupov BinarySection *BinarySection = BC->getSectionForSectionRef(SecRef); 47282ef86c1SAmir Ayupov // If the section is non-allocatable, ignore it for GNU_RELRO purposes: 47382ef86c1SAmir Ayupov // it can't be made read-only after runtime relocations processing. 47482ef86c1SAmir Ayupov if (!BinarySection || !BinarySection->isAllocatable()) 47582ef86c1SAmir Ayupov return; 47682ef86c1SAmir Ayupov const ELFShdrTy *Sec = cantFail(Obj.getSection(SecRef.getIndex())); 47782ef86c1SAmir Ayupov bool ImageOverlap{false}, VMAOverlap{false}; 47882ef86c1SAmir Ayupov bool ImageContains = checkOffsets<ELFT>(Phdr, *Sec, ImageOverlap); 47982ef86c1SAmir Ayupov bool VMAContains = checkVMA<ELFT>(Phdr, *Sec, VMAOverlap); 48082ef86c1SAmir Ayupov if (ImageOverlap) { 48182ef86c1SAmir Ayupov if (opts::Verbosity >= 1) 48252cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: GNU_RELRO segment has partial file offset " 48352cf0711SAmir Ayupov << "overlap with section " << BinarySection->getName() 48452cf0711SAmir Ayupov << '\n'; 48582ef86c1SAmir Ayupov return; 48682ef86c1SAmir Ayupov } 48782ef86c1SAmir Ayupov if (VMAOverlap) { 48882ef86c1SAmir Ayupov if (opts::Verbosity >= 1) 48952cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: GNU_RELRO segment has partial VMA overlap " 49082ef86c1SAmir Ayupov << "with section " << BinarySection->getName() << '\n'; 49182ef86c1SAmir Ayupov return; 49282ef86c1SAmir Ayupov } 49382ef86c1SAmir Ayupov if (!ImageContains || !VMAContains) 49482ef86c1SAmir Ayupov return; 49582ef86c1SAmir Ayupov BinarySection->setRelro(); 49682ef86c1SAmir Ayupov if (opts::Verbosity >= 1) 49752cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: marking " << BinarySection->getName() 49882ef86c1SAmir Ayupov << " as GNU_RELRO\n"; 49982ef86c1SAmir Ayupov }; 50082ef86c1SAmir Ayupov 50182ef86c1SAmir Ayupov for (const ELFT::Phdr &Phdr : cantFail(Obj.program_headers())) 50282ef86c1SAmir Ayupov if (Phdr.p_type == ELF::PT_GNU_RELRO) 50382ef86c1SAmir Ayupov for (SectionRef SecRef : InputFile->sections()) 50482ef86c1SAmir Ayupov handleSection(Phdr, SecRef); 50582ef86c1SAmir Ayupov } 50682ef86c1SAmir Ayupov 507af6e66f4SAmir Ayupov Error RewriteInstance::discoverStorage() { 508a34c753fSRafael Auler NamedRegionTimer T("discoverStorage", "discover storage", TimerGroupName, 509a34c753fSRafael Auler TimerGroupDesc, opts::TimeRewrite); 510a34c753fSRafael Auler 511ffb42e31SNathan Sidwell auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile); 512a34c753fSRafael Auler const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 513a34c753fSRafael Auler 514a34c753fSRafael Auler BC->StartFunctionAddress = Obj.getHeader().e_entry; 515a34c753fSRafael Auler 516a34c753fSRafael Auler NextAvailableAddress = 0; 517a34c753fSRafael Auler uint64_t NextAvailableOffset = 0; 518af6e66f4SAmir Ayupov Expected<ELF64LE::PhdrRange> PHsOrErr = Obj.program_headers(); 519af6e66f4SAmir Ayupov if (Error E = PHsOrErr.takeError()) 520af6e66f4SAmir Ayupov return E; 521af6e66f4SAmir Ayupov 522af6e66f4SAmir Ayupov ELF64LE::PhdrRange PHs = PHsOrErr.get(); 523a34c753fSRafael Auler for (const ELF64LE::Phdr &Phdr : PHs) { 524a34c753fSRafael Auler switch (Phdr.p_type) { 525a34c753fSRafael Auler case ELF::PT_LOAD: 526a34c753fSRafael Auler BC->FirstAllocAddress = std::min(BC->FirstAllocAddress, 527a34c753fSRafael Auler static_cast<uint64_t>(Phdr.p_vaddr)); 528a34c753fSRafael Auler NextAvailableAddress = std::max(NextAvailableAddress, 529a34c753fSRafael Auler Phdr.p_vaddr + Phdr.p_memsz); 530a34c753fSRafael Auler NextAvailableOffset = std::max(NextAvailableOffset, 531a34c753fSRafael Auler Phdr.p_offset + Phdr.p_filesz); 532a34c753fSRafael Auler 5336d216fb7SKristof Beyls BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{ 5346d216fb7SKristof Beyls Phdr.p_vaddr, Phdr.p_memsz, Phdr.p_offset, 5356d216fb7SKristof Beyls Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)}; 5362abcbbd9SMaksim Panchenko if (BC->TheTriple->getArch() == llvm::Triple::x86_64 && 5372abcbbd9SMaksim Panchenko Phdr.p_vaddr >= BinaryContext::KernelStartX86_64) 5382abcbbd9SMaksim Panchenko BC->IsLinuxKernel = true; 539a34c753fSRafael Auler break; 540a34c753fSRafael Auler case ELF::PT_INTERP: 541a34c753fSRafael Auler BC->HasInterpHeader = true; 542a34c753fSRafael Auler break; 543a34c753fSRafael Auler } 544a34c753fSRafael Auler } 545a34c753fSRafael Auler 5462abcbbd9SMaksim Panchenko if (BC->IsLinuxKernel) 54752cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: Linux kernel binary detected\n"; 5482abcbbd9SMaksim Panchenko 549a34c753fSRafael Auler for (const SectionRef &Section : InputFile->sections()) { 550af6e66f4SAmir Ayupov Expected<StringRef> SectionNameOrErr = Section.getName(); 551af6e66f4SAmir Ayupov if (Error E = SectionNameOrErr.takeError()) 552af6e66f4SAmir Ayupov return E; 553af6e66f4SAmir Ayupov StringRef SectionName = SectionNameOrErr.get(); 554364963a0SNathan Sidwell if (SectionName == BC->getMainCodeSectionName()) { 555a34c753fSRafael Auler BC->OldTextSectionAddress = Section.getAddress(); 556a34c753fSRafael Auler BC->OldTextSectionSize = Section.getSize(); 557a34c753fSRafael Auler 558af6e66f4SAmir Ayupov Expected<StringRef> SectionContentsOrErr = Section.getContents(); 559af6e66f4SAmir Ayupov if (Error E = SectionContentsOrErr.takeError()) 560af6e66f4SAmir Ayupov return E; 561af6e66f4SAmir Ayupov StringRef SectionContents = SectionContentsOrErr.get(); 562a34c753fSRafael Auler BC->OldTextSectionOffset = 563a34c753fSRafael Auler SectionContents.data() - InputFile->getData().data(); 564a34c753fSRafael Auler } 565a34c753fSRafael Auler 566a34c753fSRafael Auler if (!opts::HeatmapMode && 567a34c753fSRafael Auler !(opts::AggregateOnly && BAT->enabledFor(InputFile)) && 568ad8fd5b1SKazu Hirata (SectionName.starts_with(getOrgSecPrefix()) || 569af6e66f4SAmir Ayupov SectionName == getBOLTTextSectionName())) 570af6e66f4SAmir Ayupov return createStringError( 571af6e66f4SAmir Ayupov errc::function_not_supported, 572af6e66f4SAmir Ayupov "BOLT-ERROR: input file was processed by BOLT. Cannot re-optimize"); 573a34c753fSRafael Auler } 574a34c753fSRafael Auler 575af6e66f4SAmir Ayupov if (!NextAvailableAddress || !NextAvailableOffset) 576af6e66f4SAmir Ayupov return createStringError(errc::executable_format_error, 577a34c753fSRafael Auler "no PT_LOAD pheader seen"); 578a34c753fSRafael Auler 57952cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: first alloc address is 0x" 580a34c753fSRafael Auler << Twine::utohexstr(BC->FirstAllocAddress) << '\n'; 581a34c753fSRafael Auler 582a34c753fSRafael Auler FirstNonAllocatableOffset = NextAvailableOffset; 583a34c753fSRafael Auler 584a34c753fSRafael Auler NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); 585a34c753fSRafael Auler NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); 586a34c753fSRafael Auler 5871fb18619SAlexey Moksyakov // Hugify: Additional huge page from left side due to 5881fb18619SAlexey Moksyakov // weird ASLR mapping addresses (4KB aligned) 5891fb18619SAlexey Moksyakov if (opts::Hugify && !BC->HasFixedLoadAddress) 5901fb18619SAlexey Moksyakov NextAvailableAddress += BC->PageAlign; 5911fb18619SAlexey Moksyakov 5922abcbbd9SMaksim Panchenko if (!opts::UseGnuStack && !BC->IsLinuxKernel) { 593a34c753fSRafael Auler // This is where the black magic happens. Creating PHDR table in a segment 594a34c753fSRafael Auler // other than that containing ELF header is tricky. Some loaders and/or 595a34c753fSRafael Auler // parts of loaders will apply e_phoff from ELF header assuming both are in 596a34c753fSRafael Auler // the same segment, while others will do the proper calculation. 597a34c753fSRafael Auler // We create the new PHDR table in such a way that both of the methods 598a34c753fSRafael Auler // of loading and locating the table work. There's a slight file size 599a34c753fSRafael Auler // overhead because of that. 600a34c753fSRafael Auler // 601a34c753fSRafael Auler // NB: bfd's strip command cannot do the above and will corrupt the 602a34c753fSRafael Auler // binary during the process of stripping non-allocatable sections. 603ee0e9ccbSMaksim Panchenko if (NextAvailableOffset <= NextAvailableAddress - BC->FirstAllocAddress) 604a34c753fSRafael Auler NextAvailableOffset = NextAvailableAddress - BC->FirstAllocAddress; 605ee0e9ccbSMaksim Panchenko else 606a34c753fSRafael Auler NextAvailableAddress = NextAvailableOffset + BC->FirstAllocAddress; 607ee0e9ccbSMaksim Panchenko 60840c2e0faSMaksim Panchenko assert(NextAvailableOffset == 60940c2e0faSMaksim Panchenko NextAvailableAddress - BC->FirstAllocAddress && 61040c2e0faSMaksim Panchenko "PHDR table address calculation error"); 611a34c753fSRafael Auler 61252cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: creating new program header table at address 0x" 613a34c753fSRafael Auler << Twine::utohexstr(NextAvailableAddress) << ", offset 0x" 614a34c753fSRafael Auler << Twine::utohexstr(NextAvailableOffset) << '\n'; 615a34c753fSRafael Auler 616a34c753fSRafael Auler PHDRTableAddress = NextAvailableAddress; 617a34c753fSRafael Auler PHDRTableOffset = NextAvailableOffset; 618a34c753fSRafael Auler 619a34c753fSRafael Auler // Reserve space for 3 extra pheaders. 620a34c753fSRafael Auler unsigned Phnum = Obj.getHeader().e_phnum; 621a34c753fSRafael Auler Phnum += 3; 622a34c753fSRafael Auler 623a34c753fSRafael Auler NextAvailableAddress += Phnum * sizeof(ELF64LEPhdrTy); 624a34c753fSRafael Auler NextAvailableOffset += Phnum * sizeof(ELF64LEPhdrTy); 625a34c753fSRafael Auler } 626a34c753fSRafael Auler 627a34c753fSRafael Auler // Align at cache line. 628a34c753fSRafael Auler NextAvailableAddress = alignTo(NextAvailableAddress, 64); 629a34c753fSRafael Auler NextAvailableOffset = alignTo(NextAvailableOffset, 64); 630a34c753fSRafael Auler 631a34c753fSRafael Auler NewTextSegmentAddress = NextAvailableAddress; 632a34c753fSRafael Auler NewTextSegmentOffset = NextAvailableOffset; 633a34c753fSRafael Auler BC->LayoutStartAddress = NextAvailableAddress; 634a34c753fSRafael Auler 635a34c753fSRafael Auler // Tools such as objcopy can strip section contents but leave header 636a34c753fSRafael Auler // entries. Check that at least .text is mapped in the file. 637af6e66f4SAmir Ayupov if (!getFileOffsetForAddress(BC->OldTextSectionAddress)) 638af6e66f4SAmir Ayupov return createStringError(errc::executable_format_error, 639af6e66f4SAmir Ayupov "BOLT-ERROR: input binary is not a valid ELF " 640af6e66f4SAmir Ayupov "executable as its text section is not " 641af6e66f4SAmir Ayupov "mapped to a valid segment"); 642af6e66f4SAmir Ayupov return Error::success(); 643a34c753fSRafael Auler } 644a34c753fSRafael Auler 645af6e66f4SAmir Ayupov Error RewriteInstance::run() { 646af6e66f4SAmir Ayupov assert(BC && "failed to create a binary context"); 647a34c753fSRafael Auler 64852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: Target architecture: " 649a34c753fSRafael Auler << Triple::getArchTypeName( 650a34c753fSRafael Auler (llvm::Triple::ArchType)InputFile->getArch()) 651a34c753fSRafael Auler << "\n"; 65252cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n"; 653a34c753fSRafael Auler 654af6e66f4SAmir Ayupov if (Error E = discoverStorage()) 655af6e66f4SAmir Ayupov return E; 656ced5472eSAmir Ayupov if (Error E = readSpecialSections()) 657ced5472eSAmir Ayupov return E; 658a34c753fSRafael Auler adjustCommandLineOptions(); 659a34c753fSRafael Auler discoverFileObjects(); 660a34c753fSRafael Auler 66196b5e092SJob Noorman if (opts::Instrument && !BC->IsStaticExecutable) 66296b5e092SJob Noorman if (Error E = discoverRtFiniAddress()) 66396b5e092SJob Noorman return E; 66496b5e092SJob Noorman 665a34c753fSRafael Auler preprocessProfileData(); 666a34c753fSRafael Auler 667a34c753fSRafael Auler // Skip disassembling if we have a translation table and we are running an 668a34c753fSRafael Auler // aggregation job. 669a34c753fSRafael Auler if (opts::AggregateOnly && BAT->enabledFor(InputFile)) { 67062806811SAmir Ayupov // YAML profile in BAT mode requires CFG for .bolt.org.text functions 67162806811SAmir Ayupov if (!opts::SaveProfile.empty() || 67262806811SAmir Ayupov opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) { 67362806811SAmir Ayupov selectFunctionsToProcess(); 67462806811SAmir Ayupov disassembleFunctions(); 6759b007a19SAmir Ayupov processMetadataPreCFG(); 67662806811SAmir Ayupov buildFunctionsCFG(); 67762806811SAmir Ayupov } 678a34c753fSRafael Auler processProfileData(); 679af6e66f4SAmir Ayupov return Error::success(); 680a34c753fSRafael Auler } 681a34c753fSRafael Auler 682a34c753fSRafael Auler selectFunctionsToProcess(); 683a34c753fSRafael Auler 684a34c753fSRafael Auler readDebugInfo(); 685a34c753fSRafael Auler 686a34c753fSRafael Auler disassembleFunctions(); 687a34c753fSRafael Auler 688c9b1f062SMaksim Panchenko processMetadataPreCFG(); 689a34c753fSRafael Auler 690a34c753fSRafael Auler buildFunctionsCFG(); 691a34c753fSRafael Auler 692a34c753fSRafael Auler processProfileData(); 693a34c753fSRafael Auler 694d2c9a19dSAmir Ayupov // Save input binary metadata if BAT section needs to be emitted 695d2c9a19dSAmir Ayupov if (opts::EnableBAT) 696d2c9a19dSAmir Ayupov BAT->saveMetadata(*BC); 697d2c9a19dSAmir Ayupov 698a34c753fSRafael Auler postProcessFunctions(); 699a34c753fSRafael Auler 700dd630d83SMaksim Panchenko processMetadataPostCFG(); 701dd630d83SMaksim Panchenko 702a34c753fSRafael Auler if (opts::DiffOnly) 703af6e66f4SAmir Ayupov return Error::success(); 704a34c753fSRafael Auler 705ceb7214bSKristof Beyls if (opts::BinaryAnalysisMode) { 706ceb7214bSKristof Beyls runBinaryAnalyses(); 707ceb7214bSKristof Beyls return Error::success(); 708ceb7214bSKristof Beyls } 709ceb7214bSKristof Beyls 7104d3a0cadSMaksim Panchenko preregisterSections(); 7114d3a0cadSMaksim Panchenko 712a34c753fSRafael Auler runOptimizationPasses(); 713a34c753fSRafael Auler 714aa1968c2SMaksim Panchenko finalizeMetadataPreEmit(); 715aa1968c2SMaksim Panchenko 716a34c753fSRafael Auler emitAndLink(); 717a34c753fSRafael Auler 718a34c753fSRafael Auler updateMetadata(); 719a34c753fSRafael Auler 72096b5e092SJob Noorman if (opts::Instrument && !BC->IsStaticExecutable) 72196b5e092SJob Noorman updateRtFiniReloc(); 72296b5e092SJob Noorman 723a693ae53SMaksim Panchenko if (opts::OutputFilename == "/dev/null") { 72452cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n"; 725af6e66f4SAmir Ayupov return Error::success(); 726a693ae53SMaksim Panchenko } else if (BC->IsLinuxKernel) { 72752cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: Linux kernel support is experimental\n"; 728a34c753fSRafael Auler } 729a34c753fSRafael Auler 730a34c753fSRafael Auler // Rewrite allocatable contents and copy non-allocatable parts with mods. 731a34c753fSRafael Auler rewriteFile(); 732af6e66f4SAmir Ayupov return Error::success(); 733a34c753fSRafael Auler } 734a34c753fSRafael Auler 735a34c753fSRafael Auler void RewriteInstance::discoverFileObjects() { 736a34c753fSRafael Auler NamedRegionTimer T("discoverFileObjects", "discover file objects", 737a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 738a34c753fSRafael Auler 739a34c753fSRafael Auler // For local symbols we want to keep track of associated FILE symbol name for 740a34c753fSRafael Auler // disambiguation by combined name. 741a34c753fSRafael Auler StringRef FileSymbolName; 742a34c753fSRafael Auler bool SeenFileName = false; 743a34c753fSRafael Auler struct SymbolRefHash { 744a34c753fSRafael Auler size_t operator()(SymbolRef const &S) const { 745a34c753fSRafael Auler return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); 746a34c753fSRafael Auler } 747a34c753fSRafael Auler }; 748a34c753fSRafael Auler std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; 749a34c753fSRafael Auler for (const ELFSymbolRef &Symbol : InputFile->symbols()) { 750a34c753fSRafael Auler Expected<StringRef> NameOrError = Symbol.getName(); 751ad8fd5b1SKazu Hirata if (NameOrError && NameOrError->starts_with("__asan_init")) { 75252cf0711SAmir Ayupov BC->errs() 75352cf0711SAmir Ayupov << "BOLT-ERROR: input file was compiled or linked with sanitizer " 754a34c753fSRafael Auler "support. Cannot optimize.\n"; 755a34c753fSRafael Auler exit(1); 756a34c753fSRafael Auler } 757ad8fd5b1SKazu Hirata if (NameOrError && NameOrError->starts_with("__llvm_coverage_mapping")) { 75852cf0711SAmir Ayupov BC->errs() 75952cf0711SAmir Ayupov << "BOLT-ERROR: input file was compiled or linked with coverage " 760a34c753fSRafael Auler "support. Cannot optimize.\n"; 761a34c753fSRafael Auler exit(1); 762a34c753fSRafael Auler } 763a34c753fSRafael Auler 764a34c753fSRafael Auler if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) 765a34c753fSRafael Auler continue; 766a34c753fSRafael Auler 767a34c753fSRafael Auler if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { 768a1e9608bSAmir Ayupov FileSymbols.emplace_back(Symbol); 769a34c753fSRafael Auler StringRef Name = 770a34c753fSRafael Auler cantFail(std::move(NameOrError), "cannot get symbol name for file"); 771a34c753fSRafael Auler // Ignore Clang LTO artificial FILE symbol as it is not always generated, 772a34c753fSRafael Auler // and this uncertainty is causing havoc in function name matching. 773a34c753fSRafael Auler if (Name == "ld-temp.o") 774a34c753fSRafael Auler continue; 775a34c753fSRafael Auler FileSymbolName = Name; 776a34c753fSRafael Auler SeenFileName = true; 777a34c753fSRafael Auler continue; 778a34c753fSRafael Auler } 779a34c753fSRafael Auler if (!FileSymbolName.empty() && 780ee0e9ccbSMaksim Panchenko !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) 781a34c753fSRafael Auler SymbolToFileName[Symbol] = FileSymbolName; 782a34c753fSRafael Auler } 783a34c753fSRafael Auler 784a34c753fSRafael Auler // Sort symbols in the file by value. Ignore symbols from non-allocatable 7851e9b006aSMaksim Panchenko // sections. We memoize getAddress(), as it has rather high overhead. 7861e9b006aSMaksim Panchenko struct SymbolInfo { 7871e9b006aSMaksim Panchenko uint64_t Address; 7881e9b006aSMaksim Panchenko SymbolRef Symbol; 7891e9b006aSMaksim Panchenko }; 7901e9b006aSMaksim Panchenko std::vector<SymbolInfo> SortedSymbols; 791a34c753fSRafael Auler auto isSymbolInMemory = [this](const SymbolRef &Sym) { 792a34c753fSRafael Auler if (cantFail(Sym.getType()) == SymbolRef::ST_File) 793a34c753fSRafael Auler return false; 794a34c753fSRafael Auler if (cantFail(Sym.getFlags()) & SymbolRef::SF_Absolute) 795a34c753fSRafael Auler return true; 796a34c753fSRafael Auler if (cantFail(Sym.getFlags()) & SymbolRef::SF_Undefined) 797a34c753fSRafael Auler return false; 798a34c753fSRafael Auler BinarySection Section(*BC, *cantFail(Sym.getSection())); 799a34c753fSRafael Auler return Section.isAllocatable(); 800a34c753fSRafael Auler }; 80116cd5cdfSJacob Bramley auto checkSymbolInSection = [this](const SymbolInfo &S) { 80216cd5cdfSJacob Bramley // Sometimes, we encounter symbols with addresses outside their section. If 80316cd5cdfSJacob Bramley // such symbols happen to fall into another section, they can interfere with 80416cd5cdfSJacob Bramley // disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t) 80516cd5cdfSJacob Bramley // that belong to .eh_frame, but end up pointing into .text. 80616cd5cdfSJacob Bramley // As a workaround, we ignore all symbols that lie outside their sections. 80716cd5cdfSJacob Bramley auto Section = cantFail(S.Symbol.getSection()); 80816cd5cdfSJacob Bramley 80916cd5cdfSJacob Bramley // Accept all absolute symbols. 81016cd5cdfSJacob Bramley if (Section == InputFile->section_end()) 81116cd5cdfSJacob Bramley return true; 81216cd5cdfSJacob Bramley 81316cd5cdfSJacob Bramley uint64_t SecStart = Section->getAddress(); 81416cd5cdfSJacob Bramley uint64_t SecEnd = SecStart + Section->getSize(); 81516cd5cdfSJacob Bramley uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize(); 81616cd5cdfSJacob Bramley if (S.Address >= SecStart && SymEnd <= SecEnd) 81716cd5cdfSJacob Bramley return true; 81816cd5cdfSJacob Bramley 81916cd5cdfSJacob Bramley auto SymType = cantFail(S.Symbol.getType()); 82016cd5cdfSJacob Bramley // Skip warnings for common benign cases. 82116cd5cdfSJacob Bramley if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other) 82216cd5cdfSJacob Bramley return false; // E.g. ELF::STT_TLS. 82316cd5cdfSJacob Bramley 82416cd5cdfSJacob Bramley auto SymName = S.Symbol.getName(); 82516cd5cdfSJacob Bramley auto SecName = cantFail(S.Symbol.getSection())->getName(); 82616cd5cdfSJacob Bramley BC->errs() << "BOLT-WARNING: ignoring symbol " 82716cd5cdfSJacob Bramley << (SymName ? *SymName : "[unnamed]") << " at 0x" 82816cd5cdfSJacob Bramley << Twine::utohexstr(S.Address) << ", which lies outside " 82916cd5cdfSJacob Bramley << (SecName ? *SecName : "[unnamed]") << "\n"; 83016cd5cdfSJacob Bramley 83116cd5cdfSJacob Bramley return false; 83216cd5cdfSJacob Bramley }; 8331e9b006aSMaksim Panchenko for (const SymbolRef &Symbol : InputFile->symbols()) 83416cd5cdfSJacob Bramley if (isSymbolInMemory(Symbol)) { 83516cd5cdfSJacob Bramley SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol}; 83616cd5cdfSJacob Bramley if (checkSymbolInSection(SymInfo)) 83716cd5cdfSJacob Bramley SortedSymbols.push_back(SymInfo); 83816cd5cdfSJacob Bramley } 839a34c753fSRafael Auler 8401e9b006aSMaksim Panchenko auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) { 8411e9b006aSMaksim Panchenko if (A.Address != B.Address) 8421e9b006aSMaksim Panchenko return A.Address < B.Address; 8431e9b006aSMaksim Panchenko 8441e9b006aSMaksim Panchenko const bool AMarker = BC->isMarker(A.Symbol); 8451e9b006aSMaksim Panchenko const bool BMarker = BC->isMarker(B.Symbol); 8468579db96SDenis Revunov if (AMarker || BMarker) { 8478579db96SDenis Revunov return AMarker && !BMarker; 8488579db96SDenis Revunov } 8498579db96SDenis Revunov 8501e9b006aSMaksim Panchenko const auto AType = cantFail(A.Symbol.getType()); 8511e9b006aSMaksim Panchenko const auto BType = cantFail(B.Symbol.getType()); 85240c2e0faSMaksim Panchenko if (AType == SymbolRef::ST_Function && BType != SymbolRef::ST_Function) 853a34c753fSRafael Auler return true; 85440c2e0faSMaksim Panchenko if (BType == SymbolRef::ST_Debug && AType != SymbolRef::ST_Debug) 855a34c753fSRafael Auler return true; 856a34c753fSRafael Auler 857a34c753fSRafael Auler return false; 8588579db96SDenis Revunov }; 8591e9b006aSMaksim Panchenko llvm::stable_sort(SortedSymbols, CompareSymbols); 8608579db96SDenis Revunov 8611e9b006aSMaksim Panchenko auto LastSymbol = SortedSymbols.end(); 8621e9b006aSMaksim Panchenko if (!SortedSymbols.empty()) 86377c19773SAmir Ayupov --LastSymbol; 864a34c753fSRafael Auler 865a34c753fSRafael Auler // For aarch64, the ABI defines mapping symbols so we identify data in the 866a34c753fSRafael Auler // code section (see IHI0056B). $d identifies data contents. 8678579db96SDenis Revunov // Compilers usually merge multiple data objects in a single $d-$x interval, 8688579db96SDenis Revunov // but we need every data object to be marked with $d. Because of that we 8698579db96SDenis Revunov // create a vector of MarkerSyms with all locations of data objects. 8708579db96SDenis Revunov 8718579db96SDenis Revunov struct MarkerSym { 8728579db96SDenis Revunov uint64_t Address; 8738579db96SDenis Revunov MarkerSymType Type; 8748579db96SDenis Revunov }; 8758579db96SDenis Revunov 8768579db96SDenis Revunov std::vector<MarkerSym> SortedMarkerSymbols; 8771e9b006aSMaksim Panchenko auto addExtraDataMarkerPerSymbol = [&]() { 8788579db96SDenis Revunov bool IsData = false; 8798579db96SDenis Revunov uint64_t LastAddr = 0; 8801e9b006aSMaksim Panchenko for (const auto &SymInfo : SortedSymbols) { 8811e9b006aSMaksim Panchenko if (LastAddr == SymInfo.Address) // don't repeat markers 8828579db96SDenis Revunov continue; 8838579db96SDenis Revunov 8841e9b006aSMaksim Panchenko MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol); 8858579db96SDenis Revunov if (MarkerType != MarkerSymType::NONE) { 8861e9b006aSMaksim Panchenko SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType}); 8871e9b006aSMaksim Panchenko LastAddr = SymInfo.Address; 8888579db96SDenis Revunov IsData = MarkerType == MarkerSymType::DATA; 8898579db96SDenis Revunov continue; 8908579db96SDenis Revunov } 8918579db96SDenis Revunov 8928579db96SDenis Revunov if (IsData) { 8931e9b006aSMaksim Panchenko SortedMarkerSymbols.push_back({SymInfo.Address, MarkerSymType::DATA}); 8941e9b006aSMaksim Panchenko LastAddr = SymInfo.Address; 8958579db96SDenis Revunov } 8968579db96SDenis Revunov } 8978579db96SDenis Revunov }; 8988579db96SDenis Revunov 899fc395884SJob Noorman if (BC->isAArch64() || BC->isRISCV()) { 9001e9b006aSMaksim Panchenko addExtraDataMarkerPerSymbol(); 901a34c753fSRafael Auler LastSymbol = std::stable_partition( 9021e9b006aSMaksim Panchenko SortedSymbols.begin(), SortedSymbols.end(), 9031e9b006aSMaksim Panchenko [this](const SymbolInfo &S) { return !BC->isMarker(S.Symbol); }); 9041e9b006aSMaksim Panchenko if (!SortedSymbols.empty()) 905a34c753fSRafael Auler --LastSymbol; 906a34c753fSRafael Auler } 907a34c753fSRafael Auler 908a34c753fSRafael Auler BinaryFunction *PreviousFunction = nullptr; 909a34c753fSRafael Auler unsigned AnonymousId = 0; 910a34c753fSRafael Auler 9111e9b006aSMaksim Panchenko const auto SortedSymbolsEnd = 9121e9b006aSMaksim Panchenko LastSymbol == SortedSymbols.end() ? LastSymbol : std::next(LastSymbol); 9131e9b006aSMaksim Panchenko for (auto Iter = SortedSymbols.begin(); Iter != SortedSymbolsEnd; ++Iter) { 9141e9b006aSMaksim Panchenko const SymbolRef &Symbol = Iter->Symbol; 9159b4328fbSMaksim Panchenko const uint64_t SymbolAddress = Iter->Address; 9169b4328fbSMaksim Panchenko const auto SymbolFlags = cantFail(Symbol.getFlags()); 917a34c753fSRafael Auler const SymbolRef::Type SymbolType = cantFail(Symbol.getType()); 918a34c753fSRafael Auler 919a34c753fSRafael Auler if (SymbolType == SymbolRef::ST_File) 920a34c753fSRafael Auler continue; 921a34c753fSRafael Auler 922a34c753fSRafael Auler StringRef SymName = cantFail(Symbol.getName(), "cannot get symbol name"); 9239b4328fbSMaksim Panchenko if (SymbolAddress == 0) { 924a34c753fSRafael Auler if (opts::Verbosity >= 1 && SymbolType == SymbolRef::ST_Function) 92552cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: function with 0 address seen\n"; 926a34c753fSRafael Auler continue; 927a34c753fSRafael Auler } 928a34c753fSRafael Auler 929a34c753fSRafael Auler // Ignore input hot markers 930ee0e9ccbSMaksim Panchenko if (SymName == "__hot_start" || SymName == "__hot_end") 931a34c753fSRafael Auler continue; 932a34c753fSRafael Auler 9333fe50b6dSAmir Ayupov FileSymRefs.emplace(SymbolAddress, Symbol); 934a34c753fSRafael Auler 935a34c753fSRafael Auler // Skip section symbols that will be registered by disassemblePLT(). 9369b4328fbSMaksim Panchenko if (SymbolType == SymbolRef::ST_Debug) { 9379b4328fbSMaksim Panchenko ErrorOr<BinarySection &> BSection = 9389b4328fbSMaksim Panchenko BC->getSectionForAddress(SymbolAddress); 939a34c753fSRafael Auler if (BSection && getPLTSectionInfo(BSection->getName())) 940a34c753fSRafael Auler continue; 941a34c753fSRafael Auler } 942a34c753fSRafael Auler 943a34c753fSRafael Auler /// It is possible we are seeing a globalized local. LLVM might treat it as 944a34c753fSRafael Auler /// a local if it has a "private global" prefix, e.g. ".L". Thus we have to 945a34c753fSRafael Auler /// change the prefix to enforce global scope of the symbol. 946ad8fd5b1SKazu Hirata std::string Name = 947ad8fd5b1SKazu Hirata SymName.starts_with(BC->AsmInfo->getPrivateGlobalPrefix()) 948a34c753fSRafael Auler ? "PG" + std::string(SymName) 949a34c753fSRafael Auler : std::string(SymName); 950a34c753fSRafael Auler 951a34c753fSRafael Auler // Disambiguate all local symbols before adding to symbol table. 952a34c753fSRafael Auler // Since we don't know if we will see a global with the same name, 953a34c753fSRafael Auler // always modify the local name. 954a34c753fSRafael Auler // 955a34c753fSRafael Auler // NOTE: the naming convention for local symbols should match 956a34c753fSRafael Auler // the one we use for profile data. 957a34c753fSRafael Auler std::string UniqueName; 958a34c753fSRafael Auler std::string AlternativeName; 959a34c753fSRafael Auler if (Name.empty()) { 960a34c753fSRafael Auler UniqueName = "ANONYMOUS." + std::to_string(AnonymousId++); 9619b4328fbSMaksim Panchenko } else if (SymbolFlags & SymbolRef::SF_Global) { 962c0d954a0SRafael Auler if (const BinaryData *BD = BC->getBinaryDataByName(Name)) { 963c0d954a0SRafael Auler if (BD->getSize() == ELFSymbolRef(Symbol).getSize() && 9649b4328fbSMaksim Panchenko BD->getAddress() == SymbolAddress) { 965c0d954a0SRafael Auler if (opts::Verbosity > 1) 96652cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: ignoring duplicate global symbol " 96752cf0711SAmir Ayupov << Name << "\n"; 968c0d954a0SRafael Auler // Ignore duplicate entry - possibly a bug in the linker 969c0d954a0SRafael Auler continue; 970c0d954a0SRafael Auler } 97152cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: bad input binary, global symbol \"" << Name 972c0d954a0SRafael Auler << "\" is not unique\n"; 973c0d954a0SRafael Auler exit(1); 974c0d954a0SRafael Auler } 975a34c753fSRafael Auler UniqueName = Name; 976a34c753fSRafael Auler } else { 977a34c753fSRafael Auler // If we have a local file name, we should create 2 variants for the 978a34c753fSRafael Auler // function name. The reason is that perf profile might have been 979a34c753fSRafael Auler // collected on a binary that did not have the local file name (e.g. as 980a34c753fSRafael Auler // a side effect of stripping debug info from the binary): 981a34c753fSRafael Auler // 982a34c753fSRafael Auler // primary: <function>/<id> 983a34c753fSRafael Auler // alternative: <function>/<file>/<id2> 984a34c753fSRafael Auler // 985a34c753fSRafael Auler // The <id> field is used for disambiguation of local symbols since there 986a34c753fSRafael Auler // could be identical function names coming from identical file names 987a34c753fSRafael Auler // (e.g. from different directories). 988a34c753fSRafael Auler std::string AltPrefix; 989a34c753fSRafael Auler auto SFI = SymbolToFileName.find(Symbol); 990ee0e9ccbSMaksim Panchenko if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) 991a34c753fSRafael Auler AltPrefix = Name + "/" + std::string(SFI->second); 992a34c753fSRafael Auler 993a34c753fSRafael Auler UniqueName = NR.uniquify(Name); 994a34c753fSRafael Auler if (!AltPrefix.empty()) 995a34c753fSRafael Auler AlternativeName = NR.uniquify(AltPrefix); 996a34c753fSRafael Auler } 997a34c753fSRafael Auler 998a34c753fSRafael Auler uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 999a34c753fSRafael Auler uint64_t SymbolAlignment = Symbol.getAlignment(); 1000a34c753fSRafael Auler 1001e49549ffSDavide Italiano auto registerName = [&](uint64_t FinalSize) { 1002a34c753fSRafael Auler // Register names even if it's not a function, e.g. for an entry point. 10039b4328fbSMaksim Panchenko BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize, 1004e49549ffSDavide Italiano SymbolAlignment, SymbolFlags); 1005a34c753fSRafael Auler if (!AlternativeName.empty()) 10069b4328fbSMaksim Panchenko BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize, 1007e49549ffSDavide Italiano SymbolAlignment, SymbolFlags); 1008a34c753fSRafael Auler }; 1009a34c753fSRafael Auler 1010a34c753fSRafael Auler section_iterator Section = 1011a34c753fSRafael Auler cantFail(Symbol.getSection(), "cannot get symbol section"); 1012a34c753fSRafael Auler if (Section == InputFile->section_end()) { 1013f8730293SJob Noorman // Could be an absolute symbol. Used on RISC-V for __global_pointer$ so we 1014f8730293SJob Noorman // need to record it to handle relocations against it. For other instances 1015f8730293SJob Noorman // of absolute symbols, we record for pretty printing. 1016a34c753fSRafael Auler LLVM_DEBUG(if (opts::Verbosity > 1) { 1017a34c753fSRafael Auler dbgs() << "BOLT-INFO: absolute sym " << UniqueName << "\n"; 1018a34c753fSRafael Auler }); 1019a34c753fSRafael Auler registerName(SymbolSize); 1020a34c753fSRafael Auler continue; 1021a34c753fSRafael Auler } 1022a34c753fSRafael Auler 10233a0d894fSMaksim Panchenko if (SymName == getBOLTReservedStart() || SymName == getBOLTReservedEnd()) { 10243a0d894fSMaksim Panchenko registerName(SymbolSize); 10253a0d894fSMaksim Panchenko continue; 10263a0d894fSMaksim Panchenko } 10273a0d894fSMaksim Panchenko 1028a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName 1029a34c753fSRafael Auler << " for function\n"); 1030a34c753fSRafael Auler 10319b4328fbSMaksim Panchenko if (SymbolAddress == Section->getAddress() + Section->getSize()) { 103254ab9541SJob Noorman assert(SymbolSize == 0 && 103354ab9541SJob Noorman "unexpect non-zero sized symbol at end of section"); 1034e49549ffSDavide Italiano LLVM_DEBUG( 1035e49549ffSDavide Italiano dbgs() 1036e49549ffSDavide Italiano << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); 1037e49549ffSDavide Italiano registerName(SymbolSize); 103854ab9541SJob Noorman continue; 103954ab9541SJob Noorman } 104054ab9541SJob Noorman 1041a34c753fSRafael Auler if (!Section->isText()) { 1042a34c753fSRafael Auler assert(SymbolType != SymbolRef::ST_Function && 1043a34c753fSRafael Auler "unexpected function inside non-code section"); 1044a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); 1045a34c753fSRafael Auler registerName(SymbolSize); 1046a34c753fSRafael Auler continue; 1047a34c753fSRafael Auler } 1048a34c753fSRafael Auler 1049a34c753fSRafael Auler // Assembly functions could be ST_NONE with 0 size. Check that the 1050a34c753fSRafael Auler // corresponding section is a code section and they are not inside any 1051a34c753fSRafael Auler // other known function to consider them. 1052a34c753fSRafael Auler // 1053a34c753fSRafael Auler // Sometimes assembly functions are not marked as functions and neither are 1054a34c753fSRafael Auler // their local labels. The only way to tell them apart is to look at 1055a34c753fSRafael Auler // symbol scope - global vs local. 1056a34c753fSRafael Auler if (PreviousFunction && SymbolType != SymbolRef::ST_Function) { 10579b4328fbSMaksim Panchenko if (PreviousFunction->containsAddress(SymbolAddress)) { 1058a34c753fSRafael Auler if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1059a34c753fSRafael Auler LLVM_DEBUG(dbgs() 1060a34c753fSRafael Auler << "BOLT-DEBUG: symbol is a function local symbol\n"); 10619b4328fbSMaksim Panchenko } else if (SymbolAddress == PreviousFunction->getAddress() && 10629b4328fbSMaksim Panchenko !SymbolSize) { 1063a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring symbol as a marker\n"); 1064a34c753fSRafael Auler } else if (opts::Verbosity > 1) { 106552cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: symbol " << UniqueName 1066a34c753fSRafael Auler << " seen in the middle of function " << *PreviousFunction 1067a34c753fSRafael Auler << ". Could be a new entry.\n"; 1068a34c753fSRafael Auler } 1069a34c753fSRafael Auler registerName(SymbolSize); 1070a34c753fSRafael Auler continue; 1071a34c753fSRafael Auler } else if (PreviousFunction->getSize() == 0 && 1072a34c753fSRafael Auler PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1073a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: symbol is a function local symbol\n"); 1074a34c753fSRafael Auler registerName(SymbolSize); 1075a34c753fSRafael Auler continue; 1076a34c753fSRafael Auler } 1077a34c753fSRafael Auler } 1078a34c753fSRafael Auler 10799b4328fbSMaksim Panchenko if (PreviousFunction && PreviousFunction->containsAddress(SymbolAddress) && 10809b4328fbSMaksim Panchenko PreviousFunction->getAddress() != SymbolAddress) { 1081a34c753fSRafael Auler if (PreviousFunction->isSymbolValidInScope(Symbol, SymbolSize)) { 1082ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 108352cf0711SAmir Ayupov BC->outs() 108452cf0711SAmir Ayupov << "BOLT-INFO: skipping possibly another entry for function " 1085a34c753fSRafael Auler << *PreviousFunction << " : " << UniqueName << '\n'; 1086dc8035bdSMaksim Panchenko registerName(SymbolSize); 1087a34c753fSRafael Auler } else { 108852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: using " << UniqueName 108952cf0711SAmir Ayupov << " as another entry to " 1090a34c753fSRafael Auler << "function " << *PreviousFunction << '\n'; 1091a34c753fSRafael Auler 1092a34c753fSRafael Auler registerName(0); 1093a34c753fSRafael Auler 10949b4328fbSMaksim Panchenko PreviousFunction->addEntryPointAtOffset(SymbolAddress - 109540c2e0faSMaksim Panchenko PreviousFunction->getAddress()); 1096a34c753fSRafael Auler 1097a34c753fSRafael Auler // Remove the symbol from FileSymRefs so that we can skip it from 1098a34c753fSRafael Auler // in the future. 10993fe50b6dSAmir Ayupov auto SI = llvm::find_if( 11003fe50b6dSAmir Ayupov llvm::make_range(FileSymRefs.equal_range(SymbolAddress)), 11013fe50b6dSAmir Ayupov [&](auto SymIt) { return SymIt.second == Symbol; }); 1102a34c753fSRafael Auler assert(SI != FileSymRefs.end() && "symbol expected to be present"); 1103a34c753fSRafael Auler assert(SI->second == Symbol && "wrong symbol found"); 1104a34c753fSRafael Auler FileSymRefs.erase(SI); 1105a34c753fSRafael Auler } 1106a34c753fSRafael Auler continue; 1107a34c753fSRafael Auler } 1108a34c753fSRafael Auler 1109a34c753fSRafael Auler // Checkout for conflicts with function data from FDEs. 1110a34c753fSRafael Auler bool IsSimple = true; 11119b4328fbSMaksim Panchenko auto FDEI = CFIRdWrt->getFDEs().lower_bound(SymbolAddress); 1112a34c753fSRafael Auler if (FDEI != CFIRdWrt->getFDEs().end()) { 1113a34c753fSRafael Auler const dwarf::FDE &FDE = *FDEI->second; 11149b4328fbSMaksim Panchenko if (FDEI->first != SymbolAddress) { 1115a34c753fSRafael Auler // There's no matching starting address in FDE. Make sure the previous 1116a34c753fSRafael Auler // FDE does not contain this address. 1117a34c753fSRafael Auler if (FDEI != CFIRdWrt->getFDEs().begin()) { 1118a34c753fSRafael Auler --FDEI; 1119a34c753fSRafael Auler const dwarf::FDE &PrevFDE = *FDEI->second; 1120a34c753fSRafael Auler uint64_t PrevStart = PrevFDE.getInitialLocation(); 1121a34c753fSRafael Auler uint64_t PrevLength = PrevFDE.getAddressRange(); 11229b4328fbSMaksim Panchenko if (SymbolAddress > PrevStart && 11239b4328fbSMaksim Panchenko SymbolAddress < PrevStart + PrevLength) { 112452cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: function " << UniqueName 1125a34c753fSRafael Auler << " is in conflict with FDE [" 1126a34c753fSRafael Auler << Twine::utohexstr(PrevStart) << ", " 1127a34c753fSRafael Auler << Twine::utohexstr(PrevStart + PrevLength) 1128a34c753fSRafael Auler << "). Skipping.\n"; 1129a34c753fSRafael Auler IsSimple = false; 1130a34c753fSRafael Auler } 1131a34c753fSRafael Auler } 1132a34c753fSRafael Auler } else if (FDE.getAddressRange() != SymbolSize) { 1133a34c753fSRafael Auler if (SymbolSize) { 1134a34c753fSRafael Auler // Function addresses match but sizes differ. 113552cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: sizes differ for function " << UniqueName 1136a34c753fSRafael Auler << ". FDE : " << FDE.getAddressRange() 113752cf0711SAmir Ayupov << "; symbol table : " << SymbolSize 113852cf0711SAmir Ayupov << ". Using max size.\n"; 1139a34c753fSRafael Auler } 1140a34c753fSRafael Auler SymbolSize = std::max(SymbolSize, FDE.getAddressRange()); 11419b4328fbSMaksim Panchenko if (BC->getBinaryDataAtAddress(SymbolAddress)) { 11429b4328fbSMaksim Panchenko BC->setBinaryDataSize(SymbolAddress, SymbolSize); 1143a34c753fSRafael Auler } else { 1144a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: No BD @ 0x" 11459b4328fbSMaksim Panchenko << Twine::utohexstr(SymbolAddress) << "\n"); 1146a34c753fSRafael Auler } 1147a34c753fSRafael Auler } 1148a34c753fSRafael Auler } 1149a34c753fSRafael Auler 1150a34c753fSRafael Auler BinaryFunction *BF = nullptr; 1151a34c753fSRafael Auler // Since function may not have yet obtained its real size, do a search 1152a34c753fSRafael Auler // using the list of registered functions instead of calling 1153a34c753fSRafael Auler // getBinaryFunctionAtAddress(). 11549b4328fbSMaksim Panchenko auto BFI = BC->getBinaryFunctions().find(SymbolAddress); 1155a34c753fSRafael Auler if (BFI != BC->getBinaryFunctions().end()) { 1156a34c753fSRafael Auler BF = &BFI->second; 1157a34c753fSRafael Auler // Duplicate the function name. Make sure everything matches before we add 1158a34c753fSRafael Auler // an alternative name. 1159a34c753fSRafael Auler if (SymbolSize != BF->getSize()) { 1160a34c753fSRafael Auler if (opts::Verbosity >= 1) { 1161ee0e9ccbSMaksim Panchenko if (SymbolSize && BF->getSize()) 116252cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: size mismatch for duplicate entries " 1163a34c753fSRafael Auler << *BF << " and " << UniqueName << '\n'; 116452cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: adjusting size of function " << *BF 116552cf0711SAmir Ayupov << " old " << BF->getSize() << " new " << SymbolSize 116652cf0711SAmir Ayupov << "\n"; 1167a34c753fSRafael Auler } 1168a34c753fSRafael Auler BF->setSize(std::max(SymbolSize, BF->getSize())); 11699b4328fbSMaksim Panchenko BC->setBinaryDataSize(SymbolAddress, BF->getSize()); 1170a34c753fSRafael Auler } 1171a34c753fSRafael Auler BF->addAlternativeName(UniqueName); 1172a34c753fSRafael Auler } else { 11739b4328fbSMaksim Panchenko ErrorOr<BinarySection &> Section = 11749b4328fbSMaksim Panchenko BC->getSectionForAddress(SymbolAddress); 1175a34c753fSRafael Auler // Skip symbols from invalid sections 1176a34c753fSRafael Auler if (!Section) { 117752cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x" 11789b4328fbSMaksim Panchenko << Twine::utohexstr(SymbolAddress) 11799b4328fbSMaksim Panchenko << ") does not have any section\n"; 1180a34c753fSRafael Auler continue; 1181a34c753fSRafael Auler } 1182a34c753fSRafael Auler 1183a34c753fSRafael Auler // Skip symbols from zero-sized sections. 1184a34c753fSRafael Auler if (!Section->getSize()) 1185a34c753fSRafael Auler continue; 1186a34c753fSRafael Auler 11879b4328fbSMaksim Panchenko BF = BC->createBinaryFunction(UniqueName, *Section, SymbolAddress, 11889b4328fbSMaksim Panchenko SymbolSize); 1189a34c753fSRafael Auler if (!IsSimple) 1190a34c753fSRafael Auler BF->setSimple(false); 1191a34c753fSRafael Auler } 1192c49941bdSAmir Ayupov 1193c49941bdSAmir Ayupov // Check if it's a cold function fragment. 1194e9954ec0SAmir Ayupov if (FunctionFragmentTemplate.match(SymName)) { 1195c49941bdSAmir Ayupov static bool PrintedWarning = false; 1196c49941bdSAmir Ayupov if (!PrintedWarning) { 1197c49941bdSAmir Ayupov PrintedWarning = true; 119852cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: split function detected on input : " 1199c49941bdSAmir Ayupov << SymName; 1200c49941bdSAmir Ayupov if (BC->HasRelocations) 120152cf0711SAmir Ayupov BC->errs() << ". The support is limited in relocation mode\n"; 12027b72920aSMaksim Panchenko else 120352cf0711SAmir Ayupov BC->errs() << '\n'; 1204c49941bdSAmir Ayupov } 1205c49941bdSAmir Ayupov BC->HasSplitFunctions = true; 1206c49941bdSAmir Ayupov BF->IsFragment = true; 1207c49941bdSAmir Ayupov } 1208c49941bdSAmir Ayupov 1209a34c753fSRafael Auler if (!AlternativeName.empty()) 1210a34c753fSRafael Auler BF->addAlternativeName(AlternativeName); 1211a34c753fSRafael Auler 1212a34c753fSRafael Auler registerName(SymbolSize); 1213a34c753fSRafael Auler PreviousFunction = BF; 1214a34c753fSRafael Auler } 1215a34c753fSRafael Auler 1216a34c753fSRafael Auler // Read dynamic relocation first as their presence affects the way we process 1217a34c753fSRafael Auler // static relocations. E.g. we will ignore a static relocation at an address 1218a34c753fSRafael Auler // that is a subject to dynamic relocation processing. 1219a34c753fSRafael Auler processDynamicRelocations(); 1220a34c753fSRafael Auler 1221a34c753fSRafael Auler // Process PLT section. 1222a34c753fSRafael Auler disassemblePLT(); 1223a34c753fSRafael Auler 1224a34c753fSRafael Auler // See if we missed any functions marked by FDE. 1225a34c753fSRafael Auler for (const auto &FDEI : CFIRdWrt->getFDEs()) { 1226a34c753fSRafael Auler const uint64_t Address = FDEI.first; 1227a34c753fSRafael Auler const dwarf::FDE *FDE = FDEI.second; 1228a34c753fSRafael Auler const BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address); 1229a34c753fSRafael Auler if (BF) 1230a34c753fSRafael Auler continue; 1231a34c753fSRafael Auler 1232a34c753fSRafael Auler BF = BC->getBinaryFunctionContainingAddress(Address); 1233a34c753fSRafael Auler if (BF) { 123452cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) 123552cf0711SAmir Ayupov << ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange()) 1236a34c753fSRafael Auler << ") conflicts with function " << *BF << '\n'; 1237a34c753fSRafael Auler continue; 1238a34c753fSRafael Auler } 1239a34c753fSRafael Auler 1240ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 124152cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: FDE [0x" << Twine::utohexstr(Address) 124252cf0711SAmir Ayupov << ", 0x" << Twine::utohexstr(Address + FDE->getAddressRange()) 1243a34c753fSRafael Auler << ") has no corresponding symbol table entry\n"; 1244ee0e9ccbSMaksim Panchenko 1245a34c753fSRafael Auler ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 1246a34c753fSRafael Auler assert(Section && "cannot get section for address from FDE"); 1247a34c753fSRafael Auler std::string FunctionName = 1248a34c753fSRafael Auler "__BOLT_FDE_FUNCat" + Twine::utohexstr(Address).str(); 1249a34c753fSRafael Auler BC->createBinaryFunction(FunctionName, *Section, Address, 1250a34c753fSRafael Auler FDE->getAddressRange()); 1251a34c753fSRafael Auler } 1252a34c753fSRafael Auler 1253a34c753fSRafael Auler BC->setHasSymbolsWithFileName(SeenFileName); 1254a34c753fSRafael Auler 1255a34c753fSRafael Auler // Now that all the functions were created - adjust their boundaries. 1256a34c753fSRafael Auler adjustFunctionBoundaries(); 1257a34c753fSRafael Auler 1258a34c753fSRafael Auler // Annotate functions with code/data markers in AArch64 12598579db96SDenis Revunov for (auto ISym = SortedMarkerSymbols.begin(); 12608579db96SDenis Revunov ISym != SortedMarkerSymbols.end(); ++ISym) { 12618579db96SDenis Revunov 12628579db96SDenis Revunov auto *BF = 12638579db96SDenis Revunov BC->getBinaryFunctionContainingAddress(ISym->Address, true, true); 12648579db96SDenis Revunov 1265a34c753fSRafael Auler if (!BF) { 1266a34c753fSRafael Auler // Stray marker 1267a34c753fSRafael Auler continue; 1268a34c753fSRafael Auler } 12698579db96SDenis Revunov const auto EntryOffset = ISym->Address - BF->getAddress(); 12708579db96SDenis Revunov if (ISym->Type == MarkerSymType::CODE) { 1271a34c753fSRafael Auler BF->markCodeAtOffset(EntryOffset); 1272a34c753fSRafael Auler continue; 1273a34c753fSRafael Auler } 12748579db96SDenis Revunov if (ISym->Type == MarkerSymType::DATA) { 1275a34c753fSRafael Auler BF->markDataAtOffset(EntryOffset); 12768579db96SDenis Revunov BC->AddressToConstantIslandMap[ISym->Address] = BF; 1277a34c753fSRafael Auler continue; 1278a34c753fSRafael Auler } 1279a34c753fSRafael Auler llvm_unreachable("Unknown marker"); 1280a34c753fSRafael Auler } 1281a34c753fSRafael Auler 12827117af52SVladislav Khmelevsky if (BC->isAArch64()) { 12837117af52SVladislav Khmelevsky // Check for dynamic relocations that might be contained in 12847117af52SVladislav Khmelevsky // constant islands. 12857117af52SVladislav Khmelevsky for (const BinarySection &Section : BC->allocatableSections()) { 12867117af52SVladislav Khmelevsky const uint64_t SectionAddress = Section.getAddress(); 12877117af52SVladislav Khmelevsky for (const Relocation &Rel : Section.dynamicRelocations()) { 12887117af52SVladislav Khmelevsky const uint64_t RelAddress = SectionAddress + Rel.Offset; 12897117af52SVladislav Khmelevsky BinaryFunction *BF = 12907117af52SVladislav Khmelevsky BC->getBinaryFunctionContainingAddress(RelAddress, 12917117af52SVladislav Khmelevsky /*CheckPastEnd*/ false, 12927117af52SVladislav Khmelevsky /*UseMaxSize*/ true); 12937117af52SVladislav Khmelevsky if (BF) { 12947117af52SVladislav Khmelevsky assert(Rel.isRelative() && "Expected relative relocation for island"); 129552cf0711SAmir Ayupov BC->logBOLTErrorsAndQuitOnFatal( 129652cf0711SAmir Ayupov BF->markIslandDynamicRelocationAtAddress(RelAddress)); 12977117af52SVladislav Khmelevsky } 12987117af52SVladislav Khmelevsky } 12997117af52SVladislav Khmelevsky } 13007117af52SVladislav Khmelevsky } 13017117af52SVladislav Khmelevsky 13022abcbbd9SMaksim Panchenko if (!BC->IsLinuxKernel) { 1303a34c753fSRafael Auler // Read all relocations now that we have binary functions mapped. 1304a34c753fSRafael Auler processRelocations(); 1305a34c753fSRafael Auler } 130638639a81SMaksim Panchenko 1307c49941bdSAmir Ayupov registerFragments(); 1308a1e9608bSAmir Ayupov FileSymbols.clear(); 13093fe50b6dSAmir Ayupov FileSymRefs.clear(); 13109cd218e4SMaksim Panchenko 13119cd218e4SMaksim Panchenko discoverBOLTReserved(); 13129cd218e4SMaksim Panchenko } 13139cd218e4SMaksim Panchenko 13149cd218e4SMaksim Panchenko void RewriteInstance::discoverBOLTReserved() { 13159cd218e4SMaksim Panchenko BinaryData *StartBD = BC->getBinaryDataByName(getBOLTReservedStart()); 13169cd218e4SMaksim Panchenko BinaryData *EndBD = BC->getBinaryDataByName(getBOLTReservedEnd()); 13179cd218e4SMaksim Panchenko if (!StartBD != !EndBD) { 13189cd218e4SMaksim Panchenko BC->errs() << "BOLT-ERROR: one of the symbols is missing from the binary: " 13199cd218e4SMaksim Panchenko << getBOLTReservedStart() << ", " << getBOLTReservedEnd() 13209cd218e4SMaksim Panchenko << '\n'; 13219cd218e4SMaksim Panchenko exit(1); 13229cd218e4SMaksim Panchenko } 13239cd218e4SMaksim Panchenko 13249cd218e4SMaksim Panchenko if (!StartBD) 13259cd218e4SMaksim Panchenko return; 13269cd218e4SMaksim Panchenko 13279cd218e4SMaksim Panchenko if (StartBD->getAddress() >= EndBD->getAddress()) { 13289cd218e4SMaksim Panchenko BC->errs() << "BOLT-ERROR: invalid reserved space boundaries\n"; 13299cd218e4SMaksim Panchenko exit(1); 13309cd218e4SMaksim Panchenko } 13319cd218e4SMaksim Panchenko BC->BOLTReserved = AddressRange(StartBD->getAddress(), EndBD->getAddress()); 13329cd218e4SMaksim Panchenko BC->outs() << "BOLT-INFO: using reserved space for allocating new sections\n"; 13339cd218e4SMaksim Panchenko 13349cd218e4SMaksim Panchenko PHDRTableOffset = 0; 13359cd218e4SMaksim Panchenko PHDRTableAddress = 0; 13369cd218e4SMaksim Panchenko NewTextSegmentAddress = 0; 13379cd218e4SMaksim Panchenko NewTextSegmentOffset = 0; 13389cd218e4SMaksim Panchenko NextAvailableAddress = BC->BOLTReserved.start(); 1339c49941bdSAmir Ayupov } 1340c49941bdSAmir Ayupov 134196b5e092SJob Noorman Error RewriteInstance::discoverRtFiniAddress() { 134296b5e092SJob Noorman // Use DT_FINI if it's available. 134396b5e092SJob Noorman if (BC->FiniAddress) { 134496b5e092SJob Noorman BC->FiniFunctionAddress = BC->FiniAddress; 134596b5e092SJob Noorman return Error::success(); 134696b5e092SJob Noorman } 134796b5e092SJob Noorman 134896b5e092SJob Noorman if (!BC->FiniArrayAddress || !BC->FiniArraySize) { 134996b5e092SJob Noorman return createStringError( 135096b5e092SJob Noorman std::errc::not_supported, 135196b5e092SJob Noorman "Instrumentation needs either DT_FINI or DT_FINI_ARRAY"); 135296b5e092SJob Noorman } 135396b5e092SJob Noorman 135496b5e092SJob Noorman if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) { 135596b5e092SJob Noorman return createStringError(std::errc::not_supported, 135696b5e092SJob Noorman "Need at least 1 DT_FINI_ARRAY slot"); 135796b5e092SJob Noorman } 135896b5e092SJob Noorman 135996b5e092SJob Noorman ErrorOr<BinarySection &> FiniArraySection = 136096b5e092SJob Noorman BC->getSectionForAddress(*BC->FiniArrayAddress); 136196b5e092SJob Noorman if (auto EC = FiniArraySection.getError()) 136296b5e092SJob Noorman return errorCodeToError(EC); 136396b5e092SJob Noorman 136496b5e092SJob Noorman if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) { 136596b5e092SJob Noorman BC->FiniFunctionAddress = Reloc->Addend; 136696b5e092SJob Noorman return Error::success(); 136796b5e092SJob Noorman } 136896b5e092SJob Noorman 136996b5e092SJob Noorman if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) { 137096b5e092SJob Noorman BC->FiniFunctionAddress = Reloc->Value; 137196b5e092SJob Noorman return Error::success(); 137296b5e092SJob Noorman } 137396b5e092SJob Noorman 137496b5e092SJob Noorman return createStringError(std::errc::not_supported, 137596b5e092SJob Noorman "No relocation for first DT_FINI_ARRAY slot"); 137696b5e092SJob Noorman } 137796b5e092SJob Noorman 137896b5e092SJob Noorman void RewriteInstance::updateRtFiniReloc() { 137996b5e092SJob Noorman // Updating DT_FINI is handled by patchELFDynamic. 138096b5e092SJob Noorman if (BC->FiniAddress) 138196b5e092SJob Noorman return; 138296b5e092SJob Noorman 138396b5e092SJob Noorman const RuntimeLibrary *RT = BC->getRuntimeLibrary(); 138496b5e092SJob Noorman if (!RT || !RT->getRuntimeFiniAddress()) 138596b5e092SJob Noorman return; 138696b5e092SJob Noorman 138796b5e092SJob Noorman assert(BC->FiniArrayAddress && BC->FiniArraySize && 138896b5e092SJob Noorman "inconsistent .fini_array state"); 138996b5e092SJob Noorman 139096b5e092SJob Noorman ErrorOr<BinarySection &> FiniArraySection = 139196b5e092SJob Noorman BC->getSectionForAddress(*BC->FiniArrayAddress); 139296b5e092SJob Noorman assert(FiniArraySection && ".fini_array removed"); 139396b5e092SJob Noorman 139496b5e092SJob Noorman if (std::optional<Relocation> Reloc = 139596b5e092SJob Noorman FiniArraySection->takeDynamicRelocationAt(0)) { 139696b5e092SJob Noorman assert(Reloc->Addend == BC->FiniFunctionAddress && 139796b5e092SJob Noorman "inconsistent .fini_array dynamic relocation"); 139896b5e092SJob Noorman Reloc->Addend = RT->getRuntimeFiniAddress(); 139996b5e092SJob Noorman FiniArraySection->addDynamicRelocation(*Reloc); 140096b5e092SJob Noorman } 140196b5e092SJob Noorman 140296b5e092SJob Noorman // Update the static relocation by adding a pending relocation which will get 140396b5e092SJob Noorman // patched when flushPendingRelocations is called in rewriteFile. Note that 140496b5e092SJob Noorman // flushPendingRelocations will calculate the value to patch as 140596b5e092SJob Noorman // "Symbol + Addend". Since we don't have a symbol, just set the addend to the 140696b5e092SJob Noorman // desired value. 140796b5e092SJob Noorman FiniArraySection->addPendingRelocation(Relocation{ 140896b5e092SJob Noorman /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(), 140996b5e092SJob Noorman /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0}); 141096b5e092SJob Noorman } 141196b5e092SJob Noorman 1412c49941bdSAmir Ayupov void RewriteInstance::registerFragments() { 1413c49941bdSAmir Ayupov if (!BC->HasSplitFunctions) 1414c49941bdSAmir Ayupov return; 1415c49941bdSAmir Ayupov 1416a1e9608bSAmir Ayupov // Process fragments with ambiguous parents separately as they are typically a 1417a1e9608bSAmir Ayupov // vanishing minority of cases and require expensive symbol table lookups. 1418a1e9608bSAmir Ayupov std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments; 1419c49941bdSAmir Ayupov for (auto &BFI : BC->getBinaryFunctions()) { 1420c49941bdSAmir Ayupov BinaryFunction &Function = BFI.second; 1421c49941bdSAmir Ayupov if (!Function.isFragment()) 1422c49941bdSAmir Ayupov continue; 1423c49941bdSAmir Ayupov for (StringRef Name : Function.getNames()) { 1424a1e9608bSAmir Ayupov StringRef BaseName = NR.restore(Name); 1425a1e9608bSAmir Ayupov const bool IsGlobal = BaseName == Name; 1426e9954ec0SAmir Ayupov SmallVector<StringRef> Matches; 1427e9954ec0SAmir Ayupov if (!FunctionFragmentTemplate.match(BaseName, &Matches)) 1428c49941bdSAmir Ayupov continue; 1429e9954ec0SAmir Ayupov StringRef ParentName = Matches[1]; 1430c49941bdSAmir Ayupov const BinaryData *BD = BC->getBinaryDataByName(ParentName); 1431a1e9608bSAmir Ayupov const uint64_t NumPossibleLocalParents = 1432a1e9608bSAmir Ayupov NR.getUniquifiedNameCount(ParentName); 1433a1e9608bSAmir Ayupov // The most common case: single local parent fragment. 1434a1e9608bSAmir Ayupov if (!BD && NumPossibleLocalParents == 1) { 1435a1e9608bSAmir Ayupov BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1)); 1436a1e9608bSAmir Ayupov } else if (BD && (!NumPossibleLocalParents || IsGlobal)) { 1437a1e9608bSAmir Ayupov // Global parent and either no local candidates (second most common), or 1438a1e9608bSAmir Ayupov // the fragment is global as well (uncommon). 1439a1e9608bSAmir Ayupov } else { 1440a1e9608bSAmir Ayupov // Any other case: need to disambiguate using FILE symbols. 1441a1e9608bSAmir Ayupov AmbiguousFragments.emplace_back(ParentName, &Function); 1442c49941bdSAmir Ayupov continue; 1443c49941bdSAmir Ayupov } 1444a1e9608bSAmir Ayupov if (BD) { 1445a1e9608bSAmir Ayupov BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol()); 1446a1e9608bSAmir Ayupov if (BF) { 1447c49941bdSAmir Ayupov BC->registerFragment(Function, *BF); 1448a1e9608bSAmir Ayupov continue; 1449c49941bdSAmir Ayupov } 1450a1e9608bSAmir Ayupov } 145152cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: parent function not found for " << Function 1452c49941bdSAmir Ayupov << '\n'; 1453c49941bdSAmir Ayupov exit(1); 1454c49941bdSAmir Ayupov } 1455c49941bdSAmir Ayupov } 1456a1e9608bSAmir Ayupov 1457a1e9608bSAmir Ayupov if (AmbiguousFragments.empty()) 1458a1e9608bSAmir Ayupov return; 1459a1e9608bSAmir Ayupov 1460a1e9608bSAmir Ayupov if (!BC->hasSymbolsWithFileName()) { 1461a1e9608bSAmir Ayupov BC->errs() << "BOLT-ERROR: input file has split functions but does not " 1462a1e9608bSAmir Ayupov "have FILE symbols. If the binary was stripped, preserve " 1463a79acb0cSAmir Ayupov "FILE symbols with --keep-file-symbols strip option\n"; 1464a1e9608bSAmir Ayupov exit(1); 1465a1e9608bSAmir Ayupov } 1466a1e9608bSAmir Ayupov 1467a1e9608bSAmir Ayupov // The first global symbol is identified by the symbol table sh_info value. 1468a1e9608bSAmir Ayupov // Used as local symbol search stopping point. 1469a1e9608bSAmir Ayupov auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile); 1470a1e9608bSAmir Ayupov const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 1471a1e9608bSAmir Ayupov auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) { 1472a1e9608bSAmir Ayupov return Sec.sh_type == ELF::SHT_SYMTAB; 1473a1e9608bSAmir Ayupov }); 1474a1e9608bSAmir Ayupov assert(SymTab); 1475a1e9608bSAmir Ayupov // Symtab sh_info contains the value one greater than the symbol table index 1476a1e9608bSAmir Ayupov // of the last local symbol. 1477a1e9608bSAmir Ayupov ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info); 1478a1e9608bSAmir Ayupov 147951122fb4SVladislav Khmelevsky for (auto &Fragment : AmbiguousFragments) { 148051122fb4SVladislav Khmelevsky const StringRef &ParentName = Fragment.first; 148151122fb4SVladislav Khmelevsky BinaryFunction *BF = Fragment.second; 1482a1e9608bSAmir Ayupov const uint64_t Address = BF->getAddress(); 1483a1e9608bSAmir Ayupov 1484a1e9608bSAmir Ayupov // Get fragment's own symbol 14853fe50b6dSAmir Ayupov const auto SymIt = llvm::find_if( 14863fe50b6dSAmir Ayupov llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) { 14873fe50b6dSAmir Ayupov StringRef Name = cantFail(SI.second.getName()); 14883fe50b6dSAmir Ayupov return Name.contains(ParentName); 14893fe50b6dSAmir Ayupov }); 1490a1e9608bSAmir Ayupov if (SymIt == FileSymRefs.end()) { 1491a1e9608bSAmir Ayupov BC->errs() 1492a1e9608bSAmir Ayupov << "BOLT-ERROR: symbol lookup failed for function at address 0x" 1493a1e9608bSAmir Ayupov << Twine::utohexstr(Address) << '\n'; 1494a1e9608bSAmir Ayupov exit(1); 1495a1e9608bSAmir Ayupov } 1496a1e9608bSAmir Ayupov 1497a1e9608bSAmir Ayupov // Find containing FILE symbol 1498a1e9608bSAmir Ayupov ELFSymbolRef Symbol = SymIt->second; 1499a1e9608bSAmir Ayupov auto FSI = llvm::upper_bound(FileSymbols, Symbol); 1500a1e9608bSAmir Ayupov if (FSI == FileSymbols.begin()) { 1501a1e9608bSAmir Ayupov BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol " 1502a1e9608bSAmir Ayupov << cantFail(Symbol.getName()) << '\n'; 1503a1e9608bSAmir Ayupov exit(1); 1504a1e9608bSAmir Ayupov } 1505a1e9608bSAmir Ayupov 1506a1e9608bSAmir Ayupov ELFSymbolRef StopSymbol = LocalSymEnd; 1507a1e9608bSAmir Ayupov if (FSI != FileSymbols.end()) 1508a1e9608bSAmir Ayupov StopSymbol = *FSI; 1509a1e9608bSAmir Ayupov 1510a1e9608bSAmir Ayupov uint64_t ParentAddress{0}; 1511c4c4e17cSAmir Ayupov 1512c4c4e17cSAmir Ayupov // BOLT split fragment symbols are emitted just before the main function 1513c4c4e17cSAmir Ayupov // symbol. 1514c4c4e17cSAmir Ayupov for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol; 1515c4c4e17cSAmir Ayupov NextSymbol.moveNext()) { 1516c4c4e17cSAmir Ayupov StringRef Name = cantFail(NextSymbol.getName()); 1517c4c4e17cSAmir Ayupov if (Name == ParentName) { 1518c4c4e17cSAmir Ayupov ParentAddress = cantFail(NextSymbol.getValue()); 1519c4c4e17cSAmir Ayupov goto registerParent; 1520c4c4e17cSAmir Ayupov } 1521c4c4e17cSAmir Ayupov if (Name.starts_with(ParentName)) 1522c4c4e17cSAmir Ayupov // With multi-way splitting, there are multiple fragments with different 1523c4c4e17cSAmir Ayupov // suffixes. Parent follows the last fragment. 1524c4c4e17cSAmir Ayupov continue; 1525c4c4e17cSAmir Ayupov break; 1526c4c4e17cSAmir Ayupov } 1527c4c4e17cSAmir Ayupov 1528a1e9608bSAmir Ayupov // Iterate over local file symbols and check symbol names to match parent. 1529a1e9608bSAmir Ayupov for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) { 1530a1e9608bSAmir Ayupov if (cantFail(Symbol.getName()) == ParentName) { 1531a1e9608bSAmir Ayupov ParentAddress = cantFail(Symbol.getAddress()); 1532a1e9608bSAmir Ayupov break; 1533a1e9608bSAmir Ayupov } 1534a1e9608bSAmir Ayupov } 1535a1e9608bSAmir Ayupov 1536c4c4e17cSAmir Ayupov registerParent: 1537a1e9608bSAmir Ayupov // No local parent is found, use global parent function. 1538a1e9608bSAmir Ayupov if (!ParentAddress) 1539a1e9608bSAmir Ayupov if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName)) 1540a1e9608bSAmir Ayupov ParentAddress = ParentBD->getAddress(); 1541a1e9608bSAmir Ayupov 1542a1e9608bSAmir Ayupov if (BinaryFunction *ParentBF = 1543a1e9608bSAmir Ayupov BC->getBinaryFunctionAtAddress(ParentAddress)) { 1544a1e9608bSAmir Ayupov BC->registerFragment(*BF, *ParentBF); 1545a1e9608bSAmir Ayupov continue; 1546a1e9608bSAmir Ayupov } 1547a1e9608bSAmir Ayupov BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n'; 1548a1e9608bSAmir Ayupov exit(1); 1549a1e9608bSAmir Ayupov } 1550a34c753fSRafael Auler } 1551a34c753fSRafael Auler 155200b6efc8SVladislav Khmelevsky void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress, 155300b6efc8SVladislav Khmelevsky uint64_t EntryAddress, 155400b6efc8SVladislav Khmelevsky uint64_t EntrySize) { 155500b6efc8SVladislav Khmelevsky if (!TargetAddress) 155600b6efc8SVladislav Khmelevsky return; 155700b6efc8SVladislav Khmelevsky 15588bdbcfe7SVladislav Khmelevsky auto setPLTSymbol = [&](BinaryFunction *BF, StringRef Name) { 15598bdbcfe7SVladislav Khmelevsky const unsigned PtrSize = BC->AsmInfo->getCodePointerSize(); 15608bdbcfe7SVladislav Khmelevsky MCSymbol *TargetSymbol = BC->registerNameAtAddress( 15618bdbcfe7SVladislav Khmelevsky Name.str() + "@GOT", TargetAddress, PtrSize, PtrSize); 15628bdbcfe7SVladislav Khmelevsky BF->setPLTSymbol(TargetSymbol); 15638bdbcfe7SVladislav Khmelevsky }; 15648bdbcfe7SVladislav Khmelevsky 15658bdbcfe7SVladislav Khmelevsky BinaryFunction *BF = BC->getBinaryFunctionAtAddress(EntryAddress); 15668bdbcfe7SVladislav Khmelevsky if (BF && BC->isAArch64()) { 1567e2f1a95fSVladislav Khmelevsky // Handle IFUNC trampoline with symbol 15688bdbcfe7SVladislav Khmelevsky setPLTSymbol(BF, BF->getOneName()); 15698bdbcfe7SVladislav Khmelevsky return; 15708bdbcfe7SVladislav Khmelevsky } 15718bdbcfe7SVladislav Khmelevsky 157200b6efc8SVladislav Khmelevsky const Relocation *Rel = BC->getDynamicRelocationAt(TargetAddress); 1573e2f1a95fSVladislav Khmelevsky if (!Rel) 157400b6efc8SVladislav Khmelevsky return; 157500b6efc8SVladislav Khmelevsky 1576e2f1a95fSVladislav Khmelevsky MCSymbol *Symbol = Rel->Symbol; 1577e2f1a95fSVladislav Khmelevsky if (!Symbol) { 157831ac3d09Ssinan if (BC->isRISCV() || !Rel->Addend || !Rel->isIRelative()) 1579e2f1a95fSVladislav Khmelevsky return; 1580e2f1a95fSVladislav Khmelevsky 1581e2f1a95fSVladislav Khmelevsky // IFUNC trampoline without symbol 1582e2f1a95fSVladislav Khmelevsky BinaryFunction *TargetBF = BC->getBinaryFunctionAtAddress(Rel->Addend); 1583e2f1a95fSVladislav Khmelevsky if (!TargetBF) { 158452cf0711SAmir Ayupov BC->errs() 1585e2f1a95fSVladislav Khmelevsky << "BOLT-WARNING: Expected BF to be presented as IFUNC resolver at " 1586e2f1a95fSVladislav Khmelevsky << Twine::utohexstr(Rel->Addend) << ", skipping\n"; 1587e2f1a95fSVladislav Khmelevsky return; 1588e2f1a95fSVladislav Khmelevsky } 1589e2f1a95fSVladislav Khmelevsky 1590e2f1a95fSVladislav Khmelevsky Symbol = TargetBF->getSymbol(); 1591e2f1a95fSVladislav Khmelevsky } 1592e2f1a95fSVladislav Khmelevsky 159300b6efc8SVladislav Khmelevsky ErrorOr<BinarySection &> Section = BC->getSectionForAddress(EntryAddress); 159400b6efc8SVladislav Khmelevsky assert(Section && "cannot get section for address"); 159520204db5SMaksim Panchenko if (!BF) 1596e2f1a95fSVladislav Khmelevsky BF = BC->createBinaryFunction(Symbol->getName().str() + "@PLT", *Section, 1597e2f1a95fSVladislav Khmelevsky EntryAddress, 0, EntrySize, 15988bdbcfe7SVladislav Khmelevsky Section->getAlignment()); 159920204db5SMaksim Panchenko else 1600e2f1a95fSVladislav Khmelevsky BF->addAlternativeName(Symbol->getName().str() + "@PLT"); 1601e2f1a95fSVladislav Khmelevsky setPLTSymbol(BF, Symbol->getName()); 160200b6efc8SVladislav Khmelevsky } 160300b6efc8SVladislav Khmelevsky 1604bed3608cSAmir Ayupov void RewriteInstance::disassemblePLTInstruction(const BinarySection &Section, 1605bed3608cSAmir Ayupov uint64_t InstrOffset, 1606bed3608cSAmir Ayupov MCInst &Instruction, 1607bed3608cSAmir Ayupov uint64_t &InstrSize) { 160800b6efc8SVladislav Khmelevsky const uint64_t SectionAddress = Section.getAddress(); 160900b6efc8SVladislav Khmelevsky const uint64_t SectionSize = Section.getSize(); 1610a34c753fSRafael Auler StringRef PLTContents = Section.getContents(); 1611a34c753fSRafael Auler ArrayRef<uint8_t> PLTData( 161200b6efc8SVladislav Khmelevsky reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize); 1613a34c753fSRafael Auler 161400b6efc8SVladislav Khmelevsky const uint64_t InstrAddr = SectionAddress + InstrOffset; 1615a34c753fSRafael Auler if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1616a34c753fSRafael Auler PLTData.slice(InstrOffset), InstrAddr, 1617a34c753fSRafael Auler nulls())) { 161852cf0711SAmir Ayupov BC->errs() 161952cf0711SAmir Ayupov << "BOLT-ERROR: unable to disassemble instruction in PLT section " 1620bed3608cSAmir Ayupov << Section.getName() << formatv(" at offset {0:x}\n", InstrOffset); 1621a34c753fSRafael Auler exit(1); 1622a34c753fSRafael Auler } 1623bed3608cSAmir Ayupov } 1624bed3608cSAmir Ayupov 1625bed3608cSAmir Ayupov void RewriteInstance::disassemblePLTSectionAArch64(BinarySection &Section) { 1626bed3608cSAmir Ayupov const uint64_t SectionAddress = Section.getAddress(); 1627bed3608cSAmir Ayupov const uint64_t SectionSize = Section.getSize(); 1628a34c753fSRafael Auler 162900b6efc8SVladislav Khmelevsky uint64_t InstrOffset = 0; 163000b6efc8SVladislav Khmelevsky // Locate new plt entry 163100b6efc8SVladislav Khmelevsky while (InstrOffset < SectionSize) { 163200b6efc8SVladislav Khmelevsky InstructionListType Instructions; 163300b6efc8SVladislav Khmelevsky MCInst Instruction; 163400b6efc8SVladislav Khmelevsky uint64_t EntryOffset = InstrOffset; 163500b6efc8SVladislav Khmelevsky uint64_t EntrySize = 0; 163600b6efc8SVladislav Khmelevsky uint64_t InstrSize; 163700b6efc8SVladislav Khmelevsky // Loop through entry instructions 163800b6efc8SVladislav Khmelevsky while (InstrOffset < SectionSize) { 1639bed3608cSAmir Ayupov disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize); 164000b6efc8SVladislav Khmelevsky EntrySize += InstrSize; 164100b6efc8SVladislav Khmelevsky if (!BC->MIB->isIndirectBranch(Instruction)) { 164200b6efc8SVladislav Khmelevsky Instructions.emplace_back(Instruction); 164300b6efc8SVladislav Khmelevsky InstrOffset += InstrSize; 164400b6efc8SVladislav Khmelevsky continue; 164500b6efc8SVladislav Khmelevsky } 164600b6efc8SVladislav Khmelevsky 164700b6efc8SVladislav Khmelevsky const uint64_t EntryAddress = SectionAddress + EntryOffset; 164800b6efc8SVladislav Khmelevsky const uint64_t TargetAddress = BC->MIB->analyzePLTEntry( 164900b6efc8SVladislav Khmelevsky Instruction, Instructions.begin(), Instructions.end(), EntryAddress); 165000b6efc8SVladislav Khmelevsky 165100b6efc8SVladislav Khmelevsky createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize); 165200b6efc8SVladislav Khmelevsky break; 165300b6efc8SVladislav Khmelevsky } 165400b6efc8SVladislav Khmelevsky 165500b6efc8SVladislav Khmelevsky // Branch instruction 165600b6efc8SVladislav Khmelevsky InstrOffset += InstrSize; 165700b6efc8SVladislav Khmelevsky 165800b6efc8SVladislav Khmelevsky // Skip nops if any 165900b6efc8SVladislav Khmelevsky while (InstrOffset < SectionSize) { 1660bed3608cSAmir Ayupov disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize); 166100b6efc8SVladislav Khmelevsky if (!BC->MIB->isNoop(Instruction)) 166200b6efc8SVladislav Khmelevsky break; 166300b6efc8SVladislav Khmelevsky 166400b6efc8SVladislav Khmelevsky InstrOffset += InstrSize; 166500b6efc8SVladislav Khmelevsky } 166600b6efc8SVladislav Khmelevsky } 166700b6efc8SVladislav Khmelevsky } 166800b6efc8SVladislav Khmelevsky 1669f8730293SJob Noorman void RewriteInstance::disassemblePLTSectionRISCV(BinarySection &Section) { 1670f8730293SJob Noorman const uint64_t SectionAddress = Section.getAddress(); 1671f8730293SJob Noorman const uint64_t SectionSize = Section.getSize(); 1672f8730293SJob Noorman StringRef PLTContents = Section.getContents(); 1673f8730293SJob Noorman ArrayRef<uint8_t> PLTData( 1674f8730293SJob Noorman reinterpret_cast<const uint8_t *>(PLTContents.data()), SectionSize); 1675f8730293SJob Noorman 1676f8730293SJob Noorman auto disassembleInstruction = [&](uint64_t InstrOffset, MCInst &Instruction, 1677f8730293SJob Noorman uint64_t &InstrSize) { 1678f8730293SJob Noorman const uint64_t InstrAddr = SectionAddress + InstrOffset; 1679f8730293SJob Noorman if (!BC->DisAsm->getInstruction(Instruction, InstrSize, 1680f8730293SJob Noorman PLTData.slice(InstrOffset), InstrAddr, 1681f8730293SJob Noorman nulls())) { 168252cf0711SAmir Ayupov BC->errs() 168352cf0711SAmir Ayupov << "BOLT-ERROR: unable to disassemble instruction in PLT section " 1684f8730293SJob Noorman << Section.getName() << " at offset 0x" 1685f8730293SJob Noorman << Twine::utohexstr(InstrOffset) << '\n'; 1686f8730293SJob Noorman exit(1); 1687f8730293SJob Noorman } 1688f8730293SJob Noorman }; 1689f8730293SJob Noorman 1690f8730293SJob Noorman // Skip the first special entry since no relocation points to it. 1691f8730293SJob Noorman uint64_t InstrOffset = 32; 1692f8730293SJob Noorman 1693f8730293SJob Noorman while (InstrOffset < SectionSize) { 1694f8730293SJob Noorman InstructionListType Instructions; 1695f8730293SJob Noorman MCInst Instruction; 1696f8730293SJob Noorman const uint64_t EntryOffset = InstrOffset; 1697f8730293SJob Noorman const uint64_t EntrySize = 16; 1698f8730293SJob Noorman uint64_t InstrSize; 1699f8730293SJob Noorman 1700f8730293SJob Noorman while (InstrOffset < EntryOffset + EntrySize) { 1701f8730293SJob Noorman disassembleInstruction(InstrOffset, Instruction, InstrSize); 1702f8730293SJob Noorman Instructions.emplace_back(Instruction); 1703f8730293SJob Noorman InstrOffset += InstrSize; 1704f8730293SJob Noorman } 1705f8730293SJob Noorman 1706f8730293SJob Noorman const uint64_t EntryAddress = SectionAddress + EntryOffset; 1707f8730293SJob Noorman const uint64_t TargetAddress = BC->MIB->analyzePLTEntry( 1708f8730293SJob Noorman Instruction, Instructions.begin(), Instructions.end(), EntryAddress); 1709f8730293SJob Noorman 1710f8730293SJob Noorman createPLTBinaryFunction(TargetAddress, EntryAddress, EntrySize); 1711f8730293SJob Noorman } 1712f8730293SJob Noorman } 1713f8730293SJob Noorman 171400b6efc8SVladislav Khmelevsky void RewriteInstance::disassemblePLTSectionX86(BinarySection &Section, 171500b6efc8SVladislav Khmelevsky uint64_t EntrySize) { 171600b6efc8SVladislav Khmelevsky const uint64_t SectionAddress = Section.getAddress(); 171700b6efc8SVladislav Khmelevsky const uint64_t SectionSize = Section.getSize(); 171800b6efc8SVladislav Khmelevsky 171900b6efc8SVladislav Khmelevsky for (uint64_t EntryOffset = 0; EntryOffset + EntrySize <= SectionSize; 172000b6efc8SVladislav Khmelevsky EntryOffset += EntrySize) { 172100b6efc8SVladislav Khmelevsky MCInst Instruction; 172200b6efc8SVladislav Khmelevsky uint64_t InstrSize, InstrOffset = EntryOffset; 172300b6efc8SVladislav Khmelevsky while (InstrOffset < EntryOffset + EntrySize) { 1724bed3608cSAmir Ayupov disassemblePLTInstruction(Section, InstrOffset, Instruction, InstrSize); 1725a34c753fSRafael Auler // Check if the entry size needs adjustment. 1726a34c753fSRafael Auler if (EntryOffset == 0 && BC->MIB->isTerminateBranch(Instruction) && 1727a34c753fSRafael Auler EntrySize == 8) 1728a34c753fSRafael Auler EntrySize = 16; 1729a34c753fSRafael Auler 1730a34c753fSRafael Auler if (BC->MIB->isIndirectBranch(Instruction)) 1731a34c753fSRafael Auler break; 1732a34c753fSRafael Auler 1733a34c753fSRafael Auler InstrOffset += InstrSize; 1734a34c753fSRafael Auler } 1735a34c753fSRafael Auler 1736a34c753fSRafael Auler if (InstrOffset + InstrSize > EntryOffset + EntrySize) 1737a34c753fSRafael Auler continue; 1738a34c753fSRafael Auler 1739a34c753fSRafael Auler uint64_t TargetAddress; 1740a34c753fSRafael Auler if (!BC->MIB->evaluateMemOperandTarget(Instruction, TargetAddress, 174100b6efc8SVladislav Khmelevsky SectionAddress + InstrOffset, 1742a34c753fSRafael Auler InstrSize)) { 174352cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: error evaluating PLT instruction at offset 0x" 174400b6efc8SVladislav Khmelevsky << Twine::utohexstr(SectionAddress + InstrOffset) << '\n'; 1745a34c753fSRafael Auler exit(1); 1746a34c753fSRafael Auler } 1747a34c753fSRafael Auler 174800b6efc8SVladislav Khmelevsky createPLTBinaryFunction(TargetAddress, SectionAddress + EntryOffset, 174900b6efc8SVladislav Khmelevsky EntrySize); 1750a34c753fSRafael Auler } 175100b6efc8SVladislav Khmelevsky } 175200b6efc8SVladislav Khmelevsky 175300b6efc8SVladislav Khmelevsky void RewriteInstance::disassemblePLT() { 175400b6efc8SVladislav Khmelevsky auto analyzeOnePLTSection = [&](BinarySection &Section, uint64_t EntrySize) { 175500b6efc8SVladislav Khmelevsky if (BC->isAArch64()) 175600b6efc8SVladislav Khmelevsky return disassemblePLTSectionAArch64(Section); 1757f8730293SJob Noorman if (BC->isRISCV()) 1758f8730293SJob Noorman return disassemblePLTSectionRISCV(Section); 1759603fa4c6SNathan Sidwell if (BC->isX86()) 176000b6efc8SVladislav Khmelevsky return disassemblePLTSectionX86(Section, EntrySize); 1761603fa4c6SNathan Sidwell llvm_unreachable("Unmplemented PLT"); 1762a34c753fSRafael Auler }; 1763a34c753fSRafael Auler 1764a34c753fSRafael Auler for (BinarySection &Section : BC->allocatableSections()) { 1765a34c753fSRafael Auler const PLTSectionInfo *PLTSI = getPLTSectionInfo(Section.getName()); 1766a34c753fSRafael Auler if (!PLTSI) 1767a34c753fSRafael Auler continue; 1768a34c753fSRafael Auler 1769a34c753fSRafael Auler analyzeOnePLTSection(Section, PLTSI->EntrySize); 177020204db5SMaksim Panchenko 177120204db5SMaksim Panchenko BinaryFunction *PltBF; 177220204db5SMaksim Panchenko auto BFIter = BC->getBinaryFunctions().find(Section.getAddress()); 177320204db5SMaksim Panchenko if (BFIter != BC->getBinaryFunctions().end()) { 177420204db5SMaksim Panchenko PltBF = &BFIter->second; 177520204db5SMaksim Panchenko } else { 1776a34c753fSRafael Auler // If we did not register any function at the start of the section, 1777a34c753fSRafael Auler // then it must be a general PLT entry. Add a function at the location. 177820204db5SMaksim Panchenko PltBF = BC->createBinaryFunction( 1779a34c753fSRafael Auler "__BOLT_PSEUDO_" + Section.getName().str(), Section, 1780a34c753fSRafael Auler Section.getAddress(), 0, PLTSI->EntrySize, Section.getAlignment()); 1781a34c753fSRafael Auler } 178220204db5SMaksim Panchenko PltBF->setPseudo(true); 1783a34c753fSRafael Auler } 1784a34c753fSRafael Auler } 1785a34c753fSRafael Auler 1786a34c753fSRafael Auler void RewriteInstance::adjustFunctionBoundaries() { 1787a34c753fSRafael Auler for (auto BFI = BC->getBinaryFunctions().begin(), 1788a34c753fSRafael Auler BFE = BC->getBinaryFunctions().end(); 1789a34c753fSRafael Auler BFI != BFE; ++BFI) { 1790a34c753fSRafael Auler BinaryFunction &Function = BFI->second; 1791a34c753fSRafael Auler const BinaryFunction *NextFunction = nullptr; 1792a34c753fSRafael Auler if (std::next(BFI) != BFE) 1793a34c753fSRafael Auler NextFunction = &std::next(BFI)->second; 1794a34c753fSRafael Auler 1795a34c753fSRafael Auler // Check if there's a symbol or a function with a larger address in the 1796a34c753fSRafael Auler // same section. If there is - it determines the maximum size for the 1797a34c753fSRafael Auler // current function. Otherwise, it is the size of a containing section 1798a34c753fSRafael Auler // the defines it. 1799a34c753fSRafael Auler // 1800a34c753fSRafael Auler // NOTE: ignore some symbols that could be tolerated inside the body 1801a34c753fSRafael Auler // of a function. 1802a34c753fSRafael Auler auto NextSymRefI = FileSymRefs.upper_bound(Function.getAddress()); 1803a34c753fSRafael Auler while (NextSymRefI != FileSymRefs.end()) { 1804a34c753fSRafael Auler SymbolRef &Symbol = NextSymRefI->second; 1805a34c753fSRafael Auler const uint64_t SymbolAddress = NextSymRefI->first; 1806a34c753fSRafael Auler const uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); 1807a34c753fSRafael Auler 1808a34c753fSRafael Auler if (NextFunction && SymbolAddress >= NextFunction->getAddress()) 1809a34c753fSRafael Auler break; 1810a34c753fSRafael Auler 1811a34c753fSRafael Auler if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) 1812a34c753fSRafael Auler break; 1813a34c753fSRafael Auler 18141cf2599aSJob Noorman // Skip basic block labels. This happens on RISC-V with linker relaxation 18151cf2599aSJob Noorman // enabled because every branch needs a relocation and corresponding 18161cf2599aSJob Noorman // symbol. We don't want to add such symbols as entry points. 18171cf2599aSJob Noorman const auto PrivateLabelPrefix = BC->AsmInfo->getPrivateLabelPrefix(); 18181cf2599aSJob Noorman if (!PrivateLabelPrefix.empty() && 18191cf2599aSJob Noorman cantFail(Symbol.getName()).starts_with(PrivateLabelPrefix)) { 18201cf2599aSJob Noorman ++NextSymRefI; 18211cf2599aSJob Noorman continue; 18221cf2599aSJob Noorman } 18231cf2599aSJob Noorman 1824a34c753fSRafael Auler // This is potentially another entry point into the function. 1825a34c753fSRafael Auler uint64_t EntryOffset = NextSymRefI->first - Function.getAddress(); 1826a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding entry point to function " 1827a34c753fSRafael Auler << Function << " at offset 0x" 1828a34c753fSRafael Auler << Twine::utohexstr(EntryOffset) << '\n'); 1829a34c753fSRafael Auler Function.addEntryPointAtOffset(EntryOffset); 1830a34c753fSRafael Auler 1831a34c753fSRafael Auler ++NextSymRefI; 1832a34c753fSRafael Auler } 1833a34c753fSRafael Auler 1834a34c753fSRafael Auler // Function runs at most till the end of the containing section. 1835a34c753fSRafael Auler uint64_t NextObjectAddress = Function.getOriginSection()->getEndAddress(); 1836a34c753fSRafael Auler // Or till the next object marked by a symbol. 1837ee0e9ccbSMaksim Panchenko if (NextSymRefI != FileSymRefs.end()) 1838a34c753fSRafael Auler NextObjectAddress = std::min(NextSymRefI->first, NextObjectAddress); 1839ee0e9ccbSMaksim Panchenko 1840a34c753fSRafael Auler // Or till the next function not marked by a symbol. 1841ee0e9ccbSMaksim Panchenko if (NextFunction) 1842a34c753fSRafael Auler NextObjectAddress = 1843a34c753fSRafael Auler std::min(NextFunction->getAddress(), NextObjectAddress); 1844a34c753fSRafael Auler 1845a34c753fSRafael Auler const uint64_t MaxSize = NextObjectAddress - Function.getAddress(); 1846a34c753fSRafael Auler if (MaxSize < Function.getSize()) { 184752cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: symbol seen in the middle of the function " 1848a34c753fSRafael Auler << Function << ". Skipping.\n"; 1849a34c753fSRafael Auler Function.setSimple(false); 1850a34c753fSRafael Auler Function.setMaxSize(Function.getSize()); 1851a34c753fSRafael Auler continue; 1852a34c753fSRafael Auler } 1853a34c753fSRafael Auler Function.setMaxSize(MaxSize); 1854a34c753fSRafael Auler if (!Function.getSize() && Function.isSimple()) { 1855a34c753fSRafael Auler // Some assembly functions have their size set to 0, use the max 1856a34c753fSRafael Auler // size as their real size. 1857ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 185852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: setting size of function " << Function 185952cf0711SAmir Ayupov << " to " << Function.getMaxSize() << " (was 0)\n"; 1860a34c753fSRafael Auler Function.setSize(Function.getMaxSize()); 1861a34c753fSRafael Auler } 1862a34c753fSRafael Auler } 1863a34c753fSRafael Auler } 1864a34c753fSRafael Auler 1865a34c753fSRafael Auler void RewriteInstance::relocateEHFrameSection() { 18664d3a0cadSMaksim Panchenko assert(EHFrameSection && "Non-empty .eh_frame section expected."); 1867a34c753fSRafael Auler 18684d3a0cadSMaksim Panchenko BinarySection *RelocatedEHFrameSection = 18694d3a0cadSMaksim Panchenko getSection(".relocated" + getEHFrameSectionName()); 18704d3a0cadSMaksim Panchenko assert(RelocatedEHFrameSection && 18714d3a0cadSMaksim Panchenko "Relocated eh_frame section should be preregistered."); 1872a34c753fSRafael Auler DWARFDataExtractor DE(EHFrameSection->getContents(), 1873a34c753fSRafael Auler BC->AsmInfo->isLittleEndian(), 1874a34c753fSRafael Auler BC->AsmInfo->getCodePointerSize()); 1875a34c753fSRafael Auler auto createReloc = [&](uint64_t Value, uint64_t Offset, uint64_t DwarfType) { 1876a34c753fSRafael Auler if (DwarfType == dwarf::DW_EH_PE_omit) 1877a34c753fSRafael Auler return; 1878a34c753fSRafael Auler 1879a34c753fSRafael Auler // Only fix references that are relative to other locations. 1880a34c753fSRafael Auler if (!(DwarfType & dwarf::DW_EH_PE_pcrel) && 1881a34c753fSRafael Auler !(DwarfType & dwarf::DW_EH_PE_textrel) && 1882a34c753fSRafael Auler !(DwarfType & dwarf::DW_EH_PE_funcrel) && 1883ee0e9ccbSMaksim Panchenko !(DwarfType & dwarf::DW_EH_PE_datarel)) 1884a34c753fSRafael Auler return; 1885a34c753fSRafael Auler 1886a34c753fSRafael Auler if (!(DwarfType & dwarf::DW_EH_PE_sdata4)) 1887a34c753fSRafael Auler return; 1888a34c753fSRafael Auler 1889a34c753fSRafael Auler uint64_t RelType; 1890a34c753fSRafael Auler switch (DwarfType & 0x0f) { 1891a34c753fSRafael Auler default: 1892a34c753fSRafael Auler llvm_unreachable("unsupported DWARF encoding type"); 1893a34c753fSRafael Auler case dwarf::DW_EH_PE_sdata4: 1894a34c753fSRafael Auler case dwarf::DW_EH_PE_udata4: 1895a34c753fSRafael Auler RelType = Relocation::getPC32(); 1896a34c753fSRafael Auler Offset -= 4; 1897a34c753fSRafael Auler break; 1898a34c753fSRafael Auler case dwarf::DW_EH_PE_sdata8: 1899a34c753fSRafael Auler case dwarf::DW_EH_PE_udata8: 1900a34c753fSRafael Auler RelType = Relocation::getPC64(); 1901a34c753fSRafael Auler Offset -= 8; 1902a34c753fSRafael Auler break; 1903a34c753fSRafael Auler } 1904a34c753fSRafael Auler 1905a34c753fSRafael Auler // Create a relocation against an absolute value since the goal is to 1906a34c753fSRafael Auler // preserve the contents of the section independent of the new values 1907a34c753fSRafael Auler // of referenced symbols. 19084d3a0cadSMaksim Panchenko RelocatedEHFrameSection->addRelocation(Offset, nullptr, RelType, Value); 1909a34c753fSRafael Auler }; 1910a34c753fSRafael Auler 191140c2e0faSMaksim Panchenko Error E = EHFrameParser::parse(DE, EHFrameSection->getAddress(), createReloc); 1912a34c753fSRafael Auler check_error(std::move(E), "failed to patch EH frame"); 1913a34c753fSRafael Auler } 1914a34c753fSRafael Auler 1915ced5472eSAmir Ayupov Error RewriteInstance::readSpecialSections() { 1916a34c753fSRafael Auler NamedRegionTimer T("readSpecialSections", "read special sections", 1917a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 1918a34c753fSRafael Auler 1919a34c753fSRafael Auler bool HasTextRelocations = false; 1920986362d4SHuan Nguyen bool HasSymbolTable = false; 1921a34c753fSRafael Auler bool HasDebugInfo = false; 1922a34c753fSRafael Auler 1923a34c753fSRafael Auler // Process special sections. 1924a34c753fSRafael Auler for (const SectionRef &Section : InputFile->sections()) { 1925a34c753fSRafael Auler Expected<StringRef> SectionNameOrErr = Section.getName(); 1926a34c753fSRafael Auler check_error(SectionNameOrErr.takeError(), "cannot get section name"); 1927a34c753fSRafael Auler StringRef SectionName = *SectionNameOrErr; 1928a34c753fSRafael Auler 1929ced5472eSAmir Ayupov if (Error E = Section.getContents().takeError()) 1930ced5472eSAmir Ayupov return E; 1931a34c753fSRafael Auler BC->registerSection(Section); 1932a34c753fSRafael Auler LLVM_DEBUG( 1933a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: registering section " << SectionName << " @ 0x" 1934a34c753fSRafael Auler << Twine::utohexstr(Section.getAddress()) << ":0x" 1935a34c753fSRafael Auler << Twine::utohexstr(Section.getAddress() + Section.getSize()) 1936a34c753fSRafael Auler << "\n"); 1937a34c753fSRafael Auler if (isDebugSection(SectionName)) 1938a34c753fSRafael Auler HasDebugInfo = true; 1939a34c753fSRafael Auler } 1940a34c753fSRafael Auler 194182ef86c1SAmir Ayupov // Set IsRelro section attribute based on PT_GNU_RELRO segment. 194282ef86c1SAmir Ayupov markGnuRelroSections(); 194382ef86c1SAmir Ayupov 1944a34c753fSRafael Auler if (HasDebugInfo && !opts::UpdateDebugSections && !opts::AggregateOnly) { 194552cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: debug info will be stripped from the binary. " 1946a34c753fSRafael Auler "Use -update-debug-sections to keep it.\n"; 1947a34c753fSRafael Auler } 1948a34c753fSRafael Auler 1949364963a0SNathan Sidwell HasTextRelocations = (bool)BC->getUniqueSectionByName( 1950364963a0SNathan Sidwell ".rela" + std::string(BC->getMainCodeSectionName())); 1951986362d4SHuan Nguyen HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab"); 1952a34c753fSRafael Auler EHFrameSection = BC->getUniqueSectionByName(".eh_frame"); 1953a34c753fSRafael Auler 1954a34c753fSRafael Auler if (ErrorOr<BinarySection &> BATSec = 1955a34c753fSRafael Auler BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) { 1956935b946bSAmir Ayupov BC->HasBATSection = true; 1957a34c753fSRafael Auler // Do not read BAT when plotting a heatmap 1958a34c753fSRafael Auler if (!opts::HeatmapMode) { 195952cf0711SAmir Ayupov if (std::error_code EC = BAT->parse(BC->outs(), BATSec->getContents())) { 196052cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: failed to parse BOLT address translation " 1961a34c753fSRafael Auler "table.\n"; 1962a34c753fSRafael Auler exit(1); 1963a34c753fSRafael Auler } 1964a34c753fSRafael Auler } 1965a34c753fSRafael Auler } 1966a34c753fSRafael Auler 1967a34c753fSRafael Auler if (opts::PrintSections) { 196852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: Sections from original binary:\n"; 196952cf0711SAmir Ayupov BC->printSections(BC->outs()); 1970a34c753fSRafael Auler } 1971a34c753fSRafael Auler 1972a34c753fSRafael Auler if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { 197352cf0711SAmir Ayupov BC->errs() 197452cf0711SAmir Ayupov << "BOLT-ERROR: relocations against code are missing from the input " 1975a34c753fSRafael Auler "file. Cannot proceed in relocations mode (-relocs).\n"; 1976a34c753fSRafael Auler exit(1); 1977a34c753fSRafael Auler } 1978a34c753fSRafael Auler 197940c2e0faSMaksim Panchenko BC->HasRelocations = 198040c2e0faSMaksim Panchenko HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); 1981a34c753fSRafael Auler 1982a693ae53SMaksim Panchenko if (BC->IsLinuxKernel && BC->HasRelocations) { 198352cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: disabling relocation mode for Linux kernel\n"; 1984a693ae53SMaksim Panchenko BC->HasRelocations = false; 1985a693ae53SMaksim Panchenko } 1986a693ae53SMaksim Panchenko 1987986362d4SHuan Nguyen BC->IsStripped = !HasSymbolTable; 1988986362d4SHuan Nguyen 1989c8482da7SAmir Ayupov if (BC->IsStripped && !opts::AllowStripped) { 199052cf0711SAmir Ayupov BC->errs() 199152cf0711SAmir Ayupov << "BOLT-ERROR: stripped binaries are not supported. If you know " 1992c8482da7SAmir Ayupov "what you're doing, use --allow-stripped to proceed"; 1993c8482da7SAmir Ayupov exit(1); 1994c8482da7SAmir Ayupov } 1995c8482da7SAmir Ayupov 1996a34c753fSRafael Auler // Force non-relocation mode for heatmap generation 1997ee0e9ccbSMaksim Panchenko if (opts::HeatmapMode) 1998a34c753fSRafael Auler BC->HasRelocations = false; 1999a34c753fSRafael Auler 2000ee0e9ccbSMaksim Panchenko if (BC->HasRelocations) 200152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: enabling " << (opts::StrictMode ? "strict " : "") 2002a34c753fSRafael Auler << "relocation mode\n"; 2003a34c753fSRafael Auler 2004a34c753fSRafael Auler // Read EH frame for function boundaries info. 2005a34c753fSRafael Auler Expected<const DWARFDebugFrame *> EHFrameOrError = BC->DwCtx->getEHFrame(); 2006a34c753fSRafael Auler if (!EHFrameOrError) 2007a34c753fSRafael Auler report_error("expected valid eh_frame section", EHFrameOrError.takeError()); 200852cf0711SAmir Ayupov CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get())); 2009a34c753fSRafael Auler 20108ea59ec6SMaksim Panchenko processSectionMetadata(); 2011a34c753fSRafael Auler 2012a34c753fSRafael Auler // Read .dynamic/PT_DYNAMIC. 20131e016c3bSAmir Ayupov return readELFDynamic(); 2014a34c753fSRafael Auler } 2015a34c753fSRafael Auler 2016a34c753fSRafael Auler void RewriteInstance::adjustCommandLineOptions() { 2017ee0e9ccbSMaksim Panchenko if (BC->isAArch64() && !BC->HasRelocations) 201852cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: non-relocation mode for AArch64 is not fully " 2019a34c753fSRafael Auler "supported\n"; 2020a34c753fSRafael Auler 2021ee0e9ccbSMaksim Panchenko if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 2022a34c753fSRafael Auler RtLibrary->adjustCommandLineOptions(*BC); 2023a34c753fSRafael Auler 2024a34c753fSRafael Auler if (BC->isX86() && BC->MAB->allowAutoPadding()) { 2025a34c753fSRafael Auler if (!BC->HasRelocations) { 202652cf0711SAmir Ayupov BC->errs() 202752cf0711SAmir Ayupov << "BOLT-ERROR: cannot apply mitigations for Intel JCC erratum in " 2028a34c753fSRafael Auler "non-relocation mode\n"; 2029a34c753fSRafael Auler exit(1); 2030a34c753fSRafael Auler } 203152cf0711SAmir Ayupov BC->outs() 203252cf0711SAmir Ayupov << "BOLT-WARNING: using mitigation for Intel JCC erratum, layout " 2033a34c753fSRafael Auler "may take several minutes\n"; 2034a34c753fSRafael Auler } 2035a34c753fSRafael Auler 2036a34c753fSRafael Auler if (opts::SplitEH && !BC->HasRelocations) { 203752cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: disabling -split-eh in non-relocation mode\n"; 2038a34c753fSRafael Auler opts::SplitEH = false; 2039a34c753fSRafael Auler } 2040a34c753fSRafael Auler 2041a34c753fSRafael Auler if (opts::StrictMode && !BC->HasRelocations) { 204252cf0711SAmir Ayupov BC->errs() 204352cf0711SAmir Ayupov << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation " 2044a34c753fSRafael Auler "mode\n"; 2045a34c753fSRafael Auler opts::StrictMode = false; 2046a34c753fSRafael Auler } 2047a34c753fSRafael Auler 2048a34c753fSRafael Auler if (BC->HasRelocations && opts::AggregateOnly && 2049a34c753fSRafael Auler !opts::StrictMode.getNumOccurrences()) { 205052cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: enabling strict relocation mode for aggregation " 2051a34c753fSRafael Auler "purposes\n"; 2052a34c753fSRafael Auler opts::StrictMode = true; 2053a34c753fSRafael Auler } 2054a34c753fSRafael Auler 2055a34c753fSRafael Auler if (!BC->HasRelocations && 2056a34c753fSRafael Auler opts::ReorderFunctions != ReorderFunctions::RT_NONE) { 205752cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: function reordering only works when " 2058a34c753fSRafael Auler << "relocations are enabled\n"; 2059a34c753fSRafael Auler exit(1); 2060a34c753fSRafael Auler } 2061a34c753fSRafael Auler 2062*3c357a49SAlexander Yermolovich if (!BC->HasRelocations && 2063*3c357a49SAlexander Yermolovich opts::ICF == IdenticalCodeFolding::ICFLevel::Safe) { 2064*3c357a49SAlexander Yermolovich BC->errs() << "BOLT-ERROR: binary built without relocations. Safe ICF is " 2065*3c357a49SAlexander Yermolovich "not supported\n"; 2066*3c357a49SAlexander Yermolovich exit(1); 2067*3c357a49SAlexander Yermolovich } 2068*3c357a49SAlexander Yermolovich 2069a86dd9aeSDenis Revunov if (opts::Instrument || 2070a86dd9aeSDenis Revunov (opts::ReorderFunctions != ReorderFunctions::RT_NONE && 2071a86dd9aeSDenis Revunov !opts::HotText.getNumOccurrences())) { 2072a34c753fSRafael Auler opts::HotText = true; 2073a34c753fSRafael Auler } else if (opts::HotText && !BC->HasRelocations) { 207452cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; 2075a34c753fSRafael Auler opts::HotText = false; 2076a34c753fSRafael Auler } 2077a34c753fSRafael Auler 2078a34c753fSRafael Auler if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) { 2079a34c753fSRafael Auler opts::HotTextMoveSections.addValue(".stub"); 2080a34c753fSRafael Auler opts::HotTextMoveSections.addValue(".mover"); 2081a34c753fSRafael Auler opts::HotTextMoveSections.addValue(".never_hugify"); 2082a34c753fSRafael Auler } 2083a34c753fSRafael Auler 2084a34c753fSRafael Auler if (opts::UseOldText && !BC->OldTextSectionAddress) { 208552cf0711SAmir Ayupov BC->errs() 208652cf0711SAmir Ayupov << "BOLT-WARNING: cannot use old .text as the section was not found" 2087a34c753fSRafael Auler "\n"; 2088a34c753fSRafael Auler opts::UseOldText = false; 2089a34c753fSRafael Auler } 2090a34c753fSRafael Auler if (opts::UseOldText && !BC->HasRelocations) { 209152cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: cannot use old .text in non-relocation mode\n"; 2092a34c753fSRafael Auler opts::UseOldText = false; 2093a34c753fSRafael Auler } 2094a34c753fSRafael Auler 2095ee0e9ccbSMaksim Panchenko if (!opts::AlignText.getNumOccurrences()) 2096a34c753fSRafael Auler opts::AlignText = BC->PageAlign; 2097a34c753fSRafael Auler 209862a289d8SVladislav Khmelevsky if (opts::AlignText < opts::AlignFunctions) 209962a289d8SVladislav Khmelevsky opts::AlignText = (unsigned)opts::AlignFunctions; 210062a289d8SVladislav Khmelevsky 210140c2e0faSMaksim Panchenko if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && 2102ee0e9ccbSMaksim Panchenko !opts::UseOldText) 2103a34c753fSRafael Auler opts::Lite = true; 2104a34c753fSRafael Auler 2105a34c753fSRafael Auler if (opts::Lite && opts::UseOldText) { 210652cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: cannot combine -lite with -use-old-text. " 2107a34c753fSRafael Auler "Disabling -use-old-text.\n"; 2108a34c753fSRafael Auler opts::UseOldText = false; 2109a34c753fSRafael Auler } 2110a34c753fSRafael Auler 2111a34c753fSRafael Auler if (opts::Lite && opts::StrictMode) { 211252cf0711SAmir Ayupov BC->errs() 211352cf0711SAmir Ayupov << "BOLT-ERROR: -strict and -lite cannot be used at the same time\n"; 2114a34c753fSRafael Auler exit(1); 2115a34c753fSRafael Auler } 2116a34c753fSRafael Auler 2117ee0e9ccbSMaksim Panchenko if (opts::Lite) 211852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: enabling lite mode\n"; 211951268a57SMaksim Panchenko 21207de82ca3SMaksim Panchenko if (BC->IsLinuxKernel) { 21217de82ca3SMaksim Panchenko if (!opts::KeepNops.getNumOccurrences()) 212251268a57SMaksim Panchenko opts::KeepNops = true; 21237de82ca3SMaksim Panchenko 21247de82ca3SMaksim Panchenko // Linux kernel may resume execution after a trap instruction in some cases. 21257de82ca3SMaksim Panchenko if (!opts::TerminalTrap.getNumOccurrences()) 21267de82ca3SMaksim Panchenko opts::TerminalTrap = false; 21277de82ca3SMaksim Panchenko } 2128a34c753fSRafael Auler } 2129a34c753fSRafael Auler 2130a34c753fSRafael Auler namespace { 2131a34c753fSRafael Auler template <typename ELFT> 2132a34c753fSRafael Auler int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj, 2133a34c753fSRafael Auler const RelocationRef &RelRef) { 2134a34c753fSRafael Auler using ELFShdrTy = typename ELFT::Shdr; 2135a34c753fSRafael Auler using Elf_Rela = typename ELFT::Rela; 2136a34c753fSRafael Auler int64_t Addend = 0; 2137a34c753fSRafael Auler const ELFFile<ELFT> &EF = Obj->getELFFile(); 2138a34c753fSRafael Auler DataRefImpl Rel = RelRef.getRawDataRefImpl(); 2139a34c753fSRafael Auler const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 2140a34c753fSRafael Auler switch (RelocationSection->sh_type) { 214140c2e0faSMaksim Panchenko default: 214240c2e0faSMaksim Panchenko llvm_unreachable("unexpected relocation section type"); 2143a34c753fSRafael Auler case ELF::SHT_REL: 2144a34c753fSRafael Auler break; 2145a34c753fSRafael Auler case ELF::SHT_RELA: { 2146a34c753fSRafael Auler const Elf_Rela *RelA = Obj->getRela(Rel); 2147a34c753fSRafael Auler Addend = RelA->r_addend; 2148a34c753fSRafael Auler break; 2149a34c753fSRafael Auler } 2150a34c753fSRafael Auler } 2151a34c753fSRafael Auler 2152a34c753fSRafael Auler return Addend; 2153a34c753fSRafael Auler } 2154a34c753fSRafael Auler 2155a34c753fSRafael Auler int64_t getRelocationAddend(const ELFObjectFileBase *Obj, 2156a34c753fSRafael Auler const RelocationRef &Rel) { 21575b9f0309SNathan Sidwell return getRelocationAddend(cast<ELF64LEObjectFile>(Obj), Rel); 2158a34c753fSRafael Auler } 2159228970f6Sspupyrev 2160228970f6Sspupyrev template <typename ELFT> 2161228970f6Sspupyrev uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj, 2162228970f6Sspupyrev const RelocationRef &RelRef) { 2163228970f6Sspupyrev using ELFShdrTy = typename ELFT::Shdr; 2164228970f6Sspupyrev uint32_t Symbol = 0; 2165228970f6Sspupyrev const ELFFile<ELFT> &EF = Obj->getELFFile(); 2166228970f6Sspupyrev DataRefImpl Rel = RelRef.getRawDataRefImpl(); 2167228970f6Sspupyrev const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a)); 2168228970f6Sspupyrev switch (RelocationSection->sh_type) { 2169228970f6Sspupyrev default: 2170228970f6Sspupyrev llvm_unreachable("unexpected relocation section type"); 2171228970f6Sspupyrev case ELF::SHT_REL: 2172228970f6Sspupyrev Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL()); 2173228970f6Sspupyrev break; 2174228970f6Sspupyrev case ELF::SHT_RELA: 2175228970f6Sspupyrev Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL()); 2176228970f6Sspupyrev break; 2177228970f6Sspupyrev } 2178228970f6Sspupyrev 2179228970f6Sspupyrev return Symbol; 2180228970f6Sspupyrev } 2181228970f6Sspupyrev 2182228970f6Sspupyrev uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj, 2183228970f6Sspupyrev const RelocationRef &Rel) { 21845b9f0309SNathan Sidwell return getRelocationSymbol(cast<ELF64LEObjectFile>(Obj), Rel); 2185228970f6Sspupyrev } 2186a34c753fSRafael Auler } // anonymous namespace 2187a34c753fSRafael Auler 2188a34c753fSRafael Auler bool RewriteInstance::analyzeRelocation( 218917ed8f29SVladislav Khmelevsky const RelocationRef &Rel, uint64_t &RType, std::string &SymbolName, 2190a34c753fSRafael Auler bool &IsSectionRelocation, uint64_t &SymbolAddress, int64_t &Addend, 2191a34c753fSRafael Auler uint64_t &ExtractedValue, bool &Skip) const { 2192a34c753fSRafael Auler Skip = false; 2193a34c753fSRafael Auler if (!Relocation::isSupported(RType)) 2194a34c753fSRafael Auler return false; 2195a34c753fSRafael Auler 21966c8933e1Ssinan auto IsWeakReference = [](const SymbolRef &Symbol) { 21976c8933e1Ssinan Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags(); 21986c8933e1Ssinan if (!SymFlagsOrErr) 21996c8933e1Ssinan return false; 22006c8933e1Ssinan return (*SymFlagsOrErr & SymbolRef::SF_Undefined) && 22016c8933e1Ssinan (*SymFlagsOrErr & SymbolRef::SF_Weak); 22026c8933e1Ssinan }; 22036c8933e1Ssinan 2204a34c753fSRafael Auler const bool IsAArch64 = BC->isAArch64(); 2205a34c753fSRafael Auler 2206a34c753fSRafael Auler const size_t RelSize = Relocation::getSizeForType(RType); 2207a34c753fSRafael Auler 2208a34c753fSRafael Auler ErrorOr<uint64_t> Value = 2209a34c753fSRafael Auler BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize); 2210a34c753fSRafael Auler assert(Value && "failed to extract relocated value"); 2211a34c753fSRafael Auler if ((Skip = Relocation::skipRelocationProcess(RType, *Value))) 2212a34c753fSRafael Auler return true; 2213a34c753fSRafael Auler 221440c2e0faSMaksim Panchenko ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset()); 2215a34c753fSRafael Auler Addend = getRelocationAddend(InputFile, Rel); 2216a34c753fSRafael Auler 2217a34c753fSRafael Auler const bool IsPCRelative = Relocation::isPCRelative(RType); 2218a34c753fSRafael Auler const uint64_t PCRelOffset = IsPCRelative && !IsAArch64 ? Rel.getOffset() : 0; 2219a34c753fSRafael Auler bool SkipVerification = false; 2220a34c753fSRafael Auler auto SymbolIter = Rel.getSymbol(); 2221a34c753fSRafael Auler if (SymbolIter == InputFile->symbol_end()) { 2222a34c753fSRafael Auler SymbolAddress = ExtractedValue - Addend + PCRelOffset; 2223a34c753fSRafael Auler MCSymbol *RelSymbol = 2224a34c753fSRafael Auler BC->getOrCreateGlobalSymbol(SymbolAddress, "RELSYMat"); 2225a34c753fSRafael Auler SymbolName = std::string(RelSymbol->getName()); 2226a34c753fSRafael Auler IsSectionRelocation = false; 2227a34c753fSRafael Auler } else { 2228a34c753fSRafael Auler const SymbolRef &Symbol = *SymbolIter; 2229a34c753fSRafael Auler SymbolName = std::string(cantFail(Symbol.getName())); 2230a34c753fSRafael Auler SymbolAddress = cantFail(Symbol.getAddress()); 2231a34c753fSRafael Auler SkipVerification = (cantFail(Symbol.getType()) == SymbolRef::ST_Other); 2232a34c753fSRafael Auler // Section symbols are marked as ST_Debug. 2233a34c753fSRafael Auler IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug); 223400b6efc8SVladislav Khmelevsky // Check for PLT entry registered with symbol name 22356c8933e1Ssinan if (!SymbolAddress && !IsWeakReference(Symbol) && 22366c8933e1Ssinan (IsAArch64 || BC->isRISCV())) { 22374956e0e1SVladislav Khmelevsky const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName); 223800b6efc8SVladislav Khmelevsky SymbolAddress = BD ? BD->getAddress() : 0; 223900b6efc8SVladislav Khmelevsky } 2240a34c753fSRafael Auler } 2241a34c753fSRafael Auler // For PIE or dynamic libs, the linker may choose not to put the relocation 2242a34c753fSRafael Auler // result at the address if it is a X86_64_64 one because it will emit a 2243a34c753fSRafael Auler // dynamic relocation (X86_RELATIVE) for the dynamic linker and loader to 2244a34c753fSRafael Auler // resolve it at run time. The static relocation result goes as the addend 2245a34c753fSRafael Auler // of the dynamic relocation in this case. We can't verify these cases. 2246a34c753fSRafael Auler // FIXME: perhaps we can try to find if it really emitted a corresponding 2247a34c753fSRafael Auler // RELATIVE relocation at this offset with the correct value as the addend. 2248a34c753fSRafael Auler if (!BC->HasFixedLoadAddress && RelSize == 8) 2249a34c753fSRafael Auler SkipVerification = true; 2250a34c753fSRafael Auler 2251a34c753fSRafael Auler if (IsSectionRelocation && !IsAArch64) { 2252a34c753fSRafael Auler ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 2253a34c753fSRafael Auler assert(Section && "section expected for section relocation"); 2254a34c753fSRafael Auler SymbolName = "section " + std::string(Section->getName()); 2255a34c753fSRafael Auler // Convert section symbol relocations to regular relocations inside 2256a34c753fSRafael Auler // non-section symbols. 2257a34c753fSRafael Auler if (Section->containsAddress(ExtractedValue) && !IsPCRelative) { 2258a34c753fSRafael Auler SymbolAddress = ExtractedValue; 2259a34c753fSRafael Auler Addend = 0; 2260a34c753fSRafael Auler } else { 2261a34c753fSRafael Auler Addend = ExtractedValue - (SymbolAddress - PCRelOffset); 2262a34c753fSRafael Auler } 2263a34c753fSRafael Auler } 2264a34c753fSRafael Auler 2265a34c753fSRafael Auler // If no symbol has been found or if it is a relocation requiring the 2266a34c753fSRafael Auler // creation of a GOT entry, do not link against the symbol but against 2267a34c753fSRafael Auler // whatever address was extracted from the instruction itself. We are 2268a34c753fSRafael Auler // not creating a GOT entry as this was already processed by the linker. 2269a34c753fSRafael Auler // For GOT relocs, do not subtract addend as the addend does not refer 2270a34c753fSRafael Auler // to this instruction's target, but it refers to the target in the GOT 2271a34c753fSRafael Auler // entry. 2272a34c753fSRafael Auler if (Relocation::isGOT(RType)) { 2273a34c753fSRafael Auler Addend = 0; 2274a34c753fSRafael Auler SymbolAddress = ExtractedValue + PCRelOffset; 2275a34c753fSRafael Auler } else if (Relocation::isTLS(RType)) { 2276a34c753fSRafael Auler SkipVerification = true; 2277a34c753fSRafael Auler } else if (!SymbolAddress) { 2278a34c753fSRafael Auler assert(!IsSectionRelocation); 2279a34c753fSRafael Auler if (ExtractedValue || Addend == 0 || IsPCRelative) { 228040c2e0faSMaksim Panchenko SymbolAddress = 228140c2e0faSMaksim Panchenko truncateToSize(ExtractedValue - Addend + PCRelOffset, RelSize); 2282a34c753fSRafael Auler } else { 2283a34c753fSRafael Auler // This is weird case. The extracted value is zero but the addend is 2284a34c753fSRafael Auler // non-zero and the relocation is not pc-rel. Using the previous logic, 2285a34c753fSRafael Auler // the SymbolAddress would end up as a huge number. Seen in 2286a34c753fSRafael Auler // exceptions_pic.test. 2287a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocation @ 0x" 2288a34c753fSRafael Auler << Twine::utohexstr(Rel.getOffset()) 2289a34c753fSRafael Auler << " value does not match addend for " 2290a34c753fSRafael Auler << "relocation to undefined symbol.\n"); 2291a34c753fSRafael Auler return true; 2292a34c753fSRafael Auler } 2293a34c753fSRafael Auler } 2294a34c753fSRafael Auler 2295a34c753fSRafael Auler auto verifyExtractedValue = [&]() { 2296a34c753fSRafael Auler if (SkipVerification) 2297a34c753fSRafael Auler return true; 2298a34c753fSRafael Auler 2299f8730293SJob Noorman if (IsAArch64 || BC->isRISCV()) 2300a34c753fSRafael Auler return true; 2301a34c753fSRafael Auler 2302a34c753fSRafael Auler if (SymbolName == "__hot_start" || SymbolName == "__hot_end") 2303a34c753fSRafael Auler return true; 2304a34c753fSRafael Auler 2305a34c753fSRafael Auler if (RType == ELF::R_X86_64_PLT32) 2306a34c753fSRafael Auler return true; 2307a34c753fSRafael Auler 2308a34c753fSRafael Auler return truncateToSize(ExtractedValue, RelSize) == 2309a34c753fSRafael Auler truncateToSize(SymbolAddress + Addend - PCRelOffset, RelSize); 2310a34c753fSRafael Auler }; 2311a34c753fSRafael Auler 2312a34c753fSRafael Auler (void)verifyExtractedValue; 2313a34c753fSRafael Auler assert(verifyExtractedValue() && "mismatched extracted relocation value"); 2314a34c753fSRafael Auler 2315a34c753fSRafael Auler return true; 2316a34c753fSRafael Auler } 2317a34c753fSRafael Auler 2318a34c753fSRafael Auler void RewriteInstance::processDynamicRelocations() { 2319f9bf9f92SVladislav Khmelevsky // Read .relr.dyn section containing compressed R_*_RELATIVE relocations. 2320f9bf9f92SVladislav Khmelevsky if (DynamicRelrSize > 0) { 2321f9bf9f92SVladislav Khmelevsky ErrorOr<BinarySection &> DynamicRelrSectionOrErr = 2322f9bf9f92SVladislav Khmelevsky BC->getSectionForAddress(*DynamicRelrAddress); 2323f9bf9f92SVladislav Khmelevsky if (!DynamicRelrSectionOrErr) 2324f9bf9f92SVladislav Khmelevsky report_error("unable to find section corresponding to DT_RELR", 2325f9bf9f92SVladislav Khmelevsky DynamicRelrSectionOrErr.getError()); 2326f9bf9f92SVladislav Khmelevsky if (DynamicRelrSectionOrErr->getSize() != DynamicRelrSize) 2327f9bf9f92SVladislav Khmelevsky report_error("section size mismatch for DT_RELRSZ", 2328f9bf9f92SVladislav Khmelevsky errc::executable_format_error); 2329f9bf9f92SVladislav Khmelevsky readDynamicRelrRelocations(*DynamicRelrSectionOrErr); 2330f9bf9f92SVladislav Khmelevsky } 2331f9bf9f92SVladislav Khmelevsky 2332a34c753fSRafael Auler // Read relocations for PLT - DT_JMPREL. 2333a34c753fSRafael Auler if (PLTRelocationsSize > 0) { 2334a34c753fSRafael Auler ErrorOr<BinarySection &> PLTRelSectionOrErr = 2335a34c753fSRafael Auler BC->getSectionForAddress(*PLTRelocationsAddress); 2336ee0e9ccbSMaksim Panchenko if (!PLTRelSectionOrErr) 2337a34c753fSRafael Auler report_error("unable to find section corresponding to DT_JMPREL", 2338a34c753fSRafael Auler PLTRelSectionOrErr.getError()); 2339ee0e9ccbSMaksim Panchenko if (PLTRelSectionOrErr->getSize() != PLTRelocationsSize) 2340a34c753fSRafael Auler report_error("section size mismatch for DT_PLTRELSZ", 2341a34c753fSRafael Auler errc::executable_format_error); 2342228970f6Sspupyrev readDynamicRelocations(PLTRelSectionOrErr->getSectionRef(), 2343228970f6Sspupyrev /*IsJmpRel*/ true); 2344a34c753fSRafael Auler } 2345a34c753fSRafael Auler 2346a34c753fSRafael Auler // The rest of dynamic relocations - DT_RELA. 2347cf18f142SVladislav Khmelevsky // The static executable might have .rela.dyn secion and not have PT_DYNAMIC 2348cf18f142SVladislav Khmelevsky if (!DynamicRelocationsSize && BC->IsStaticExecutable) { 2349cf18f142SVladislav Khmelevsky ErrorOr<BinarySection &> DynamicRelSectionOrErr = 2350cf18f142SVladislav Khmelevsky BC->getUniqueSectionByName(getRelaDynSectionName()); 2351cf18f142SVladislav Khmelevsky if (DynamicRelSectionOrErr) { 2352cf18f142SVladislav Khmelevsky DynamicRelocationsAddress = DynamicRelSectionOrErr->getAddress(); 2353cf18f142SVladislav Khmelevsky DynamicRelocationsSize = DynamicRelSectionOrErr->getSize(); 2354cf18f142SVladislav Khmelevsky const SectionRef &SectionRef = DynamicRelSectionOrErr->getSectionRef(); 2355cf18f142SVladislav Khmelevsky DynamicRelativeRelocationsCount = std::distance( 2356cf18f142SVladislav Khmelevsky SectionRef.relocation_begin(), SectionRef.relocation_end()); 2357cf18f142SVladislav Khmelevsky } 2358cf18f142SVladislav Khmelevsky } 2359cf18f142SVladislav Khmelevsky 2360a34c753fSRafael Auler if (DynamicRelocationsSize > 0) { 2361a34c753fSRafael Auler ErrorOr<BinarySection &> DynamicRelSectionOrErr = 2362a34c753fSRafael Auler BC->getSectionForAddress(*DynamicRelocationsAddress); 2363ee0e9ccbSMaksim Panchenko if (!DynamicRelSectionOrErr) 2364a34c753fSRafael Auler report_error("unable to find section corresponding to DT_RELA", 2365a34c753fSRafael Auler DynamicRelSectionOrErr.getError()); 2366f8730293SJob Noorman auto DynamicRelSectionSize = DynamicRelSectionOrErr->getSize(); 2367f8730293SJob Noorman // On RISC-V DT_RELASZ seems to include both .rela.dyn and .rela.plt 2368f8730293SJob Noorman if (DynamicRelocationsSize == DynamicRelSectionSize + PLTRelocationsSize) 2369f8730293SJob Noorman DynamicRelocationsSize = DynamicRelSectionSize; 2370f8730293SJob Noorman if (DynamicRelSectionSize != DynamicRelocationsSize) 2371a34c753fSRafael Auler report_error("section size mismatch for DT_RELASZ", 2372a34c753fSRafael Auler errc::executable_format_error); 2373228970f6Sspupyrev readDynamicRelocations(DynamicRelSectionOrErr->getSectionRef(), 2374228970f6Sspupyrev /*IsJmpRel*/ false); 2375a34c753fSRafael Auler } 2376a34c753fSRafael Auler } 2377a34c753fSRafael Auler 2378a34c753fSRafael Auler void RewriteInstance::processRelocations() { 2379a34c753fSRafael Auler if (!BC->HasRelocations) 2380a34c753fSRafael Auler return; 2381a34c753fSRafael Auler 2382a34c753fSRafael Auler for (const SectionRef &Section : InputFile->sections()) { 23834dd20b07SNathan Sidwell section_iterator SecIter = cantFail(Section.getRelocatedSection()); 23844dd20b07SNathan Sidwell if (SecIter == InputFile->section_end()) 23854dd20b07SNathan Sidwell continue; 23864dd20b07SNathan Sidwell if (BinarySection(*BC, Section).isAllocatable()) 23874dd20b07SNathan Sidwell continue; 23884dd20b07SNathan Sidwell 2389a34c753fSRafael Auler readRelocations(Section); 2390a34c753fSRafael Auler } 2391a34c753fSRafael Auler 2392a34c753fSRafael Auler if (NumFailedRelocations) 239352cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: Failed to analyze " << NumFailedRelocations 2394a34c753fSRafael Auler << " relocations\n"; 2395a34c753fSRafael Auler } 2396a34c753fSRafael Auler 2397228970f6Sspupyrev void RewriteInstance::readDynamicRelocations(const SectionRef &Section, 2398228970f6Sspupyrev bool IsJmpRel) { 2399a34c753fSRafael Auler assert(BinarySection(*BC, Section).isAllocatable() && "allocatable expected"); 2400a34c753fSRafael Auler 2401a34c753fSRafael Auler LLVM_DEBUG({ 2402a34c753fSRafael Auler StringRef SectionName = cantFail(Section.getName()); 2403a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2404a34c753fSRafael Auler << ":\n"; 2405a34c753fSRafael Auler }); 2406a34c753fSRafael Auler 2407a34c753fSRafael Auler for (const RelocationRef &Rel : Section.relocations()) { 2408228970f6Sspupyrev const uint64_t RType = Rel.getType(); 2409a34c753fSRafael Auler if (Relocation::isNone(RType)) 2410a34c753fSRafael Auler continue; 2411a34c753fSRafael Auler 2412a34c753fSRafael Auler StringRef SymbolName = "<none>"; 2413a34c753fSRafael Auler MCSymbol *Symbol = nullptr; 2414a34c753fSRafael Auler uint64_t SymbolAddress = 0; 2415a34c753fSRafael Auler const uint64_t Addend = getRelocationAddend(InputFile, Rel); 2416a34c753fSRafael Auler 2417a34c753fSRafael Auler symbol_iterator SymbolIter = Rel.getSymbol(); 2418a34c753fSRafael Auler if (SymbolIter != InputFile->symbol_end()) { 2419a34c753fSRafael Auler SymbolName = cantFail(SymbolIter->getName()); 2420a34c753fSRafael Auler BinaryData *BD = BC->getBinaryDataByName(SymbolName); 2421a34c753fSRafael Auler Symbol = BD ? BD->getSymbol() 2422a34c753fSRafael Auler : BC->getOrCreateUndefinedGlobalSymbol(SymbolName); 2423a34c753fSRafael Auler SymbolAddress = cantFail(SymbolIter->getAddress()); 2424a34c753fSRafael Auler (void)SymbolAddress; 2425a34c753fSRafael Auler } 2426a34c753fSRafael Auler 2427a34c753fSRafael Auler LLVM_DEBUG( 2428a34c753fSRafael Auler SmallString<16> TypeName; 2429a34c753fSRafael Auler Rel.getTypeName(TypeName); 2430a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: dynamic relocation at 0x" 2431a34c753fSRafael Auler << Twine::utohexstr(Rel.getOffset()) << " : " << TypeName 2432a34c753fSRafael Auler << " : " << SymbolName << " : " << Twine::utohexstr(SymbolAddress) 2433a34c753fSRafael Auler << " : + 0x" << Twine::utohexstr(Addend) << '\n' 2434a34c753fSRafael Auler ); 2435a34c753fSRafael Auler 2436228970f6Sspupyrev if (IsJmpRel) 2437228970f6Sspupyrev IsJmpRelocation[RType] = true; 2438228970f6Sspupyrev 2439228970f6Sspupyrev if (Symbol) 2440228970f6Sspupyrev SymbolIndex[Symbol] = getRelocationSymbol(InputFile, Rel); 2441228970f6Sspupyrev 2442228970f6Sspupyrev BC->addDynamicRelocation(Rel.getOffset(), Symbol, RType, Addend); 2443a34c753fSRafael Auler } 2444a34c753fSRafael Auler } 2445a34c753fSRafael Auler 2446f9bf9f92SVladislav Khmelevsky void RewriteInstance::readDynamicRelrRelocations(BinarySection &Section) { 2447f9bf9f92SVladislav Khmelevsky assert(Section.isAllocatable() && "allocatable expected"); 2448f9bf9f92SVladislav Khmelevsky 2449f9bf9f92SVladislav Khmelevsky LLVM_DEBUG({ 2450f9bf9f92SVladislav Khmelevsky StringRef SectionName = Section.getName(); 2451f9bf9f92SVladislav Khmelevsky dbgs() << "BOLT-DEBUG: reading relocations in section " << SectionName 2452f9bf9f92SVladislav Khmelevsky << ":\n"; 2453f9bf9f92SVladislav Khmelevsky }); 2454f9bf9f92SVladislav Khmelevsky 2455f9bf9f92SVladislav Khmelevsky const uint64_t RType = Relocation::getRelative(); 2456f9bf9f92SVladislav Khmelevsky const uint8_t PSize = BC->AsmInfo->getCodePointerSize(); 2457f9bf9f92SVladislav Khmelevsky const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize; 2458f9bf9f92SVladislav Khmelevsky 2459f9bf9f92SVladislav Khmelevsky auto ExtractAddendValue = [&](uint64_t Address) -> uint64_t { 2460f9bf9f92SVladislav Khmelevsky ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 2461f9bf9f92SVladislav Khmelevsky assert(Section && "cannot get section for data address from RELR"); 2462f9bf9f92SVladislav Khmelevsky DataExtractor DE = DataExtractor(Section->getContents(), 2463f9bf9f92SVladislav Khmelevsky BC->AsmInfo->isLittleEndian(), PSize); 2464f9bf9f92SVladislav Khmelevsky uint64_t Offset = Address - Section->getAddress(); 2465f9bf9f92SVladislav Khmelevsky return DE.getUnsigned(&Offset, PSize); 2466f9bf9f92SVladislav Khmelevsky }; 2467f9bf9f92SVladislav Khmelevsky 2468f9bf9f92SVladislav Khmelevsky auto AddRelocation = [&](uint64_t Address) { 2469f9bf9f92SVladislav Khmelevsky uint64_t Addend = ExtractAddendValue(Address); 2470f9bf9f92SVladislav Khmelevsky LLVM_DEBUG(dbgs() << "BOLT-DEBUG: R_*_RELATIVE relocation at 0x" 2471f9bf9f92SVladislav Khmelevsky << Twine::utohexstr(Address) << " to 0x" 2472f9bf9f92SVladislav Khmelevsky << Twine::utohexstr(Addend) << '\n';); 2473f9bf9f92SVladislav Khmelevsky BC->addDynamicRelocation(Address, nullptr, RType, Addend); 2474f9bf9f92SVladislav Khmelevsky }; 2475f9bf9f92SVladislav Khmelevsky 2476f9bf9f92SVladislav Khmelevsky DataExtractor DE = DataExtractor(Section.getContents(), 2477f9bf9f92SVladislav Khmelevsky BC->AsmInfo->isLittleEndian(), PSize); 2478f9bf9f92SVladislav Khmelevsky uint64_t Offset = 0, Address = 0; 2479f9bf9f92SVladislav Khmelevsky uint64_t RelrCount = DynamicRelrSize / DynamicRelrEntrySize; 2480f9bf9f92SVladislav Khmelevsky while (RelrCount--) { 2481f9bf9f92SVladislav Khmelevsky assert(DE.isValidOffset(Offset)); 2482f9bf9f92SVladislav Khmelevsky uint64_t Entry = DE.getUnsigned(&Offset, DynamicRelrEntrySize); 2483f9bf9f92SVladislav Khmelevsky if ((Entry & 1) == 0) { 2484f9bf9f92SVladislav Khmelevsky AddRelocation(Entry); 2485f9bf9f92SVladislav Khmelevsky Address = Entry + PSize; 2486f9bf9f92SVladislav Khmelevsky } else { 2487f9bf9f92SVladislav Khmelevsky const uint64_t StartAddress = Address; 2488f9bf9f92SVladislav Khmelevsky while (Entry >>= 1) { 2489f9bf9f92SVladislav Khmelevsky if (Entry & 1) 2490f9bf9f92SVladislav Khmelevsky AddRelocation(Address); 2491f9bf9f92SVladislav Khmelevsky 2492f9bf9f92SVladislav Khmelevsky Address += PSize; 2493f9bf9f92SVladislav Khmelevsky } 2494f9bf9f92SVladislav Khmelevsky 2495f9bf9f92SVladislav Khmelevsky Address = StartAddress + MaxDelta; 2496f9bf9f92SVladislav Khmelevsky } 2497f9bf9f92SVladislav Khmelevsky } 2498f9bf9f92SVladislav Khmelevsky } 2499f9bf9f92SVladislav Khmelevsky 25004ddc9c8eSAmir Ayupov void RewriteInstance::printRelocationInfo(const RelocationRef &Rel, 25014ddc9c8eSAmir Ayupov StringRef SymbolName, 25024ddc9c8eSAmir Ayupov uint64_t SymbolAddress, 25034ddc9c8eSAmir Ayupov uint64_t Addend, 25044ddc9c8eSAmir Ayupov uint64_t ExtractedValue) const { 25054ddc9c8eSAmir Ayupov SmallString<16> TypeName; 25064ddc9c8eSAmir Ayupov Rel.getTypeName(TypeName); 25074ddc9c8eSAmir Ayupov const uint64_t Address = SymbolAddress + Addend; 25084ddc9c8eSAmir Ayupov const uint64_t Offset = Rel.getOffset(); 25094ddc9c8eSAmir Ayupov ErrorOr<BinarySection &> Section = BC->getSectionForAddress(SymbolAddress); 25104ddc9c8eSAmir Ayupov BinaryFunction *Func = 25114ddc9c8eSAmir Ayupov BC->getBinaryFunctionContainingAddress(Offset, false, BC->isAArch64()); 25124ddc9c8eSAmir Ayupov dbgs() << formatv("Relocation: offset = {0:x}; type = {1}; value = {2:x}; ", 25134ddc9c8eSAmir Ayupov Offset, TypeName, ExtractedValue) 25144ddc9c8eSAmir Ayupov << formatv("symbol = {0} ({1}); symbol address = {2:x}; ", SymbolName, 25154ddc9c8eSAmir Ayupov Section ? Section->getName() : "", SymbolAddress) 25164ddc9c8eSAmir Ayupov << formatv("addend = {0:x}; address = {1:x}; in = ", Addend, Address); 25174ddc9c8eSAmir Ayupov if (Func) 25184ddc9c8eSAmir Ayupov dbgs() << Func->getPrintName(); 25194ddc9c8eSAmir Ayupov else 25204ddc9c8eSAmir Ayupov dbgs() << BC->getSectionForAddress(Rel.getOffset())->getName(); 25214ddc9c8eSAmir Ayupov dbgs() << '\n'; 25224ddc9c8eSAmir Ayupov } 25234ddc9c8eSAmir Ayupov 2524a34c753fSRafael Auler void RewriteInstance::readRelocations(const SectionRef &Section) { 2525a34c753fSRafael Auler LLVM_DEBUG({ 2526a34c753fSRafael Auler StringRef SectionName = cantFail(Section.getName()); 2527a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: reading relocations for section " << SectionName 2528a34c753fSRafael Auler << ":\n"; 2529a34c753fSRafael Auler }); 2530a34c753fSRafael Auler if (BinarySection(*BC, Section).isAllocatable()) { 2531a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring runtime relocations\n"); 2532a34c753fSRafael Auler return; 2533a34c753fSRafael Auler } 2534a34c753fSRafael Auler section_iterator SecIter = cantFail(Section.getRelocatedSection()); 2535a34c753fSRafael Auler assert(SecIter != InputFile->section_end() && "relocated section expected"); 2536a34c753fSRafael Auler SectionRef RelocatedSection = *SecIter; 2537a34c753fSRafael Auler 2538a34c753fSRafael Auler StringRef RelocatedSectionName = cantFail(RelocatedSection.getName()); 2539a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: relocated section is " 2540a34c753fSRafael Auler << RelocatedSectionName << '\n'); 2541a34c753fSRafael Auler 2542a34c753fSRafael Auler if (!BinarySection(*BC, RelocatedSection).isAllocatable()) { 2543a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocations against " 2544a34c753fSRafael Auler << "non-allocatable section\n"); 2545a34c753fSRafael Auler return; 2546a34c753fSRafael Auler } 2547a34c753fSRafael Auler const bool SkipRelocs = StringSwitch<bool>(RelocatedSectionName) 2548a34c753fSRafael Auler .Cases(".plt", ".rela.plt", ".got.plt", 2549a34c753fSRafael Auler ".eh_frame", ".gcc_except_table", true) 2550a34c753fSRafael Auler .Default(false); 2551a34c753fSRafael Auler if (SkipRelocs) { 2552a34c753fSRafael Auler LLVM_DEBUG( 2553a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: ignoring relocations against known section\n"); 2554a34c753fSRafael Auler return; 2555a34c753fSRafael Auler } 2556a34c753fSRafael Auler 255770d0134fSAmir Ayupov for (const RelocationRef &Rel : Section.relocations()) 255870d0134fSAmir Ayupov handleRelocation(RelocatedSection, Rel); 255970d0134fSAmir Ayupov } 256070d0134fSAmir Ayupov 256170d0134fSAmir Ayupov void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, 256270d0134fSAmir Ayupov const RelocationRef &Rel) { 2563a34c753fSRafael Auler const bool IsAArch64 = BC->isAArch64(); 2564a34c753fSRafael Auler const bool IsFromCode = RelocatedSection.isText(); 2565a34c753fSRafael Auler 2566a34c753fSRafael Auler SmallString<16> TypeName; 2567a34c753fSRafael Auler Rel.getTypeName(TypeName); 2568a34c753fSRafael Auler uint64_t RType = Rel.getType(); 25696e26ffa0SVladislav Khmelevsky if (Relocation::skipRelocationType(RType)) 257070d0134fSAmir Ayupov return; 2571a34c753fSRafael Auler 2572a34c753fSRafael Auler // Adjust the relocation type as the linker might have skewed it. 2573a34c753fSRafael Auler if (BC->isX86() && (RType & ELF::R_X86_64_converted_reloc_bit)) { 2574ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 2575a34c753fSRafael Auler dbgs() << "BOLT-WARNING: ignoring R_X86_64_converted_reloc_bit\n"; 2576a34c753fSRafael Auler RType &= ~ELF::R_X86_64_converted_reloc_bit; 2577a34c753fSRafael Auler } 2578a34c753fSRafael Auler 2579a34c753fSRafael Auler if (Relocation::isTLS(RType)) { 2580a34c753fSRafael Auler // No special handling required for TLS relocations on X86. 2581a34c753fSRafael Auler if (BC->isX86()) 258270d0134fSAmir Ayupov return; 2583a34c753fSRafael Auler 2584c7d6d622SJob Noorman // The non-got related TLS relocations on AArch64 and RISC-V also could be 2585c7d6d622SJob Noorman // skipped. 2586a34c753fSRafael Auler if (!Relocation::isGOT(RType)) 258770d0134fSAmir Ayupov return; 2588a34c753fSRafael Auler } 2589a34c753fSRafael Auler 25903b1314f4SVladislav Khmelevsky if (!IsAArch64 && BC->getDynamicRelocationAt(Rel.getOffset())) { 2591e3359937SAmir Ayupov LLVM_DEBUG({ 2592e3359937SAmir Ayupov dbgs() << formatv("BOLT-DEBUG: address {0:x} has a ", Rel.getOffset()) 2593e3359937SAmir Ayupov << "dynamic relocation against it. Ignoring static relocation.\n"; 2594e3359937SAmir Ayupov }); 259570d0134fSAmir Ayupov return; 2596a34c753fSRafael Auler } 2597a34c753fSRafael Auler 2598a34c753fSRafael Auler std::string SymbolName; 2599a34c753fSRafael Auler uint64_t SymbolAddress; 2600a34c753fSRafael Auler int64_t Addend; 2601a34c753fSRafael Auler uint64_t ExtractedValue; 2602a34c753fSRafael Auler bool IsSectionRelocation; 2603a34c753fSRafael Auler bool Skip; 2604a34c753fSRafael Auler if (!analyzeRelocation(Rel, RType, SymbolName, IsSectionRelocation, 2605a34c753fSRafael Auler SymbolAddress, Addend, ExtractedValue, Skip)) { 2606e3359937SAmir Ayupov LLVM_DEBUG({ 2607e3359937SAmir Ayupov dbgs() << "BOLT-WARNING: failed to analyze relocation @ offset = " 2608e3359937SAmir Ayupov << formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName); 2609e3359937SAmir Ayupov }); 2610a34c753fSRafael Auler ++NumFailedRelocations; 261170d0134fSAmir Ayupov return; 2612a34c753fSRafael Auler } 2613a34c753fSRafael Auler 2614a34c753fSRafael Auler if (Skip) { 2615e3359937SAmir Ayupov LLVM_DEBUG({ 2616e3359937SAmir Ayupov dbgs() << "BOLT-DEBUG: skipping relocation @ offset = " 2617e3359937SAmir Ayupov << formatv("{0:x}; type name = {1}\n", Rel.getOffset(), TypeName); 2618e3359937SAmir Ayupov }); 261970d0134fSAmir Ayupov return; 2620a34c753fSRafael Auler } 2621a34c753fSRafael Auler 2622a34c753fSRafael Auler const uint64_t Address = SymbolAddress + Addend; 2623a34c753fSRafael Auler 262470d0134fSAmir Ayupov LLVM_DEBUG({ 262570d0134fSAmir Ayupov dbgs() << "BOLT-DEBUG: "; 262670d0134fSAmir Ayupov printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, ExtractedValue); 262770d0134fSAmir Ayupov }); 2628a34c753fSRafael Auler 2629a34c753fSRafael Auler BinaryFunction *ContainingBF = nullptr; 2630a34c753fSRafael Auler if (IsFromCode) { 2631a34c753fSRafael Auler ContainingBF = 2632a34c753fSRafael Auler BC->getBinaryFunctionContainingAddress(Rel.getOffset(), 2633a34c753fSRafael Auler /*CheckPastEnd*/ false, 2634a34c753fSRafael Auler /*UseMaxSize*/ true); 2635a34c753fSRafael Auler assert(ContainingBF && "cannot find function for address in code"); 2636a34c753fSRafael Auler if (!IsAArch64 && !ContainingBF->containsAddress(Rel.getOffset())) { 2637ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 263852cf0711SAmir Ayupov BC->outs() << formatv( 263952cf0711SAmir Ayupov "BOLT-INFO: {0} has relocations in padding area\n", *ContainingBF); 2640a34c753fSRafael Auler ContainingBF->setSize(ContainingBF->getMaxSize()); 2641a34c753fSRafael Auler ContainingBF->setSimple(false); 264270d0134fSAmir Ayupov return; 2643a34c753fSRafael Auler } 2644a34c753fSRafael Auler } 2645a34c753fSRafael Auler 26464101aa13SMaksim Panchenko MCSymbol *ReferencedSymbol = nullptr; 2647853e126cSRafael Auler if (!IsSectionRelocation) { 26484101aa13SMaksim Panchenko if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) 26494101aa13SMaksim Panchenko ReferencedSymbol = BD->getSymbol(); 2650853e126cSRafael Auler else if (BC->isGOTSymbol(SymbolName)) 2651853e126cSRafael Auler if (BinaryData *BD = BC->getGOTSymbol()) 2652853e126cSRafael Auler ReferencedSymbol = BD->getSymbol(); 2653853e126cSRafael Auler } 26544101aa13SMaksim Panchenko 26550776fc32Syavtuk ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address}; 26560776fc32Syavtuk symbol_iterator SymbolIter = Rel.getSymbol(); 26570776fc32Syavtuk if (SymbolIter != InputFile->symbol_end()) { 26580776fc32Syavtuk SymbolRef Symbol = *SymbolIter; 26590776fc32Syavtuk section_iterator Section = 26600776fc32Syavtuk cantFail(Symbol.getSection(), "cannot get symbol section"); 26610776fc32Syavtuk if (Section != InputFile->section_end()) { 26620776fc32Syavtuk Expected<StringRef> SectionName = Section->getName(); 26630776fc32Syavtuk if (SectionName && !SectionName->empty()) 26640776fc32Syavtuk ReferencedSection = BC->getUniqueSectionByName(*SectionName); 2665097ddd35SVladislav Khmelevsky } else if (BC->isRISCV() && ReferencedSymbol && ContainingBF && 2666f8730293SJob Noorman (cantFail(Symbol.getFlags()) & SymbolRef::SF_Absolute)) { 2667f8730293SJob Noorman // This might be a relocation for an ABS symbols like __global_pointer$ on 2668f8730293SJob Noorman // RISC-V 2669f8730293SJob Noorman ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, 2670f8730293SJob Noorman Rel.getType(), 0, 2671f8730293SJob Noorman cantFail(Symbol.getValue())); 2672f8730293SJob Noorman return; 26730776fc32Syavtuk } 26740776fc32Syavtuk } 26750776fc32Syavtuk 26760776fc32Syavtuk if (!ReferencedSection) 26770776fc32Syavtuk ReferencedSection = BC->getSectionForAddress(SymbolAddress); 267836cb7366SMaksim Panchenko 267936cb7366SMaksim Panchenko const bool IsToCode = ReferencedSection && ReferencedSection->isText(); 268036cb7366SMaksim Panchenko 268136cb7366SMaksim Panchenko // Special handling of PC-relative relocations. 2682603fa4c6SNathan Sidwell if (BC->isX86() && Relocation::isPCRelative(RType)) { 268336cb7366SMaksim Panchenko if (!IsFromCode && IsToCode) { 268436cb7366SMaksim Panchenko // PC-relative relocations from data to code are tricky since the 268536cb7366SMaksim Panchenko // original information is typically lost after linking, even with 268636cb7366SMaksim Panchenko // '--emit-relocs'. Such relocations are normally used by PIC-style 268736cb7366SMaksim Panchenko // jump tables and they reference both the jump table and jump 268836cb7366SMaksim Panchenko // targets by computing the difference between the two. If we blindly 268936cb7366SMaksim Panchenko // apply the relocation, it will appear that it references an arbitrary 269036cb7366SMaksim Panchenko // location in the code, possibly in a different function from the one 269136cb7366SMaksim Panchenko // containing the jump table. 269236cb7366SMaksim Panchenko // 269336cb7366SMaksim Panchenko // For that reason, we only register the fact that there is a 269436cb7366SMaksim Panchenko // PC-relative relocation at a given address against the code. 269536cb7366SMaksim Panchenko // The actual referenced label/address will be determined during jump 269636cb7366SMaksim Panchenko // table analysis. 2697a34c753fSRafael Auler BC->addPCRelativeDataRelocation(Rel.getOffset()); 269836cb7366SMaksim Panchenko } else if (ContainingBF && !IsSectionRelocation && ReferencedSymbol) { 269936cb7366SMaksim Panchenko // If we know the referenced symbol, register the relocation from 270036cb7366SMaksim Panchenko // the code. It's required to properly handle cases where 270136cb7366SMaksim Panchenko // "symbol + addend" references an object different from "symbol". 27024101aa13SMaksim Panchenko ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 27034101aa13SMaksim Panchenko Addend, ExtractedValue); 270436cb7366SMaksim Panchenko } else { 2705e3359937SAmir Ayupov LLVM_DEBUG({ 2706e3359937SAmir Ayupov dbgs() << "BOLT-DEBUG: not creating PC-relative relocation at" 2707e3359937SAmir Ayupov << formatv("{0:x} for {1}\n", Rel.getOffset(), SymbolName); 2708e3359937SAmir Ayupov }); 270936cb7366SMaksim Panchenko } 271036cb7366SMaksim Panchenko 271170d0134fSAmir Ayupov return; 2712a34c753fSRafael Auler } 2713a34c753fSRafael Auler 2714a34c753fSRafael Auler bool ForceRelocation = BC->forceSymbolRelocations(SymbolName); 2715f8730293SJob Noorman if ((BC->isAArch64() || BC->isRISCV()) && Relocation::isGOT(RType)) 2716a34c753fSRafael Auler ForceRelocation = true; 271736cb7366SMaksim Panchenko 271836cb7366SMaksim Panchenko if (!ReferencedSection && !ForceRelocation) { 271970d0134fSAmir Ayupov LLVM_DEBUG(dbgs() << "BOLT-DEBUG: cannot determine referenced section.\n"); 272070d0134fSAmir Ayupov return; 2721a34c753fSRafael Auler } 2722a34c753fSRafael Auler 2723a34c753fSRafael Auler // Occasionally we may see a reference past the last byte of the function 2724a34c753fSRafael Auler // typically as a result of __builtin_unreachable(). Check it here. 2725a34c753fSRafael Auler BinaryFunction *ReferencedBF = BC->getBinaryFunctionContainingAddress( 2726a34c753fSRafael Auler Address, /*CheckPastEnd*/ true, /*UseMaxSize*/ IsAArch64); 2727a34c753fSRafael Auler 2728a34c753fSRafael Auler if (!IsSectionRelocation) { 2729a34c753fSRafael Auler if (BinaryFunction *BF = 2730a34c753fSRafael Auler BC->getBinaryFunctionContainingAddress(SymbolAddress)) { 2731a34c753fSRafael Auler if (BF != ReferencedBF) { 2732a34c753fSRafael Auler // It's possible we are referencing a function without referencing any 2733a34c753fSRafael Auler // code, e.g. when taking a bitmask action on a function address. 273452cf0711SAmir Ayupov BC->errs() 273552cf0711SAmir Ayupov << "BOLT-WARNING: non-standard function reference (e.g. bitmask)" 2736e3359937SAmir Ayupov << formatv(" detected against function {0} from ", *BF); 2737ee0e9ccbSMaksim Panchenko if (IsFromCode) 273852cf0711SAmir Ayupov BC->errs() << formatv("function {0}\n", *ContainingBF); 2739ee0e9ccbSMaksim Panchenko else 274052cf0711SAmir Ayupov BC->errs() << formatv("data section at {0:x}\n", Rel.getOffset()); 274140c2e0faSMaksim Panchenko LLVM_DEBUG(printRelocationInfo(Rel, SymbolName, SymbolAddress, Addend, 274240c2e0faSMaksim Panchenko ExtractedValue)); 2743a34c753fSRafael Auler ReferencedBF = BF; 2744a34c753fSRafael Auler } 2745a34c753fSRafael Auler } 2746a34c753fSRafael Auler } else if (ReferencedBF) { 274736cb7366SMaksim Panchenko assert(ReferencedSection && "section expected for section relocation"); 274836cb7366SMaksim Panchenko if (*ReferencedBF->getOriginSection() != *ReferencedSection) { 2749a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring false function reference\n"); 2750a34c753fSRafael Auler ReferencedBF = nullptr; 2751a34c753fSRafael Auler } 2752a34c753fSRafael Auler } 2753a34c753fSRafael Auler 2754a34c753fSRafael Auler // Workaround for a member function pointer de-virtualization bug. We check 2755a34c753fSRafael Auler // if a non-pc-relative relocation in the code is pointing to (fptr - 1). 2756a34c753fSRafael Auler if (IsToCode && ContainingBF && !Relocation::isPCRelative(RType) && 2757a34c753fSRafael Auler (!ReferencedBF || (ReferencedBF->getAddress() != Address))) { 2758a34c753fSRafael Auler if (const BinaryFunction *RogueBF = 2759a34c753fSRafael Auler BC->getBinaryFunctionAtAddress(Address + 1)) { 2760a34c753fSRafael Auler // Do an extra check that the function was referenced previously. 2761a34c753fSRafael Auler // It's a linear search, but it should rarely happen. 276243f382a9SAmir Ayupov auto CheckReloc = [&](const Relocation &Rel) { 2763f119a248SAmir Ayupov return Rel.Symbol == RogueBF->getSymbol() && 2764f119a248SAmir Ayupov !Relocation::isPCRelative(Rel.Type); 276543f382a9SAmir Ayupov }; 276643f382a9SAmir Ayupov bool Found = llvm::any_of( 276743f382a9SAmir Ayupov llvm::make_second_range(ContainingBF->Relocations), CheckReloc); 2768a34c753fSRafael Auler 2769a34c753fSRafael Auler if (Found) { 277052cf0711SAmir Ayupov BC->errs() 277152cf0711SAmir Ayupov << "BOLT-WARNING: detected possible compiler de-virtualization " 2772e3359937SAmir Ayupov "bug: -1 addend used with non-pc-relative relocation against " 2773e3359937SAmir Ayupov << formatv("function {0} in function {1}\n", *RogueBF, 2774e3359937SAmir Ayupov *ContainingBF); 277570d0134fSAmir Ayupov return; 2776a34c753fSRafael Auler } 2777a34c753fSRafael Auler } 2778a34c753fSRafael Auler } 2779a34c753fSRafael Auler 2780a34c753fSRafael Auler if (ForceRelocation) { 278117ed8f29SVladislav Khmelevsky std::string Name = 278217ed8f29SVladislav Khmelevsky Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName; 2783a34c753fSRafael Auler ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0); 2784a34c753fSRafael Auler SymbolAddress = 0; 2785a34c753fSRafael Auler if (Relocation::isGOT(RType)) 2786a34c753fSRafael Auler Addend = Address; 2787a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol " 2788a34c753fSRafael Auler << SymbolName << " with addend " << Addend << '\n'); 2789a34c753fSRafael Auler } else if (ReferencedBF) { 2790a34c753fSRafael Auler ReferencedSymbol = ReferencedBF->getSymbol(); 2791a34c753fSRafael Auler uint64_t RefFunctionOffset = 0; 2792a34c753fSRafael Auler 2793a34c753fSRafael Auler // Adjust the point of reference to a code location inside a function. 2794a34c753fSRafael Auler if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) { 2795a34c753fSRafael Auler RefFunctionOffset = Address - ReferencedBF->getAddress(); 2796ff5e2babSJob Noorman if (Relocation::isInstructionReference(RType)) { 2797ff5e2babSJob Noorman // Instruction labels are created while disassembling so we just leave 2798ff5e2babSJob Noorman // the symbol empty for now. Since the extracted value is typically 2799ff5e2babSJob Noorman // unrelated to the referenced symbol (e.g., %pcrel_lo in RISC-V 2800ff5e2babSJob Noorman // references an instruction but the patched value references the low 2801ff5e2babSJob Noorman // bits of a data address), we set the extracted value to the symbol 2802ff5e2babSJob Noorman // address in order to be able to correctly reconstruct the reference 2803ff5e2babSJob Noorman // later. 2804ff5e2babSJob Noorman ReferencedSymbol = nullptr; 2805ff5e2babSJob Noorman ExtractedValue = Address; 2806ff5e2babSJob Noorman } else if (RefFunctionOffset) { 2807a34c753fSRafael Auler if (ContainingBF && ContainingBF != ReferencedBF) { 2808a34c753fSRafael Auler ReferencedSymbol = 2809a34c753fSRafael Auler ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); 2810a34c753fSRafael Auler } else { 2811a34c753fSRafael Auler ReferencedSymbol = 2812a34c753fSRafael Auler ReferencedBF->getOrCreateLocalLabel(Address, 2813a34c753fSRafael Auler /*CreatePastEnd =*/true); 281438ba2824SJob Noorman 281538ba2824SJob Noorman // If ContainingBF != nullptr, it equals ReferencedBF (see 281638ba2824SJob Noorman // if-condition above) so we're handling a relocation from a function 281738ba2824SJob Noorman // to itself. RISC-V uses such relocations for branches, for example. 281838ba2824SJob Noorman // These should not be registered as externally references offsets. 281938ba2824SJob Noorman if (!ContainingBF) 2820a34c753fSRafael Auler ReferencedBF->registerReferencedOffset(RefFunctionOffset); 2821a34c753fSRafael Auler } 2822a34c753fSRafael Auler if (opts::Verbosity > 1 && 282369a9bbf1SAmir Ayupov BinarySection(*BC, RelocatedSection).isWritable()) 282452cf0711SAmir Ayupov BC->errs() 282552cf0711SAmir Ayupov << "BOLT-WARNING: writable reference into the middle of the " 2826e3359937SAmir Ayupov << formatv("function {0} detected at address {1:x}\n", 2827e3359937SAmir Ayupov *ReferencedBF, Rel.getOffset()); 2828a34c753fSRafael Auler } 2829a34c753fSRafael Auler SymbolAddress = Address; 2830a34c753fSRafael Auler Addend = 0; 2831a34c753fSRafael Auler } 283270d0134fSAmir Ayupov LLVM_DEBUG({ 2833a34c753fSRafael Auler dbgs() << " referenced function " << *ReferencedBF; 2834a34c753fSRafael Auler if (Address != ReferencedBF->getAddress()) 2835e3359937SAmir Ayupov dbgs() << formatv(" at offset {0:x}", RefFunctionOffset); 283670d0134fSAmir Ayupov dbgs() << '\n'; 283770d0134fSAmir Ayupov }); 2838a34c753fSRafael Auler } else { 2839a34c753fSRafael Auler if (IsToCode && SymbolAddress) { 2840a34c753fSRafael Auler // This can happen e.g. with PIC-style jump tables. 2841a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: no corresponding function for " 2842a34c753fSRafael Auler "relocation against code\n"); 2843a34c753fSRafael Auler } 2844a34c753fSRafael Auler 2845a34c753fSRafael Auler // In AArch64 there are zero reasons to keep a reference to the 2846a34c753fSRafael Auler // "original" symbol plus addend. The original symbol is probably just a 2847a34c753fSRafael Auler // section symbol. If we are here, this means we are probably accessing 2848a34c753fSRafael Auler // data, so it is imperative to keep the original address. 2849a34c753fSRafael Auler if (IsAArch64) { 2850e3359937SAmir Ayupov SymbolName = formatv("SYMBOLat{0:x}", Address); 2851a34c753fSRafael Auler SymbolAddress = Address; 2852a34c753fSRafael Auler Addend = 0; 2853a34c753fSRafael Auler } 2854a34c753fSRafael Auler 2855a34c753fSRafael Auler if (BinaryData *BD = BC->getBinaryDataContainingAddress(SymbolAddress)) { 2856a34c753fSRafael Auler // Note: this assertion is trying to check sanity of BinaryData objects 2857a34c753fSRafael Auler // but AArch64 has inferred and incomplete object locations coming from 2858a34c753fSRafael Auler // GOT/TLS or any other non-trivial relocation (that requires creation 2859a34c753fSRafael Auler // of sections and whose symbol address is not really what should be 2860a34c753fSRafael Auler // encoded in the instruction). So we essentially disabled this check 2861a34c753fSRafael Auler // for AArch64 and live with bogus names for objects. 286240c2e0faSMaksim Panchenko assert((IsAArch64 || IsSectionRelocation || 2863a34c753fSRafael Auler BD->nameStartsWith(SymbolName) || 2864a34c753fSRafael Auler BD->nameStartsWith("PG" + SymbolName) || 2865a34c753fSRafael Auler (BD->nameStartsWith("ANONYMOUS") && 2866ad8fd5b1SKazu Hirata (BD->getSectionName().starts_with(".plt") || 2867ad8fd5b1SKazu Hirata BD->getSectionName().ends_with(".plt")))) && 2868e3359937SAmir Ayupov "BOLT symbol names of all non-section relocations must match up " 2869e3359937SAmir Ayupov "with symbol names referenced in the relocation"); 2870a34c753fSRafael Auler 2871ee0e9ccbSMaksim Panchenko if (IsSectionRelocation) 2872a34c753fSRafael Auler BC->markAmbiguousRelocations(*BD, Address); 2873a34c753fSRafael Auler 2874a34c753fSRafael Auler ReferencedSymbol = BD->getSymbol(); 2875a34c753fSRafael Auler Addend += (SymbolAddress - BD->getAddress()); 2876a34c753fSRafael Auler SymbolAddress = BD->getAddress(); 2877a34c753fSRafael Auler assert(Address == SymbolAddress + Addend); 2878a34c753fSRafael Auler } else { 2879a34c753fSRafael Auler // These are mostly local data symbols but undefined symbols 2880a34c753fSRafael Auler // in relocation sections can get through here too, from .plt. 288140c2e0faSMaksim Panchenko assert( 2882f8730293SJob Noorman (IsAArch64 || BC->isRISCV() || IsSectionRelocation || 2883ad8fd5b1SKazu Hirata BC->getSectionNameForAddress(SymbolAddress)->starts_with(".plt")) && 288440c2e0faSMaksim Panchenko "known symbols should not resolve to anonymous locals"); 2885a34c753fSRafael Auler 2886a34c753fSRafael Auler if (IsSectionRelocation) { 288740c2e0faSMaksim Panchenko ReferencedSymbol = 288840c2e0faSMaksim Panchenko BC->getOrCreateGlobalSymbol(SymbolAddress, "SYMBOLat"); 2889a34c753fSRafael Auler } else { 2890a34c753fSRafael Auler SymbolRef Symbol = *Rel.getSymbol(); 2891a34c753fSRafael Auler const uint64_t SymbolSize = 2892a34c753fSRafael Auler IsAArch64 ? 0 : ELFSymbolRef(Symbol).getSize(); 289370d0134fSAmir Ayupov const uint64_t SymbolAlignment = IsAArch64 ? 1 : Symbol.getAlignment(); 2894a34c753fSRafael Auler const uint32_t SymbolFlags = cantFail(Symbol.getFlags()); 2895a34c753fSRafael Auler std::string Name; 2896a34c753fSRafael Auler if (SymbolFlags & SymbolRef::SF_Global) { 2897a34c753fSRafael Auler Name = SymbolName; 2898a34c753fSRafael Auler } else { 289940c2e0faSMaksim Panchenko if (StringRef(SymbolName) 2900ad8fd5b1SKazu Hirata .starts_with(BC->AsmInfo->getPrivateGlobalPrefix())) 2901a34c753fSRafael Auler Name = NR.uniquify("PG" + SymbolName); 2902ee0e9ccbSMaksim Panchenko else 2903a34c753fSRafael Auler Name = NR.uniquify(SymbolName); 2904a34c753fSRafael Auler } 290540c2e0faSMaksim Panchenko ReferencedSymbol = BC->registerNameAtAddress( 290640c2e0faSMaksim Panchenko Name, SymbolAddress, SymbolSize, SymbolAlignment, SymbolFlags); 2907a34c753fSRafael Auler } 2908a34c753fSRafael Auler 2909a34c753fSRafael Auler if (IsSectionRelocation) { 2910a34c753fSRafael Auler BinaryData *BD = BC->getBinaryDataByName(ReferencedSymbol->getName()); 2911a34c753fSRafael Auler BC->markAmbiguousRelocations(*BD, Address); 2912a34c753fSRafael Auler } 2913a34c753fSRafael Auler } 2914a34c753fSRafael Auler } 2915a34c753fSRafael Auler 2916a34c753fSRafael Auler auto checkMaxDataRelocations = [&]() { 2917a34c753fSRafael Auler ++NumDataRelocations; 291843f382a9SAmir Ayupov LLVM_DEBUG(if (opts::MaxDataRelocations && 2919a34c753fSRafael Auler NumDataRelocations + 1 == opts::MaxDataRelocations) { 2920e3359937SAmir Ayupov dbgs() << "BOLT-DEBUG: processing ending on data relocation " 2921e3359937SAmir Ayupov << NumDataRelocations << ": "; 2922a34c753fSRafael Auler printRelocationInfo(Rel, ReferencedSymbol->getName(), SymbolAddress, 2923a34c753fSRafael Auler Addend, ExtractedValue); 292443f382a9SAmir Ayupov }); 2925a34c753fSRafael Auler 2926a34c753fSRafael Auler return (!opts::MaxDataRelocations || 2927a34c753fSRafael Auler NumDataRelocations < opts::MaxDataRelocations); 2928a34c753fSRafael Auler }; 2929a34c753fSRafael Auler 293036cb7366SMaksim Panchenko if ((ReferencedSection && refersToReorderedSection(ReferencedSection)) || 2931b410d24aSJob Noorman (opts::ForceToDataRelocations && checkMaxDataRelocations()) || 2932b410d24aSJob Noorman // RISC-V has ADD/SUB data-to-data relocations 2933b410d24aSJob Noorman BC->isRISCV()) 2934a34c753fSRafael Auler ForceRelocation = true; 2935a34c753fSRafael Auler 2936bd7b170eSNathan Sidwell if (IsFromCode) 293740c2e0faSMaksim Panchenko ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, 293840c2e0faSMaksim Panchenko Addend, ExtractedValue); 2939bd7b170eSNathan Sidwell else if (IsToCode || ForceRelocation) 2940a34c753fSRafael Auler BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend, 2941a34c753fSRafael Auler ExtractedValue); 2942bd7b170eSNathan Sidwell else 294370d0134fSAmir Ayupov LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n"); 2944a34c753fSRafael Auler } 2945a34c753fSRafael Auler 2946b5ed375fSPeter Waller static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) { 2947b5ed375fSPeter Waller // Workaround for https://github.com/llvm/llvm-project/issues/100096 2948b5ed375fSPeter Waller // ("[BOLT] GOT array pointer incorrectly rewritten"). In aarch64 2949b5ed375fSPeter Waller // static glibc binaries, the .init section's _init function pointer can 2950b5ed375fSPeter Waller // alias with a data pointer for the end of an array. GOT rewriting 2951b5ed375fSPeter Waller // currently can't detect this and updates the data pointer to the 2952b5ed375fSPeter Waller // moved _init, causing a runtime crash. Skipping _init on the other 2953b5ed375fSPeter Waller // hand should be harmless. 2954b5ed375fSPeter Waller if (!BC.IsStaticExecutable) 2955b5ed375fSPeter Waller return nullptr; 2956b5ed375fSPeter Waller const BinaryData *BD = BC.getBinaryDataByName("_init"); 2957b5ed375fSPeter Waller if (!BD || BD->getSectionName() != ".init") 2958b5ed375fSPeter Waller return nullptr; 2959b5ed375fSPeter Waller LLVM_DEBUG(dbgs() << "BOLT-DEBUG: skip _init in for GOT workaround.\n"); 2960b5ed375fSPeter Waller return BC.getBinaryFunctionAtAddress(BD->getAddress()); 2961b5ed375fSPeter Waller } 2962b5ed375fSPeter Waller 2963a34c753fSRafael Auler void RewriteInstance::selectFunctionsToProcess() { 2964a34c753fSRafael Auler // Extend the list of functions to process or skip from a file. 2965a34c753fSRafael Auler auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile, 2966a34c753fSRafael Auler cl::list<std::string> &FunctionNames) { 2967a34c753fSRafael Auler if (FunctionNamesFile.empty()) 2968a34c753fSRafael Auler return; 2969a34c753fSRafael Auler std::ifstream FuncsFile(FunctionNamesFile, std::ios::in); 2970a34c753fSRafael Auler std::string FuncName; 2971ee0e9ccbSMaksim Panchenko while (std::getline(FuncsFile, FuncName)) 2972a34c753fSRafael Auler FunctionNames.push_back(FuncName); 2973a34c753fSRafael Auler }; 2974a34c753fSRafael Auler populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames); 2975a34c753fSRafael Auler populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames); 2976d474dbdfSAmir Ayupov populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR); 2977a34c753fSRafael Auler 2978d474dbdfSAmir Ayupov // Make a set of functions to process to speed up lookups. 2979d474dbdfSAmir Ayupov std::unordered_set<std::string> ForceFunctionsNR( 2980d474dbdfSAmir Ayupov opts::ForceFunctionNamesNR.begin(), opts::ForceFunctionNamesNR.end()); 2981d474dbdfSAmir Ayupov 2982d474dbdfSAmir Ayupov if ((!opts::ForceFunctionNames.empty() || 2983d474dbdfSAmir Ayupov !opts::ForceFunctionNamesNR.empty()) && 2984d474dbdfSAmir Ayupov !opts::SkipFunctionNames.empty()) { 298552cf0711SAmir Ayupov BC->errs() 298652cf0711SAmir Ayupov << "BOLT-ERROR: cannot select functions to process and skip at the " 2987a34c753fSRafael Auler "same time. Please use only one type of selection.\n"; 2988a34c753fSRafael Auler exit(1); 2989a34c753fSRafael Auler } 2990a34c753fSRafael Auler 2991a34c753fSRafael Auler uint64_t LiteThresholdExecCount = 0; 2992a34c753fSRafael Auler if (opts::LiteThresholdPct) { 2993a34c753fSRafael Auler if (opts::LiteThresholdPct > 100) 2994a34c753fSRafael Auler opts::LiteThresholdPct = 100; 2995a34c753fSRafael Auler 2996a34c753fSRafael Auler std::vector<const BinaryFunction *> TopFunctions; 2997a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 2998a34c753fSRafael Auler const BinaryFunction &Function = BFI.second; 2999a34c753fSRafael Auler if (ProfileReader->mayHaveProfileData(Function)) 3000a34c753fSRafael Auler TopFunctions.push_back(&Function); 3001a34c753fSRafael Auler } 3002d2c87699SAmir Ayupov llvm::sort( 3003d2c87699SAmir Ayupov TopFunctions, [](const BinaryFunction *A, const BinaryFunction *B) { 3004d2c87699SAmir Ayupov return A->getKnownExecutionCount() < B->getKnownExecutionCount(); 3005a34c753fSRafael Auler }); 3006a34c753fSRafael Auler 3007a34c753fSRafael Auler size_t Index = TopFunctions.size() * opts::LiteThresholdPct / 100; 3008a34c753fSRafael Auler if (Index) 3009a34c753fSRafael Auler --Index; 3010a34c753fSRafael Auler LiteThresholdExecCount = TopFunctions[Index]->getKnownExecutionCount(); 301152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: limiting processing to functions with at least " 3012a34c753fSRafael Auler << LiteThresholdExecCount << " invocations\n"; 3013a34c753fSRafael Auler } 301440c2e0faSMaksim Panchenko LiteThresholdExecCount = std::max( 301540c2e0faSMaksim Panchenko LiteThresholdExecCount, static_cast<uint64_t>(opts::LiteThresholdCount)); 3016a34c753fSRafael Auler 3017703d94d8SAmir Ayupov StringSet<> ReorderFunctionsUserSet; 3018287508cdSAmir Ayupov StringSet<> ReorderFunctionsLTOCommonSet; 3019703d94d8SAmir Ayupov if (opts::ReorderFunctions == ReorderFunctions::RT_USER) { 302013d60ce2SAmir Ayupov std::vector<std::string> FunctionNames; 302152cf0711SAmir Ayupov BC->logBOLTErrorsAndQuitOnFatal( 302252cf0711SAmir Ayupov ReorderFunctions::readFunctionOrderFile(FunctionNames)); 302313d60ce2SAmir Ayupov for (const std::string &Function : FunctionNames) { 3024703d94d8SAmir Ayupov ReorderFunctionsUserSet.insert(Function); 3025287508cdSAmir Ayupov if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Function)) 3026287508cdSAmir Ayupov ReorderFunctionsLTOCommonSet.insert(*LTOCommonName); 3027287508cdSAmir Ayupov } 3028703d94d8SAmir Ayupov } 3029703d94d8SAmir Ayupov 3030a34c753fSRafael Auler uint64_t NumFunctionsToProcess = 0; 3031c49941bdSAmir Ayupov auto mustSkip = [&](const BinaryFunction &Function) { 3032c4e60a7fSMaksim Panchenko if (opts::MaxFunctions.getNumOccurrences() && 3033c4e60a7fSMaksim Panchenko NumFunctionsToProcess >= opts::MaxFunctions) 3034c49941bdSAmir Ayupov return true; 3035c49941bdSAmir Ayupov for (std::string &Name : opts::SkipFunctionNames) 3036c49941bdSAmir Ayupov if (Function.hasNameRegex(Name)) 3037c49941bdSAmir Ayupov return true; 3038c49941bdSAmir Ayupov 3039c49941bdSAmir Ayupov return false; 3040c49941bdSAmir Ayupov }; 3041c49941bdSAmir Ayupov 3042c49941bdSAmir Ayupov auto shouldProcess = [&](const BinaryFunction &Function) { 3043c49941bdSAmir Ayupov if (mustSkip(Function)) 3044a34c753fSRafael Auler return false; 3045a34c753fSRafael Auler 3046a34c753fSRafael Auler // If the list is not empty, only process functions from the list. 3047d474dbdfSAmir Ayupov if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) { 3048d474dbdfSAmir Ayupov // Regex check (-funcs and -funcs-file options). 3049ee0e9ccbSMaksim Panchenko for (std::string &Name : opts::ForceFunctionNames) 3050ee0e9ccbSMaksim Panchenko if (Function.hasNameRegex(Name)) 3051a34c753fSRafael Auler return true; 3052ee0e9ccbSMaksim Panchenko 3053d474dbdfSAmir Ayupov // Non-regex check (-funcs-no-regex and -funcs-file-no-regex). 30544a6426a8SAmir Ayupov for (const StringRef Name : Function.getNames()) 30554a6426a8SAmir Ayupov if (ForceFunctionsNR.count(Name.str())) 30564a6426a8SAmir Ayupov return true; 30574a6426a8SAmir Ayupov 30584a6426a8SAmir Ayupov return false; 3059a34c753fSRafael Auler } 3060a34c753fSRafael Auler 3061a34c753fSRafael Auler if (opts::Lite) { 3062703d94d8SAmir Ayupov // Forcibly include functions specified in the -function-order file. 3063703d94d8SAmir Ayupov if (opts::ReorderFunctions == ReorderFunctions::RT_USER) { 30644a6426a8SAmir Ayupov for (const StringRef Name : Function.getNames()) 30654a6426a8SAmir Ayupov if (ReorderFunctionsUserSet.contains(Name)) 3066703d94d8SAmir Ayupov return true; 3067287508cdSAmir Ayupov for (const StringRef Name : Function.getNames()) 3068287508cdSAmir Ayupov if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name)) 3069287508cdSAmir Ayupov if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName)) 3070287508cdSAmir Ayupov return true; 3071703d94d8SAmir Ayupov } 3072703d94d8SAmir Ayupov 3073a34c753fSRafael Auler if (ProfileReader && !ProfileReader->mayHaveProfileData(Function)) 3074a34c753fSRafael Auler return false; 3075a34c753fSRafael Auler 3076a34c753fSRafael Auler if (Function.getKnownExecutionCount() < LiteThresholdExecCount) 3077a34c753fSRafael Auler return false; 3078a34c753fSRafael Auler } 3079a34c753fSRafael Auler 3080a34c753fSRafael Auler return true; 3081a34c753fSRafael Auler }; 3082a34c753fSRafael Auler 3083b5ed375fSPeter Waller if (BinaryFunction *Init = getInitFunctionIfStaticBinary(*BC)) 3084b5ed375fSPeter Waller Init->setIgnored(); 3085b5ed375fSPeter Waller 3086a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3087a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3088a34c753fSRafael Auler 3089a34c753fSRafael Auler // Pseudo functions are explicitly marked by us not to be processed. 3090a34c753fSRafael Auler if (Function.isPseudo()) { 3091a34c753fSRafael Auler Function.IsIgnored = true; 3092a34c753fSRafael Auler Function.HasExternalRefRelocations = true; 3093a34c753fSRafael Auler continue; 3094a34c753fSRafael Auler } 3095a34c753fSRafael Auler 3096c49941bdSAmir Ayupov // Decide what to do with fragments after parent functions are processed. 3097c49941bdSAmir Ayupov if (Function.isFragment()) 3098c49941bdSAmir Ayupov continue; 3099c49941bdSAmir Ayupov 3100a34c753fSRafael Auler if (!shouldProcess(Function)) { 3101c49941bdSAmir Ayupov if (opts::Verbosity >= 1) { 310252cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: skipping processing " << Function 3103c49941bdSAmir Ayupov << " per user request\n"; 3104c49941bdSAmir Ayupov } 3105a34c753fSRafael Auler Function.setIgnored(); 3106a34c753fSRafael Auler } else { 3107a34c753fSRafael Auler ++NumFunctionsToProcess; 3108c4e60a7fSMaksim Panchenko if (opts::MaxFunctions.getNumOccurrences() && 3109c4e60a7fSMaksim Panchenko NumFunctionsToProcess == opts::MaxFunctions) 311052cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 3111a34c753fSRafael Auler } 3112a34c753fSRafael Auler } 3113c49941bdSAmir Ayupov 3114c49941bdSAmir Ayupov if (!BC->HasSplitFunctions) 3115c49941bdSAmir Ayupov return; 3116c49941bdSAmir Ayupov 3117c49941bdSAmir Ayupov // Fragment overrides: 3118c49941bdSAmir Ayupov // - If the fragment must be skipped, then the parent must be skipped as well. 3119c49941bdSAmir Ayupov // Otherwise, fragment should follow the parent function: 3120c49941bdSAmir Ayupov // - if the parent is skipped, skip fragment, 3121c49941bdSAmir Ayupov // - if the parent is processed, process the fragment(s) as well. 3122c49941bdSAmir Ayupov for (auto &BFI : BC->getBinaryFunctions()) { 3123c49941bdSAmir Ayupov BinaryFunction &Function = BFI.second; 3124c49941bdSAmir Ayupov if (!Function.isFragment()) 3125c49941bdSAmir Ayupov continue; 3126c49941bdSAmir Ayupov if (mustSkip(Function)) { 3127c49941bdSAmir Ayupov for (BinaryFunction *Parent : Function.ParentFragments) { 3128c49941bdSAmir Ayupov if (opts::Verbosity >= 1) { 312952cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: skipping processing " << *Parent 3130c49941bdSAmir Ayupov << " together with fragment function\n"; 3131c49941bdSAmir Ayupov } 3132c49941bdSAmir Ayupov Parent->setIgnored(); 3133c49941bdSAmir Ayupov --NumFunctionsToProcess; 3134c49941bdSAmir Ayupov } 3135c49941bdSAmir Ayupov Function.setIgnored(); 3136c49941bdSAmir Ayupov continue; 3137c49941bdSAmir Ayupov } 3138c49941bdSAmir Ayupov 3139c49941bdSAmir Ayupov bool IgnoredParent = 3140c49941bdSAmir Ayupov llvm::any_of(Function.ParentFragments, [&](BinaryFunction *Parent) { 3141c49941bdSAmir Ayupov return Parent->isIgnored(); 3142c49941bdSAmir Ayupov }); 3143c49941bdSAmir Ayupov if (IgnoredParent) { 3144c49941bdSAmir Ayupov if (opts::Verbosity >= 1) { 314552cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: skipping processing " << Function 3146c49941bdSAmir Ayupov << " together with parent function\n"; 3147c49941bdSAmir Ayupov } 3148c49941bdSAmir Ayupov Function.setIgnored(); 3149c49941bdSAmir Ayupov } else { 3150c49941bdSAmir Ayupov ++NumFunctionsToProcess; 3151c49941bdSAmir Ayupov if (opts::Verbosity >= 1) { 315252cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: processing " << Function 3153c49941bdSAmir Ayupov << " as a sibling of non-ignored function\n"; 3154c49941bdSAmir Ayupov } 3155c49941bdSAmir Ayupov if (opts::MaxFunctions && NumFunctionsToProcess == opts::MaxFunctions) 315652cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: processing ending on " << Function << '\n'; 3157c49941bdSAmir Ayupov } 3158c49941bdSAmir Ayupov } 3159a34c753fSRafael Auler } 3160a34c753fSRafael Auler 3161a34c753fSRafael Auler void RewriteInstance::readDebugInfo() { 3162a34c753fSRafael Auler NamedRegionTimer T("readDebugInfo", "read debug info", TimerGroupName, 3163a34c753fSRafael Auler TimerGroupDesc, opts::TimeRewrite); 3164a34c753fSRafael Auler if (!opts::UpdateDebugSections) 3165a34c753fSRafael Auler return; 3166a34c753fSRafael Auler 3167a34c753fSRafael Auler BC->preprocessDebugInfo(); 3168a34c753fSRafael Auler } 3169a34c753fSRafael Auler 3170a34c753fSRafael Auler void RewriteInstance::preprocessProfileData() { 3171a34c753fSRafael Auler if (!ProfileReader) 3172a34c753fSRafael Auler return; 3173a34c753fSRafael Auler 3174a34c753fSRafael Auler NamedRegionTimer T("preprocessprofile", "pre-process profile data", 3175a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3176a34c753fSRafael Auler 317752cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: pre-processing profile using " 3178a34c753fSRafael Auler << ProfileReader->getReaderName() << '\n'; 3179a34c753fSRafael Auler 3180a34c753fSRafael Auler if (BAT->enabledFor(InputFile)) { 318152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: profile collection done on a binary already " 3182a34c753fSRafael Auler "processed by BOLT\n"; 3183a34c753fSRafael Auler ProfileReader->setBAT(&*BAT); 3184a34c753fSRafael Auler } 3185a34c753fSRafael Auler 3186a34c753fSRafael Auler if (Error E = ProfileReader->preprocessProfile(*BC.get())) 3187a34c753fSRafael Auler report_error("cannot pre-process profile", std::move(E)); 3188a34c753fSRafael Auler 318983b3e13eSAmir Ayupov if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() && 319083b3e13eSAmir Ayupov !opts::AllowStripped) { 319152cf0711SAmir Ayupov BC->errs() 319252cf0711SAmir Ayupov << "BOLT-ERROR: input binary does not have local file symbols " 3193a34c753fSRafael Auler "but profile data includes function names with embedded file " 3194a34c753fSRafael Auler "names. It appears that the input binary was stripped while a " 319583b3e13eSAmir Ayupov "profiled binary was not. If you know what you are doing and " 319683b3e13eSAmir Ayupov "wish to proceed, use -allow-stripped option.\n"; 3197a34c753fSRafael Auler exit(1); 3198a34c753fSRafael Auler } 3199a34c753fSRafael Auler } 3200a34c753fSRafael Auler 3201c9b1f062SMaksim Panchenko void RewriteInstance::initializeMetadataManager() { 32022abcbbd9SMaksim Panchenko if (BC->IsLinuxKernel) 320338639a81SMaksim Panchenko MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC)); 320438639a81SMaksim Panchenko 32058ea59ec6SMaksim Panchenko MetadataManager.registerRewriter(createBuildIDRewriter(*BC)); 32068ea59ec6SMaksim Panchenko 320743dce27cSMaksim Panchenko MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC)); 320843dce27cSMaksim Panchenko 320998e2d630SMaksim Panchenko MetadataManager.registerRewriter(createSDTRewriter(*BC)); 3210c9b1f062SMaksim Panchenko } 3211c9b1f062SMaksim Panchenko 32128ea59ec6SMaksim Panchenko void RewriteInstance::processSectionMetadata() { 3213fb97b4f9SAmir Ayupov NamedRegionTimer T("processmetadata-section", "process section metadata", 3214fb97b4f9SAmir Ayupov TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3215c9b1f062SMaksim Panchenko initializeMetadataManager(); 3216c9b1f062SMaksim Panchenko 32178ea59ec6SMaksim Panchenko MetadataManager.runSectionInitializers(); 32188ea59ec6SMaksim Panchenko } 32198ea59ec6SMaksim Panchenko 32208ea59ec6SMaksim Panchenko void RewriteInstance::processMetadataPreCFG() { 3221fb97b4f9SAmir Ayupov NamedRegionTimer T("processmetadata-precfg", "process metadata pre-CFG", 3222fb97b4f9SAmir Ayupov TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3223c9b1f062SMaksim Panchenko MetadataManager.runInitializersPreCFG(); 3224c9b1f062SMaksim Panchenko 3225c9b1f062SMaksim Panchenko processProfileDataPreCFG(); 3226c9b1f062SMaksim Panchenko } 3227c9b1f062SMaksim Panchenko 3228dd630d83SMaksim Panchenko void RewriteInstance::processMetadataPostCFG() { 3229fb97b4f9SAmir Ayupov NamedRegionTimer T("processmetadata-postcfg", "process metadata post-CFG", 3230fb97b4f9SAmir Ayupov TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3231dd630d83SMaksim Panchenko MetadataManager.runInitializersPostCFG(); 3232dd630d83SMaksim Panchenko } 3233dd630d83SMaksim Panchenko 3234a34c753fSRafael Auler void RewriteInstance::processProfileDataPreCFG() { 3235a34c753fSRafael Auler if (!ProfileReader) 3236a34c753fSRafael Auler return; 3237a34c753fSRafael Auler 3238a34c753fSRafael Auler NamedRegionTimer T("processprofile-precfg", "process profile data pre-CFG", 3239a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3240a34c753fSRafael Auler 3241a34c753fSRafael Auler if (Error E = ProfileReader->readProfilePreCFG(*BC.get())) 3242a34c753fSRafael Auler report_error("cannot read profile pre-CFG", std::move(E)); 3243a34c753fSRafael Auler } 3244a34c753fSRafael Auler 3245a34c753fSRafael Auler void RewriteInstance::processProfileData() { 3246a34c753fSRafael Auler if (!ProfileReader) 3247a34c753fSRafael Auler return; 3248a34c753fSRafael Auler 3249a34c753fSRafael Auler NamedRegionTimer T("processprofile", "process profile data", TimerGroupName, 3250a34c753fSRafael Auler TimerGroupDesc, opts::TimeRewrite); 3251a34c753fSRafael Auler 3252a34c753fSRafael Auler if (Error E = ProfileReader->readProfile(*BC.get())) 3253a34c753fSRafael Auler report_error("cannot read profile", std::move(E)); 3254a34c753fSRafael Auler 3255fd49cc87SAmir Ayupov if (opts::PrintProfile || opts::PrintAll) { 3256fd49cc87SAmir Ayupov for (auto &BFI : BC->getBinaryFunctions()) { 3257fd49cc87SAmir Ayupov BinaryFunction &Function = BFI.second; 3258fd49cc87SAmir Ayupov if (Function.empty()) 3259fd49cc87SAmir Ayupov continue; 3260fd49cc87SAmir Ayupov 326152cf0711SAmir Ayupov Function.print(BC->outs(), "after attaching profile"); 3262fd49cc87SAmir Ayupov } 3263fd49cc87SAmir Ayupov } 3264fd49cc87SAmir Ayupov 326562806811SAmir Ayupov if (!opts::SaveProfile.empty() && !BAT->enabledFor(InputFile)) { 3266a34c753fSRafael Auler YAMLProfileWriter PW(opts::SaveProfile); 3267a34c753fSRafael Auler PW.writeProfile(*this); 3268a34c753fSRafael Auler } 326939336fc0SAmir Ayupov if (opts::AggregateOnly && 327062806811SAmir Ayupov opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML && 327162806811SAmir Ayupov !BAT->enabledFor(InputFile)) { 327239336fc0SAmir Ayupov YAMLProfileWriter PW(opts::OutputFilename); 327339336fc0SAmir Ayupov PW.writeProfile(*this); 327439336fc0SAmir Ayupov } 3275a34c753fSRafael Auler 3276a34c753fSRafael Auler // Release memory used by profile reader. 3277a34c753fSRafael Auler ProfileReader.reset(); 3278a34c753fSRafael Auler 32791529ec08SAmir Ayupov if (opts::AggregateOnly) { 32801529ec08SAmir Ayupov PrintProgramStats PPS(&*BAT); 32811529ec08SAmir Ayupov BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*BC)); 32823f51bec4SAmir Ayupov TimerGroup::printAll(outs()); 3283a34c753fSRafael Auler exit(0); 3284a34c753fSRafael Auler } 32851529ec08SAmir Ayupov } 3286a34c753fSRafael Auler 3287a34c753fSRafael Auler void RewriteInstance::disassembleFunctions() { 3288a34c753fSRafael Auler NamedRegionTimer T("disassembleFunctions", "disassemble functions", 3289a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3290a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3291a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3292a34c753fSRafael Auler 3293a34c753fSRafael Auler ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData(); 3294a34c753fSRafael Auler if (!FunctionData) { 329552cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: corresponding section is non-executable or " 3296a34c753fSRafael Auler << "empty for function " << Function << '\n'; 3297a34c753fSRafael Auler exit(1); 3298a34c753fSRafael Auler } 3299a34c753fSRafael Auler 3300a34c753fSRafael Auler // Treat zero-sized functions as non-simple ones. 3301a34c753fSRafael Auler if (Function.getSize() == 0) { 3302a34c753fSRafael Auler Function.setSimple(false); 3303a34c753fSRafael Auler continue; 3304a34c753fSRafael Auler } 3305a34c753fSRafael Auler 3306a34c753fSRafael Auler // Offset of the function in the file. 3307a34c753fSRafael Auler const auto *FileBegin = 3308a34c753fSRafael Auler reinterpret_cast<const uint8_t *>(InputFile->getData().data()); 3309a34c753fSRafael Auler Function.setFileOffset(FunctionData->begin() - FileBegin); 3310a34c753fSRafael Auler 3311a34c753fSRafael Auler if (!shouldDisassemble(Function)) { 3312a34c753fSRafael Auler NamedRegionTimer T("scan", "scan functions", "buildfuncs", 3313a34c753fSRafael Auler "Scan Binary Functions", opts::TimeBuild); 3314a34c753fSRafael Auler Function.scanExternalRefs(); 3315a34c753fSRafael Auler Function.setSimple(false); 3316a34c753fSRafael Auler continue; 3317a34c753fSRafael Auler } 3318a34c753fSRafael Auler 331913d60ce2SAmir Ayupov bool DisasmFailed{false}; 332013d60ce2SAmir Ayupov handleAllErrors(Function.disassemble(), [&](const BOLTError &E) { 332113d60ce2SAmir Ayupov DisasmFailed = true; 332213d60ce2SAmir Ayupov if (E.isFatal()) { 332352cf0711SAmir Ayupov E.log(BC->errs()); 332413d60ce2SAmir Ayupov exit(1); 332513d60ce2SAmir Ayupov } 332652cf0711SAmir Ayupov if (opts::processAllFunctions()) { 332752cf0711SAmir Ayupov BC->errs() << BC->generateBugReportMessage( 332852cf0711SAmir Ayupov "function cannot be properly disassembled. " 3329a34c753fSRafael Auler "Unable to continue in relocation mode.", 3330a34c753fSRafael Auler Function); 333152cf0711SAmir Ayupov exit(1); 333252cf0711SAmir Ayupov } 3333ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 333452cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: could not disassemble function " << Function 3335a34c753fSRafael Auler << ". Will ignore.\n"; 3336a34c753fSRafael Auler // Forcefully ignore the function. 3337a34c753fSRafael Auler Function.setIgnored(); 333813d60ce2SAmir Ayupov }); 333913d60ce2SAmir Ayupov 334013d60ce2SAmir Ayupov if (DisasmFailed) 3341a34c753fSRafael Auler continue; 3342a34c753fSRafael Auler 3343a34c753fSRafael Auler if (opts::PrintAll || opts::PrintDisasm) 334452cf0711SAmir Ayupov Function.print(BC->outs(), "after disassembly"); 3345a34c753fSRafael Auler } 3346a34c753fSRafael Auler 334735efe1d8SVladislav Khmelevsky BC->processInterproceduralReferences(); 3348a34c753fSRafael Auler BC->populateJumpTables(); 3349a34c753fSRafael Auler 3350a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3351a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3352a34c753fSRafael Auler 3353a34c753fSRafael Auler if (!shouldDisassemble(Function)) 3354a34c753fSRafael Auler continue; 3355a34c753fSRafael Auler 3356a34c753fSRafael Auler Function.postProcessEntryPoints(); 3357a34c753fSRafael Auler Function.postProcessJumpTables(); 3358a34c753fSRafael Auler } 3359a34c753fSRafael Auler 336005523dc3SHuan Nguyen BC->clearJumpTableTempData(); 3361a34c753fSRafael Auler BC->adjustCodePadding(); 3362a34c753fSRafael Auler 3363a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3364a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3365a34c753fSRafael Auler 3366a34c753fSRafael Auler if (!shouldDisassemble(Function)) 3367a34c753fSRafael Auler continue; 3368a34c753fSRafael Auler 3369a34c753fSRafael Auler if (!Function.isSimple()) { 337082095bd5SHuan Nguyen assert((!BC->HasRelocations || Function.getSize() == 0 || 337105523dc3SHuan Nguyen Function.hasIndirectTargetToSplitFragment()) && 3372a34c753fSRafael Auler "unexpected non-simple function in relocation mode"); 3373a34c753fSRafael Auler continue; 3374a34c753fSRafael Auler } 3375a34c753fSRafael Auler 3376a34c753fSRafael Auler // Fill in CFI information for this function 3377ee0e9ccbSMaksim Panchenko if (!Function.trapsOnEntry() && !CFIRdWrt->fillCFIInfoFor(Function)) { 3378a34c753fSRafael Auler if (BC->HasRelocations) { 337952cf0711SAmir Ayupov BC->errs() << BC->generateBugReportMessage("unable to fill CFI.", 338052cf0711SAmir Ayupov Function); 338152cf0711SAmir Ayupov exit(1); 3382a34c753fSRafael Auler } else { 338352cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: unable to fill CFI for function " 338452cf0711SAmir Ayupov << Function << ". Skipping.\n"; 3385a34c753fSRafael Auler Function.setSimple(false); 3386a34c753fSRafael Auler continue; 3387a34c753fSRafael Auler } 3388a34c753fSRafael Auler } 3389a34c753fSRafael Auler 3390a34c753fSRafael Auler // Parse LSDA. 3391ae563c91SHuan Nguyen if (Function.getLSDAAddress() != 0 && 3392c5a306f0SVladislav Khmelevsky !BC->getFragmentsToSkip().count(&Function)) { 3393c5a306f0SVladislav Khmelevsky ErrorOr<BinarySection &> LSDASection = 3394c5a306f0SVladislav Khmelevsky BC->getSectionForAddress(Function.getLSDAAddress()); 3395c5a306f0SVladislav Khmelevsky check_error(LSDASection.getError(), "failed to get LSDA section"); 3396c5a306f0SVladislav Khmelevsky ArrayRef<uint8_t> LSDAData = ArrayRef<uint8_t>( 3397c5a306f0SVladislav Khmelevsky LSDASection->getData(), LSDASection->getContents().size()); 339852cf0711SAmir Ayupov BC->logBOLTErrorsAndQuitOnFatal( 339952cf0711SAmir Ayupov Function.parseLSDA(LSDAData, LSDASection->getAddress())); 3400c5a306f0SVladislav Khmelevsky } 3401a34c753fSRafael Auler } 3402a34c753fSRafael Auler } 3403a34c753fSRafael Auler 3404a34c753fSRafael Auler void RewriteInstance::buildFunctionsCFG() { 3405a34c753fSRafael Auler NamedRegionTimer T("buildCFG", "buildCFG", "buildfuncs", 3406a34c753fSRafael Auler "Build Binary Functions", opts::TimeBuild); 3407a34c753fSRafael Auler 3408a34c753fSRafael Auler // Create annotation indices to allow lock-free execution 3409a34c753fSRafael Auler BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); 3410ccb99dd1SMaksim Panchenko BC->MIB->getOrCreateAnnotationIndex("NOP"); 3411a34c753fSRafael Auler 3412a34c753fSRafael Auler ParallelUtilities::WorkFuncWithAllocTy WorkFun = 3413a34c753fSRafael Auler [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) { 341413d60ce2SAmir Ayupov bool HadErrors{false}; 341513d60ce2SAmir Ayupov handleAllErrors(BF.buildCFG(AllocId), [&](const BOLTError &E) { 341613d60ce2SAmir Ayupov if (!E.getMessage().empty()) 341752cf0711SAmir Ayupov E.log(BC->errs()); 341813d60ce2SAmir Ayupov if (E.isFatal()) 341913d60ce2SAmir Ayupov exit(1); 342013d60ce2SAmir Ayupov HadErrors = true; 342113d60ce2SAmir Ayupov }); 342213d60ce2SAmir Ayupov 342313d60ce2SAmir Ayupov if (HadErrors) 3424a34c753fSRafael Auler return; 3425a34c753fSRafael Auler 3426d1638cb0SAmir Ayupov if (opts::PrintAll) { 3427d1638cb0SAmir Ayupov auto L = BC->scopeLock(); 342852cf0711SAmir Ayupov BF.print(BC->outs(), "while building cfg"); 3429d1638cb0SAmir Ayupov } 3430a34c753fSRafael Auler }; 3431a34c753fSRafael Auler 343240c2e0faSMaksim Panchenko ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { 3433a34c753fSRafael Auler return !shouldDisassemble(BF) || !BF.isSimple(); 3434a34c753fSRafael Auler }; 3435a34c753fSRafael Auler 3436a34c753fSRafael Auler ParallelUtilities::runOnEachFunctionWithUniqueAllocId( 3437a34c753fSRafael Auler *BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, 3438a34c753fSRafael Auler SkipPredicate, "disassembleFunctions-buildCFG", 3439a34c753fSRafael Auler /*ForceSequential*/ opts::SequentialDisassembly || opts::PrintAll); 3440a34c753fSRafael Auler 3441a34c753fSRafael Auler BC->postProcessSymbolTable(); 3442a34c753fSRafael Auler } 3443a34c753fSRafael Auler 3444a34c753fSRafael Auler void RewriteInstance::postProcessFunctions() { 344505523dc3SHuan Nguyen // We mark fragments as non-simple here, not during disassembly, 344605523dc3SHuan Nguyen // So we can build their CFGs. 344705523dc3SHuan Nguyen BC->skipMarkedFragments(); 344805523dc3SHuan Nguyen BC->clearFragmentsToSkip(); 344905523dc3SHuan Nguyen 3450a34c753fSRafael Auler BC->TotalScore = 0; 3451a34c753fSRafael Auler BC->SumExecutionCount = 0; 3452a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3453a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3454a34c753fSRafael Auler 34557117af52SVladislav Khmelevsky // Set function as non-simple if it has dynamic relocations 34567117af52SVladislav Khmelevsky // in constant island, we don't want this function to be optimized 34577117af52SVladislav Khmelevsky // e.g. function splitting is unsupported. 34587117af52SVladislav Khmelevsky if (Function.hasDynamicRelocationAtIsland()) 34597117af52SVladislav Khmelevsky Function.setSimple(false); 34607117af52SVladislav Khmelevsky 3461a34c753fSRafael Auler if (Function.empty()) 3462a34c753fSRafael Auler continue; 3463a34c753fSRafael Auler 3464a34c753fSRafael Auler Function.postProcessCFG(); 3465a34c753fSRafael Auler 3466a34c753fSRafael Auler if (opts::PrintAll || opts::PrintCFG) 346752cf0711SAmir Ayupov Function.print(BC->outs(), "after building cfg"); 3468a34c753fSRafael Auler 3469a34c753fSRafael Auler if (opts::DumpDotAll) 3470a34c753fSRafael Auler Function.dumpGraphForPass("00_build-cfg"); 3471a34c753fSRafael Auler 3472a34c753fSRafael Auler if (opts::PrintLoopInfo) { 3473a34c753fSRafael Auler Function.calculateLoopInfo(); 347452cf0711SAmir Ayupov Function.printLoopInfo(BC->outs()); 3475a34c753fSRafael Auler } 3476a34c753fSRafael Auler 3477a34c753fSRafael Auler BC->TotalScore += Function.getFunctionScore(); 3478a34c753fSRafael Auler BC->SumExecutionCount += Function.getKnownExecutionCount(); 3479a34c753fSRafael Auler } 3480a34c753fSRafael Auler 3481a34c753fSRafael Auler if (opts::PrintGlobals) { 348252cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: Global symbols:\n"; 348352cf0711SAmir Ayupov BC->printGlobalSymbols(BC->outs()); 3484a34c753fSRafael Auler } 3485a34c753fSRafael Auler } 3486a34c753fSRafael Auler 3487a34c753fSRafael Auler void RewriteInstance::runOptimizationPasses() { 3488a34c753fSRafael Auler NamedRegionTimer T("runOptimizationPasses", "run optimization passes", 3489a34c753fSRafael Auler TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 349052cf0711SAmir Ayupov BC->logBOLTErrorsAndQuitOnFatal(BinaryFunctionPassManager::runAllPasses(*BC)); 3491a34c753fSRafael Auler } 3492a34c753fSRafael Auler 3493ceb7214bSKristof Beyls void RewriteInstance::runBinaryAnalyses() {} 3494ceb7214bSKristof Beyls 34954d3a0cadSMaksim Panchenko void RewriteInstance::preregisterSections() { 34964d3a0cadSMaksim Panchenko // Preregister sections before emission to set their order in the output. 34974d3a0cadSMaksim Panchenko const unsigned ROFlags = BinarySection::getFlags(/*IsReadOnly*/ true, 34984d3a0cadSMaksim Panchenko /*IsText*/ false, 34994d3a0cadSMaksim Panchenko /*IsAllocatable*/ true); 35004d3a0cadSMaksim Panchenko if (BinarySection *EHFrameSection = getSection(getEHFrameSectionName())) { 35014d3a0cadSMaksim Panchenko // New .eh_frame. 35024d3a0cadSMaksim Panchenko BC->registerOrUpdateSection(getNewSecPrefix() + getEHFrameSectionName(), 35034d3a0cadSMaksim Panchenko ELF::SHT_PROGBITS, ROFlags); 35044d3a0cadSMaksim Panchenko // Fully register a relocatable copy of the original .eh_frame. 35054d3a0cadSMaksim Panchenko BC->registerSection(".relocated.eh_frame", *EHFrameSection); 35064d3a0cadSMaksim Panchenko } 35074d3a0cadSMaksim Panchenko BC->registerOrUpdateSection(getNewSecPrefix() + ".gcc_except_table", 35084d3a0cadSMaksim Panchenko ELF::SHT_PROGBITS, ROFlags); 35094d3a0cadSMaksim Panchenko BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata", ELF::SHT_PROGBITS, 35104d3a0cadSMaksim Panchenko ROFlags); 35114d3a0cadSMaksim Panchenko BC->registerOrUpdateSection(getNewSecPrefix() + ".rodata.cold", 35124d3a0cadSMaksim Panchenko ELF::SHT_PROGBITS, ROFlags); 35134d3a0cadSMaksim Panchenko } 35144d3a0cadSMaksim Panchenko 3515a34c753fSRafael Auler void RewriteInstance::emitAndLink() { 3516a34c753fSRafael Auler NamedRegionTimer T("emitAndLink", "emit and link", TimerGroupName, 3517a34c753fSRafael Auler TimerGroupDesc, opts::TimeRewrite); 3518a34c753fSRafael Auler 3519f2f1e670SJob Noorman SmallString<0> ObjectBuffer; 3520f2f1e670SJob Noorman raw_svector_ostream OS(ObjectBuffer); 3521a34c753fSRafael Auler 3522a34c753fSRafael Auler // Implicitly MCObjectStreamer takes ownership of MCAsmBackend (MAB) 3523a34c753fSRafael Auler // and MCCodeEmitter (MCE). ~MCObjectStreamer() will delete these 3524a34c753fSRafael Auler // two instances. 3525f2f1e670SJob Noorman std::unique_ptr<MCStreamer> Streamer = BC->createStreamer(OS); 3526a34c753fSRafael Auler 3527a34c753fSRafael Auler if (EHFrameSection) { 3528a34c753fSRafael Auler if (opts::UseOldText || opts::StrictMode) { 3529a34c753fSRafael Auler // The section is going to be regenerated from scratch. 3530a34c753fSRafael Auler // Empty the contents, but keep the section reference. 3531a34c753fSRafael Auler EHFrameSection->clearContents(); 3532a34c753fSRafael Auler } else { 3533a34c753fSRafael Auler // Make .eh_frame relocatable. 3534a34c753fSRafael Auler relocateEHFrameSection(); 3535a34c753fSRafael Auler } 3536a34c753fSRafael Auler } 3537a34c753fSRafael Auler 3538a34c753fSRafael Auler emitBinaryContext(*Streamer, *BC, getOrgSecPrefix()); 3539a34c753fSRafael Auler 354015d82c62SFangrui Song Streamer->finish(); 35419b02dc63SAmir Ayupov if (Streamer->getContext().hadError()) { 354252cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: Emission failed.\n"; 35439b02dc63SAmir Ayupov exit(1); 35449b02dc63SAmir Ayupov } 3545a34c753fSRafael Auler 3546f2f1e670SJob Noorman if (opts::KeepTmp) { 3547f2f1e670SJob Noorman SmallString<128> OutObjectPath; 3548f2f1e670SJob Noorman sys::fs::getPotentiallyUniqueTempFileName("output", "o", OutObjectPath); 3549f2f1e670SJob Noorman std::error_code EC; 3550f2f1e670SJob Noorman raw_fd_ostream FOS(OutObjectPath, EC); 3551f2f1e670SJob Noorman check_error(EC, "cannot create output object file"); 3552f2f1e670SJob Noorman FOS << ObjectBuffer; 355352cf0711SAmir Ayupov BC->outs() 355452cf0711SAmir Ayupov << "BOLT-INFO: intermediary output object file saved for debugging " 3555f2f1e670SJob Noorman "purposes: " 3556f2f1e670SJob Noorman << OutObjectPath << "\n"; 3557f2f1e670SJob Noorman } 3558f2f1e670SJob Noorman 35594d3a0cadSMaksim Panchenko ErrorOr<BinarySection &> TextSection = 35604d3a0cadSMaksim Panchenko BC->getUniqueSectionByName(BC->getMainCodeSectionName()); 35614d3a0cadSMaksim Panchenko if (BC->HasRelocations && TextSection) 3562364963a0SNathan Sidwell BC->renameSection(*TextSection, 3563364963a0SNathan Sidwell getOrgSecPrefix() + BC->getMainCodeSectionName()); 35644d3a0cadSMaksim Panchenko 3565a34c753fSRafael Auler ////////////////////////////////////////////////////////////////////////////// 3566a34c753fSRafael Auler // Assign addresses to new sections. 3567a34c753fSRafael Auler ////////////////////////////////////////////////////////////////////////////// 3568a34c753fSRafael Auler 3569a34c753fSRafael Auler // Get output object as ObjectFile. 3570a34c753fSRafael Auler std::unique_ptr<MemoryBuffer> ObjectMemBuffer = 3571f2f1e670SJob Noorman MemoryBuffer::getMemBuffer(ObjectBuffer, "in-memory object file", false); 3572a34c753fSRafael Auler 357305634f73SJob Noorman auto EFMM = std::make_unique<ExecutableFileMemoryManager>(*BC); 357405634f73SJob Noorman EFMM->setNewSecPrefix(getNewSecPrefix()); 357505634f73SJob Noorman EFMM->setOrgSecPrefix(getOrgSecPrefix()); 357605634f73SJob Noorman 357705634f73SJob Noorman Linker = std::make_unique<JITLinkLinker>(*BC, std::move(EFMM)); 357805634f73SJob Noorman Linker->loadObject(ObjectMemBuffer->getMemBufferRef(), 357905634f73SJob Noorman [this](auto MapSection) { mapFileSections(MapSection); }); 3580a34c753fSRafael Auler 3581a34c753fSRafael Auler // Update output addresses based on the new section map and 3582a34c753fSRafael Auler // layout. Only do this for the object created by ourselves. 3583475a93a0SJob Noorman updateOutputValues(*Linker); 3584a34c753fSRafael Auler 3585475a93a0SJob Noorman if (opts::UpdateDebugSections) { 3586e3e0df39SFangrui Song DebugInfoRewriter->updateLineTableOffsets( 3587e3e0df39SFangrui Song static_cast<MCObjectStreamer &>(*Streamer).getAssembler()); 3588475a93a0SJob Noorman } 3589a34c753fSRafael Auler 3590ee0e9ccbSMaksim Panchenko if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 359105634f73SJob Noorman RtLibrary->link(*BC, ToolPath, *Linker, [this](auto MapSection) { 35924d3a0cadSMaksim Panchenko // Map newly registered sections. 359305634f73SJob Noorman this->mapAllocatableSections(MapSection); 3594a34c753fSRafael Auler }); 3595a34c753fSRafael Auler 3596a34c753fSRafael Auler // Once the code is emitted, we can rename function sections to actual 3597a34c753fSRafael Auler // output sections and de-register sections used for emission. 3598a34c753fSRafael Auler for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 3599a34c753fSRafael Auler ErrorOr<BinarySection &> Section = Function->getCodeSection(); 360040c2e0faSMaksim Panchenko if (Section && 3601ee0e9ccbSMaksim Panchenko (Function->getImageAddress() == 0 || Function->getImageSize() == 0)) 3602a34c753fSRafael Auler continue; 3603a34c753fSRafael Auler 3604a34c753fSRafael Auler // Restore origin section for functions that were emitted or supposed to 3605a34c753fSRafael Auler // be emitted to patch sections. 3606a34c753fSRafael Auler if (Section) 3607a34c753fSRafael Auler BC->deregisterSection(*Section); 3608a34c753fSRafael Auler assert(Function->getOriginSectionName() && "expected origin section"); 3609275e075cSFabian Parzefall Function->CodeSectionName = Function->getOriginSectionName()->str(); 361007f63b0aSFabian Parzefall for (const FunctionFragment &FF : 3611275e075cSFabian Parzefall Function->getLayout().getSplitFragments()) { 3612275e075cSFabian Parzefall if (ErrorOr<BinarySection &> ColdSection = 36130f74d191SFabian Parzefall Function->getCodeSection(FF.getFragmentNum())) 3614a34c753fSRafael Auler BC->deregisterSection(*ColdSection); 3615a34c753fSRafael Auler } 3616275e075cSFabian Parzefall if (Function->getLayout().isSplit()) 36170f74d191SFabian Parzefall Function->setColdCodeSectionName(getBOLTTextSectionName()); 3618a34c753fSRafael Auler } 3619a34c753fSRafael Auler 3620a34c753fSRafael Auler if (opts::PrintCacheMetrics) { 362152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: cache metrics after emitting functions:\n"; 362252cf0711SAmir Ayupov CacheMetrics::printAll(BC->outs(), BC->getSortedFunctions()); 3623a34c753fSRafael Auler } 3624a34c753fSRafael Auler } 3625a34c753fSRafael Auler 3626aa1968c2SMaksim Panchenko void RewriteInstance::finalizeMetadataPreEmit() { 3627fb97b4f9SAmir Ayupov NamedRegionTimer T("finalizemetadata-preemit", "finalize metadata pre-emit", 3628fb97b4f9SAmir Ayupov TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3629aa1968c2SMaksim Panchenko MetadataManager.runFinalizersPreEmit(); 3630aa1968c2SMaksim Panchenko } 3631aa1968c2SMaksim Panchenko 3632a34c753fSRafael Auler void RewriteInstance::updateMetadata() { 3633fb97b4f9SAmir Ayupov NamedRegionTimer T("updatemetadata-postemit", "update metadata post-emit", 3634fb97b4f9SAmir Ayupov TimerGroupName, TimerGroupDesc, opts::TimeRewrite); 3635c9b1f062SMaksim Panchenko MetadataManager.runFinalizersAfterEmit(); 3636c9b1f062SMaksim Panchenko 3637a34c753fSRafael Auler if (opts::UpdateDebugSections) { 3638a34c753fSRafael Auler NamedRegionTimer T("updateDebugInfo", "update debug info", TimerGroupName, 3639a34c753fSRafael Auler TimerGroupDesc, opts::TimeRewrite); 3640a34c753fSRafael Auler DebugInfoRewriter->updateDebugInfo(); 3641a34c753fSRafael Auler } 3642a34c753fSRafael Auler 3643ee0e9ccbSMaksim Panchenko if (opts::WriteBoltInfoSection) 3644a34c753fSRafael Auler addBoltInfoSection(); 3645a34c753fSRafael Auler } 3646a34c753fSRafael Auler 364705634f73SJob Noorman void RewriteInstance::mapFileSections(BOLTLinker::SectionMapper MapSection) { 36484d3a0cadSMaksim Panchenko BC->deregisterUnusedSections(); 36494d3a0cadSMaksim Panchenko 36504d3a0cadSMaksim Panchenko // If no new .eh_frame was written, remove relocated original .eh_frame. 36514d3a0cadSMaksim Panchenko BinarySection *RelocatedEHFrameSection = 36524d3a0cadSMaksim Panchenko getSection(".relocated" + getEHFrameSectionName()); 36534d3a0cadSMaksim Panchenko if (RelocatedEHFrameSection && RelocatedEHFrameSection->hasValidSectionID()) { 36544d3a0cadSMaksim Panchenko BinarySection *NewEHFrameSection = 36554d3a0cadSMaksim Panchenko getSection(getNewSecPrefix() + getEHFrameSectionName()); 36564d3a0cadSMaksim Panchenko if (!NewEHFrameSection || !NewEHFrameSection->isFinalized()) { 365705634f73SJob Noorman // JITLink will still have to process relocations for the section, hence 36584d3a0cadSMaksim Panchenko // we need to assign it the address that wouldn't result in relocation 36594d3a0cadSMaksim Panchenko // processing failure. 366005634f73SJob Noorman MapSection(*RelocatedEHFrameSection, NextAvailableAddress); 36614d3a0cadSMaksim Panchenko BC->deregisterSection(*RelocatedEHFrameSection); 36624d3a0cadSMaksim Panchenko } 36634d3a0cadSMaksim Panchenko } 36644d3a0cadSMaksim Panchenko 366505634f73SJob Noorman mapCodeSections(MapSection); 36664d3a0cadSMaksim Panchenko 36674d3a0cadSMaksim Panchenko // Map the rest of the sections. 366805634f73SJob Noorman mapAllocatableSections(MapSection); 36693a0d894fSMaksim Panchenko 3670ad7ee900SMaksim Panchenko if (!BC->BOLTReserved.empty()) { 3671ad7ee900SMaksim Panchenko const uint64_t AllocatedSize = 3672ad7ee900SMaksim Panchenko NextAvailableAddress - BC->BOLTReserved.start(); 3673ad7ee900SMaksim Panchenko if (BC->BOLTReserved.size() < AllocatedSize) { 3674ad7ee900SMaksim Panchenko BC->errs() << "BOLT-ERROR: reserved space (" << BC->BOLTReserved.size() 3675ad7ee900SMaksim Panchenko << " byte" << (BC->BOLTReserved.size() == 1 ? "" : "s") 36763a0d894fSMaksim Panchenko << ") is smaller than required for new allocations (" 36773a0d894fSMaksim Panchenko << AllocatedSize << " bytes)\n"; 36783a0d894fSMaksim Panchenko exit(1); 36793a0d894fSMaksim Panchenko } 36803a0d894fSMaksim Panchenko } 3681a34c753fSRafael Auler } 3682a34c753fSRafael Auler 368340c2e0faSMaksim Panchenko std::vector<BinarySection *> RewriteInstance::getCodeSections() { 3684a34c753fSRafael Auler std::vector<BinarySection *> CodeSections; 3685ee0e9ccbSMaksim Panchenko for (BinarySection &Section : BC->textSections()) 3686a34c753fSRafael Auler if (Section.hasValidSectionID()) 3687a34c753fSRafael Auler CodeSections.emplace_back(&Section); 3688a34c753fSRafael Auler 3689a34c753fSRafael Auler auto compareSections = [&](const BinarySection *A, const BinarySection *B) { 3690a89c9b35SShatian Wang // If both A and B have names starting with ".text.cold", then 3691a89c9b35SShatian Wang // - if opts::HotFunctionsAtEnd is true, we want order 3692a89c9b35SShatian Wang // ".text.cold.T", ".text.cold.T-1", ... ".text.cold.1", ".text.cold" 3693a89c9b35SShatian Wang // - if opts::HotFunctionsAtEnd is false, we want order 3694a89c9b35SShatian Wang // ".text.cold", ".text.cold.1", ... ".text.cold.T-1", ".text.cold.T" 3695ad8fd5b1SKazu Hirata if (A->getName().starts_with(BC->getColdCodeSectionName()) && 3696ad8fd5b1SKazu Hirata B->getName().starts_with(BC->getColdCodeSectionName())) { 3697a89c9b35SShatian Wang if (A->getName().size() != B->getName().size()) 3698a89c9b35SShatian Wang return (opts::HotFunctionsAtEnd) 3699a89c9b35SShatian Wang ? (A->getName().size() > B->getName().size()) 3700a89c9b35SShatian Wang : (A->getName().size() < B->getName().size()); 3701a89c9b35SShatian Wang return (opts::HotFunctionsAtEnd) ? (A->getName() > B->getName()) 3702a89c9b35SShatian Wang : (A->getName() < B->getName()); 3703a89c9b35SShatian Wang } 3704a89c9b35SShatian Wang 3705a34c753fSRafael Auler // Place movers before anything else. 3706a34c753fSRafael Auler if (A->getName() == BC->getHotTextMoverSectionName()) 3707a34c753fSRafael Auler return true; 3708a34c753fSRafael Auler if (B->getName() == BC->getHotTextMoverSectionName()) 3709a34c753fSRafael Auler return false; 3710a34c753fSRafael Auler 3711c43d0432SShatianWang // Depending on opts::HotFunctionsAtEnd, place main and warm sections in 3712c43d0432SShatianWang // order. 3713c43d0432SShatianWang if (opts::HotFunctionsAtEnd) { 3714c43d0432SShatianWang if (B->getName() == BC->getMainCodeSectionName()) 3715c43d0432SShatianWang return true; 3716c43d0432SShatianWang if (A->getName() == BC->getMainCodeSectionName()) 3717c43d0432SShatianWang return false; 3718c43d0432SShatianWang return (B->getName() == BC->getWarmCodeSectionName()); 3719c43d0432SShatianWang } else { 3720c43d0432SShatianWang if (A->getName() == BC->getMainCodeSectionName()) 3721c43d0432SShatianWang return true; 3722c43d0432SShatianWang if (B->getName() == BC->getMainCodeSectionName()) 3723c43d0432SShatianWang return false; 3724c43d0432SShatianWang return (A->getName() == BC->getWarmCodeSectionName()); 3725c43d0432SShatianWang } 3726a34c753fSRafael Auler }; 3727a34c753fSRafael Auler 3728a34c753fSRafael Auler // Determine the order of sections. 3729d2c87699SAmir Ayupov llvm::stable_sort(CodeSections, compareSections); 3730a34c753fSRafael Auler 3731a34c753fSRafael Auler return CodeSections; 3732a34c753fSRafael Auler } 3733a34c753fSRafael Auler 373405634f73SJob Noorman void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { 3735a34c753fSRafael Auler if (BC->HasRelocations) { 3736a34c753fSRafael Auler // Map sections for functions with pre-assigned addresses. 3737a34c753fSRafael Auler for (BinaryFunction *InjectedFunction : BC->getInjectedBinaryFunctions()) { 3738a34c753fSRafael Auler const uint64_t OutputAddress = InjectedFunction->getOutputAddress(); 3739a34c753fSRafael Auler if (!OutputAddress) 3740a34c753fSRafael Auler continue; 3741a34c753fSRafael Auler 3742a34c753fSRafael Auler ErrorOr<BinarySection &> FunctionSection = 3743a34c753fSRafael Auler InjectedFunction->getCodeSection(); 3744a34c753fSRafael Auler assert(FunctionSection && "function should have section"); 3745a34c753fSRafael Auler FunctionSection->setOutputAddress(OutputAddress); 374605634f73SJob Noorman MapSection(*FunctionSection, OutputAddress); 3747a34c753fSRafael Auler InjectedFunction->setImageAddress(FunctionSection->getAllocAddress()); 3748a34c753fSRafael Auler InjectedFunction->setImageSize(FunctionSection->getOutputSize()); 3749a34c753fSRafael Auler } 3750a34c753fSRafael Auler 3751a34c753fSRafael Auler // Populate the list of sections to be allocated. 3752a34c753fSRafael Auler std::vector<BinarySection *> CodeSections = getCodeSections(); 3753a34c753fSRafael Auler 3754a34c753fSRafael Auler // Remove sections that were pre-allocated (patch sections). 3755d2c87699SAmir Ayupov llvm::erase_if(CodeSections, [](BinarySection *Section) { 3756a34c753fSRafael Auler return Section->getOutputAddress(); 3757d2c87699SAmir Ayupov }); 3758a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Code sections in the order of output:\n"; 3759ee0e9ccbSMaksim Panchenko for (const BinarySection *Section : CodeSections) 3760a34c753fSRafael Auler dbgs() << Section->getName() << '\n'; 3761ee0e9ccbSMaksim Panchenko ); 3762a34c753fSRafael Auler 3763a34c753fSRafael Auler uint64_t PaddingSize = 0; // size of padding required at the end 3764a34c753fSRafael Auler 3765a34c753fSRafael Auler // Allocate sections starting at a given Address. 3766a34c753fSRafael Auler auto allocateAt = [&](uint64_t Address) { 3767c43d0432SShatianWang const char *LastNonColdSectionName = BC->HasWarmSection 3768c43d0432SShatianWang ? BC->getWarmCodeSectionName() 3769c43d0432SShatianWang : BC->getMainCodeSectionName(); 3770a34c753fSRafael Auler for (BinarySection *Section : CodeSections) { 3771a34c753fSRafael Auler Address = alignTo(Address, Section->getAlignment()); 3772a34c753fSRafael Auler Section->setOutputAddress(Address); 3773a34c753fSRafael Auler Address += Section->getOutputSize(); 37741fb18619SAlexey Moksyakov 37751fb18619SAlexey Moksyakov // Hugify: Additional huge page from right side due to 37761fb18619SAlexey Moksyakov // weird ASLR mapping addresses (4KB aligned) 37771fb18619SAlexey Moksyakov if (opts::Hugify && !BC->HasFixedLoadAddress && 3778c43d0432SShatianWang Section->getName() == LastNonColdSectionName) 37791fb18619SAlexey Moksyakov Address = alignTo(Address, Section->getAlignment()); 3780a34c753fSRafael Auler } 3781a34c753fSRafael Auler 3782a34c753fSRafael Auler // Make sure we allocate enough space for huge pages. 37833e097fabSMaksim Panchenko ErrorOr<BinarySection &> TextSection = 3784c43d0432SShatianWang BC->getUniqueSectionByName(LastNonColdSectionName); 37853e097fabSMaksim Panchenko if (opts::HotText && TextSection && TextSection->hasValidSectionID()) { 3786a34c753fSRafael Auler uint64_t HotTextEnd = 3787a34c753fSRafael Auler TextSection->getOutputAddress() + TextSection->getOutputSize(); 3788a34c753fSRafael Auler HotTextEnd = alignTo(HotTextEnd, BC->PageAlign); 3789a34c753fSRafael Auler if (HotTextEnd > Address) { 3790a34c753fSRafael Auler PaddingSize = HotTextEnd - Address; 3791a34c753fSRafael Auler Address = HotTextEnd; 3792a34c753fSRafael Auler } 3793a34c753fSRafael Auler } 3794a34c753fSRafael Auler return Address; 3795a34c753fSRafael Auler }; 3796a34c753fSRafael Auler 3797a34c753fSRafael Auler // Check if we can fit code in the original .text 3798a34c753fSRafael Auler bool AllocationDone = false; 3799a34c753fSRafael Auler if (opts::UseOldText) { 3800a34c753fSRafael Auler const uint64_t CodeSize = 3801a34c753fSRafael Auler allocateAt(BC->OldTextSectionAddress) - BC->OldTextSectionAddress; 3802a34c753fSRafael Auler 3803a34c753fSRafael Auler if (CodeSize <= BC->OldTextSectionSize) { 380452cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: using original .text for new code with 0x" 3805a34c753fSRafael Auler << Twine::utohexstr(opts::AlignText) << " alignment\n"; 3806a34c753fSRafael Auler AllocationDone = true; 3807a34c753fSRafael Auler } else { 380852cf0711SAmir Ayupov BC->errs() 380952cf0711SAmir Ayupov << "BOLT-WARNING: original .text too small to fit the new code" 3810a34c753fSRafael Auler << " using 0x" << Twine::utohexstr(opts::AlignText) 381140c2e0faSMaksim Panchenko << " alignment. " << CodeSize << " bytes needed, have " 381240c2e0faSMaksim Panchenko << BC->OldTextSectionSize << " bytes available.\n"; 3813a34c753fSRafael Auler opts::UseOldText = false; 3814a34c753fSRafael Auler } 3815a34c753fSRafael Auler } 3816a34c753fSRafael Auler 3817ee0e9ccbSMaksim Panchenko if (!AllocationDone) 3818a34c753fSRafael Auler NextAvailableAddress = allocateAt(NextAvailableAddress); 3819a34c753fSRafael Auler 3820a34c753fSRafael Auler // Do the mapping for ORC layer based on the allocation. 3821a34c753fSRafael Auler for (BinarySection *Section : CodeSections) { 3822a34c753fSRafael Auler LLVM_DEBUG( 3823a34c753fSRafael Auler dbgs() << "BOLT: mapping " << Section->getName() << " at 0x" 3824a34c753fSRafael Auler << Twine::utohexstr(Section->getAllocAddress()) << " to 0x" 3825a34c753fSRafael Auler << Twine::utohexstr(Section->getOutputAddress()) << '\n'); 382605634f73SJob Noorman MapSection(*Section, Section->getOutputAddress()); 3827a34c753fSRafael Auler Section->setOutputFileOffset( 3828a34c753fSRafael Auler getFileOffsetForAddress(Section->getOutputAddress())); 3829a34c753fSRafael Auler } 3830a34c753fSRafael Auler 3831a34c753fSRafael Auler // Check if we need to insert a padding section for hot text. 3832ee0e9ccbSMaksim Panchenko if (PaddingSize && !opts::UseOldText) 383352cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: padding code to 0x" 3834a34c753fSRafael Auler << Twine::utohexstr(NextAvailableAddress) 3835a34c753fSRafael Auler << " to accommodate hot text\n"; 3836a34c753fSRafael Auler 3837a34c753fSRafael Auler return; 3838a34c753fSRafael Auler } 3839a34c753fSRafael Auler 3840a34c753fSRafael Auler // Processing in non-relocation mode. 3841a34c753fSRafael Auler uint64_t NewTextSectionStartAddress = NextAvailableAddress; 3842a34c753fSRafael Auler 3843a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 3844a34c753fSRafael Auler BinaryFunction &Function = BFI.second; 3845a34c753fSRafael Auler if (!Function.isEmitted()) 3846a34c753fSRafael Auler continue; 3847a34c753fSRafael Auler 3848a34c753fSRafael Auler ErrorOr<BinarySection &> FuncSection = Function.getCodeSection(); 3849a34c753fSRafael Auler assert(FuncSection && "cannot find section for function"); 3850a34c753fSRafael Auler FuncSection->setOutputAddress(Function.getAddress()); 3851a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT: mapping 0x" 3852a34c753fSRafael Auler << Twine::utohexstr(FuncSection->getAllocAddress()) 3853a34c753fSRafael Auler << " to 0x" << Twine::utohexstr(Function.getAddress()) 3854a34c753fSRafael Auler << '\n'); 385505634f73SJob Noorman MapSection(*FuncSection, Function.getAddress()); 3856a34c753fSRafael Auler Function.setImageAddress(FuncSection->getAllocAddress()); 3857a34c753fSRafael Auler Function.setImageSize(FuncSection->getOutputSize()); 38581b8e0cf0SMaksim Panchenko assert(Function.getImageSize() <= Function.getMaxSize() && 38591b8e0cf0SMaksim Panchenko "Unexpected large function"); 3860a34c753fSRafael Auler 3861a34c753fSRafael Auler if (!Function.isSplit()) 3862a34c753fSRafael Auler continue; 3863a34c753fSRafael Auler 38649b6e7861SFabian Parzefall assert(Function.getLayout().isHotColdSplit() && 38659b6e7861SFabian Parzefall "Cannot allocate more than two fragments per function in " 38669b6e7861SFabian Parzefall "non-relocation mode."); 38679b6e7861SFabian Parzefall 38689b6e7861SFabian Parzefall FunctionFragment &FF = 38699b6e7861SFabian Parzefall Function.getLayout().getFragment(FragmentNum::cold()); 3870275e075cSFabian Parzefall ErrorOr<BinarySection &> ColdSection = 38710f74d191SFabian Parzefall Function.getCodeSection(FF.getFragmentNum()); 3872a34c753fSRafael Auler assert(ColdSection && "cannot find section for cold part"); 3873a34c753fSRafael Auler // Cold fragments are aligned at 16 bytes. 3874a34c753fSRafael Auler NextAvailableAddress = alignTo(NextAvailableAddress, 16); 38759b6e7861SFabian Parzefall FF.setAddress(NextAvailableAddress); 38769b6e7861SFabian Parzefall FF.setImageAddress(ColdSection->getAllocAddress()); 38779b6e7861SFabian Parzefall FF.setImageSize(ColdSection->getOutputSize()); 38789b6e7861SFabian Parzefall FF.setFileOffset(getFileOffsetForAddress(NextAvailableAddress)); 38799b6e7861SFabian Parzefall ColdSection->setOutputAddress(FF.getAddress()); 3880a34c753fSRafael Auler 38819b6e7861SFabian Parzefall LLVM_DEBUG( 38829b6e7861SFabian Parzefall dbgs() << formatv( 38839b6e7861SFabian Parzefall "BOLT: mapping cold fragment {0:x+} to {1:x+} with size {2:x+}\n", 38849b6e7861SFabian Parzefall FF.getImageAddress(), FF.getAddress(), FF.getImageSize())); 388505634f73SJob Noorman MapSection(*ColdSection, FF.getAddress()); 3886a34c753fSRafael Auler 38879b6e7861SFabian Parzefall NextAvailableAddress += FF.getImageSize(); 3888275e075cSFabian Parzefall } 3889a34c753fSRafael Auler 3890a34c753fSRafael Auler // Add the new text section aggregating all existing code sections. 3891a34c753fSRafael Auler // This is pseudo-section that serves a purpose of creating a corresponding 3892a34c753fSRafael Auler // entry in section header table. 38933a0d894fSMaksim Panchenko const uint64_t NewTextSectionSize = 3894a34c753fSRafael Auler NextAvailableAddress - NewTextSectionStartAddress; 3895a34c753fSRafael Auler if (NewTextSectionSize) { 3896a34c753fSRafael Auler const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 3897a34c753fSRafael Auler /*IsText=*/true, 3898a34c753fSRafael Auler /*IsAllocatable=*/true); 3899a34c753fSRafael Auler BinarySection &Section = 3900a34c753fSRafael Auler BC->registerOrUpdateSection(getBOLTTextSectionName(), 3901a34c753fSRafael Auler ELF::SHT_PROGBITS, 3902a34c753fSRafael Auler Flags, 3903a34c753fSRafael Auler /*Data=*/nullptr, 3904a34c753fSRafael Auler NewTextSectionSize, 3905a34c753fSRafael Auler 16); 3906a34c753fSRafael Auler Section.setOutputAddress(NewTextSectionStartAddress); 3907a34c753fSRafael Auler Section.setOutputFileOffset( 3908a34c753fSRafael Auler getFileOffsetForAddress(NewTextSectionStartAddress)); 3909a34c753fSRafael Auler } 3910a34c753fSRafael Auler } 3911a34c753fSRafael Auler 391205634f73SJob Noorman void RewriteInstance::mapAllocatableSections( 391305634f73SJob Noorman BOLTLinker::SectionMapper MapSection) { 391499655322SMaksim Panchenko 391599655322SMaksim Panchenko if (opts::UseOldText || opts::StrictMode) { 391699655322SMaksim Panchenko auto tryRewriteSection = [&](BinarySection &OldSection, 391799655322SMaksim Panchenko BinarySection &NewSection) { 391899655322SMaksim Panchenko if (OldSection.getSize() < NewSection.getOutputSize()) 391999655322SMaksim Panchenko return; 392099655322SMaksim Panchenko 392199655322SMaksim Panchenko BC->outs() << "BOLT-INFO: rewriting " << OldSection.getName() 392299655322SMaksim Panchenko << " in-place\n"; 392399655322SMaksim Panchenko 392499655322SMaksim Panchenko NewSection.setOutputAddress(OldSection.getAddress()); 392599655322SMaksim Panchenko NewSection.setOutputFileOffset(OldSection.getInputFileOffset()); 392699655322SMaksim Panchenko MapSection(NewSection, OldSection.getAddress()); 392799655322SMaksim Panchenko 392899655322SMaksim Panchenko // Pad contents with zeros. 392999655322SMaksim Panchenko NewSection.addPadding(OldSection.getSize() - NewSection.getOutputSize()); 393099655322SMaksim Panchenko 393199655322SMaksim Panchenko // Prevent the original section name from appearing in the section header 393299655322SMaksim Panchenko // table. 393399655322SMaksim Panchenko OldSection.setAnonymous(true); 393499655322SMaksim Panchenko }; 393599655322SMaksim Panchenko 393699655322SMaksim Panchenko if (EHFrameSection) { 393799655322SMaksim Panchenko BinarySection *NewEHFrameSection = 393899655322SMaksim Panchenko getSection(getNewSecPrefix() + getEHFrameSectionName()); 393999655322SMaksim Panchenko assert(NewEHFrameSection && "New contents expected for .eh_frame"); 394099655322SMaksim Panchenko tryRewriteSection(*EHFrameSection, *NewEHFrameSection); 394199655322SMaksim Panchenko } 394299655322SMaksim Panchenko BinarySection *EHSection = getSection(".gcc_except_table"); 394399655322SMaksim Panchenko BinarySection *NewEHSection = 394499655322SMaksim Panchenko getSection(getNewSecPrefix() + ".gcc_except_table"); 394599655322SMaksim Panchenko if (EHSection) { 394699655322SMaksim Panchenko assert(NewEHSection && "New contents expected for .gcc_except_table"); 394799655322SMaksim Panchenko tryRewriteSection(*EHSection, *NewEHSection); 394899655322SMaksim Panchenko } 394999655322SMaksim Panchenko } 395099655322SMaksim Panchenko 39514d3a0cadSMaksim Panchenko // Allocate read-only sections first, then writable sections. 39524d3a0cadSMaksim Panchenko enum : uint8_t { ST_READONLY, ST_READWRITE }; 39534d3a0cadSMaksim Panchenko for (uint8_t SType = ST_READONLY; SType <= ST_READWRITE; ++SType) { 3954207ea5f2SVladislav Khmelevsky const uint64_t LastNextAvailableAddress = NextAvailableAddress; 3955207ea5f2SVladislav Khmelevsky if (SType == ST_READWRITE) { 3956207ea5f2SVladislav Khmelevsky // Align R+W segment to regular page size 3957207ea5f2SVladislav Khmelevsky NextAvailableAddress = alignTo(NextAvailableAddress, BC->RegularPageSize); 3958207ea5f2SVladislav Khmelevsky NewWritableSegmentAddress = NextAvailableAddress; 3959207ea5f2SVladislav Khmelevsky } 3960207ea5f2SVladislav Khmelevsky 3961a34c753fSRafael Auler for (BinarySection &Section : BC->allocatableSections()) { 396223c8d382SJob Noorman if (Section.isLinkOnly()) 396323c8d382SJob Noorman continue; 396423c8d382SJob Noorman 39654d3a0cadSMaksim Panchenko if (!Section.hasValidSectionID()) 3966a34c753fSRafael Auler continue; 39674d3a0cadSMaksim Panchenko 396869a9bbf1SAmir Ayupov if (Section.isWritable() == (SType == ST_READONLY)) 39694d3a0cadSMaksim Panchenko continue; 39704d3a0cadSMaksim Panchenko 39714d3a0cadSMaksim Panchenko if (Section.getOutputAddress()) { 39724d3a0cadSMaksim Panchenko LLVM_DEBUG({ 39734d3a0cadSMaksim Panchenko dbgs() << "BOLT-DEBUG: section " << Section.getName() 39744d3a0cadSMaksim Panchenko << " is already mapped at 0x" 39754d3a0cadSMaksim Panchenko << Twine::utohexstr(Section.getOutputAddress()) << '\n'; 39764d3a0cadSMaksim Panchenko }); 39774d3a0cadSMaksim Panchenko continue; 39784d3a0cadSMaksim Panchenko } 39794d3a0cadSMaksim Panchenko 39804d3a0cadSMaksim Panchenko if (Section.hasSectionRef()) { 39814d3a0cadSMaksim Panchenko LLVM_DEBUG({ 39824d3a0cadSMaksim Panchenko dbgs() << "BOLT-DEBUG: mapping original section " << Section.getName() 39834d3a0cadSMaksim Panchenko << " to 0x" << Twine::utohexstr(Section.getAddress()) << '\n'; 39844d3a0cadSMaksim Panchenko }); 39854d3a0cadSMaksim Panchenko Section.setOutputAddress(Section.getAddress()); 39864d3a0cadSMaksim Panchenko Section.setOutputFileOffset(Section.getInputFileOffset()); 398705634f73SJob Noorman MapSection(Section, Section.getAddress()); 39884d3a0cadSMaksim Panchenko } else { 3989a34c753fSRafael Auler NextAvailableAddress = 3990a34c753fSRafael Auler alignTo(NextAvailableAddress, Section.getAlignment()); 39914d3a0cadSMaksim Panchenko LLVM_DEBUG({ 39924d3a0cadSMaksim Panchenko dbgs() << "BOLT: mapping section " << Section.getName() << " (0x" 39934d3a0cadSMaksim Panchenko << Twine::utohexstr(Section.getAllocAddress()) << ") to 0x" 39944d3a0cadSMaksim Panchenko << Twine::utohexstr(NextAvailableAddress) << ":0x" 39954d3a0cadSMaksim Panchenko << Twine::utohexstr(NextAvailableAddress + 39964d3a0cadSMaksim Panchenko Section.getOutputSize()) 39974d3a0cadSMaksim Panchenko << '\n'; 39984d3a0cadSMaksim Panchenko }); 3999a34c753fSRafael Auler 400005634f73SJob Noorman MapSection(Section, NextAvailableAddress); 40014d3a0cadSMaksim Panchenko Section.setOutputAddress(NextAvailableAddress); 4002a34c753fSRafael Auler Section.setOutputFileOffset( 40034d3a0cadSMaksim Panchenko getFileOffsetForAddress(NextAvailableAddress)); 40044d3a0cadSMaksim Panchenko 40054d3a0cadSMaksim Panchenko NextAvailableAddress += Section.getOutputSize(); 40064d3a0cadSMaksim Panchenko } 40074d3a0cadSMaksim Panchenko } 4008207ea5f2SVladislav Khmelevsky 4009207ea5f2SVladislav Khmelevsky if (SType == ST_READONLY) { 4010207ea5f2SVladislav Khmelevsky if (PHDRTableAddress) { 4011207ea5f2SVladislav Khmelevsky // Segment size includes the size of the PHDR area. 4012207ea5f2SVladislav Khmelevsky NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 40133a0d894fSMaksim Panchenko } else if (NewTextSegmentAddress) { 4014207ea5f2SVladislav Khmelevsky // Existing PHDR table would be updated. 4015207ea5f2SVladislav Khmelevsky NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 4016207ea5f2SVladislav Khmelevsky } 4017207ea5f2SVladislav Khmelevsky } else if (SType == ST_READWRITE) { 4018207ea5f2SVladislav Khmelevsky NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress; 4019207ea5f2SVladislav Khmelevsky // Restore NextAvailableAddress if no new writable sections 4020207ea5f2SVladislav Khmelevsky if (!NewWritableSegmentSize) 4021207ea5f2SVladislav Khmelevsky NextAvailableAddress = LastNextAvailableAddress; 4022207ea5f2SVladislav Khmelevsky } 4023a34c753fSRafael Auler } 4024a34c753fSRafael Auler } 4025a34c753fSRafael Auler 4026475a93a0SJob Noorman void RewriteInstance::updateOutputValues(const BOLTLinker &Linker) { 40278244ff67Smaksfb if (std::optional<AddressMap> Map = AddressMap::parse(*BC)) 40288244ff67Smaksfb BC->setIOAddressMap(std::move(*Map)); 402923c8d382SJob Noorman 4030ee0e9ccbSMaksim Panchenko for (BinaryFunction *Function : BC->getAllBinaryFunctions()) 4031475a93a0SJob Noorman Function->updateOutputValues(Linker); 4032a34c753fSRafael Auler } 4033a34c753fSRafael Auler 4034a34c753fSRafael Auler void RewriteInstance::patchELFPHDRTable() { 4035ffb42e31SNathan Sidwell auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile); 4036a34c753fSRafael Auler const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 4037a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 4038a34c753fSRafael Auler 4039a34c753fSRafael Auler // Write/re-write program headers. 4040a34c753fSRafael Auler Phnum = Obj.getHeader().e_phnum; 4041a34c753fSRafael Auler if (PHDRTableOffset) { 4042207ea5f2SVladislav Khmelevsky // Writing new pheader table and adding one new entry for R+X segment. 4043207ea5f2SVladislav Khmelevsky Phnum += 1; 4044207ea5f2SVladislav Khmelevsky if (NewWritableSegmentSize) { 4045207ea5f2SVladislav Khmelevsky // Adding one more entry for R+W segment. 4046207ea5f2SVladislav Khmelevsky Phnum += 1; 4047207ea5f2SVladislav Khmelevsky } 4048a34c753fSRafael Auler } else { 4049a34c753fSRafael Auler assert(!PHDRTableAddress && "unexpected address for program header table"); 4050a34c753fSRafael Auler PHDRTableOffset = Obj.getHeader().e_phoff; 4051207ea5f2SVladislav Khmelevsky if (NewWritableSegmentSize) { 40523a0d894fSMaksim Panchenko BC->errs() << "BOLT-ERROR: unable to add writable segment\n"; 4053207ea5f2SVladislav Khmelevsky exit(1); 4054a34c753fSRafael Auler } 4055207ea5f2SVladislav Khmelevsky } 4056207ea5f2SVladislav Khmelevsky 4057207ea5f2SVladislav Khmelevsky // NOTE Currently .eh_frame_hdr appends to the last segment, recalculate 4058207ea5f2SVladislav Khmelevsky // last segments size based on the NextAvailableAddress variable. 4059207ea5f2SVladislav Khmelevsky if (!NewWritableSegmentSize) { 4060207ea5f2SVladislav Khmelevsky if (PHDRTableAddress) 4061207ea5f2SVladislav Khmelevsky NewTextSegmentSize = NextAvailableAddress - PHDRTableAddress; 40623a0d894fSMaksim Panchenko else if (NewTextSegmentAddress) 4063207ea5f2SVladislav Khmelevsky NewTextSegmentSize = NextAvailableAddress - NewTextSegmentAddress; 4064207ea5f2SVladislav Khmelevsky } else { 4065207ea5f2SVladislav Khmelevsky NewWritableSegmentSize = NextAvailableAddress - NewWritableSegmentAddress; 4066207ea5f2SVladislav Khmelevsky } 4067207ea5f2SVladislav Khmelevsky 406849bb9939SMaksim Panchenko const uint64_t SavedPos = OS.tell(); 4069a34c753fSRafael Auler OS.seek(PHDRTableOffset); 4070a34c753fSRafael Auler 4071a34c753fSRafael Auler auto createNewTextPhdr = [&]() { 4072a34c753fSRafael Auler ELF64LEPhdrTy NewPhdr; 4073a34c753fSRafael Auler NewPhdr.p_type = ELF::PT_LOAD; 4074a34c753fSRafael Auler if (PHDRTableAddress) { 4075a34c753fSRafael Auler NewPhdr.p_offset = PHDRTableOffset; 4076a34c753fSRafael Auler NewPhdr.p_vaddr = PHDRTableAddress; 4077a34c753fSRafael Auler NewPhdr.p_paddr = PHDRTableAddress; 4078a34c753fSRafael Auler } else { 4079a34c753fSRafael Auler NewPhdr.p_offset = NewTextSegmentOffset; 4080a34c753fSRafael Auler NewPhdr.p_vaddr = NewTextSegmentAddress; 4081a34c753fSRafael Auler NewPhdr.p_paddr = NewTextSegmentAddress; 4082a34c753fSRafael Auler } 4083a34c753fSRafael Auler NewPhdr.p_filesz = NewTextSegmentSize; 4084a34c753fSRafael Auler NewPhdr.p_memsz = NewTextSegmentSize; 4085a34c753fSRafael Auler NewPhdr.p_flags = ELF::PF_X | ELF::PF_R; 40863ec858bcSMaksim Panchenko if (opts::Instrument) { 4087a34c753fSRafael Auler // FIXME: Currently instrumentation is experimental and the runtime data 40883ec858bcSMaksim Panchenko // is emitted with code, thus everything needs to be writable. 4089a34c753fSRafael Auler NewPhdr.p_flags |= ELF::PF_W; 40903ec858bcSMaksim Panchenko } 4091a34c753fSRafael Auler NewPhdr.p_align = BC->PageAlign; 4092a34c753fSRafael Auler 4093a34c753fSRafael Auler return NewPhdr; 4094a34c753fSRafael Auler }; 4095a34c753fSRafael Auler 40963ec858bcSMaksim Panchenko auto writeNewSegmentPhdrs = [&]() { 40973a0d894fSMaksim Panchenko if (PHDRTableAddress || NewTextSegmentSize) { 40983a0d894fSMaksim Panchenko ELF64LE::Phdr NewPhdr = createNewTextPhdr(); 40993a0d894fSMaksim Panchenko OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 41003a0d894fSMaksim Panchenko } 41013ec858bcSMaksim Panchenko 41023ec858bcSMaksim Panchenko if (NewWritableSegmentSize) { 4103207ea5f2SVladislav Khmelevsky ELF64LEPhdrTy NewPhdr; 4104207ea5f2SVladislav Khmelevsky NewPhdr.p_type = ELF::PT_LOAD; 4105207ea5f2SVladislav Khmelevsky NewPhdr.p_offset = getFileOffsetForAddress(NewWritableSegmentAddress); 4106207ea5f2SVladislav Khmelevsky NewPhdr.p_vaddr = NewWritableSegmentAddress; 4107207ea5f2SVladislav Khmelevsky NewPhdr.p_paddr = NewWritableSegmentAddress; 4108207ea5f2SVladislav Khmelevsky NewPhdr.p_filesz = NewWritableSegmentSize; 4109207ea5f2SVladislav Khmelevsky NewPhdr.p_memsz = NewWritableSegmentSize; 4110207ea5f2SVladislav Khmelevsky NewPhdr.p_align = BC->RegularPageSize; 4111207ea5f2SVladislav Khmelevsky NewPhdr.p_flags = ELF::PF_R | ELF::PF_W; 41123ec858bcSMaksim Panchenko OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 41133ec858bcSMaksim Panchenko } 4114207ea5f2SVladislav Khmelevsky }; 4115207ea5f2SVladislav Khmelevsky 41163ec858bcSMaksim Panchenko bool ModdedGnuStack = false; 41173ec858bcSMaksim Panchenko bool AddedSegment = false; 41183ec858bcSMaksim Panchenko 4119a34c753fSRafael Auler // Copy existing program headers with modifications. 4120a34c753fSRafael Auler for (const ELF64LE::Phdr &Phdr : cantFail(Obj.program_headers())) { 4121a34c753fSRafael Auler ELF64LE::Phdr NewPhdr = Phdr; 41223ec858bcSMaksim Panchenko switch (Phdr.p_type) { 41233ec858bcSMaksim Panchenko case ELF::PT_PHDR: 41243ec858bcSMaksim Panchenko if (PHDRTableAddress) { 4125a34c753fSRafael Auler NewPhdr.p_offset = PHDRTableOffset; 4126a34c753fSRafael Auler NewPhdr.p_vaddr = PHDRTableAddress; 4127a34c753fSRafael Auler NewPhdr.p_paddr = PHDRTableAddress; 4128a34c753fSRafael Auler NewPhdr.p_filesz = sizeof(NewPhdr) * Phnum; 4129a34c753fSRafael Auler NewPhdr.p_memsz = sizeof(NewPhdr) * Phnum; 41303ec858bcSMaksim Panchenko } 41313ec858bcSMaksim Panchenko break; 41323ec858bcSMaksim Panchenko case ELF::PT_GNU_EH_FRAME: { 413312d322dbSMaksim Panchenko ErrorOr<BinarySection &> EHFrameHdrSec = BC->getUniqueSectionByName( 413412d322dbSMaksim Panchenko getNewSecPrefix() + getEHFrameHdrSectionName()); 413540c2e0faSMaksim Panchenko if (EHFrameHdrSec && EHFrameHdrSec->isAllocatable() && 4136a34c753fSRafael Auler EHFrameHdrSec->isFinalized()) { 4137a34c753fSRafael Auler NewPhdr.p_offset = EHFrameHdrSec->getOutputFileOffset(); 4138a34c753fSRafael Auler NewPhdr.p_vaddr = EHFrameHdrSec->getOutputAddress(); 4139a34c753fSRafael Auler NewPhdr.p_paddr = EHFrameHdrSec->getOutputAddress(); 4140a34c753fSRafael Auler NewPhdr.p_filesz = EHFrameHdrSec->getOutputSize(); 4141a34c753fSRafael Auler NewPhdr.p_memsz = EHFrameHdrSec->getOutputSize(); 4142a34c753fSRafael Auler } 41433ec858bcSMaksim Panchenko break; 41443ec858bcSMaksim Panchenko } 41453ec858bcSMaksim Panchenko case ELF::PT_GNU_STACK: 41463ec858bcSMaksim Panchenko if (opts::UseGnuStack) { 41473ec858bcSMaksim Panchenko // Overwrite the header with the new text segment header. 4148a34c753fSRafael Auler NewPhdr = createNewTextPhdr(); 4149a34c753fSRafael Auler ModdedGnuStack = true; 4150207ea5f2SVladislav Khmelevsky } 41513ec858bcSMaksim Panchenko break; 41523ec858bcSMaksim Panchenko case ELF::PT_DYNAMIC: 41533ec858bcSMaksim Panchenko if (!opts::UseGnuStack) { 41543ec858bcSMaksim Panchenko // Insert new headers before DYNAMIC. 41553ec858bcSMaksim Panchenko writeNewSegmentPhdrs(); 4156a34c753fSRafael Auler AddedSegment = true; 4157a34c753fSRafael Auler } 41583ec858bcSMaksim Panchenko break; 41593ec858bcSMaksim Panchenko } 4160a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&NewPhdr), sizeof(NewPhdr)); 4161a34c753fSRafael Auler } 4162a34c753fSRafael Auler 4163a34c753fSRafael Auler if (!opts::UseGnuStack && !AddedSegment) { 41643ec858bcSMaksim Panchenko // Append new headers to the end of the table. 41653ec858bcSMaksim Panchenko writeNewSegmentPhdrs(); 4166a34c753fSRafael Auler } 4167a34c753fSRafael Auler 41683ec858bcSMaksim Panchenko if (opts::UseGnuStack && !ModdedGnuStack) { 41693ec858bcSMaksim Panchenko BC->errs() 41703ec858bcSMaksim Panchenko << "BOLT-ERROR: could not find PT_GNU_STACK program header to modify\n"; 41713ec858bcSMaksim Panchenko exit(1); 41723ec858bcSMaksim Panchenko } 417349bb9939SMaksim Panchenko 417449bb9939SMaksim Panchenko OS.seek(SavedPos); 4175a34c753fSRafael Auler } 4176a34c753fSRafael Auler 4177a34c753fSRafael Auler namespace { 4178a34c753fSRafael Auler 4179a34c753fSRafael Auler /// Write padding to \p OS such that its current \p Offset becomes aligned 4180a34c753fSRafael Auler /// at \p Alignment. Return new (aligned) offset. 418140c2e0faSMaksim Panchenko uint64_t appendPadding(raw_pwrite_stream &OS, uint64_t Offset, 4182a34c753fSRafael Auler uint64_t Alignment) { 4183a34c753fSRafael Auler if (!Alignment) 4184a34c753fSRafael Auler return Offset; 4185a34c753fSRafael Auler 4186a34c753fSRafael Auler const uint64_t PaddingSize = 4187a34c753fSRafael Auler offsetToAlignment(Offset, llvm::Align(Alignment)); 4188a34c753fSRafael Auler for (unsigned I = 0; I < PaddingSize; ++I) 4189a34c753fSRafael Auler OS.write((unsigned char)0); 4190a34c753fSRafael Auler return Offset + PaddingSize; 4191a34c753fSRafael Auler } 4192a34c753fSRafael Auler 4193a34c753fSRafael Auler } 4194a34c753fSRafael Auler 4195a34c753fSRafael Auler void RewriteInstance::rewriteNoteSections() { 4196ffb42e31SNathan Sidwell auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile); 4197a34c753fSRafael Auler const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile(); 4198a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 4199a34c753fSRafael Auler 42003a0d894fSMaksim Panchenko uint64_t NextAvailableOffset = std::max( 42013a0d894fSMaksim Panchenko getFileOffsetForAddress(NextAvailableAddress), FirstNonAllocatableOffset); 4202a34c753fSRafael Auler OS.seek(NextAvailableOffset); 4203a34c753fSRafael Auler 4204a34c753fSRafael Auler // Copy over non-allocatable section contents and update file offsets. 4205a34c753fSRafael Auler for (const ELF64LE::Shdr &Section : cantFail(Obj.sections())) { 4206a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_NULL) 4207a34c753fSRafael Auler continue; 4208a34c753fSRafael Auler if (Section.sh_flags & ELF::SHF_ALLOC) 4209a34c753fSRafael Auler continue; 4210a34c753fSRafael Auler 42114d3a0cadSMaksim Panchenko SectionRef SecRef = ELF64LEFile->toSectionRef(&Section); 42124d3a0cadSMaksim Panchenko BinarySection *BSec = BC->getSectionForSectionRef(SecRef); 42134d3a0cadSMaksim Panchenko assert(BSec && !BSec->isAllocatable() && 42144d3a0cadSMaksim Panchenko "Matching non-allocatable BinarySection should exist."); 42154d3a0cadSMaksim Panchenko 4216a34c753fSRafael Auler StringRef SectionName = 4217a34c753fSRafael Auler cantFail(Obj.getSectionName(Section), "cannot get section name"); 4218a34c753fSRafael Auler if (shouldStrip(Section, SectionName)) 4219a34c753fSRafael Auler continue; 4220a34c753fSRafael Auler 4221a34c753fSRafael Auler // Insert padding as needed. 4222a34c753fSRafael Auler NextAvailableOffset = 4223a34c753fSRafael Auler appendPadding(OS, NextAvailableOffset, Section.sh_addralign); 4224a34c753fSRafael Auler 4225a34c753fSRafael Auler // New section size. 4226a34c753fSRafael Auler uint64_t Size = 0; 4227a34c753fSRafael Auler bool DataWritten = false; 4228a34c753fSRafael Auler // Copy over section contents unless it's one of the sections we overwrite. 4229a34c753fSRafael Auler if (!willOverwriteSection(SectionName)) { 4230a34c753fSRafael Auler Size = Section.sh_size; 42311c2f4bbeSAlexander Yermolovich StringRef Dataref = InputFile->getData().substr(Section.sh_offset, Size); 42321c2f4bbeSAlexander Yermolovich std::string Data; 42334d3a0cadSMaksim Panchenko if (BSec->getPatcher()) { 42341c2f4bbeSAlexander Yermolovich Data = BSec->getPatcher()->patchBinary(Dataref); 42351c2f4bbeSAlexander Yermolovich Dataref = StringRef(Data); 42361c2f4bbeSAlexander Yermolovich } 4237a34c753fSRafael Auler 4238a34c753fSRafael Auler // Section was expanded, so need to treat it as overwrite. 42391c2f4bbeSAlexander Yermolovich if (Size != Dataref.size()) { 42404d3a0cadSMaksim Panchenko BSec = &BC->registerOrUpdateNoteSection( 42411c2f4bbeSAlexander Yermolovich SectionName, copyByteArray(Dataref), Dataref.size()); 4242a34c753fSRafael Auler Size = 0; 4243a34c753fSRafael Auler } else { 42441c2f4bbeSAlexander Yermolovich OS << Dataref; 4245a34c753fSRafael Auler DataWritten = true; 4246a34c753fSRafael Auler 4247a34c753fSRafael Auler // Add padding as the section extension might rely on the alignment. 4248a34c753fSRafael Auler Size = appendPadding(OS, Size, Section.sh_addralign); 4249a34c753fSRafael Auler } 4250a34c753fSRafael Auler } 4251a34c753fSRafael Auler 4252a34c753fSRafael Auler // Perform section post-processing. 4253a34c753fSRafael Auler assert(BSec->getAlignment() <= Section.sh_addralign && 4254a34c753fSRafael Auler "alignment exceeds value in file"); 4255a34c753fSRafael Auler 4256a34c753fSRafael Auler if (BSec->getAllocAddress()) { 4257a34c753fSRafael Auler assert(!DataWritten && "Writing section twice."); 4258c907d6e0SAmir Ayupov (void)DataWritten; 425999655322SMaksim Panchenko Size += BSec->write(OS); 4260a34c753fSRafael Auler } 4261a34c753fSRafael Auler 4262a34c753fSRafael Auler BSec->setOutputFileOffset(NextAvailableOffset); 42634d3a0cadSMaksim Panchenko BSec->flushPendingRelocations(OS, [this](const MCSymbol *S) { 4264a34c753fSRafael Auler return getNewValueForSymbol(S->getName()); 4265a34c753fSRafael Auler }); 4266a34c753fSRafael Auler 42675daf2001SMaksim Panchenko // Section contents are no longer needed, but we need to update the size so 42685daf2001SMaksim Panchenko // that it will be reflected in the section header table. 42695daf2001SMaksim Panchenko BSec->updateContents(nullptr, Size); 4270a34c753fSRafael Auler 4271a34c753fSRafael Auler NextAvailableOffset += Size; 4272a34c753fSRafael Auler } 4273a34c753fSRafael Auler 4274a34c753fSRafael Auler // Write new note sections. 4275a34c753fSRafael Auler for (BinarySection &Section : BC->nonAllocatableSections()) { 4276a34c753fSRafael Auler if (Section.getOutputFileOffset() || !Section.getAllocAddress()) 4277a34c753fSRafael Auler continue; 4278a34c753fSRafael Auler 4279a34c753fSRafael Auler assert(!Section.hasPendingRelocations() && "cannot have pending relocs"); 4280a34c753fSRafael Auler 428140c2e0faSMaksim Panchenko NextAvailableOffset = 428240c2e0faSMaksim Panchenko appendPadding(OS, NextAvailableOffset, Section.getAlignment()); 4283a34c753fSRafael Auler Section.setOutputFileOffset(NextAvailableOffset); 4284a34c753fSRafael Auler 4285a34c753fSRafael Auler LLVM_DEBUG( 4286a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: writing out new section " << Section.getName() 4287a34c753fSRafael Auler << " of size " << Section.getOutputSize() << " at offset 0x" 4288a34c753fSRafael Auler << Twine::utohexstr(Section.getOutputFileOffset()) << '\n'); 4289a34c753fSRafael Auler 429099655322SMaksim Panchenko NextAvailableOffset += Section.write(OS); 4291a34c753fSRafael Auler } 4292a34c753fSRafael Auler } 4293a34c753fSRafael Auler 4294a34c753fSRafael Auler template <typename ELFT> 4295a34c753fSRafael Auler void RewriteInstance::finalizeSectionStringTable(ELFObjectFile<ELFT> *File) { 4296a34c753fSRafael Auler // Pre-populate section header string table. 4297ee0e9ccbSMaksim Panchenko for (const BinarySection &Section : BC->sections()) 42984d3a0cadSMaksim Panchenko if (!Section.isAnonymous()) 42994d3a0cadSMaksim Panchenko SHStrTab.add(Section.getOutputName()); 4300a34c753fSRafael Auler SHStrTab.finalize(); 4301a34c753fSRafael Auler 4302a34c753fSRafael Auler const size_t SHStrTabSize = SHStrTab.getSize(); 4303a34c753fSRafael Auler uint8_t *DataCopy = new uint8_t[SHStrTabSize]; 4304a34c753fSRafael Auler memset(DataCopy, 0, SHStrTabSize); 4305a34c753fSRafael Auler SHStrTab.write(DataCopy); 4306a34c753fSRafael Auler BC->registerOrUpdateNoteSection(".shstrtab", 4307a34c753fSRafael Auler DataCopy, 4308a34c753fSRafael Auler SHStrTabSize, 4309a34c753fSRafael Auler /*Alignment=*/1, 4310a34c753fSRafael Auler /*IsReadOnly=*/true, 4311a34c753fSRafael Auler ELF::SHT_STRTAB); 4312a34c753fSRafael Auler } 4313a34c753fSRafael Auler 4314a34c753fSRafael Auler void RewriteInstance::addBoltInfoSection() { 4315a34c753fSRafael Auler std::string DescStr; 4316a34c753fSRafael Auler raw_string_ostream DescOS(DescStr); 4317a34c753fSRafael Auler 4318a34c753fSRafael Auler DescOS << "BOLT revision: " << BoltRevision << ", " 4319a34c753fSRafael Auler << "command line:"; 4320ee0e9ccbSMaksim Panchenko for (int I = 0; I < Argc; ++I) 4321a34c753fSRafael Auler DescOS << " " << Argv[I]; 4322a34c753fSRafael Auler 4323a34c753fSRafael Auler // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' 4324a34c753fSRafael Auler const std::string BoltInfo = 4325a34c753fSRafael Auler BinarySection::encodeELFNote("GNU", DescStr, 4 /*NT_GNU_GOLD_VERSION*/); 4326a34c753fSRafael Auler BC->registerOrUpdateNoteSection(".note.bolt_info", copyByteArray(BoltInfo), 4327a34c753fSRafael Auler BoltInfo.size(), 4328a34c753fSRafael Auler /*Alignment=*/1, 4329a34c753fSRafael Auler /*IsReadOnly=*/true, ELF::SHT_NOTE); 4330a34c753fSRafael Auler } 4331a34c753fSRafael Auler 4332a34c753fSRafael Auler void RewriteInstance::addBATSection() { 4333a34c753fSRafael Auler BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, nullptr, 4334a34c753fSRafael Auler 0, 4335a34c753fSRafael Auler /*Alignment=*/1, 4336a34c753fSRafael Auler /*IsReadOnly=*/true, ELF::SHT_NOTE); 4337a34c753fSRafael Auler } 4338a34c753fSRafael Auler 4339a34c753fSRafael Auler void RewriteInstance::encodeBATSection() { 4340a34c753fSRafael Auler std::string DescStr; 4341a34c753fSRafael Auler raw_string_ostream DescOS(DescStr); 4342a34c753fSRafael Auler 4343fc0ced73SRafael Auler BAT->write(*BC, DescOS); 4344a34c753fSRafael Auler 4345a34c753fSRafael Auler const std::string BoltInfo = 4346a34c753fSRafael Auler BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); 4347a34c753fSRafael Auler BC->registerOrUpdateNoteSection(BoltAddressTranslation::SECTION_NAME, 4348a34c753fSRafael Auler copyByteArray(BoltInfo), BoltInfo.size(), 4349a34c753fSRafael Auler /*Alignment=*/1, 4350a34c753fSRafael Auler /*IsReadOnly=*/true, ELF::SHT_NOTE); 435152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: BAT section size (bytes): " << BoltInfo.size() 435252cf0711SAmir Ayupov << '\n'; 4353a34c753fSRafael Auler } 4354a34c753fSRafael Auler 4355a34c753fSRafael Auler template <typename ELFShdrTy> 4356a34c753fSRafael Auler bool RewriteInstance::shouldStrip(const ELFShdrTy &Section, 4357a34c753fSRafael Auler StringRef SectionName) { 4358a34c753fSRafael Auler // Strip non-allocatable relocation sections. 4359a34c753fSRafael Auler if (!(Section.sh_flags & ELF::SHF_ALLOC) && Section.sh_type == ELF::SHT_RELA) 4360a34c753fSRafael Auler return true; 4361a34c753fSRafael Auler 4362a34c753fSRafael Auler // Strip debug sections if not updating them. 4363a34c753fSRafael Auler if (isDebugSection(SectionName) && !opts::UpdateDebugSections) 4364a34c753fSRafael Auler return true; 4365a34c753fSRafael Auler 4366a34c753fSRafael Auler // Strip symtab section if needed 4367a34c753fSRafael Auler if (opts::RemoveSymtab && Section.sh_type == ELF::SHT_SYMTAB) 4368a34c753fSRafael Auler return true; 4369a34c753fSRafael Auler 4370a34c753fSRafael Auler return false; 4371a34c753fSRafael Auler } 4372a34c753fSRafael Auler 437329fe14c7SAmir Ayupov template <typename ELFT> 437429fe14c7SAmir Ayupov std::vector<typename object::ELFObjectFile<ELFT>::Elf_Shdr> 437540c2e0faSMaksim Panchenko RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File, 437640c2e0faSMaksim Panchenko std::vector<uint32_t> &NewSectionIndex) { 437729fe14c7SAmir Ayupov using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4378a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 4379a34c753fSRafael Auler typename ELFT::ShdrRange Sections = cantFail(Obj.sections()); 4380a34c753fSRafael Auler 43814d3a0cadSMaksim Panchenko // Keep track of section header entries attached to the corresponding section. 43824d3a0cadSMaksim Panchenko std::vector<std::pair<BinarySection *, ELFShdrTy>> OutputSections; 4383116e801aSMaksim Panchenko auto addSection = [&](const ELFShdrTy &Section, BinarySection &BinSec) { 4384a34c753fSRafael Auler ELFShdrTy NewSection = Section; 4385116e801aSMaksim Panchenko NewSection.sh_name = SHStrTab.getOffset(BinSec.getOutputName()); 4386116e801aSMaksim Panchenko OutputSections.emplace_back(&BinSec, std::move(NewSection)); 4387a34c753fSRafael Auler }; 4388a34c753fSRafael Auler 4389a34c753fSRafael Auler // Copy over entries for original allocatable sections using modified name. 4390a34c753fSRafael Auler for (const ELFShdrTy &Section : Sections) { 4391a34c753fSRafael Auler // Always ignore this section. 4392a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_NULL) { 43934d3a0cadSMaksim Panchenko OutputSections.emplace_back(nullptr, Section); 4394a34c753fSRafael Auler continue; 4395a34c753fSRafael Auler } 4396a34c753fSRafael Auler 4397a34c753fSRafael Auler if (!(Section.sh_flags & ELF::SHF_ALLOC)) 4398a34c753fSRafael Auler continue; 4399a34c753fSRafael Auler 44004d3a0cadSMaksim Panchenko SectionRef SecRef = File->toSectionRef(&Section); 44014d3a0cadSMaksim Panchenko BinarySection *BinSec = BC->getSectionForSectionRef(SecRef); 44024d3a0cadSMaksim Panchenko assert(BinSec && "Matching BinarySection should exist."); 44034d3a0cadSMaksim Panchenko 440499655322SMaksim Panchenko // Exclude anonymous sections. 440599655322SMaksim Panchenko if (BinSec->isAnonymous()) 440699655322SMaksim Panchenko continue; 440799655322SMaksim Panchenko 4408116e801aSMaksim Panchenko addSection(Section, *BinSec); 4409a34c753fSRafael Auler } 4410a34c753fSRafael Auler 44114d3a0cadSMaksim Panchenko for (BinarySection &Section : BC->allocatableSections()) { 4412a34c753fSRafael Auler if (!Section.isFinalized()) 4413a34c753fSRafael Auler continue; 4414a34c753fSRafael Auler 44154d3a0cadSMaksim Panchenko if (Section.hasSectionRef() || Section.isAnonymous()) { 4416a34c753fSRafael Auler if (opts::Verbosity) 441752cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: not writing section header for section " 44184d3a0cadSMaksim Panchenko << Section.getOutputName() << '\n'; 4419a34c753fSRafael Auler continue; 4420a34c753fSRafael Auler } 4421a34c753fSRafael Auler 4422a34c753fSRafael Auler if (opts::Verbosity >= 1) 442352cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: writing section header for " 44244d3a0cadSMaksim Panchenko << Section.getOutputName() << '\n'; 4425a34c753fSRafael Auler ELFShdrTy NewSection; 4426a34c753fSRafael Auler NewSection.sh_type = ELF::SHT_PROGBITS; 4427a34c753fSRafael Auler NewSection.sh_addr = Section.getOutputAddress(); 4428a34c753fSRafael Auler NewSection.sh_offset = Section.getOutputFileOffset(); 4429a34c753fSRafael Auler NewSection.sh_size = Section.getOutputSize(); 4430a34c753fSRafael Auler NewSection.sh_entsize = 0; 4431a34c753fSRafael Auler NewSection.sh_flags = Section.getELFFlags(); 4432a34c753fSRafael Auler NewSection.sh_link = 0; 4433a34c753fSRafael Auler NewSection.sh_info = 0; 4434a34c753fSRafael Auler NewSection.sh_addralign = Section.getAlignment(); 4435116e801aSMaksim Panchenko addSection(NewSection, Section); 4436a34c753fSRafael Auler } 4437a34c753fSRafael Auler 4438a34c753fSRafael Auler // Sort all allocatable sections by their offset. 44394d3a0cadSMaksim Panchenko llvm::stable_sort(OutputSections, [](const auto &A, const auto &B) { 4440a34c753fSRafael Auler return A.second.sh_offset < B.second.sh_offset; 4441a34c753fSRafael Auler }); 4442a34c753fSRafael Auler 4443a34c753fSRafael Auler // Fix section sizes to prevent overlapping. 4444a34c753fSRafael Auler ELFShdrTy *PrevSection = nullptr; 44454d3a0cadSMaksim Panchenko BinarySection *PrevBinSec = nullptr; 4446a34c753fSRafael Auler for (auto &SectionKV : OutputSections) { 4447a34c753fSRafael Auler ELFShdrTy &Section = SectionKV.second; 4448a34c753fSRafael Auler 44496ec46729SNathan Sidwell // Ignore NOBITS sections as they don't take any space in the file. 4450116e801aSMaksim Panchenko if (Section.sh_type == ELF::SHT_NOBITS) 4451a34c753fSRafael Auler continue; 4452a34c753fSRafael Auler 4453116e801aSMaksim Panchenko // Note that address continuity is not guaranteed as sections could be 4454116e801aSMaksim Panchenko // placed in different loadable segments. 4455a34c753fSRafael Auler if (PrevSection && 4456116e801aSMaksim Panchenko PrevSection->sh_offset + PrevSection->sh_size > Section.sh_offset) { 44576ec46729SNathan Sidwell if (opts::Verbosity > 1) 445852cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: adjusting size for section " 44594d3a0cadSMaksim Panchenko << PrevBinSec->getOutputName() << '\n'; 4460116e801aSMaksim Panchenko PrevSection->sh_size = Section.sh_offset - PrevSection->sh_offset; 4461a34c753fSRafael Auler } 4462a34c753fSRafael Auler 4463a34c753fSRafael Auler PrevSection = &Section; 44644d3a0cadSMaksim Panchenko PrevBinSec = SectionKV.first; 4465a34c753fSRafael Auler } 4466a34c753fSRafael Auler 4467a34c753fSRafael Auler uint64_t LastFileOffset = 0; 4468a34c753fSRafael Auler 4469a34c753fSRafael Auler // Copy over entries for non-allocatable sections performing necessary 4470a34c753fSRafael Auler // adjustments. 4471a34c753fSRafael Auler for (const ELFShdrTy &Section : Sections) { 4472a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_NULL) 4473a34c753fSRafael Auler continue; 4474a34c753fSRafael Auler if (Section.sh_flags & ELF::SHF_ALLOC) 4475a34c753fSRafael Auler continue; 4476a34c753fSRafael Auler 4477a34c753fSRafael Auler StringRef SectionName = 4478a34c753fSRafael Auler cantFail(Obj.getSectionName(Section), "cannot get section name"); 4479a34c753fSRafael Auler 4480a34c753fSRafael Auler if (shouldStrip(Section, SectionName)) 4481a34c753fSRafael Auler continue; 4482a34c753fSRafael Auler 44834d3a0cadSMaksim Panchenko SectionRef SecRef = File->toSectionRef(&Section); 44844d3a0cadSMaksim Panchenko BinarySection *BinSec = BC->getSectionForSectionRef(SecRef); 44854d3a0cadSMaksim Panchenko assert(BinSec && "Matching BinarySection should exist."); 4486a34c753fSRafael Auler 4487a34c753fSRafael Auler ELFShdrTy NewSection = Section; 44884d3a0cadSMaksim Panchenko NewSection.sh_offset = BinSec->getOutputFileOffset(); 44894d3a0cadSMaksim Panchenko NewSection.sh_size = BinSec->getOutputSize(); 4490a34c753fSRafael Auler 4491ee0e9ccbSMaksim Panchenko if (NewSection.sh_type == ELF::SHT_SYMTAB) 4492a34c753fSRafael Auler NewSection.sh_info = NumLocalSymbols; 4493a34c753fSRafael Auler 4494116e801aSMaksim Panchenko addSection(NewSection, *BinSec); 4495a34c753fSRafael Auler 44964d3a0cadSMaksim Panchenko LastFileOffset = BinSec->getOutputFileOffset(); 4497a34c753fSRafael Auler } 4498a34c753fSRafael Auler 4499a34c753fSRafael Auler // Create entries for new non-allocatable sections. 4500a34c753fSRafael Auler for (BinarySection &Section : BC->nonAllocatableSections()) { 4501a34c753fSRafael Auler if (Section.getOutputFileOffset() <= LastFileOffset) 4502a34c753fSRafael Auler continue; 4503a34c753fSRafael Auler 4504ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 450552cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: writing section header for " 45064d3a0cadSMaksim Panchenko << Section.getOutputName() << '\n'; 4507ee0e9ccbSMaksim Panchenko 4508a34c753fSRafael Auler ELFShdrTy NewSection; 4509a34c753fSRafael Auler NewSection.sh_type = Section.getELFType(); 4510a34c753fSRafael Auler NewSection.sh_addr = 0; 4511a34c753fSRafael Auler NewSection.sh_offset = Section.getOutputFileOffset(); 4512a34c753fSRafael Auler NewSection.sh_size = Section.getOutputSize(); 4513a34c753fSRafael Auler NewSection.sh_entsize = 0; 4514a34c753fSRafael Auler NewSection.sh_flags = Section.getELFFlags(); 4515a34c753fSRafael Auler NewSection.sh_link = 0; 4516a34c753fSRafael Auler NewSection.sh_info = 0; 4517a34c753fSRafael Auler NewSection.sh_addralign = Section.getAlignment(); 4518a34c753fSRafael Auler 4519116e801aSMaksim Panchenko addSection(NewSection, Section); 4520a34c753fSRafael Auler } 4521a34c753fSRafael Auler 4522a34c753fSRafael Auler // Assign indices to sections. 4523a34c753fSRafael Auler std::unordered_map<std::string, uint64_t> NameToIndex; 45244d3a0cadSMaksim Panchenko for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) 45254d3a0cadSMaksim Panchenko OutputSections[Index].first->setIndex(Index); 4526a34c753fSRafael Auler 4527a34c753fSRafael Auler // Update section index mapping 4528a34c753fSRafael Auler NewSectionIndex.clear(); 4529a34c753fSRafael Auler NewSectionIndex.resize(Sections.size(), 0); 4530a34c753fSRafael Auler for (const ELFShdrTy &Section : Sections) { 4531a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_NULL) 4532a34c753fSRafael Auler continue; 4533a34c753fSRafael Auler 4534a34c753fSRafael Auler size_t OrgIndex = std::distance(Sections.begin(), &Section); 45354d3a0cadSMaksim Panchenko 45364d3a0cadSMaksim Panchenko SectionRef SecRef = File->toSectionRef(&Section); 45374d3a0cadSMaksim Panchenko BinarySection *BinSec = BC->getSectionForSectionRef(SecRef); 45384d3a0cadSMaksim Panchenko assert(BinSec && "BinarySection should exist for an input section."); 4539a34c753fSRafael Auler 4540a34c753fSRafael Auler // Some sections are stripped 45414d3a0cadSMaksim Panchenko if (!BinSec->hasValidIndex()) 4542a34c753fSRafael Auler continue; 4543a34c753fSRafael Auler 45444d3a0cadSMaksim Panchenko NewSectionIndex[OrgIndex] = BinSec->getIndex(); 4545a34c753fSRafael Auler } 4546a34c753fSRafael Auler 4547a34c753fSRafael Auler std::vector<ELFShdrTy> SectionsOnly(OutputSections.size()); 454872e5b14fSAmir Ayupov llvm::copy(llvm::make_second_range(OutputSections), SectionsOnly.begin()); 4549a34c753fSRafael Auler 4550a34c753fSRafael Auler return SectionsOnly; 4551a34c753fSRafael Auler } 4552a34c753fSRafael Auler 4553a34c753fSRafael Auler // Rewrite section header table inserting new entries as needed. The sections 4554a34c753fSRafael Auler // header table size itself may affect the offsets of other sections, 4555a34c753fSRafael Auler // so we are placing it at the end of the binary. 4556a34c753fSRafael Auler // 4557a34c753fSRafael Auler // As we rewrite entries we need to track how many sections were inserted 4558a34c753fSRafael Auler // as it changes the sh_link value. We map old indices to new ones for 4559a34c753fSRafael Auler // existing sections. 4560a34c753fSRafael Auler template <typename ELFT> 4561a34c753fSRafael Auler void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) { 4562a34c753fSRafael Auler using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 4563a34c753fSRafael Auler using ELFEhdrTy = typename ELFObjectFile<ELFT>::Elf_Ehdr; 4564a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 4565a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 4566a34c753fSRafael Auler 45675bed6afcSNathan Sidwell // Mapping from old section indices to new ones 4568a34c753fSRafael Auler std::vector<uint32_t> NewSectionIndex; 4569a34c753fSRafael Auler std::vector<ELFShdrTy> OutputSections = 4570a34c753fSRafael Auler getOutputSections(File, NewSectionIndex); 4571a34c753fSRafael Auler LLVM_DEBUG( 4572a34c753fSRafael Auler dbgs() << "BOLT-DEBUG: old to new section index mapping:\n"; 4573ee0e9ccbSMaksim Panchenko for (uint64_t I = 0; I < NewSectionIndex.size(); ++I) 4574a34c753fSRafael Auler dbgs() << " " << I << " -> " << NewSectionIndex[I] << '\n'; 4575a34c753fSRafael Auler ); 4576a34c753fSRafael Auler 4577f2f04119SNathan Sidwell // Align starting address for section header table. There's no architecutal 4578f2f04119SNathan Sidwell // need to align this, it is just for pleasant human readability. 4579a34c753fSRafael Auler uint64_t SHTOffset = OS.tell(); 4580f2f04119SNathan Sidwell SHTOffset = appendPadding(OS, SHTOffset, 16); 4581a34c753fSRafael Auler 4582a34c753fSRafael Auler // Write all section header entries while patching section references. 4583a34c753fSRafael Auler for (ELFShdrTy &Section : OutputSections) { 4584a34c753fSRafael Auler Section.sh_link = NewSectionIndex[Section.sh_link]; 45855bed6afcSNathan Sidwell if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) 4586a34c753fSRafael Auler Section.sh_info = NewSectionIndex[Section.sh_info]; 4587a34c753fSRafael Auler OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section)); 4588a34c753fSRafael Auler } 4589a34c753fSRafael Auler 4590a34c753fSRafael Auler // Fix ELF header. 4591a34c753fSRafael Auler ELFEhdrTy NewEhdr = Obj.getHeader(); 4592a34c753fSRafael Auler 4593a34c753fSRafael Auler if (BC->HasRelocations) { 4594ee0e9ccbSMaksim Panchenko if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) 4595a34c753fSRafael Auler NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress(); 4596ee0e9ccbSMaksim Panchenko else 4597a34c753fSRafael Auler NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry); 4598a34c753fSRafael Auler assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) && 4599a34c753fSRafael Auler "cannot find new address for entry point"); 4600a34c753fSRafael Auler } 4601a693ae53SMaksim Panchenko if (PHDRTableOffset) { 4602a34c753fSRafael Auler NewEhdr.e_phoff = PHDRTableOffset; 4603a34c753fSRafael Auler NewEhdr.e_phnum = Phnum; 4604a693ae53SMaksim Panchenko } 4605a34c753fSRafael Auler NewEhdr.e_shoff = SHTOffset; 4606a34c753fSRafael Auler NewEhdr.e_shnum = OutputSections.size(); 4607a34c753fSRafael Auler NewEhdr.e_shstrndx = NewSectionIndex[NewEhdr.e_shstrndx]; 4608a34c753fSRafael Auler OS.pwrite(reinterpret_cast<const char *>(&NewEhdr), sizeof(NewEhdr), 0); 4609a34c753fSRafael Auler } 4610a34c753fSRafael Auler 461129fe14c7SAmir Ayupov template <typename ELFT, typename WriteFuncTy, typename StrTabFuncTy> 4612a34c753fSRafael Auler void RewriteInstance::updateELFSymbolTable( 461329fe14c7SAmir Ayupov ELFObjectFile<ELFT> *File, bool IsDynSym, 461429fe14c7SAmir Ayupov const typename object::ELFObjectFile<ELFT>::Elf_Shdr &SymTabSection, 461529fe14c7SAmir Ayupov const std::vector<uint32_t> &NewSectionIndex, WriteFuncTy Write, 4616a34c753fSRafael Auler StrTabFuncTy AddToStrTab) { 4617a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 4618a34c753fSRafael Auler using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 4619a34c753fSRafael Auler 4620a34c753fSRafael Auler StringRef StringSection = 4621a34c753fSRafael Auler cantFail(Obj.getStringTableForSymtab(SymTabSection)); 4622a34c753fSRafael Auler 4623a34c753fSRafael Auler unsigned NumHotTextSymsUpdated = 0; 4624a34c753fSRafael Auler unsigned NumHotDataSymsUpdated = 0; 4625a34c753fSRafael Auler 4626a34c753fSRafael Auler std::map<const BinaryFunction *, uint64_t> IslandSizes; 4627a34c753fSRafael Auler auto getConstantIslandSize = [&IslandSizes](const BinaryFunction &BF) { 4628a34c753fSRafael Auler auto Itr = IslandSizes.find(&BF); 4629a34c753fSRafael Auler if (Itr != IslandSizes.end()) 4630a34c753fSRafael Auler return Itr->second; 4631a34c753fSRafael Auler return IslandSizes[&BF] = BF.estimateConstantIslandSize(); 4632a34c753fSRafael Auler }; 4633a34c753fSRafael Auler 4634a34c753fSRafael Auler // Symbols for the new symbol table. 4635a34c753fSRafael Auler std::vector<ELFSymTy> Symbols; 4636a34c753fSRafael Auler 4637090c92e0SAmir Ayupov bool EmittedColdFileSymbol = false; 4638090c92e0SAmir Ayupov 4639a34c753fSRafael Auler auto getNewSectionIndex = [&](uint32_t OldIndex) { 46408eb68d92SHuan Nguyen // For dynamic symbol table, the section index could be wrong on the input, 46418eb68d92SHuan Nguyen // and its value is ignored by the runtime if it's different from 46428eb68d92SHuan Nguyen // SHN_UNDEF and SHN_ABS. 46438eb68d92SHuan Nguyen // However, we still need to update dynamic symbol table, so return a 46448eb68d92SHuan Nguyen // section index, even though the index is broken. 46458eb68d92SHuan Nguyen if (IsDynSym && OldIndex >= NewSectionIndex.size()) 46468eb68d92SHuan Nguyen return OldIndex; 46478eb68d92SHuan Nguyen 4648a34c753fSRafael Auler assert(OldIndex < NewSectionIndex.size() && "section index out of bounds"); 4649a34c753fSRafael Auler const uint32_t NewIndex = NewSectionIndex[OldIndex]; 4650a34c753fSRafael Auler 4651a34c753fSRafael Auler // We may have stripped the section that dynsym was referencing due to 4652a34c753fSRafael Auler // the linker bug. In that case return the old index avoiding marking 4653a34c753fSRafael Auler // the symbol as undefined. 4654a34c753fSRafael Auler if (IsDynSym && NewIndex != OldIndex && NewIndex == ELF::SHN_UNDEF) 4655a34c753fSRafael Auler return OldIndex; 4656a34c753fSRafael Auler return NewIndex; 4657a34c753fSRafael Auler }; 4658a34c753fSRafael Auler 4659c43d0432SShatianWang // Get the extra symbol name of a split fragment; used in addExtraSymbols. 4660c43d0432SShatianWang auto getSplitSymbolName = [&](const FunctionFragment &FF, 4661c43d0432SShatianWang const ELFSymTy &FunctionSymbol) { 4662c43d0432SShatianWang SmallString<256> SymbolName; 4663c43d0432SShatianWang if (BC->HasWarmSection) 4664c43d0432SShatianWang SymbolName = 4665c43d0432SShatianWang formatv("{0}.{1}", cantFail(FunctionSymbol.getName(StringSection)), 4666c43d0432SShatianWang FF.getFragmentNum() == FragmentNum::warm() ? "warm" : "cold"); 4667c43d0432SShatianWang else 4668c43d0432SShatianWang SymbolName = formatv("{0}.cold.{1}", 4669c43d0432SShatianWang cantFail(FunctionSymbol.getName(StringSection)), 4670c43d0432SShatianWang FF.getFragmentNum().get() - 1); 4671c43d0432SShatianWang return SymbolName; 4672c43d0432SShatianWang }; 4673c43d0432SShatianWang 4674a34c753fSRafael Auler // Add extra symbols for the function. 4675a34c753fSRafael Auler // 4676a34c753fSRafael Auler // Note that addExtraSymbols() could be called multiple times for the same 4677a34c753fSRafael Auler // function with different FunctionSymbol matching the main function entry 4678a34c753fSRafael Auler // point. 4679a34c753fSRafael Auler auto addExtraSymbols = [&](const BinaryFunction &Function, 4680a34c753fSRafael Auler const ELFSymTy &FunctionSymbol) { 4681a34c753fSRafael Auler if (Function.isFolded()) { 4682a34c753fSRafael Auler BinaryFunction *ICFParent = Function.getFoldedIntoFunction(); 4683a34c753fSRafael Auler while (ICFParent->isFolded()) 4684a34c753fSRafael Auler ICFParent = ICFParent->getFoldedIntoFunction(); 4685a34c753fSRafael Auler ELFSymTy ICFSymbol = FunctionSymbol; 4686a34c753fSRafael Auler SmallVector<char, 256> Buf; 4687a34c753fSRafael Auler ICFSymbol.st_name = 4688a34c753fSRafael Auler AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection))) 4689a34c753fSRafael Auler .concat(".icf.0") 4690a34c753fSRafael Auler .toStringRef(Buf)); 4691a34c753fSRafael Auler ICFSymbol.st_value = ICFParent->getOutputAddress(); 4692a34c753fSRafael Auler ICFSymbol.st_size = ICFParent->getOutputSize(); 4693a34c753fSRafael Auler ICFSymbol.st_shndx = ICFParent->getCodeSection()->getIndex(); 4694a34c753fSRafael Auler Symbols.emplace_back(ICFSymbol); 4695a34c753fSRafael Auler } 46969b6e7861SFabian Parzefall if (Function.isSplit()) { 4697090c92e0SAmir Ayupov // Prepend synthetic FILE symbol to prevent local cold fragments from 4698090c92e0SAmir Ayupov // colliding with existing symbols with the same name. 4699090c92e0SAmir Ayupov if (!EmittedColdFileSymbol && 4700090c92e0SAmir Ayupov FunctionSymbol.getBinding() == ELF::STB_GLOBAL) { 4701090c92e0SAmir Ayupov ELFSymTy FileSymbol; 4702090c92e0SAmir Ayupov FileSymbol.st_shndx = ELF::SHN_ABS; 4703090c92e0SAmir Ayupov FileSymbol.st_name = AddToStrTab(getBOLTFileSymbolName()); 4704090c92e0SAmir Ayupov FileSymbol.st_value = 0; 4705090c92e0SAmir Ayupov FileSymbol.st_size = 0; 4706090c92e0SAmir Ayupov FileSymbol.st_other = 0; 4707090c92e0SAmir Ayupov FileSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FILE); 4708090c92e0SAmir Ayupov Symbols.emplace_back(FileSymbol); 4709090c92e0SAmir Ayupov EmittedColdFileSymbol = true; 4710090c92e0SAmir Ayupov } 471107f63b0aSFabian Parzefall for (const FunctionFragment &FF : 4712275e075cSFabian Parzefall Function.getLayout().getSplitFragments()) { 47139b6e7861SFabian Parzefall if (FF.getAddress()) { 4714a34c753fSRafael Auler ELFSymTy NewColdSym = FunctionSymbol; 4715c43d0432SShatianWang const SmallString<256> SymbolName = 4716c43d0432SShatianWang getSplitSymbolName(FF, FunctionSymbol); 47170f74d191SFabian Parzefall NewColdSym.st_name = AddToStrTab(SymbolName); 4718275e075cSFabian Parzefall NewColdSym.st_shndx = 47190f74d191SFabian Parzefall Function.getCodeSection(FF.getFragmentNum())->getIndex(); 47209b6e7861SFabian Parzefall NewColdSym.st_value = FF.getAddress(); 47219b6e7861SFabian Parzefall NewColdSym.st_size = FF.getImageSize(); 4722a34c753fSRafael Auler NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4723a34c753fSRafael Auler Symbols.emplace_back(NewColdSym); 4724a34c753fSRafael Auler } 4725275e075cSFabian Parzefall } 47269b6e7861SFabian Parzefall } 4727a34c753fSRafael Auler if (Function.hasConstantIsland()) { 4728a34c753fSRafael Auler uint64_t DataMark = Function.getOutputDataAddress(); 4729a34c753fSRafael Auler uint64_t CISize = getConstantIslandSize(Function); 4730a34c753fSRafael Auler uint64_t CodeMark = DataMark + CISize; 4731a34c753fSRafael Auler ELFSymTy DataMarkSym = FunctionSymbol; 4732a34c753fSRafael Auler DataMarkSym.st_name = AddToStrTab("$d"); 4733a34c753fSRafael Auler DataMarkSym.st_value = DataMark; 4734a34c753fSRafael Auler DataMarkSym.st_size = 0; 4735a34c753fSRafael Auler DataMarkSym.setType(ELF::STT_NOTYPE); 4736a34c753fSRafael Auler DataMarkSym.setBinding(ELF::STB_LOCAL); 4737a34c753fSRafael Auler ELFSymTy CodeMarkSym = DataMarkSym; 4738a34c753fSRafael Auler CodeMarkSym.st_name = AddToStrTab("$x"); 4739a34c753fSRafael Auler CodeMarkSym.st_value = CodeMark; 4740a34c753fSRafael Auler Symbols.emplace_back(DataMarkSym); 4741a34c753fSRafael Auler Symbols.emplace_back(CodeMarkSym); 4742a34c753fSRafael Auler } 4743a34c753fSRafael Auler if (Function.hasConstantIsland() && Function.isSplit()) { 4744a34c753fSRafael Auler uint64_t DataMark = Function.getOutputColdDataAddress(); 4745a34c753fSRafael Auler uint64_t CISize = getConstantIslandSize(Function); 4746a34c753fSRafael Auler uint64_t CodeMark = DataMark + CISize; 4747a34c753fSRafael Auler ELFSymTy DataMarkSym = FunctionSymbol; 4748a34c753fSRafael Auler DataMarkSym.st_name = AddToStrTab("$d"); 4749a34c753fSRafael Auler DataMarkSym.st_value = DataMark; 4750a34c753fSRafael Auler DataMarkSym.st_size = 0; 4751a34c753fSRafael Auler DataMarkSym.setType(ELF::STT_NOTYPE); 4752a34c753fSRafael Auler DataMarkSym.setBinding(ELF::STB_LOCAL); 4753a34c753fSRafael Auler ELFSymTy CodeMarkSym = DataMarkSym; 4754a34c753fSRafael Auler CodeMarkSym.st_name = AddToStrTab("$x"); 4755a34c753fSRafael Auler CodeMarkSym.st_value = CodeMark; 4756a34c753fSRafael Auler Symbols.emplace_back(DataMarkSym); 4757a34c753fSRafael Auler Symbols.emplace_back(CodeMarkSym); 4758a34c753fSRafael Auler } 4759a34c753fSRafael Auler }; 4760a34c753fSRafael Auler 4761a34c753fSRafael Auler // For regular (non-dynamic) symbol table, exclude symbols referring 4762a34c753fSRafael Auler // to non-allocatable sections. 4763a34c753fSRafael Auler auto shouldStrip = [&](const ELFSymTy &Symbol) { 4764a34c753fSRafael Auler if (Symbol.isAbsolute() || !Symbol.isDefined()) 4765a34c753fSRafael Auler return false; 4766a34c753fSRafael Auler 4767a34c753fSRafael Auler // If we cannot link the symbol to a section, leave it as is. 4768a34c753fSRafael Auler Expected<const typename ELFT::Shdr *> Section = 4769a34c753fSRafael Auler Obj.getSection(Symbol.st_shndx); 4770a34c753fSRafael Auler if (!Section) 4771a34c753fSRafael Auler return false; 4772a34c753fSRafael Auler 4773a34c753fSRafael Auler // Remove the section symbol iif the corresponding section was stripped. 4774a34c753fSRafael Auler if (Symbol.getType() == ELF::STT_SECTION) { 4775a34c753fSRafael Auler if (!getNewSectionIndex(Symbol.st_shndx)) 4776a34c753fSRafael Auler return true; 4777a34c753fSRafael Auler return false; 4778a34c753fSRafael Auler } 4779a34c753fSRafael Auler 4780a34c753fSRafael Auler // Symbols in non-allocatable sections are typically remnants of relocations 4781a34c753fSRafael Auler // emitted under "-emit-relocs" linker option. Delete those as we delete 4782a34c753fSRafael Auler // relocations against non-allocatable sections. 4783a34c753fSRafael Auler if (!((*Section)->sh_flags & ELF::SHF_ALLOC)) 4784a34c753fSRafael Auler return true; 4785a34c753fSRafael Auler 4786a34c753fSRafael Auler return false; 4787a34c753fSRafael Auler }; 4788a34c753fSRafael Auler 4789a34c753fSRafael Auler for (const ELFSymTy &Symbol : cantFail(Obj.symbols(&SymTabSection))) { 4790a34c753fSRafael Auler // For regular (non-dynamic) symbol table strip unneeded symbols. 4791a34c753fSRafael Auler if (!IsDynSym && shouldStrip(Symbol)) 4792a34c753fSRafael Auler continue; 4793a34c753fSRafael Auler 4794a34c753fSRafael Auler const BinaryFunction *Function = 4795a34c753fSRafael Auler BC->getBinaryFunctionAtAddress(Symbol.st_value); 4796a34c753fSRafael Auler // Ignore false function references, e.g. when the section address matches 4797a34c753fSRafael Auler // the address of the function. 4798a34c753fSRafael Auler if (Function && Symbol.getType() == ELF::STT_SECTION) 4799a34c753fSRafael Auler Function = nullptr; 4800a34c753fSRafael Auler 4801a34c753fSRafael Auler // For non-dynamic symtab, make sure the symbol section matches that of 4802a34c753fSRafael Auler // the function. It can mismatch e.g. if the symbol is a section marker 4803a34c753fSRafael Auler // in which case we treat the symbol separately from the function. 4804a34c753fSRafael Auler // For dynamic symbol table, the section index could be wrong on the input, 4805a34c753fSRafael Auler // and its value is ignored by the runtime if it's different from 4806a34c753fSRafael Auler // SHN_UNDEF and SHN_ABS. 4807a34c753fSRafael Auler if (!IsDynSym && Function && 4808a34c753fSRafael Auler Symbol.st_shndx != 4809a34c753fSRafael Auler Function->getOriginSection()->getSectionRef().getIndex()) 4810a34c753fSRafael Auler Function = nullptr; 4811a34c753fSRafael Auler 4812a34c753fSRafael Auler // Create a new symbol based on the existing symbol. 4813a34c753fSRafael Auler ELFSymTy NewSymbol = Symbol; 4814a34c753fSRafael Auler 4815bb627b0aSAmir Ayupov // Handle special symbols based on their name. 4816bb627b0aSAmir Ayupov Expected<StringRef> SymbolName = Symbol.getName(StringSection); 4817bb627b0aSAmir Ayupov assert(SymbolName && "cannot get symbol name"); 4818bb627b0aSAmir Ayupov 4819bb627b0aSAmir Ayupov auto updateSymbolValue = [&](const StringRef Name, 4820bb627b0aSAmir Ayupov std::optional<uint64_t> Value = std::nullopt) { 4821bb627b0aSAmir Ayupov NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name); 4822bb627b0aSAmir Ayupov NewSymbol.st_shndx = ELF::SHN_ABS; 4823bb627b0aSAmir Ayupov BC->outs() << "BOLT-INFO: setting " << Name << " to 0x" 4824bb627b0aSAmir Ayupov << Twine::utohexstr(NewSymbol.st_value) << '\n'; 4825bb627b0aSAmir Ayupov }; 4826bb627b0aSAmir Ayupov 4827bb627b0aSAmir Ayupov if (*SymbolName == "__hot_start" || *SymbolName == "__hot_end") { 4828bb627b0aSAmir Ayupov if (opts::HotText) { 4829bb627b0aSAmir Ayupov updateSymbolValue(*SymbolName); 4830bb627b0aSAmir Ayupov ++NumHotTextSymsUpdated; 4831bb627b0aSAmir Ayupov } 4832bb627b0aSAmir Ayupov goto registerSymbol; 4833bb627b0aSAmir Ayupov } 4834bb627b0aSAmir Ayupov 4835bb627b0aSAmir Ayupov if (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end") { 4836bb627b0aSAmir Ayupov if (opts::HotData) { 4837bb627b0aSAmir Ayupov updateSymbolValue(*SymbolName); 4838bb627b0aSAmir Ayupov ++NumHotDataSymsUpdated; 4839bb627b0aSAmir Ayupov } 4840bb627b0aSAmir Ayupov goto registerSymbol; 4841bb627b0aSAmir Ayupov } 4842bb627b0aSAmir Ayupov 4843bb627b0aSAmir Ayupov if (*SymbolName == "_end") { 4844bb627b0aSAmir Ayupov if (NextAvailableAddress > Symbol.st_value) 4845bb627b0aSAmir Ayupov updateSymbolValue(*SymbolName, NextAvailableAddress); 4846bb627b0aSAmir Ayupov goto registerSymbol; 4847bb627b0aSAmir Ayupov } 4848bb627b0aSAmir Ayupov 4849a34c753fSRafael Auler if (Function) { 4850a34c753fSRafael Auler // If the symbol matched a function that was not emitted, update the 4851a34c753fSRafael Auler // corresponding section index but otherwise leave it unchanged. 4852a34c753fSRafael Auler if (Function->isEmitted()) { 4853a34c753fSRafael Auler NewSymbol.st_value = Function->getOutputAddress(); 4854a34c753fSRafael Auler NewSymbol.st_size = Function->getOutputSize(); 4855a34c753fSRafael Auler NewSymbol.st_shndx = Function->getCodeSection()->getIndex(); 4856a34c753fSRafael Auler } else if (Symbol.st_shndx < ELF::SHN_LORESERVE) { 4857a34c753fSRafael Auler NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4858a34c753fSRafael Auler } 4859a34c753fSRafael Auler 4860a34c753fSRafael Auler // Add new symbols to the symbol table if necessary. 4861a34c753fSRafael Auler if (!IsDynSym) 4862a34c753fSRafael Auler addExtraSymbols(*Function, NewSymbol); 4863a34c753fSRafael Auler } else { 4864a34c753fSRafael Auler // Check if the function symbol matches address inside a function, i.e. 4865a34c753fSRafael Auler // it marks a secondary entry point. 486640c2e0faSMaksim Panchenko Function = 486740c2e0faSMaksim Panchenko (Symbol.getType() == ELF::STT_FUNC) 4868a34c753fSRafael Auler ? BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4869a34c753fSRafael Auler /*CheckPastEnd=*/false, 4870a34c753fSRafael Auler /*UseMaxSize=*/true) 4871a34c753fSRafael Auler : nullptr; 4872a34c753fSRafael Auler 4873a34c753fSRafael Auler if (Function && Function->isEmitted()) { 4874275e075cSFabian Parzefall assert(Function->getLayout().isHotColdSplit() && 4875275e075cSFabian Parzefall "Adding symbols based on cold fragment when there are more than " 4876275e075cSFabian Parzefall "2 fragments"); 4877a34c753fSRafael Auler const uint64_t OutputAddress = 4878a34c753fSRafael Auler Function->translateInputToOutputAddress(Symbol.st_value); 4879a34c753fSRafael Auler 4880a34c753fSRafael Auler NewSymbol.st_value = OutputAddress; 4881a34c753fSRafael Auler // Force secondary entry points to have zero size. 4882a34c753fSRafael Auler NewSymbol.st_size = 0; 48839b6e7861SFabian Parzefall 48849b6e7861SFabian Parzefall // Find fragment containing entrypoint 48859b6e7861SFabian Parzefall FunctionLayout::fragment_const_iterator FF = llvm::find_if( 48869b6e7861SFabian Parzefall Function->getLayout().fragments(), [&](const FunctionFragment &FF) { 48879b6e7861SFabian Parzefall uint64_t Lo = FF.getAddress(); 48889b6e7861SFabian Parzefall uint64_t Hi = Lo + FF.getImageSize(); 48899b6e7861SFabian Parzefall return Lo <= OutputAddress && OutputAddress < Hi; 48909b6e7861SFabian Parzefall }); 48919b6e7861SFabian Parzefall 48929b6e7861SFabian Parzefall if (FF == Function->getLayout().fragment_end()) { 48939b6e7861SFabian Parzefall assert( 48949b6e7861SFabian Parzefall OutputAddress >= Function->getCodeSection()->getOutputAddress() && 48959b6e7861SFabian Parzefall OutputAddress < (Function->getCodeSection()->getOutputAddress() + 48969b6e7861SFabian Parzefall Function->getCodeSection()->getOutputSize()) && 48971a2f8336Sspaette "Cannot locate fragment containing secondary entrypoint"); 48989b6e7861SFabian Parzefall FF = Function->getLayout().fragment_begin(); 48999b6e7861SFabian Parzefall } 49009b6e7861SFabian Parzefall 490140c2e0faSMaksim Panchenko NewSymbol.st_shndx = 49029b6e7861SFabian Parzefall Function->getCodeSection(FF->getFragmentNum())->getIndex(); 4903a34c753fSRafael Auler } else { 4904a34c753fSRafael Auler // Check if the symbol belongs to moved data object and update it. 4905a34c753fSRafael Auler BinaryData *BD = opts::ReorderData.empty() 4906a34c753fSRafael Auler ? nullptr 4907a34c753fSRafael Auler : BC->getBinaryDataAtAddress(Symbol.st_value); 4908a34c753fSRafael Auler if (BD && BD->isMoved() && !BD->isJumpTable()) { 4909a34c753fSRafael Auler assert((!BD->getSize() || !Symbol.st_size || 4910a34c753fSRafael Auler Symbol.st_size == BD->getSize()) && 4911a34c753fSRafael Auler "sizes must match"); 4912a34c753fSRafael Auler 4913a34c753fSRafael Auler BinarySection &OutputSection = BD->getOutputSection(); 4914a34c753fSRafael Auler assert(OutputSection.getIndex()); 4915a34c753fSRafael Auler LLVM_DEBUG(dbgs() 4916a34c753fSRafael Auler << "BOLT-DEBUG: moving " << BD->getName() << " from " 4917a34c753fSRafael Auler << *BC->getSectionNameForAddress(Symbol.st_value) << " (" 4918a34c753fSRafael Auler << Symbol.st_shndx << ") to " << OutputSection.getName() 4919a34c753fSRafael Auler << " (" << OutputSection.getIndex() << ")\n"); 4920a34c753fSRafael Auler NewSymbol.st_shndx = OutputSection.getIndex(); 4921a34c753fSRafael Auler NewSymbol.st_value = BD->getOutputAddress(); 4922a34c753fSRafael Auler } else { 4923a34c753fSRafael Auler // Otherwise just update the section for the symbol. 4924ee0e9ccbSMaksim Panchenko if (Symbol.st_shndx < ELF::SHN_LORESERVE) 4925a34c753fSRafael Auler NewSymbol.st_shndx = getNewSectionIndex(Symbol.st_shndx); 4926a34c753fSRafael Auler } 4927a34c753fSRafael Auler 4928a34c753fSRafael Auler // Detect local syms in the text section that we didn't update 4929a34c753fSRafael Auler // and that were preserved by the linker to support relocations against 4930a34c753fSRafael Auler // .text. Remove them from the symtab. 4931a34c753fSRafael Auler if (Symbol.getType() == ELF::STT_NOTYPE && 493240c2e0faSMaksim Panchenko Symbol.getBinding() == ELF::STB_LOCAL && Symbol.st_size == 0) { 4933a34c753fSRafael Auler if (BC->getBinaryFunctionContainingAddress(Symbol.st_value, 4934a34c753fSRafael Auler /*CheckPastEnd=*/false, 4935a34c753fSRafael Auler /*UseMaxSize=*/true)) { 4936a34c753fSRafael Auler // Can only delete the symbol if not patching. Such symbols should 4937a34c753fSRafael Auler // not exist in the dynamic symbol table. 4938a34c753fSRafael Auler assert(!IsDynSym && "cannot delete symbol"); 4939a34c753fSRafael Auler continue; 4940a34c753fSRafael Auler } 4941a34c753fSRafael Auler } 4942a34c753fSRafael Auler } 4943a34c753fSRafael Auler } 4944a34c753fSRafael Auler 4945bb627b0aSAmir Ayupov registerSymbol: 4946ee0e9ccbSMaksim Panchenko if (IsDynSym) 4947a34c753fSRafael Auler Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) * 4948a34c753fSRafael Auler sizeof(ELFSymTy), 4949a34c753fSRafael Auler NewSymbol); 4950ee0e9ccbSMaksim Panchenko else 4951a34c753fSRafael Auler Symbols.emplace_back(NewSymbol); 4952a34c753fSRafael Auler } 4953a34c753fSRafael Auler 4954a34c753fSRafael Auler if (IsDynSym) { 4955a34c753fSRafael Auler assert(Symbols.empty()); 4956a34c753fSRafael Auler return; 4957a34c753fSRafael Auler } 4958a34c753fSRafael Auler 4959a34c753fSRafael Auler // Add symbols of injected functions 4960a34c753fSRafael Auler for (BinaryFunction *Function : BC->getInjectedBinaryFunctions()) { 4961a34c753fSRafael Auler ELFSymTy NewSymbol; 4962a34c753fSRafael Auler BinarySection *OriginSection = Function->getOriginSection(); 496340c2e0faSMaksim Panchenko NewSymbol.st_shndx = 496440c2e0faSMaksim Panchenko OriginSection 496540c2e0faSMaksim Panchenko ? getNewSectionIndex(OriginSection->getSectionRef().getIndex()) 496640c2e0faSMaksim Panchenko : Function->getCodeSection()->getIndex(); 4967a34c753fSRafael Auler NewSymbol.st_value = Function->getOutputAddress(); 4968a34c753fSRafael Auler NewSymbol.st_name = AddToStrTab(Function->getOneName()); 4969a34c753fSRafael Auler NewSymbol.st_size = Function->getOutputSize(); 4970a34c753fSRafael Auler NewSymbol.st_other = 0; 4971a34c753fSRafael Auler NewSymbol.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC); 4972a34c753fSRafael Auler Symbols.emplace_back(NewSymbol); 4973a34c753fSRafael Auler 4974a34c753fSRafael Auler if (Function->isSplit()) { 49750f74d191SFabian Parzefall assert(Function->getLayout().isHotColdSplit() && 49760f74d191SFabian Parzefall "Adding symbols based on cold fragment when there are more than " 49770f74d191SFabian Parzefall "2 fragments"); 4978a34c753fSRafael Auler ELFSymTy NewColdSym = NewSymbol; 4979a34c753fSRafael Auler NewColdSym.setType(ELF::STT_NOTYPE); 4980a34c753fSRafael Auler SmallVector<char, 256> Buf; 4981a34c753fSRafael Auler NewColdSym.st_name = AddToStrTab( 4982a34c753fSRafael Auler Twine(Function->getPrintName()).concat(".cold.0").toStringRef(Buf)); 49839b6e7861SFabian Parzefall const FunctionFragment &ColdFF = 49849b6e7861SFabian Parzefall Function->getLayout().getFragment(FragmentNum::cold()); 49859b6e7861SFabian Parzefall NewColdSym.st_value = ColdFF.getAddress(); 49869b6e7861SFabian Parzefall NewColdSym.st_size = ColdFF.getImageSize(); 4987a34c753fSRafael Auler Symbols.emplace_back(NewColdSym); 4988a34c753fSRafael Auler } 4989a34c753fSRafael Auler } 4990a34c753fSRafael Auler 4991c6731d38SVladislav Khmelevsky auto AddSymbol = [&](const StringRef &Name, uint64_t Address) { 4992c6731d38SVladislav Khmelevsky if (!Address) 4993c6731d38SVladislav Khmelevsky return; 4994a34c753fSRafael Auler 4995a34c753fSRafael Auler ELFSymTy Symbol; 4996c6731d38SVladislav Khmelevsky Symbol.st_value = Address; 4997a34c753fSRafael Auler Symbol.st_shndx = ELF::SHN_ABS; 4998a34c753fSRafael Auler Symbol.st_name = AddToStrTab(Name); 4999a34c753fSRafael Auler Symbol.st_size = 0; 5000a34c753fSRafael Auler Symbol.st_other = 0; 5001a34c753fSRafael Auler Symbol.setBindingAndType(ELF::STB_WEAK, ELF::STT_NOTYPE); 5002a34c753fSRafael Auler 500352cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: setting " << Name << " to 0x" 5004a34c753fSRafael Auler << Twine::utohexstr(Symbol.st_value) << '\n'; 5005a34c753fSRafael Auler 5006a34c753fSRafael Auler Symbols.emplace_back(Symbol); 5007a34c753fSRafael Auler }; 5008a34c753fSRafael Auler 5009c6731d38SVladislav Khmelevsky // Add runtime library start and fini address symbols 5010c6731d38SVladislav Khmelevsky if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary()) { 5011c6731d38SVladislav Khmelevsky AddSymbol("__bolt_runtime_start", RtLibrary->getRuntimeStartAddress()); 5012c6731d38SVladislav Khmelevsky AddSymbol("__bolt_runtime_fini", RtLibrary->getRuntimeFiniAddress()); 5013c6731d38SVladislav Khmelevsky } 5014c6731d38SVladislav Khmelevsky 5015c6731d38SVladislav Khmelevsky assert((!NumHotTextSymsUpdated || NumHotTextSymsUpdated == 2) && 5016c6731d38SVladislav Khmelevsky "either none or both __hot_start/__hot_end symbols were expected"); 5017c6731d38SVladislav Khmelevsky assert((!NumHotDataSymsUpdated || NumHotDataSymsUpdated == 2) && 5018c6731d38SVladislav Khmelevsky "either none or both __hot_data_start/__hot_data_end symbols were " 5019c6731d38SVladislav Khmelevsky "expected"); 5020c6731d38SVladislav Khmelevsky 5021c6731d38SVladislav Khmelevsky auto AddEmittedSymbol = [&](const StringRef &Name) { 5022c6731d38SVladislav Khmelevsky AddSymbol(Name, getNewValueForSymbol(Name)); 5023c6731d38SVladislav Khmelevsky }; 5024c6731d38SVladislav Khmelevsky 5025a34c753fSRafael Auler if (opts::HotText && !NumHotTextSymsUpdated) { 5026c6731d38SVladislav Khmelevsky AddEmittedSymbol("__hot_start"); 5027c6731d38SVladislav Khmelevsky AddEmittedSymbol("__hot_end"); 5028a34c753fSRafael Auler } 5029a34c753fSRafael Auler 5030a34c753fSRafael Auler if (opts::HotData && !NumHotDataSymsUpdated) { 5031c6731d38SVladislav Khmelevsky AddEmittedSymbol("__hot_data_start"); 5032c6731d38SVladislav Khmelevsky AddEmittedSymbol("__hot_data_end"); 5033a34c753fSRafael Auler } 5034a34c753fSRafael Auler 5035a34c753fSRafael Auler // Put local symbols at the beginning. 5036d2c87699SAmir Ayupov llvm::stable_sort(Symbols, [](const ELFSymTy &A, const ELFSymTy &B) { 5037d2c87699SAmir Ayupov if (A.getBinding() == ELF::STB_LOCAL && B.getBinding() != ELF::STB_LOCAL) 5038a34c753fSRafael Auler return true; 5039a34c753fSRafael Auler return false; 5040a34c753fSRafael Auler }); 5041a34c753fSRafael Auler 5042ee0e9ccbSMaksim Panchenko for (const ELFSymTy &Symbol : Symbols) 5043a34c753fSRafael Auler Write(0, Symbol); 5044a34c753fSRafael Auler } 5045a34c753fSRafael Auler 5046a34c753fSRafael Auler template <typename ELFT> 5047a34c753fSRafael Auler void RewriteInstance::patchELFSymTabs(ELFObjectFile<ELFT> *File) { 5048a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 5049a34c753fSRafael Auler using ELFShdrTy = typename ELFObjectFile<ELFT>::Elf_Shdr; 5050a34c753fSRafael Auler using ELFSymTy = typename ELFObjectFile<ELFT>::Elf_Sym; 5051a34c753fSRafael Auler 5052a34c753fSRafael Auler // Compute a preview of how section indices will change after rewriting, so 5053a34c753fSRafael Auler // we can properly update the symbol table based on new section indices. 5054a34c753fSRafael Auler std::vector<uint32_t> NewSectionIndex; 5055a34c753fSRafael Auler getOutputSections(File, NewSectionIndex); 5056a34c753fSRafael Auler 5057a34c753fSRafael Auler // Update dynamic symbol table. 5058a34c753fSRafael Auler const ELFShdrTy *DynSymSection = nullptr; 5059a34c753fSRafael Auler for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 5060a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_DYNSYM) { 5061a34c753fSRafael Auler DynSymSection = &Section; 5062a34c753fSRafael Auler break; 5063a34c753fSRafael Auler } 5064a34c753fSRafael Auler } 5065a34c753fSRafael Auler assert((DynSymSection || BC->IsStaticExecutable) && 5066a34c753fSRafael Auler "dynamic symbol table expected"); 5067a34c753fSRafael Auler if (DynSymSection) { 5068a34c753fSRafael Auler updateELFSymbolTable( 5069a34c753fSRafael Auler File, 5070a34c753fSRafael Auler /*IsDynSym=*/true, 5071a34c753fSRafael Auler *DynSymSection, 5072a34c753fSRafael Auler NewSectionIndex, 5073a34c753fSRafael Auler [&](size_t Offset, const ELFSymTy &Sym) { 5074a34c753fSRafael Auler Out->os().pwrite(reinterpret_cast<const char *>(&Sym), 5075a34c753fSRafael Auler sizeof(ELFSymTy), 5076a34c753fSRafael Auler DynSymSection->sh_offset + Offset); 5077a34c753fSRafael Auler }, 5078a34c753fSRafael Auler [](StringRef) -> size_t { return 0; }); 5079a34c753fSRafael Auler } 5080a34c753fSRafael Auler 5081a34c753fSRafael Auler if (opts::RemoveSymtab) 5082a34c753fSRafael Auler return; 5083a34c753fSRafael Auler 5084a34c753fSRafael Auler // (re)create regular symbol table. 5085a34c753fSRafael Auler const ELFShdrTy *SymTabSection = nullptr; 5086a34c753fSRafael Auler for (const ELFShdrTy &Section : cantFail(Obj.sections())) { 5087a34c753fSRafael Auler if (Section.sh_type == ELF::SHT_SYMTAB) { 5088a34c753fSRafael Auler SymTabSection = &Section; 5089a34c753fSRafael Auler break; 5090a34c753fSRafael Auler } 5091a34c753fSRafael Auler } 5092a34c753fSRafael Auler if (!SymTabSection) { 509352cf0711SAmir Ayupov BC->errs() << "BOLT-WARNING: no symbol table found\n"; 5094a34c753fSRafael Auler return; 5095a34c753fSRafael Auler } 5096a34c753fSRafael Auler 5097a34c753fSRafael Auler const ELFShdrTy *StrTabSection = 5098a34c753fSRafael Auler cantFail(Obj.getSection(SymTabSection->sh_link)); 5099a34c753fSRafael Auler std::string NewContents; 5100a34c753fSRafael Auler std::string NewStrTab = std::string( 5101a34c753fSRafael Auler File->getData().substr(StrTabSection->sh_offset, StrTabSection->sh_size)); 5102a34c753fSRafael Auler StringRef SecName = cantFail(Obj.getSectionName(*SymTabSection)); 5103a34c753fSRafael Auler StringRef StrSecName = cantFail(Obj.getSectionName(*StrTabSection)); 5104a34c753fSRafael Auler 5105a34c753fSRafael Auler NumLocalSymbols = 0; 5106a34c753fSRafael Auler updateELFSymbolTable( 5107a34c753fSRafael Auler File, 5108a34c753fSRafael Auler /*IsDynSym=*/false, 5109a34c753fSRafael Auler *SymTabSection, 5110a34c753fSRafael Auler NewSectionIndex, 5111a34c753fSRafael Auler [&](size_t Offset, const ELFSymTy &Sym) { 5112a34c753fSRafael Auler if (Sym.getBinding() == ELF::STB_LOCAL) 5113a34c753fSRafael Auler ++NumLocalSymbols; 5114a34c753fSRafael Auler NewContents.append(reinterpret_cast<const char *>(&Sym), 5115a34c753fSRafael Auler sizeof(ELFSymTy)); 5116a34c753fSRafael Auler }, 5117a34c753fSRafael Auler [&](StringRef Str) { 5118a34c753fSRafael Auler size_t Idx = NewStrTab.size(); 5119a34c753fSRafael Auler NewStrTab.append(NameResolver::restore(Str).str()); 5120a34c753fSRafael Auler NewStrTab.append(1, '\0'); 5121a34c753fSRafael Auler return Idx; 5122a34c753fSRafael Auler }); 5123a34c753fSRafael Auler 5124a34c753fSRafael Auler BC->registerOrUpdateNoteSection(SecName, 5125a34c753fSRafael Auler copyByteArray(NewContents), 5126a34c753fSRafael Auler NewContents.size(), 5127a34c753fSRafael Auler /*Alignment=*/1, 5128a34c753fSRafael Auler /*IsReadOnly=*/true, 5129a34c753fSRafael Auler ELF::SHT_SYMTAB); 5130a34c753fSRafael Auler 5131a34c753fSRafael Auler BC->registerOrUpdateNoteSection(StrSecName, 5132a34c753fSRafael Auler copyByteArray(NewStrTab), 5133a34c753fSRafael Auler NewStrTab.size(), 5134a34c753fSRafael Auler /*Alignment=*/1, 5135a34c753fSRafael Auler /*IsReadOnly=*/true, 5136a34c753fSRafael Auler ELF::SHT_STRTAB); 5137a34c753fSRafael Auler } 5138a34c753fSRafael Auler 5139a34c753fSRafael Auler template <typename ELFT> 5140f9bf9f92SVladislav Khmelevsky void RewriteInstance::patchELFAllocatableRelrSection( 5141f9bf9f92SVladislav Khmelevsky ELFObjectFile<ELFT> *File) { 5142f9bf9f92SVladislav Khmelevsky if (!DynamicRelrAddress) 5143f9bf9f92SVladislav Khmelevsky return; 5144f9bf9f92SVladislav Khmelevsky 5145f9bf9f92SVladislav Khmelevsky raw_fd_ostream &OS = Out->os(); 5146f9bf9f92SVladislav Khmelevsky const uint8_t PSize = BC->AsmInfo->getCodePointerSize(); 5147f9bf9f92SVladislav Khmelevsky const uint64_t MaxDelta = ((CHAR_BIT * DynamicRelrEntrySize) - 1) * PSize; 5148f9bf9f92SVladislav Khmelevsky 51490053cb8eSVladislav Khmelevsky auto FixAddend = [&](const BinarySection &Section, const Relocation &Rel, 51500053cb8eSVladislav Khmelevsky uint64_t FileOffset) { 5151f9bf9f92SVladislav Khmelevsky // Fix relocation symbol value in place if no static relocation found 51520053cb8eSVladislav Khmelevsky // on the same address. We won't check the BF relocations here since it 51530053cb8eSVladislav Khmelevsky // is rare case and no optimization is required. 5154f9bf9f92SVladislav Khmelevsky if (Section.getRelocationAt(Rel.Offset)) 5155f9bf9f92SVladislav Khmelevsky return; 5156f9bf9f92SVladislav Khmelevsky 5157f9bf9f92SVladislav Khmelevsky // No fixup needed if symbol address was not changed 5158f9bf9f92SVladislav Khmelevsky const uint64_t Addend = getNewFunctionOrDataAddress(Rel.Addend); 5159f9bf9f92SVladislav Khmelevsky if (!Addend) 5160f9bf9f92SVladislav Khmelevsky return; 5161f9bf9f92SVladislav Khmelevsky 5162f9bf9f92SVladislav Khmelevsky OS.pwrite(reinterpret_cast<const char *>(&Addend), PSize, FileOffset); 5163f9bf9f92SVladislav Khmelevsky }; 5164f9bf9f92SVladislav Khmelevsky 5165f9bf9f92SVladislav Khmelevsky // Fill new relative relocation offsets set 5166f9bf9f92SVladislav Khmelevsky std::set<uint64_t> RelOffsets; 5167f9bf9f92SVladislav Khmelevsky for (const BinarySection &Section : BC->allocatableSections()) { 5168f9bf9f92SVladislav Khmelevsky const uint64_t SectionInputAddress = Section.getAddress(); 5169f9bf9f92SVladislav Khmelevsky uint64_t SectionAddress = Section.getOutputAddress(); 5170f9bf9f92SVladislav Khmelevsky if (!SectionAddress) 5171f9bf9f92SVladislav Khmelevsky SectionAddress = SectionInputAddress; 5172f9bf9f92SVladislav Khmelevsky 5173f9bf9f92SVladislav Khmelevsky for (const Relocation &Rel : Section.dynamicRelocations()) { 5174f9bf9f92SVladislav Khmelevsky if (!Rel.isRelative()) 5175f9bf9f92SVladislav Khmelevsky continue; 5176f9bf9f92SVladislav Khmelevsky 5177f9bf9f92SVladislav Khmelevsky uint64_t RelOffset = 5178f9bf9f92SVladislav Khmelevsky getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset); 5179f9bf9f92SVladislav Khmelevsky 5180f9bf9f92SVladislav Khmelevsky RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset; 5181f9bf9f92SVladislav Khmelevsky assert((RelOffset & 1) == 0 && "Wrong relocation offset"); 5182f9bf9f92SVladislav Khmelevsky RelOffsets.emplace(RelOffset); 51830053cb8eSVladislav Khmelevsky FixAddend(Section, Rel, RelOffset); 5184f9bf9f92SVladislav Khmelevsky } 5185f9bf9f92SVladislav Khmelevsky } 5186f9bf9f92SVladislav Khmelevsky 5187f9bf9f92SVladislav Khmelevsky ErrorOr<BinarySection &> Section = 5188f9bf9f92SVladislav Khmelevsky BC->getSectionForAddress(*DynamicRelrAddress); 5189f9bf9f92SVladislav Khmelevsky assert(Section && "cannot get .relr.dyn section"); 5190f9bf9f92SVladislav Khmelevsky assert(Section->isRelr() && "Expected section to be SHT_RELR type"); 5191f9bf9f92SVladislav Khmelevsky uint64_t RelrDynOffset = Section->getInputFileOffset(); 5192f9bf9f92SVladislav Khmelevsky const uint64_t RelrDynEndOffset = RelrDynOffset + Section->getSize(); 5193f9bf9f92SVladislav Khmelevsky 5194f9bf9f92SVladislav Khmelevsky auto WriteRelr = [&](uint64_t Value) { 5195f9bf9f92SVladislav Khmelevsky if (RelrDynOffset + DynamicRelrEntrySize > RelrDynEndOffset) { 519652cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: Offset overflow for relr.dyn section\n"; 5197f9bf9f92SVladislav Khmelevsky exit(1); 5198f9bf9f92SVladislav Khmelevsky } 5199f9bf9f92SVladislav Khmelevsky 5200f9bf9f92SVladislav Khmelevsky OS.pwrite(reinterpret_cast<const char *>(&Value), DynamicRelrEntrySize, 5201f9bf9f92SVladislav Khmelevsky RelrDynOffset); 5202f9bf9f92SVladislav Khmelevsky RelrDynOffset += DynamicRelrEntrySize; 5203f9bf9f92SVladislav Khmelevsky }; 5204f9bf9f92SVladislav Khmelevsky 5205f9bf9f92SVladislav Khmelevsky for (auto RelIt = RelOffsets.begin(); RelIt != RelOffsets.end();) { 5206f9bf9f92SVladislav Khmelevsky WriteRelr(*RelIt); 5207f9bf9f92SVladislav Khmelevsky uint64_t Base = *RelIt++ + PSize; 5208f9bf9f92SVladislav Khmelevsky while (1) { 5209f9bf9f92SVladislav Khmelevsky uint64_t Bitmap = 0; 5210f9bf9f92SVladislav Khmelevsky for (; RelIt != RelOffsets.end(); ++RelIt) { 5211f9bf9f92SVladislav Khmelevsky const uint64_t Delta = *RelIt - Base; 5212f9bf9f92SVladislav Khmelevsky if (Delta >= MaxDelta || Delta % PSize) 5213f9bf9f92SVladislav Khmelevsky break; 5214f9bf9f92SVladislav Khmelevsky 5215f9bf9f92SVladislav Khmelevsky Bitmap |= (1ULL << (Delta / PSize)); 5216f9bf9f92SVladislav Khmelevsky } 5217f9bf9f92SVladislav Khmelevsky 5218f9bf9f92SVladislav Khmelevsky if (!Bitmap) 5219f9bf9f92SVladislav Khmelevsky break; 5220f9bf9f92SVladislav Khmelevsky 5221f9bf9f92SVladislav Khmelevsky WriteRelr((Bitmap << 1) | 1); 5222f9bf9f92SVladislav Khmelevsky Base += MaxDelta; 5223f9bf9f92SVladislav Khmelevsky } 5224f9bf9f92SVladislav Khmelevsky } 5225f9bf9f92SVladislav Khmelevsky 5226f9bf9f92SVladislav Khmelevsky // Fill the rest of the section with empty bitmap value 5227f9bf9f92SVladislav Khmelevsky while (RelrDynOffset != RelrDynEndOffset) 5228f9bf9f92SVladislav Khmelevsky WriteRelr(1); 5229f9bf9f92SVladislav Khmelevsky } 5230f9bf9f92SVladislav Khmelevsky 5231f9bf9f92SVladislav Khmelevsky template <typename ELFT> 5232a34c753fSRafael Auler void 5233a34c753fSRafael Auler RewriteInstance::patchELFAllocatableRelaSections(ELFObjectFile<ELFT> *File) { 5234a34c753fSRafael Auler using Elf_Rela = typename ELFT::Rela; 5235a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 5236228970f6Sspupyrev const ELFFile<ELFT> &EF = File->getELFFile(); 5237a34c753fSRafael Auler 5238228970f6Sspupyrev uint64_t RelDynOffset = 0, RelDynEndOffset = 0; 5239228970f6Sspupyrev uint64_t RelPltOffset = 0, RelPltEndOffset = 0; 5240228970f6Sspupyrev 5241228970f6Sspupyrev auto setSectionFileOffsets = [&](uint64_t Address, uint64_t &Start, 5242228970f6Sspupyrev uint64_t &End) { 5243228970f6Sspupyrev ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address); 52447117af52SVladislav Khmelevsky assert(Section && "cannot get relocation section"); 5245228970f6Sspupyrev Start = Section->getInputFileOffset(); 5246228970f6Sspupyrev End = Start + Section->getSize(); 5247228970f6Sspupyrev }; 5248228970f6Sspupyrev 5249228970f6Sspupyrev if (!DynamicRelocationsAddress && !PLTRelocationsAddress) 5250228970f6Sspupyrev return; 5251228970f6Sspupyrev 5252228970f6Sspupyrev if (DynamicRelocationsAddress) 5253228970f6Sspupyrev setSectionFileOffsets(*DynamicRelocationsAddress, RelDynOffset, 5254228970f6Sspupyrev RelDynEndOffset); 5255228970f6Sspupyrev 5256228970f6Sspupyrev if (PLTRelocationsAddress) 5257228970f6Sspupyrev setSectionFileOffsets(*PLTRelocationsAddress, RelPltOffset, 5258228970f6Sspupyrev RelPltEndOffset); 5259228970f6Sspupyrev 5260228970f6Sspupyrev DynamicRelativeRelocationsCount = 0; 5261228970f6Sspupyrev 5262228970f6Sspupyrev auto writeRela = [&OS](const Elf_Rela *RelA, uint64_t &Offset) { 5263228970f6Sspupyrev OS.pwrite(reinterpret_cast<const char *>(RelA), sizeof(*RelA), Offset); 5264228970f6Sspupyrev Offset += sizeof(*RelA); 5265228970f6Sspupyrev }; 5266228970f6Sspupyrev 5267228970f6Sspupyrev auto writeRelocations = [&](bool PatchRelative) { 5268228970f6Sspupyrev for (BinarySection &Section : BC->allocatableSections()) { 52697117af52SVladislav Khmelevsky const uint64_t SectionInputAddress = Section.getAddress(); 52707117af52SVladislav Khmelevsky uint64_t SectionAddress = Section.getOutputAddress(); 52717117af52SVladislav Khmelevsky if (!SectionAddress) 52727117af52SVladislav Khmelevsky SectionAddress = SectionInputAddress; 52737117af52SVladislav Khmelevsky 5274228970f6Sspupyrev for (const Relocation &Rel : Section.dynamicRelocations()) { 5275228970f6Sspupyrev const bool IsRelative = Rel.isRelative(); 5276228970f6Sspupyrev if (PatchRelative != IsRelative) 5277ee0e9ccbSMaksim Panchenko continue; 5278228970f6Sspupyrev 5279228970f6Sspupyrev if (IsRelative) 5280228970f6Sspupyrev ++DynamicRelativeRelocationsCount; 5281228970f6Sspupyrev 5282228970f6Sspupyrev Elf_Rela NewRelA; 5283228970f6Sspupyrev MCSymbol *Symbol = Rel.Symbol; 5284228970f6Sspupyrev uint32_t SymbolIdx = 0; 5285228970f6Sspupyrev uint64_t Addend = Rel.Addend; 52867117af52SVladislav Khmelevsky uint64_t RelOffset = 52877117af52SVladislav Khmelevsky getNewFunctionOrDataAddress(SectionInputAddress + Rel.Offset); 5288228970f6Sspupyrev 52897117af52SVladislav Khmelevsky RelOffset = RelOffset == 0 ? SectionAddress + Rel.Offset : RelOffset; 5290228970f6Sspupyrev if (Rel.Symbol) { 5291228970f6Sspupyrev SymbolIdx = getOutputDynamicSymbolIndex(Symbol); 5292228970f6Sspupyrev } else { 5293228970f6Sspupyrev // Usually this case is used for R_*_(I)RELATIVE relocations 5294228970f6Sspupyrev const uint64_t Address = getNewFunctionOrDataAddress(Addend); 5295228970f6Sspupyrev if (Address) 5296228970f6Sspupyrev Addend = Address; 5297228970f6Sspupyrev } 5298228970f6Sspupyrev 5299228970f6Sspupyrev NewRelA.setSymbolAndType(SymbolIdx, Rel.Type, EF.isMips64EL()); 53007117af52SVladislav Khmelevsky NewRelA.r_offset = RelOffset; 5301228970f6Sspupyrev NewRelA.r_addend = Addend; 5302228970f6Sspupyrev 53034e585e51SKazu Hirata const bool IsJmpRel = IsJmpRelocation.contains(Rel.Type); 5304228970f6Sspupyrev uint64_t &Offset = IsJmpRel ? RelPltOffset : RelDynOffset; 5305228970f6Sspupyrev const uint64_t &EndOffset = 5306228970f6Sspupyrev IsJmpRel ? RelPltEndOffset : RelDynEndOffset; 5307228970f6Sspupyrev if (!Offset || !EndOffset) { 530852cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: Invalid offsets for dynamic relocation\n"; 5309228970f6Sspupyrev exit(1); 5310228970f6Sspupyrev } 5311228970f6Sspupyrev 5312228970f6Sspupyrev if (Offset + sizeof(NewRelA) > EndOffset) { 531352cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: Offset overflow for dynamic relocation\n"; 5314228970f6Sspupyrev exit(1); 5315228970f6Sspupyrev } 5316228970f6Sspupyrev 5317228970f6Sspupyrev writeRela(&NewRelA, Offset); 5318a34c753fSRafael Auler } 5319a34c753fSRafael Auler } 5320228970f6Sspupyrev }; 5321228970f6Sspupyrev 5322f9bf9f92SVladislav Khmelevsky // Place R_*_RELATIVE relocations in RELA section if RELR is not presented. 5323f9bf9f92SVladislav Khmelevsky // The dynamic linker expects all R_*_RELATIVE relocations in RELA 5324f9bf9f92SVladislav Khmelevsky // to be emitted first. 5325f9bf9f92SVladislav Khmelevsky if (!DynamicRelrAddress) 5326228970f6Sspupyrev writeRelocations(/* PatchRelative */ true); 5327228970f6Sspupyrev writeRelocations(/* PatchRelative */ false); 5328228970f6Sspupyrev 5329228970f6Sspupyrev auto fillNone = [&](uint64_t &Offset, uint64_t EndOffset) { 5330228970f6Sspupyrev if (!Offset) 5331228970f6Sspupyrev return; 5332228970f6Sspupyrev 5333228970f6Sspupyrev typename ELFObjectFile<ELFT>::Elf_Rela RelA; 5334228970f6Sspupyrev RelA.setSymbolAndType(0, Relocation::getNone(), EF.isMips64EL()); 5335228970f6Sspupyrev RelA.r_offset = 0; 5336228970f6Sspupyrev RelA.r_addend = 0; 5337228970f6Sspupyrev while (Offset < EndOffset) 5338228970f6Sspupyrev writeRela(&RelA, Offset); 5339228970f6Sspupyrev 5340228970f6Sspupyrev assert(Offset == EndOffset && "Unexpected section overflow"); 5341228970f6Sspupyrev }; 5342228970f6Sspupyrev 5343228970f6Sspupyrev // Fill the rest of the sections with R_*_NONE relocations 5344228970f6Sspupyrev fillNone(RelDynOffset, RelDynEndOffset); 5345228970f6Sspupyrev fillNone(RelPltOffset, RelPltEndOffset); 5346a34c753fSRafael Auler } 5347a34c753fSRafael Auler 5348a34c753fSRafael Auler template <typename ELFT> 5349a34c753fSRafael Auler void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) { 5350a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 5351a34c753fSRafael Auler 5352a34c753fSRafael Auler SectionRef GOTSection; 5353a34c753fSRafael Auler for (const SectionRef &Section : File->sections()) { 5354a34c753fSRafael Auler StringRef SectionName = cantFail(Section.getName()); 5355a34c753fSRafael Auler if (SectionName == ".got") { 5356a34c753fSRafael Auler GOTSection = Section; 5357a34c753fSRafael Auler break; 5358a34c753fSRafael Auler } 5359a34c753fSRafael Auler } 5360a34c753fSRafael Auler if (!GOTSection.getObject()) { 5361228970f6Sspupyrev if (!BC->IsStaticExecutable) 536252cf0711SAmir Ayupov BC->errs() << "BOLT-INFO: no .got section found\n"; 5363a34c753fSRafael Auler return; 5364a34c753fSRafael Auler } 5365a34c753fSRafael Auler 5366a34c753fSRafael Auler StringRef GOTContents = cantFail(GOTSection.getContents()); 5367a34c753fSRafael Auler for (const uint64_t *GOTEntry = 5368a34c753fSRafael Auler reinterpret_cast<const uint64_t *>(GOTContents.data()); 5369a34c753fSRafael Auler GOTEntry < reinterpret_cast<const uint64_t *>(GOTContents.data() + 5370a34c753fSRafael Auler GOTContents.size()); 5371a34c753fSRafael Auler ++GOTEntry) { 5372a34c753fSRafael Auler if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) { 5373a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x" 5374a34c753fSRafael Auler << Twine::utohexstr(*GOTEntry) << " with 0x" 5375a34c753fSRafael Auler << Twine::utohexstr(NewAddress) << '\n'); 5376a34c753fSRafael Auler OS.pwrite(reinterpret_cast<const char *>(&NewAddress), sizeof(NewAddress), 537740c2e0faSMaksim Panchenko reinterpret_cast<const char *>(GOTEntry) - 537840c2e0faSMaksim Panchenko File->getData().data()); 5379a34c753fSRafael Auler } 5380a34c753fSRafael Auler } 5381a34c753fSRafael Auler } 5382a34c753fSRafael Auler 5383a34c753fSRafael Auler template <typename ELFT> 5384a34c753fSRafael Auler void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) { 5385a34c753fSRafael Auler if (BC->IsStaticExecutable) 5386a34c753fSRafael Auler return; 5387a34c753fSRafael Auler 5388a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 5389a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 5390a34c753fSRafael Auler 5391a34c753fSRafael Auler using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 5392a34c753fSRafael Auler using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 5393a34c753fSRafael Auler 5394a34c753fSRafael Auler // Locate DYNAMIC by looking through program headers. 5395a34c753fSRafael Auler uint64_t DynamicOffset = 0; 539660db8d9bSKazu Hirata const Elf_Phdr *DynamicPhdr = nullptr; 5397a34c753fSRafael Auler for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 5398a34c753fSRafael Auler if (Phdr.p_type == ELF::PT_DYNAMIC) { 5399a34c753fSRafael Auler DynamicOffset = Phdr.p_offset; 5400a34c753fSRafael Auler DynamicPhdr = &Phdr; 5401a34c753fSRafael Auler assert(Phdr.p_memsz == Phdr.p_filesz && "dynamic sizes should match"); 5402a34c753fSRafael Auler break; 5403a34c753fSRafael Auler } 5404a34c753fSRafael Auler } 5405a34c753fSRafael Auler assert(DynamicPhdr && "missing dynamic in ELF binary"); 5406a34c753fSRafael Auler 5407a34c753fSRafael Auler bool ZNowSet = false; 5408a34c753fSRafael Auler 5409a34c753fSRafael Auler // Go through all dynamic entries and patch functions addresses with 5410a34c753fSRafael Auler // new ones. 5411a34c753fSRafael Auler typename ELFT::DynRange DynamicEntries = 5412a34c753fSRafael Auler cantFail(Obj.dynamicEntries(), "error accessing dynamic table"); 5413a34c753fSRafael Auler auto DTB = DynamicEntries.begin(); 5414a34c753fSRafael Auler for (const Elf_Dyn &Dyn : DynamicEntries) { 5415a34c753fSRafael Auler Elf_Dyn NewDE = Dyn; 5416a34c753fSRafael Auler bool ShouldPatch = true; 5417a34c753fSRafael Auler switch (Dyn.d_tag) { 5418a34c753fSRafael Auler default: 5419a34c753fSRafael Auler ShouldPatch = false; 5420a34c753fSRafael Auler break; 5421228970f6Sspupyrev case ELF::DT_RELACOUNT: 5422228970f6Sspupyrev NewDE.d_un.d_val = DynamicRelativeRelocationsCount; 5423228970f6Sspupyrev break; 5424a34c753fSRafael Auler case ELF::DT_INIT: 5425df288e84SMaksim Panchenko case ELF::DT_FINI: { 5426a34c753fSRafael Auler if (BC->HasRelocations) { 5427a34c753fSRafael Auler if (uint64_t NewAddress = getNewFunctionAddress(Dyn.getPtr())) { 5428a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching dynamic entry of type " 5429a34c753fSRafael Auler << Dyn.getTag() << '\n'); 5430a34c753fSRafael Auler NewDE.d_un.d_ptr = NewAddress; 5431a34c753fSRafael Auler } 5432a34c753fSRafael Auler } 5433df288e84SMaksim Panchenko RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary(); 5434df288e84SMaksim Panchenko if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) { 5435ee0e9ccbSMaksim Panchenko if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) 5436a34c753fSRafael Auler NewDE.d_un.d_ptr = Addr; 5437a34c753fSRafael Auler } 5438df288e84SMaksim Panchenko if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) { 5439a34c753fSRafael Auler if (auto Addr = RtLibrary->getRuntimeStartAddress()) { 5440a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x" 5441a34c753fSRafael Auler << Twine::utohexstr(Addr) << '\n'); 5442a34c753fSRafael Auler NewDE.d_un.d_ptr = Addr; 5443a34c753fSRafael Auler } 5444a34c753fSRafael Auler } 5445a34c753fSRafael Auler break; 5446df288e84SMaksim Panchenko } 5447a34c753fSRafael Auler case ELF::DT_FLAGS: 5448a34c753fSRafael Auler if (BC->RequiresZNow) { 5449a34c753fSRafael Auler NewDE.d_un.d_val |= ELF::DF_BIND_NOW; 5450a34c753fSRafael Auler ZNowSet = true; 5451a34c753fSRafael Auler } 5452a34c753fSRafael Auler break; 5453a34c753fSRafael Auler case ELF::DT_FLAGS_1: 5454a34c753fSRafael Auler if (BC->RequiresZNow) { 5455a34c753fSRafael Auler NewDE.d_un.d_val |= ELF::DF_1_NOW; 5456a34c753fSRafael Auler ZNowSet = true; 5457a34c753fSRafael Auler } 5458a34c753fSRafael Auler break; 5459a34c753fSRafael Auler } 5460ee0e9ccbSMaksim Panchenko if (ShouldPatch) 5461a34c753fSRafael Auler OS.pwrite(reinterpret_cast<const char *>(&NewDE), sizeof(NewDE), 5462a34c753fSRafael Auler DynamicOffset + (&Dyn - DTB) * sizeof(Dyn)); 5463a34c753fSRafael Auler } 5464a34c753fSRafael Auler 5465a34c753fSRafael Auler if (BC->RequiresZNow && !ZNowSet) { 546652cf0711SAmir Ayupov BC->errs() 546752cf0711SAmir Ayupov << "BOLT-ERROR: output binary requires immediate relocation " 5468a34c753fSRafael Auler "processing which depends on DT_FLAGS or DT_FLAGS_1 presence in " 5469a34c753fSRafael Auler ".dynamic. Please re-link the binary with -znow.\n"; 5470a34c753fSRafael Auler exit(1); 5471a34c753fSRafael Auler } 5472a34c753fSRafael Auler } 5473a34c753fSRafael Auler 5474a34c753fSRafael Auler template <typename ELFT> 54751e016c3bSAmir Ayupov Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) { 5476a34c753fSRafael Auler const ELFFile<ELFT> &Obj = File->getELFFile(); 5477a34c753fSRafael Auler 5478a34c753fSRafael Auler using Elf_Phdr = typename ELFFile<ELFT>::Elf_Phdr; 5479a34c753fSRafael Auler using Elf_Dyn = typename ELFFile<ELFT>::Elf_Dyn; 5480a34c753fSRafael Auler 5481a34c753fSRafael Auler // Locate DYNAMIC by looking through program headers. 548260db8d9bSKazu Hirata const Elf_Phdr *DynamicPhdr = nullptr; 5483a34c753fSRafael Auler for (const Elf_Phdr &Phdr : cantFail(Obj.program_headers())) { 5484a34c753fSRafael Auler if (Phdr.p_type == ELF::PT_DYNAMIC) { 5485a34c753fSRafael Auler DynamicPhdr = &Phdr; 5486a34c753fSRafael Auler break; 5487a34c753fSRafael Auler } 5488a34c753fSRafael Auler } 5489a34c753fSRafael Auler 5490a34c753fSRafael Auler if (!DynamicPhdr) { 549152cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: static input executable detected\n"; 5492a34c753fSRafael Auler // TODO: static PIE executable might have dynamic header 5493a34c753fSRafael Auler BC->IsStaticExecutable = true; 54941e016c3bSAmir Ayupov return Error::success(); 5495a34c753fSRafael Auler } 5496a34c753fSRafael Auler 54971e016c3bSAmir Ayupov if (DynamicPhdr->p_memsz != DynamicPhdr->p_filesz) 54981e016c3bSAmir Ayupov return createStringError(errc::executable_format_error, 5499a34c753fSRafael Auler "dynamic section sizes should match"); 5500a34c753fSRafael Auler 5501a34c753fSRafael Auler // Go through all dynamic entries to locate entries of interest. 5502d16bbc53SAmir Ayupov auto DynamicEntriesOrErr = Obj.dynamicEntries(); 5503d16bbc53SAmir Ayupov if (!DynamicEntriesOrErr) 5504d16bbc53SAmir Ayupov return DynamicEntriesOrErr.takeError(); 5505d16bbc53SAmir Ayupov typename ELFT::DynRange DynamicEntries = DynamicEntriesOrErr.get(); 5506a34c753fSRafael Auler 5507a34c753fSRafael Auler for (const Elf_Dyn &Dyn : DynamicEntries) { 5508a34c753fSRafael Auler switch (Dyn.d_tag) { 5509a34c753fSRafael Auler case ELF::DT_INIT: 5510a34c753fSRafael Auler if (!BC->HasInterpHeader) { 5511a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n"); 5512a34c753fSRafael Auler BC->StartFunctionAddress = Dyn.getPtr(); 5513a34c753fSRafael Auler } 5514a34c753fSRafael Auler break; 5515a34c753fSRafael Auler case ELF::DT_FINI: 551696b5e092SJob Noorman BC->FiniAddress = Dyn.getPtr(); 551796b5e092SJob Noorman break; 551896b5e092SJob Noorman case ELF::DT_FINI_ARRAY: 551996b5e092SJob Noorman BC->FiniArrayAddress = Dyn.getPtr(); 552096b5e092SJob Noorman break; 552196b5e092SJob Noorman case ELF::DT_FINI_ARRAYSZ: 552296b5e092SJob Noorman BC->FiniArraySize = Dyn.getPtr(); 5523a34c753fSRafael Auler break; 5524a34c753fSRafael Auler case ELF::DT_RELA: 5525a34c753fSRafael Auler DynamicRelocationsAddress = Dyn.getPtr(); 5526a34c753fSRafael Auler break; 5527a34c753fSRafael Auler case ELF::DT_RELASZ: 5528a34c753fSRafael Auler DynamicRelocationsSize = Dyn.getVal(); 5529a34c753fSRafael Auler break; 5530a34c753fSRafael Auler case ELF::DT_JMPREL: 5531a34c753fSRafael Auler PLTRelocationsAddress = Dyn.getPtr(); 5532a34c753fSRafael Auler break; 5533a34c753fSRafael Auler case ELF::DT_PLTRELSZ: 5534a34c753fSRafael Auler PLTRelocationsSize = Dyn.getVal(); 5535a34c753fSRafael Auler break; 5536228970f6Sspupyrev case ELF::DT_RELACOUNT: 5537228970f6Sspupyrev DynamicRelativeRelocationsCount = Dyn.getVal(); 5538228970f6Sspupyrev break; 5539f9bf9f92SVladislav Khmelevsky case ELF::DT_RELR: 5540f9bf9f92SVladislav Khmelevsky DynamicRelrAddress = Dyn.getPtr(); 5541f9bf9f92SVladislav Khmelevsky break; 5542f9bf9f92SVladislav Khmelevsky case ELF::DT_RELRSZ: 5543f9bf9f92SVladislav Khmelevsky DynamicRelrSize = Dyn.getVal(); 5544f9bf9f92SVladislav Khmelevsky break; 5545f9bf9f92SVladislav Khmelevsky case ELF::DT_RELRENT: 5546f9bf9f92SVladislav Khmelevsky DynamicRelrEntrySize = Dyn.getVal(); 5547f9bf9f92SVladislav Khmelevsky break; 5548a34c753fSRafael Auler } 5549a34c753fSRafael Auler } 5550a34c753fSRafael Auler 5551228970f6Sspupyrev if (!DynamicRelocationsAddress || !DynamicRelocationsSize) { 5552228970f6Sspupyrev DynamicRelocationsAddress.reset(); 5553a34c753fSRafael Auler DynamicRelocationsSize = 0; 5554228970f6Sspupyrev } 5555a34c753fSRafael Auler 5556228970f6Sspupyrev if (!PLTRelocationsAddress || !PLTRelocationsSize) { 5557228970f6Sspupyrev PLTRelocationsAddress.reset(); 5558a34c753fSRafael Auler PLTRelocationsSize = 0; 5559228970f6Sspupyrev } 5560f9bf9f92SVladislav Khmelevsky 5561f9bf9f92SVladislav Khmelevsky if (!DynamicRelrAddress || !DynamicRelrSize) { 5562f9bf9f92SVladislav Khmelevsky DynamicRelrAddress.reset(); 5563f9bf9f92SVladislav Khmelevsky DynamicRelrSize = 0; 5564f9bf9f92SVladislav Khmelevsky } else if (!DynamicRelrEntrySize) { 556552cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: expected DT_RELRENT to be presented " 5566f9bf9f92SVladislav Khmelevsky << "in DYNAMIC section\n"; 5567f9bf9f92SVladislav Khmelevsky exit(1); 5568f9bf9f92SVladislav Khmelevsky } else if (DynamicRelrSize % DynamicRelrEntrySize) { 556952cf0711SAmir Ayupov BC->errs() << "BOLT-ERROR: expected RELR table size to be divisible " 5570f9bf9f92SVladislav Khmelevsky << "by RELR entry size\n"; 5571f9bf9f92SVladislav Khmelevsky exit(1); 5572f9bf9f92SVladislav Khmelevsky } 5573f9bf9f92SVladislav Khmelevsky 55741e016c3bSAmir Ayupov return Error::success(); 5575729d29e1SVladislav Khmelevsky } 5576a34c753fSRafael Auler 5577a34c753fSRafael Auler uint64_t RewriteInstance::getNewFunctionAddress(uint64_t OldAddress) { 5578a34c753fSRafael Auler const BinaryFunction *Function = BC->getBinaryFunctionAtAddress(OldAddress); 5579a34c753fSRafael Auler if (!Function) 5580a34c753fSRafael Auler return 0; 5581a34c753fSRafael Auler 5582a34c753fSRafael Auler return Function->getOutputAddress(); 5583a34c753fSRafael Auler } 5584a34c753fSRafael Auler 5585228970f6Sspupyrev uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) { 5586228970f6Sspupyrev if (uint64_t Function = getNewFunctionAddress(OldAddress)) 5587228970f6Sspupyrev return Function; 5588228970f6Sspupyrev 5589228970f6Sspupyrev const BinaryData *BD = BC->getBinaryDataAtAddress(OldAddress); 5590228970f6Sspupyrev if (BD && BD->isMoved()) 5591228970f6Sspupyrev return BD->getOutputAddress(); 5592228970f6Sspupyrev 5593418e4b0cSMaksim Panchenko if (const BinaryFunction *BF = 5594418e4b0cSMaksim Panchenko BC->getBinaryFunctionContainingAddress(OldAddress)) { 5595418e4b0cSMaksim Panchenko if (BF->isEmitted()) { 5596734c0488Ssinan // If OldAddress is the another entry point of 5597734c0488Ssinan // the function, then BOLT could get the new address. 5598734c0488Ssinan if (BF->isMultiEntry()) { 5599734c0488Ssinan for (const BinaryBasicBlock &BB : *BF) 5600734c0488Ssinan if (BB.isEntryPoint() && 5601734c0488Ssinan (BF->getAddress() + BB.getOffset()) == OldAddress) 5602734c0488Ssinan return BF->getOutputAddress() + BB.getOffset(); 5603734c0488Ssinan } 5604418e4b0cSMaksim Panchenko BC->errs() << "BOLT-ERROR: unable to get new address corresponding to " 5605418e4b0cSMaksim Panchenko "input address 0x" 5606418e4b0cSMaksim Panchenko << Twine::utohexstr(OldAddress) << " in function " << *BF 5607418e4b0cSMaksim Panchenko << ". Consider adding this function to --skip-funcs=...\n"; 5608418e4b0cSMaksim Panchenko exit(1); 5609418e4b0cSMaksim Panchenko } 5610418e4b0cSMaksim Panchenko } 5611418e4b0cSMaksim Panchenko 5612228970f6Sspupyrev return 0; 5613228970f6Sspupyrev } 5614228970f6Sspupyrev 5615a34c753fSRafael Auler void RewriteInstance::rewriteFile() { 5616a34c753fSRafael Auler std::error_code EC; 5617a34c753fSRafael Auler Out = std::make_unique<ToolOutputFile>(opts::OutputFilename, EC, 5618a34c753fSRafael Auler sys::fs::OF_None); 5619a34c753fSRafael Auler check_error(EC, "cannot create output executable file"); 5620a34c753fSRafael Auler 5621a34c753fSRafael Auler raw_fd_ostream &OS = Out->os(); 5622a34c753fSRafael Auler 5623a34c753fSRafael Auler // Copy allocatable part of the input. 5624a34c753fSRafael Auler OS << InputFile->getData().substr(0, FirstNonAllocatableOffset); 5625a34c753fSRafael Auler 5626a34c753fSRafael Auler auto Streamer = BC->createStreamer(OS); 5627a34c753fSRafael Auler // Make sure output stream has enough reserved space, otherwise 5628a34c753fSRafael Auler // pwrite() will fail. 56293a0d894fSMaksim Panchenko uint64_t Offset = std::max(getFileOffsetForAddress(NextAvailableAddress), 56303a0d894fSMaksim Panchenko FirstNonAllocatableOffset); 56313a0d894fSMaksim Panchenko Offset = OS.seek(Offset); 56323a0d894fSMaksim Panchenko assert((Offset != (uint64_t)-1) && "Error resizing output file"); 5633a34c753fSRafael Auler 5634a34c753fSRafael Auler // Overwrite functions with fixed output address. This is mostly used by 5635a34c753fSRafael Auler // non-relocation mode, with one exception: injected functions are covered 5636a34c753fSRafael Auler // here in both modes. 5637a34c753fSRafael Auler uint64_t CountOverwrittenFunctions = 0; 5638a34c753fSRafael Auler uint64_t OverwrittenScore = 0; 5639a34c753fSRafael Auler for (BinaryFunction *Function : BC->getAllBinaryFunctions()) { 5640a34c753fSRafael Auler if (Function->getImageAddress() == 0 || Function->getImageSize() == 0) 5641a34c753fSRafael Auler continue; 5642a34c753fSRafael Auler 5643b560b87bSMaksim Panchenko assert(Function->getImageSize() <= Function->getMaxSize() && 5644b560b87bSMaksim Panchenko "Unexpected large function"); 5645a34c753fSRafael Auler 56469b6e7861SFabian Parzefall const auto HasAddress = [](const FunctionFragment &FF) { 56479b6e7861SFabian Parzefall return FF.empty() || 56489b6e7861SFabian Parzefall (FF.getImageAddress() != 0 && FF.getImageSize() != 0); 56499b6e7861SFabian Parzefall }; 56509b6e7861SFabian Parzefall const bool SplitFragmentsHaveAddress = 56519b6e7861SFabian Parzefall llvm::all_of(Function->getLayout().getSplitFragments(), HasAddress); 56529b6e7861SFabian Parzefall if (Function->isSplit() && !SplitFragmentsHaveAddress) { 56539b6e7861SFabian Parzefall const auto HasNoAddress = [](const FunctionFragment &FF) { 56549b6e7861SFabian Parzefall return FF.getImageAddress() == 0 && FF.getImageSize() == 0; 56559b6e7861SFabian Parzefall }; 56569b6e7861SFabian Parzefall assert(llvm::all_of(Function->getLayout().getSplitFragments(), 56579b6e7861SFabian Parzefall HasNoAddress) && 56589b6e7861SFabian Parzefall "Some split fragments have an address while others do not"); 5659981fa1c1SKazu Hirata (void)HasNoAddress; 5660a34c753fSRafael Auler continue; 56619b6e7861SFabian Parzefall } 5662a34c753fSRafael Auler 5663a34c753fSRafael Auler OverwrittenScore += Function->getFunctionScore(); 5664c4e60a7fSMaksim Panchenko ++CountOverwrittenFunctions; 5665c4e60a7fSMaksim Panchenko 5666a34c753fSRafael Auler // Overwrite function in the output file. 5667ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 2) 566852cf0711SAmir Ayupov BC->outs() << "BOLT: rewriting function \"" << *Function << "\"\n"; 5669ee0e9ccbSMaksim Panchenko 5670a34c753fSRafael Auler OS.pwrite(reinterpret_cast<char *>(Function->getImageAddress()), 567140c2e0faSMaksim Panchenko Function->getImageSize(), Function->getFileOffset()); 5672a34c753fSRafael Auler 5673a34c753fSRafael Auler // Write nops at the end of the function. 5674a34c753fSRafael Auler if (Function->getMaxSize() != std::numeric_limits<uint64_t>::max()) { 5675a34c753fSRafael Auler uint64_t Pos = OS.tell(); 5676a34c753fSRafael Auler OS.seek(Function->getFileOffset() + Function->getImageSize()); 56771ebad216SMaksim Panchenko BC->MAB->writeNopData( 56781ebad216SMaksim Panchenko OS, Function->getMaxSize() - Function->getImageSize(), &*BC->STI); 5679a34c753fSRafael Auler 5680a34c753fSRafael Auler OS.seek(Pos); 5681a34c753fSRafael Auler } 5682a34c753fSRafael Auler 5683c4e60a7fSMaksim Panchenko if (!Function->isSplit()) 5684a34c753fSRafael Auler continue; 5685a34c753fSRafael Auler 5686a34c753fSRafael Auler // Write cold part 5687c4e60a7fSMaksim Panchenko if (opts::Verbosity >= 2) { 568852cf0711SAmir Ayupov BC->outs() << formatv("BOLT: rewriting function \"{0}\" (split parts)\n", 56899b6e7861SFabian Parzefall *Function); 5690c4e60a7fSMaksim Panchenko } 5691ee0e9ccbSMaksim Panchenko 56929b6e7861SFabian Parzefall for (const FunctionFragment &FF : 56939b6e7861SFabian Parzefall Function->getLayout().getSplitFragments()) { 56949b6e7861SFabian Parzefall OS.pwrite(reinterpret_cast<char *>(FF.getImageAddress()), 56959b6e7861SFabian Parzefall FF.getImageSize(), FF.getFileOffset()); 56969b6e7861SFabian Parzefall } 5697a34c753fSRafael Auler } 5698a34c753fSRafael Auler 5699a34c753fSRafael Auler // Print function statistics for non-relocation mode. 5700a34c753fSRafael Auler if (!BC->HasRelocations) { 570152cf0711SAmir Ayupov BC->outs() << "BOLT: " << CountOverwrittenFunctions << " out of " 570240c2e0faSMaksim Panchenko << BC->getBinaryFunctions().size() 5703a34c753fSRafael Auler << " functions were overwritten.\n"; 5704a34c753fSRafael Auler if (BC->TotalScore != 0) { 5705a34c753fSRafael Auler double Coverage = OverwrittenScore / (double)BC->TotalScore * 100.0; 570652cf0711SAmir Ayupov BC->outs() << format("BOLT-INFO: rewritten functions cover %.2lf", 570752cf0711SAmir Ayupov Coverage) 5708a34c753fSRafael Auler << "% of the execution count of simple functions of " 5709a34c753fSRafael Auler "this binary\n"; 5710a34c753fSRafael Auler } 5711a34c753fSRafael Auler } 5712a34c753fSRafael Auler 5713a34c753fSRafael Auler if (BC->HasRelocations && opts::TrapOldCode) { 5714a34c753fSRafael Auler uint64_t SavedPos = OS.tell(); 5715a34c753fSRafael Auler // Overwrite function body to make sure we never execute these instructions. 5716a34c753fSRafael Auler for (auto &BFI : BC->getBinaryFunctions()) { 5717a34c753fSRafael Auler BinaryFunction &BF = BFI.second; 5718a34c753fSRafael Auler if (!BF.getFileOffset() || !BF.isEmitted()) 5719a34c753fSRafael Auler continue; 5720a34c753fSRafael Auler OS.seek(BF.getFileOffset()); 572128fd2ca1SDenis Revunov StringRef TrapInstr = BC->MIB->getTrapFillValue(); 572228fd2ca1SDenis Revunov unsigned NInstr = BF.getMaxSize() / TrapInstr.size(); 572328fd2ca1SDenis Revunov for (unsigned I = 0; I < NInstr; ++I) 572428fd2ca1SDenis Revunov OS.write(TrapInstr.data(), TrapInstr.size()); 5725a34c753fSRafael Auler } 5726a34c753fSRafael Auler OS.seek(SavedPos); 5727a34c753fSRafael Auler } 5728a34c753fSRafael Auler 5729a34c753fSRafael Auler // Write all allocatable sections - reloc-mode text is written here as well 5730a34c753fSRafael Auler for (BinarySection &Section : BC->allocatableSections()) { 5731a34c753fSRafael Auler if (!Section.isFinalized() || !Section.getOutputData()) 5732a34c753fSRafael Auler continue; 573323c8d382SJob Noorman if (Section.isLinkOnly()) 573423c8d382SJob Noorman continue; 5735a34c753fSRafael Auler 5736ee0e9ccbSMaksim Panchenko if (opts::Verbosity >= 1) 573752cf0711SAmir Ayupov BC->outs() << "BOLT: writing new section " << Section.getName() 573852cf0711SAmir Ayupov << "\n data at 0x" 573952cf0711SAmir Ayupov << Twine::utohexstr(Section.getAllocAddress()) << "\n of size " 574052cf0711SAmir Ayupov << Section.getOutputSize() << "\n at offset " 574140c2e0faSMaksim Panchenko << Section.getOutputFileOffset() << '\n'; 574299655322SMaksim Panchenko OS.seek(Section.getOutputFileOffset()); 574399655322SMaksim Panchenko Section.write(OS); 5744a34c753fSRafael Auler } 5745a34c753fSRafael Auler 5746ee0e9ccbSMaksim Panchenko for (BinarySection &Section : BC->allocatableSections()) 574740c2e0faSMaksim Panchenko Section.flushPendingRelocations(OS, [this](const MCSymbol *S) { 5748a34c753fSRafael Auler return getNewValueForSymbol(S->getName()); 5749a34c753fSRafael Auler }); 5750a34c753fSRafael Auler 5751a34c753fSRafael Auler // If .eh_frame is present create .eh_frame_hdr. 57524d3a0cadSMaksim Panchenko if (EHFrameSection) 5753a34c753fSRafael Auler writeEHFrameHeader(); 5754a34c753fSRafael Auler 5755a34c753fSRafael Auler // Add BOLT Addresses Translation maps to allow profile collection to 5756a34c753fSRafael Auler // happen in the output binary 5757a34c753fSRafael Auler if (opts::EnableBAT) 5758a34c753fSRafael Auler addBATSection(); 5759a34c753fSRafael Auler 5760a34c753fSRafael Auler // Patch program header table. 5761a693ae53SMaksim Panchenko if (!BC->IsLinuxKernel) 5762a34c753fSRafael Auler patchELFPHDRTable(); 5763a34c753fSRafael Auler 5764a34c753fSRafael Auler // Finalize memory image of section string table. 5765a34c753fSRafael Auler finalizeSectionStringTable(); 5766a34c753fSRafael Auler 5767a34c753fSRafael Auler // Update symbol tables. 5768a34c753fSRafael Auler patchELFSymTabs(); 5769a34c753fSRafael Auler 5770a34c753fSRafael Auler if (opts::EnableBAT) 5771a34c753fSRafael Auler encodeBATSection(); 5772a34c753fSRafael Auler 5773a34c753fSRafael Auler // Copy non-allocatable sections once allocatable part is finished. 5774a34c753fSRafael Auler rewriteNoteSections(); 5775a34c753fSRafael Auler 5776a34c753fSRafael Auler if (BC->HasRelocations) { 5777a34c753fSRafael Auler patchELFAllocatableRelaSections(); 5778f9bf9f92SVladislav Khmelevsky patchELFAllocatableRelrSection(); 5779a34c753fSRafael Auler patchELFGOT(); 5780a34c753fSRafael Auler } 5781a34c753fSRafael Auler 5782228970f6Sspupyrev // Patch dynamic section/segment. 5783228970f6Sspupyrev patchELFDynamic(); 5784228970f6Sspupyrev 5785a34c753fSRafael Auler // Update ELF book-keeping info. 5786a34c753fSRafael Auler patchELFSectionHeaderTable(); 5787a34c753fSRafael Auler 5788a34c753fSRafael Auler if (opts::PrintSections) { 578952cf0711SAmir Ayupov BC->outs() << "BOLT-INFO: Sections after processing:\n"; 579052cf0711SAmir Ayupov BC->printSections(BC->outs()); 5791a34c753fSRafael Auler } 5792a34c753fSRafael Auler 5793a34c753fSRafael Auler Out->keep(); 5794473b9dd4Szhoujiapeng EC = sys::fs::setPermissions( 5795473b9dd4Szhoujiapeng opts::OutputFilename, 5796473b9dd4Szhoujiapeng static_cast<sys::fs::perms>(sys::fs::perms::all_all & 5797473b9dd4Szhoujiapeng ~sys::fs::getUmask())); 5798a34c753fSRafael Auler check_error(EC, "cannot set permissions of output file"); 5799a34c753fSRafael Auler } 5800a34c753fSRafael Auler 5801a34c753fSRafael Auler void RewriteInstance::writeEHFrameHeader() { 58024d3a0cadSMaksim Panchenko BinarySection *NewEHFrameSection = 58034d3a0cadSMaksim Panchenko getSection(getNewSecPrefix() + getEHFrameSectionName()); 58044d3a0cadSMaksim Panchenko 58054d3a0cadSMaksim Panchenko // No need to update the header if no new .eh_frame was created. 58064d3a0cadSMaksim Panchenko if (!NewEHFrameSection) 58074d3a0cadSMaksim Panchenko return; 58084d3a0cadSMaksim Panchenko 5809a34c753fSRafael Auler DWARFDebugFrame NewEHFrame(BC->TheTriple->getArch(), true, 58104d3a0cadSMaksim Panchenko NewEHFrameSection->getOutputAddress()); 5811a34c753fSRafael Auler Error E = NewEHFrame.parse(DWARFDataExtractor( 58124d3a0cadSMaksim Panchenko NewEHFrameSection->getOutputContents(), BC->AsmInfo->isLittleEndian(), 5813a34c753fSRafael Auler BC->AsmInfo->getCodePointerSize())); 5814a34c753fSRafael Auler check_error(std::move(E), "failed to parse EH frame"); 5815a34c753fSRafael Auler 58164d3a0cadSMaksim Panchenko uint64_t RelocatedEHFrameAddress = 0; 58174d3a0cadSMaksim Panchenko StringRef RelocatedEHFrameContents; 58184d3a0cadSMaksim Panchenko BinarySection *RelocatedEHFrameSection = 58194d3a0cadSMaksim Panchenko getSection(".relocated" + getEHFrameSectionName()); 58204d3a0cadSMaksim Panchenko if (RelocatedEHFrameSection) { 58214d3a0cadSMaksim Panchenko RelocatedEHFrameAddress = RelocatedEHFrameSection->getOutputAddress(); 58224d3a0cadSMaksim Panchenko RelocatedEHFrameContents = RelocatedEHFrameSection->getOutputContents(); 5823a34c753fSRafael Auler } 58244d3a0cadSMaksim Panchenko DWARFDebugFrame RelocatedEHFrame(BC->TheTriple->getArch(), true, 58254d3a0cadSMaksim Panchenko RelocatedEHFrameAddress); 58264d3a0cadSMaksim Panchenko Error Er = RelocatedEHFrame.parse(DWARFDataExtractor( 58274d3a0cadSMaksim Panchenko RelocatedEHFrameContents, BC->AsmInfo->isLittleEndian(), 5828a34c753fSRafael Auler BC->AsmInfo->getCodePointerSize())); 5829a34c753fSRafael Auler check_error(std::move(Er), "failed to parse EH frame"); 5830a34c753fSRafael Auler 583112d322dbSMaksim Panchenko LLVM_DEBUG(dbgs() << "BOLT: writing a new " << getEHFrameHdrSectionName() 583212d322dbSMaksim Panchenko << '\n'); 5833a34c753fSRafael Auler 583408ef9396SMaksim Panchenko // Try to overwrite the original .eh_frame_hdr if the size permits. 583508ef9396SMaksim Panchenko uint64_t EHFrameHdrOutputAddress = 0; 583608ef9396SMaksim Panchenko uint64_t EHFrameHdrFileOffset = 0; 583708ef9396SMaksim Panchenko std::vector<char> NewEHFrameHdr; 583808ef9396SMaksim Panchenko BinarySection *OldEHFrameHdrSection = getSection(getEHFrameHdrSectionName()); 583908ef9396SMaksim Panchenko if (OldEHFrameHdrSection) { 584008ef9396SMaksim Panchenko NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 584108ef9396SMaksim Panchenko RelocatedEHFrame, NewEHFrame, OldEHFrameHdrSection->getAddress()); 584208ef9396SMaksim Panchenko if (NewEHFrameHdr.size() <= OldEHFrameHdrSection->getSize()) { 584308ef9396SMaksim Panchenko BC->outs() << "BOLT-INFO: rewriting " << getEHFrameHdrSectionName() 584408ef9396SMaksim Panchenko << " in-place\n"; 584508ef9396SMaksim Panchenko EHFrameHdrOutputAddress = OldEHFrameHdrSection->getAddress(); 584608ef9396SMaksim Panchenko EHFrameHdrFileOffset = OldEHFrameHdrSection->getInputFileOffset(); 584708ef9396SMaksim Panchenko } else { 584808ef9396SMaksim Panchenko OldEHFrameHdrSection->setOutputName(getOrgSecPrefix() + 584908ef9396SMaksim Panchenko getEHFrameHdrSectionName()); 585008ef9396SMaksim Panchenko OldEHFrameHdrSection = nullptr; 585108ef9396SMaksim Panchenko } 585208ef9396SMaksim Panchenko } 585308ef9396SMaksim Panchenko 585408ef9396SMaksim Panchenko // If there was not enough space, allocate more memory for .eh_frame_hdr. 585508ef9396SMaksim Panchenko if (!OldEHFrameHdrSection) { 5856a34c753fSRafael Auler NextAvailableAddress = 5857a34c753fSRafael Auler appendPadding(Out->os(), NextAvailableAddress, EHFrameHdrAlign); 5858a34c753fSRafael Auler 585908ef9396SMaksim Panchenko EHFrameHdrOutputAddress = NextAvailableAddress; 586008ef9396SMaksim Panchenko EHFrameHdrFileOffset = getFileOffsetForAddress(NextAvailableAddress); 5861a34c753fSRafael Auler 586208ef9396SMaksim Panchenko NewEHFrameHdr = CFIRdWrt->generateEHFrameHeader( 58631b8e0cf0SMaksim Panchenko RelocatedEHFrame, NewEHFrame, EHFrameHdrOutputAddress); 5864a34c753fSRafael Auler 586508ef9396SMaksim Panchenko NextAvailableAddress += NewEHFrameHdr.size(); 5866ad7ee900SMaksim Panchenko if (!BC->BOLTReserved.empty() && 5867ad7ee900SMaksim Panchenko (NextAvailableAddress > BC->BOLTReserved.end())) { 58683a0d894fSMaksim Panchenko BC->errs() << "BOLT-ERROR: unable to fit " << getEHFrameHdrSectionName() 58693a0d894fSMaksim Panchenko << " into reserved space\n"; 58703a0d894fSMaksim Panchenko exit(1); 58713a0d894fSMaksim Panchenko } 58723a0d894fSMaksim Panchenko 587308ef9396SMaksim Panchenko // Create a new entry in the section header table. 587408ef9396SMaksim Panchenko const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/true, 587508ef9396SMaksim Panchenko /*IsText=*/false, 587608ef9396SMaksim Panchenko /*IsAllocatable=*/true); 587708ef9396SMaksim Panchenko BinarySection &EHFrameHdrSec = BC->registerOrUpdateSection( 587808ef9396SMaksim Panchenko getNewSecPrefix() + getEHFrameHdrSectionName(), ELF::SHT_PROGBITS, 587908ef9396SMaksim Panchenko Flags, nullptr, NewEHFrameHdr.size(), /*Alignment=*/1); 588008ef9396SMaksim Panchenko EHFrameHdrSec.setOutputFileOffset(EHFrameHdrFileOffset); 588108ef9396SMaksim Panchenko EHFrameHdrSec.setOutputAddress(EHFrameHdrOutputAddress); 588208ef9396SMaksim Panchenko EHFrameHdrSec.setOutputName(getEHFrameHdrSectionName()); 588308ef9396SMaksim Panchenko } 588408ef9396SMaksim Panchenko 588508ef9396SMaksim Panchenko Out->os().seek(EHFrameHdrFileOffset); 588608ef9396SMaksim Panchenko Out->os().write(NewEHFrameHdr.data(), NewEHFrameHdr.size()); 588708ef9396SMaksim Panchenko 588808ef9396SMaksim Panchenko // Pad the contents if overwriting in-place. 588908ef9396SMaksim Panchenko if (OldEHFrameHdrSection) 589008ef9396SMaksim Panchenko Out->os().write_zeros(OldEHFrameHdrSection->getSize() - 589108ef9396SMaksim Panchenko NewEHFrameHdr.size()); 589208ef9396SMaksim Panchenko 58934d3a0cadSMaksim Panchenko // Merge new .eh_frame with the relocated original so that gdb can locate all 58944d3a0cadSMaksim Panchenko // FDEs. 58954d3a0cadSMaksim Panchenko if (RelocatedEHFrameSection) { 58964d3a0cadSMaksim Panchenko const uint64_t NewEHFrameSectionSize = 58974d3a0cadSMaksim Panchenko RelocatedEHFrameSection->getOutputAddress() + 58984d3a0cadSMaksim Panchenko RelocatedEHFrameSection->getOutputSize() - 58994d3a0cadSMaksim Panchenko NewEHFrameSection->getOutputAddress(); 59004d3a0cadSMaksim Panchenko NewEHFrameSection->updateContents(NewEHFrameSection->getOutputData(), 59014d3a0cadSMaksim Panchenko NewEHFrameSectionSize); 59024d3a0cadSMaksim Panchenko BC->deregisterSection(*RelocatedEHFrameSection); 5903a34c753fSRafael Auler } 5904a34c753fSRafael Auler 5905a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "BOLT-DEBUG: size of .eh_frame after merge is " 59064d3a0cadSMaksim Panchenko << NewEHFrameSection->getOutputSize() << '\n'); 5907a34c753fSRafael Auler } 5908a34c753fSRafael Auler 5909a34c753fSRafael Auler uint64_t RewriteInstance::getNewValueForSymbol(const StringRef Name) { 591005634f73SJob Noorman auto Value = Linker->lookupSymbol(Name); 591105634f73SJob Noorman if (Value) 591205634f73SJob Noorman return *Value; 5913a34c753fSRafael Auler 5914a34c753fSRafael Auler // Return the original value if we haven't emitted the symbol. 5915a34c753fSRafael Auler BinaryData *BD = BC->getBinaryDataByName(Name); 5916a34c753fSRafael Auler if (!BD) 5917a34c753fSRafael Auler return 0; 5918a34c753fSRafael Auler 5919a34c753fSRafael Auler return BD->getAddress(); 5920a34c753fSRafael Auler } 5921a34c753fSRafael Auler 5922a34c753fSRafael Auler uint64_t RewriteInstance::getFileOffsetForAddress(uint64_t Address) const { 5923a34c753fSRafael Auler // Check if it's possibly part of the new segment. 5924ad7ee900SMaksim Panchenko if (NewTextSegmentAddress && Address >= NewTextSegmentAddress) 5925a34c753fSRafael Auler return Address - NewTextSegmentAddress + NewTextSegmentOffset; 5926a34c753fSRafael Auler 5927a34c753fSRafael Auler // Find an existing segment that matches the address. 5928a34c753fSRafael Auler const auto SegmentInfoI = BC->SegmentMapInfo.upper_bound(Address); 5929a34c753fSRafael Auler if (SegmentInfoI == BC->SegmentMapInfo.begin()) 5930a34c753fSRafael Auler return 0; 5931a34c753fSRafael Auler 5932a34c753fSRafael Auler const SegmentInfo &SegmentInfo = std::prev(SegmentInfoI)->second; 5933a34c753fSRafael Auler if (Address < SegmentInfo.Address || 5934a34c753fSRafael Auler Address >= SegmentInfo.Address + SegmentInfo.FileSize) 5935a34c753fSRafael Auler return 0; 5936a34c753fSRafael Auler 5937a34c753fSRafael Auler return SegmentInfo.FileOffset + Address - SegmentInfo.Address; 5938a34c753fSRafael Auler } 5939a34c753fSRafael Auler 5940a34c753fSRafael Auler bool RewriteInstance::willOverwriteSection(StringRef SectionName) { 5941e7541f56SKazu Hirata if (llvm::is_contained(SectionsToOverwrite, SectionName)) 5942a34c753fSRafael Auler return true; 5943e7541f56SKazu Hirata if (llvm::is_contained(DebugSectionsToOverwrite, SectionName)) 5944a34c753fSRafael Auler return true; 5945a34c753fSRafael Auler 5946a34c753fSRafael Auler ErrorOr<BinarySection &> Section = BC->getUniqueSectionByName(SectionName); 5947a34c753fSRafael Auler return Section && Section->isAllocatable() && Section->isFinalized(); 5948a34c753fSRafael Auler } 5949a34c753fSRafael Auler 5950a34c753fSRafael Auler bool RewriteInstance::isDebugSection(StringRef SectionName) { 5951ad8fd5b1SKazu Hirata if (SectionName.starts_with(".debug_") || 5952ad8fd5b1SKazu Hirata SectionName.starts_with(".zdebug_") || SectionName == ".gdb_index" || 5953ad8fd5b1SKazu Hirata SectionName == ".stab" || SectionName == ".stabstr") 5954a34c753fSRafael Auler return true; 5955a34c753fSRafael Auler 5956a34c753fSRafael Auler return false; 5957a34c753fSRafael Auler } 5958