1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 } // namespace opts 76 77 namespace llvm { 78 namespace bolt { 79 80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 81 std::unique_ptr<DWARFContext> DwCtx, 82 std::unique_ptr<Triple> TheTriple, 83 const Target *TheTarget, std::string TripleName, 84 std::unique_ptr<MCCodeEmitter> MCE, 85 std::unique_ptr<MCObjectFileInfo> MOFI, 86 std::unique_ptr<const MCAsmInfo> AsmInfo, 87 std::unique_ptr<const MCInstrInfo> MII, 88 std::unique_ptr<const MCSubtargetInfo> STI, 89 std::unique_ptr<MCInstPrinter> InstPrinter, 90 std::unique_ptr<const MCInstrAnalysis> MIA, 91 std::unique_ptr<MCPlusBuilder> MIB, 92 std::unique_ptr<const MCRegisterInfo> MRI, 93 std::unique_ptr<MCDisassembler> DisAsm) 94 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 95 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 96 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 97 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 98 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 99 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 100 Relocation::Arch = this->TheTriple->getArch(); 101 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103 } 104 105 BinaryContext::~BinaryContext() { 106 for (BinarySection *Section : Sections) 107 delete Section; 108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109 delete InjectedFunction; 110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111 delete JTI.second; 112 clearBinaryData(); 113 } 114 115 /// Create BinaryContext for a given architecture \p ArchName and 116 /// triple \p TripleName. 117 Expected<std::unique_ptr<BinaryContext>> 118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119 std::unique_ptr<DWARFContext> DwCtx) { 120 StringRef ArchName = ""; 121 std::string FeaturesStr = ""; 122 switch (File->getArch()) { 123 case llvm::Triple::x86_64: 124 ArchName = "x86-64"; 125 FeaturesStr = "+nopl"; 126 break; 127 case llvm::Triple::aarch64: 128 ArchName = "aarch64"; 129 FeaturesStr = "+all"; 130 break; 131 case llvm::Triple::riscv64: { 132 ArchName = "riscv64"; 133 Expected<SubtargetFeatures> Features = File->getFeatures(); 134 135 if (auto E = Features.takeError()) 136 return std::move(E); 137 138 // We rely on relaxation for some transformations (e.g., promoting all calls 139 // to PseudoCALL and then making JITLink relax them). Since the relax 140 // feature is not stored in the object file, we manually enable it. 141 Features->AddFeature("relax"); 142 FeaturesStr = Features->getString(); 143 break; 144 } 145 default: 146 return createStringError(std::errc::not_supported, 147 "BOLT-ERROR: Unrecognized machine in ELF file"); 148 } 149 150 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 151 const std::string TripleName = TheTriple->str(); 152 153 std::string Error; 154 const Target *TheTarget = 155 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 156 if (!TheTarget) 157 return createStringError(make_error_code(std::errc::not_supported), 158 Twine("BOLT-ERROR: ", Error)); 159 160 std::unique_ptr<const MCRegisterInfo> MRI( 161 TheTarget->createMCRegInfo(TripleName)); 162 if (!MRI) 163 return createStringError( 164 make_error_code(std::errc::not_supported), 165 Twine("BOLT-ERROR: no register info for target ", TripleName)); 166 167 // Set up disassembler. 168 std::unique_ptr<MCAsmInfo> AsmInfo( 169 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 170 if (!AsmInfo) 171 return createStringError( 172 make_error_code(std::errc::not_supported), 173 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 174 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 175 // we want to emit such names as using @PLT without double quotes to convey 176 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 177 // override the default AsmInfo behavior to emit names the way we want. 178 AsmInfo->setAllowAtInName(true); 179 180 std::unique_ptr<const MCSubtargetInfo> STI( 181 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 182 if (!STI) 183 return createStringError( 184 make_error_code(std::errc::not_supported), 185 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 186 187 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 188 if (!MII) 189 return createStringError( 190 make_error_code(std::errc::not_supported), 191 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 192 193 std::unique_ptr<MCContext> Ctx( 194 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 195 std::unique_ptr<MCObjectFileInfo> MOFI( 196 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 197 Ctx->setObjectFileInfo(MOFI.get()); 198 // We do not support X86 Large code model. Change this in the future. 199 bool Large = false; 200 if (TheTriple->getArch() == llvm::Triple::aarch64) 201 Large = true; 202 unsigned LSDAEncoding = 203 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 204 if (IsPIC) { 205 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 206 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 207 } 208 209 std::unique_ptr<MCDisassembler> DisAsm( 210 TheTarget->createMCDisassembler(*STI, *Ctx)); 211 212 if (!DisAsm) 213 return createStringError( 214 make_error_code(std::errc::not_supported), 215 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 216 217 std::unique_ptr<const MCInstrAnalysis> MIA( 218 TheTarget->createMCInstrAnalysis(MII.get())); 219 if (!MIA) 220 return createStringError( 221 make_error_code(std::errc::not_supported), 222 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 223 TripleName)); 224 225 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 226 std::unique_ptr<MCInstPrinter> InstructionPrinter( 227 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 228 *MII, *MRI)); 229 if (!InstructionPrinter) 230 return createStringError( 231 make_error_code(std::errc::not_supported), 232 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 233 InstructionPrinter->setPrintImmHex(true); 234 235 std::unique_ptr<MCCodeEmitter> MCE( 236 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 237 238 // Make sure we don't miss any output on core dumps. 239 outs().SetUnbuffered(); 240 errs().SetUnbuffered(); 241 dbgs().SetUnbuffered(); 242 243 auto BC = std::make_unique<BinaryContext>( 244 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 245 std::string(TripleName), std::move(MCE), std::move(MOFI), 246 std::move(AsmInfo), std::move(MII), std::move(STI), 247 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 248 std::move(DisAsm)); 249 250 BC->LSDAEncoding = LSDAEncoding; 251 252 BC->MAB = std::unique_ptr<MCAsmBackend>( 253 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 254 255 BC->setFilename(File->getFileName()); 256 257 BC->HasFixedLoadAddress = !IsPIC; 258 259 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 260 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 261 262 if (!BC->SymbolicDisAsm) 263 return createStringError( 264 make_error_code(std::errc::not_supported), 265 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 266 267 return std::move(BC); 268 } 269 270 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 271 if (opts::HotText && 272 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 273 return true; 274 275 if (opts::HotData && 276 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 277 return true; 278 279 if (SymbolName == "_end") 280 return true; 281 282 return false; 283 } 284 285 std::unique_ptr<MCObjectWriter> 286 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 287 return MAB->createObjectWriter(OS); 288 } 289 290 bool BinaryContext::validateObjectNesting() const { 291 auto Itr = BinaryDataMap.begin(); 292 auto End = BinaryDataMap.end(); 293 bool Valid = true; 294 while (Itr != End) { 295 auto Next = std::next(Itr); 296 while (Next != End && 297 Itr->second->getSection() == Next->second->getSection() && 298 Itr->second->containsRange(Next->second->getAddress(), 299 Next->second->getSize())) { 300 if (Next->second->Parent != Itr->second) { 301 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 302 << "BOLT-WARNING: " << *Itr->second << "\n" 303 << "BOLT-WARNING: " << *Next->second << "\n"; 304 Valid = false; 305 } 306 ++Next; 307 } 308 Itr = Next; 309 } 310 return Valid; 311 } 312 313 bool BinaryContext::validateHoles() const { 314 bool Valid = true; 315 for (BinarySection &Section : sections()) { 316 for (const Relocation &Rel : Section.relocations()) { 317 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 318 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 319 if (!BD) { 320 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 321 << " 0x" << Twine::utohexstr(RelAddr) << " in " 322 << Section.getName() << "\n"; 323 Valid = false; 324 } else if (!BD->getAtomicRoot()) { 325 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 326 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 327 << Section.getName() << "\n"; 328 Valid = false; 329 } 330 } 331 } 332 return Valid; 333 } 334 335 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 336 const uint64_t Address = GAI->second->getAddress(); 337 const uint64_t Size = GAI->second->getSize(); 338 339 auto fixParents = [&](BinaryDataMapType::iterator Itr, 340 BinaryData *NewParent) { 341 BinaryData *OldParent = Itr->second->Parent; 342 Itr->second->Parent = NewParent; 343 ++Itr; 344 while (Itr != BinaryDataMap.end() && OldParent && 345 Itr->second->Parent == OldParent) { 346 Itr->second->Parent = NewParent; 347 ++Itr; 348 } 349 }; 350 351 // Check if the previous symbol contains the newly added symbol. 352 if (GAI != BinaryDataMap.begin()) { 353 BinaryData *Prev = std::prev(GAI)->second; 354 while (Prev) { 355 if (Prev->getSection() == GAI->second->getSection() && 356 Prev->containsRange(Address, Size)) { 357 fixParents(GAI, Prev); 358 } else { 359 fixParents(GAI, nullptr); 360 } 361 Prev = Prev->Parent; 362 } 363 } 364 365 // Check if the newly added symbol contains any subsequent symbols. 366 if (Size != 0) { 367 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 368 auto Itr = std::next(GAI); 369 while ( 370 Itr != BinaryDataMap.end() && 371 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 372 Itr->second->Parent = BD; 373 ++Itr; 374 } 375 } 376 } 377 378 iterator_range<BinaryContext::binary_data_iterator> 379 BinaryContext::getSubBinaryData(BinaryData *BD) { 380 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 381 auto End = Start; 382 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 383 ++End; 384 return make_range(Start, End); 385 } 386 387 std::pair<const MCSymbol *, uint64_t> 388 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 389 bool IsPCRel) { 390 if (isAArch64()) { 391 // Check if this is an access to a constant island and create bookkeeping 392 // to keep track of it and emit it later as part of this function. 393 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 394 return std::make_pair(IslandSym, 0); 395 396 // Detect custom code written in assembly that refers to arbitrary 397 // constant islands from other functions. Write this reference so we 398 // can pull this constant island and emit it as part of this function 399 // too. 400 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 401 402 if (IslandIter != AddressToConstantIslandMap.begin() && 403 (IslandIter == AddressToConstantIslandMap.end() || 404 IslandIter->first > Address)) 405 --IslandIter; 406 407 if (IslandIter != AddressToConstantIslandMap.end()) { 408 // Fall-back to referencing the original constant island in the presence 409 // of dynamic relocs, as we currently do not support cloning them. 410 // Notice: we might fail to link because of this, if the original constant 411 // island we are referring would be emitted too far away. 412 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 413 MCSymbol *IslandSym = 414 IslandIter->second->getOrCreateIslandAccess(Address); 415 if (IslandSym) 416 return std::make_pair(IslandSym, 0); 417 } else if (MCSymbol *IslandSym = 418 IslandIter->second->getOrCreateProxyIslandAccess(Address, 419 BF)) { 420 BF.createIslandDependency(IslandSym, IslandIter->second); 421 return std::make_pair(IslandSym, 0); 422 } 423 } 424 } 425 426 // Note that the address does not necessarily have to reside inside 427 // a section, it could be an absolute address too. 428 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 429 if (Section && Section->isText()) { 430 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 431 if (Address != BF.getAddress()) { 432 // The address could potentially escape. Mark it as another entry 433 // point into the function. 434 if (opts::Verbosity >= 1) { 435 outs() << "BOLT-INFO: potentially escaped address 0x" 436 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 437 } 438 BF.HasInternalLabelReference = true; 439 return std::make_pair( 440 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 441 } 442 } else { 443 addInterproceduralReference(&BF, Address); 444 } 445 } 446 447 // With relocations, catch jump table references outside of the basic block 448 // containing the indirect jump. 449 if (HasRelocations) { 450 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 451 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 452 const MCSymbol *Symbol = 453 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 454 455 return std::make_pair(Symbol, 0); 456 } 457 } 458 459 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 460 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 461 462 // TODO: use DWARF info to get size/alignment here? 463 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 464 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 465 return std::make_pair(TargetSymbol, 0); 466 } 467 468 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 469 BinaryFunction &BF) { 470 if (!isX86()) 471 return MemoryContentsType::UNKNOWN; 472 473 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 474 if (!Section) { 475 // No section - possibly an absolute address. Since we don't allow 476 // internal function addresses to escape the function scope - we 477 // consider it a tail call. 478 if (opts::Verbosity > 1) { 479 errs() << "BOLT-WARNING: no section for address 0x" 480 << Twine::utohexstr(Address) << " referenced from function " << BF 481 << '\n'; 482 } 483 return MemoryContentsType::UNKNOWN; 484 } 485 486 if (Section->isVirtual()) { 487 // The contents are filled at runtime. 488 return MemoryContentsType::UNKNOWN; 489 } 490 491 // No support for jump tables in code yet. 492 if (Section->isText()) 493 return MemoryContentsType::UNKNOWN; 494 495 // Start with checking for PIC jump table. We expect non-PIC jump tables 496 // to have high 32 bits set to 0. 497 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 498 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 499 500 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 501 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 502 503 return MemoryContentsType::UNKNOWN; 504 } 505 506 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 507 const JumpTable::JumpTableType Type, 508 const BinaryFunction &BF, 509 const uint64_t NextJTAddress, 510 JumpTable::AddressesType *EntriesAsAddress, 511 bool *HasEntryInFragment) const { 512 // Is one of the targets __builtin_unreachable? 513 bool HasUnreachable = false; 514 515 // Does one of the entries match function start address? 516 bool HasStartAsEntry = false; 517 518 // Number of targets other than __builtin_unreachable. 519 uint64_t NumRealEntries = 0; 520 521 auto addEntryAddress = [&](uint64_t EntryAddress) { 522 if (EntriesAsAddress) 523 EntriesAsAddress->emplace_back(EntryAddress); 524 }; 525 526 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 527 if (!Section) 528 return false; 529 530 // The upper bound is defined by containing object, section limits, and 531 // the next jump table in memory. 532 uint64_t UpperBound = Section->getEndAddress(); 533 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 534 if (JumpTableBD && JumpTableBD->getSize()) { 535 assert(JumpTableBD->getEndAddress() <= UpperBound && 536 "data object cannot cross a section boundary"); 537 UpperBound = JumpTableBD->getEndAddress(); 538 } 539 if (NextJTAddress) 540 UpperBound = std::min(NextJTAddress, UpperBound); 541 542 LLVM_DEBUG({ 543 using JTT = JumpTable::JumpTableType; 544 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 545 Address, BF.getPrintName(), 546 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 547 }); 548 const uint64_t EntrySize = getJumpTableEntrySize(Type); 549 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 550 EntryAddress += EntrySize) { 551 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 552 << " -> "); 553 // Check if there's a proper relocation against the jump table entry. 554 if (HasRelocations) { 555 if (Type == JumpTable::JTT_PIC && 556 !DataPCRelocations.count(EntryAddress)) { 557 LLVM_DEBUG( 558 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 559 break; 560 } 561 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 562 LLVM_DEBUG( 563 dbgs() 564 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 565 break; 566 } 567 } 568 569 const uint64_t Value = 570 (Type == JumpTable::JTT_PIC) 571 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 572 : *getPointerAtAddress(EntryAddress); 573 574 // __builtin_unreachable() case. 575 if (Value == BF.getAddress() + BF.getSize()) { 576 addEntryAddress(Value); 577 HasUnreachable = true; 578 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 579 continue; 580 } 581 582 // Function start is another special case. It is allowed in the jump table, 583 // but we need at least one another regular entry to distinguish the table 584 // from, e.g. a function pointer array. 585 if (Value == BF.getAddress()) { 586 HasStartAsEntry = true; 587 addEntryAddress(Value); 588 continue; 589 } 590 591 // Function or one of its fragments. 592 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 593 const bool DoesBelongToFunction = 594 BF.containsAddress(Value) || 595 (TargetBF && TargetBF->isParentOrChildOf(BF)); 596 if (!DoesBelongToFunction) { 597 LLVM_DEBUG({ 598 if (!BF.containsAddress(Value)) { 599 dbgs() << "FAIL: function doesn't contain this address\n"; 600 if (TargetBF) { 601 dbgs() << " ! function containing this address: " 602 << TargetBF->getPrintName() << '\n'; 603 if (TargetBF->isFragment()) { 604 dbgs() << " ! is a fragment"; 605 for (BinaryFunction *Parent : TargetBF->ParentFragments) 606 dbgs() << ", parent: " << Parent->getPrintName(); 607 dbgs() << '\n'; 608 } 609 } 610 } 611 }); 612 break; 613 } 614 615 // Check there's an instruction at this offset. 616 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 617 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 618 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 619 break; 620 } 621 622 ++NumRealEntries; 623 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 624 625 if (TargetBF != &BF && HasEntryInFragment) 626 *HasEntryInFragment = true; 627 addEntryAddress(Value); 628 } 629 630 // It's a jump table if the number of real entries is more than 1, or there's 631 // one real entry and one or more special targets. If there are only multiple 632 // special targets, then it's not a jump table. 633 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 634 } 635 636 void BinaryContext::populateJumpTables() { 637 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 638 << '\n'); 639 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 640 ++JTI) { 641 JumpTable *JT = JTI->second; 642 643 bool NonSimpleParent = false; 644 for (BinaryFunction *BF : JT->Parents) 645 NonSimpleParent |= !BF->isSimple(); 646 if (NonSimpleParent) 647 continue; 648 649 uint64_t NextJTAddress = 0; 650 auto NextJTI = std::next(JTI); 651 if (NextJTI != JTE) 652 NextJTAddress = NextJTI->second->getAddress(); 653 654 const bool Success = 655 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 656 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 657 if (!Success) { 658 LLVM_DEBUG({ 659 dbgs() << "failed to analyze "; 660 JT->print(dbgs()); 661 if (NextJTI != JTE) { 662 dbgs() << "next "; 663 NextJTI->second->print(dbgs()); 664 } 665 }); 666 llvm_unreachable("jump table heuristic failure"); 667 } 668 for (BinaryFunction *Frag : JT->Parents) { 669 if (JT->IsSplit) 670 Frag->setHasIndirectTargetToSplitFragment(true); 671 for (uint64_t EntryAddress : JT->EntriesAsAddress) 672 // if target is builtin_unreachable 673 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 674 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 675 Frag->getSize()); 676 } else if (EntryAddress >= Frag->getAddress() && 677 EntryAddress < Frag->getAddress() + Frag->getSize()) { 678 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 679 } 680 } 681 682 // In strict mode, erase PC-relative relocation record. Later we check that 683 // all such records are erased and thus have been accounted for. 684 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 685 for (uint64_t Address = JT->getAddress(); 686 Address < JT->getAddress() + JT->getSize(); 687 Address += JT->EntrySize) { 688 DataPCRelocations.erase(DataPCRelocations.find(Address)); 689 } 690 } 691 692 // Mark to skip the function and all its fragments. 693 for (BinaryFunction *Frag : JT->Parents) 694 if (Frag->hasIndirectTargetToSplitFragment()) 695 addFragmentsToSkip(Frag); 696 } 697 698 if (opts::StrictMode && DataPCRelocations.size()) { 699 LLVM_DEBUG({ 700 dbgs() << DataPCRelocations.size() 701 << " unclaimed PC-relative relocations left in data:\n"; 702 for (uint64_t Reloc : DataPCRelocations) 703 dbgs() << Twine::utohexstr(Reloc) << '\n'; 704 }); 705 assert(0 && "unclaimed PC-relative relocations left in data\n"); 706 } 707 clearList(DataPCRelocations); 708 } 709 710 void BinaryContext::skipMarkedFragments() { 711 std::vector<BinaryFunction *> FragmentQueue; 712 // Copy the functions to FragmentQueue. 713 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 714 auto addToWorklist = [&](BinaryFunction *Function) -> void { 715 if (FragmentsToSkip.count(Function)) 716 return; 717 FragmentQueue.push_back(Function); 718 addFragmentsToSkip(Function); 719 }; 720 // Functions containing split jump tables need to be skipped with all 721 // fragments (transitively). 722 for (size_t I = 0; I != FragmentQueue.size(); I++) { 723 BinaryFunction *BF = FragmentQueue[I]; 724 assert(FragmentsToSkip.count(BF) && 725 "internal error in traversing function fragments"); 726 if (opts::Verbosity >= 1) 727 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 728 BF->setSimple(false); 729 BF->setHasIndirectTargetToSplitFragment(true); 730 731 llvm::for_each(BF->Fragments, addToWorklist); 732 llvm::for_each(BF->ParentFragments, addToWorklist); 733 } 734 if (!FragmentsToSkip.empty()) 735 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 736 << (FragmentsToSkip.size() == 1 ? "" : "s") 737 << " due to cold fragments\n"; 738 } 739 740 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 741 uint64_t Size, 742 uint16_t Alignment, 743 unsigned Flags) { 744 auto Itr = BinaryDataMap.find(Address); 745 if (Itr != BinaryDataMap.end()) { 746 assert(Itr->second->getSize() == Size || !Size); 747 return Itr->second->getSymbol(); 748 } 749 750 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 751 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 752 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 753 } 754 755 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 756 return Ctx->getOrCreateSymbol(Name); 757 } 758 759 BinaryFunction *BinaryContext::createBinaryFunction( 760 const std::string &Name, BinarySection &Section, uint64_t Address, 761 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 762 auto Result = BinaryFunctions.emplace( 763 Address, BinaryFunction(Name, Section, Address, Size, *this)); 764 assert(Result.second == true && "unexpected duplicate function"); 765 BinaryFunction *BF = &Result.first->second; 766 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 767 Alignment); 768 setSymbolToFunctionMap(BF->getSymbol(), BF); 769 return BF; 770 } 771 772 const MCSymbol * 773 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 774 JumpTable::JumpTableType Type) { 775 // Two fragments of same function access same jump table 776 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 777 assert(JT->Type == Type && "jump table types have to match"); 778 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 779 780 // Prevent associating a jump table to a specific fragment twice. 781 // This simple check arises from the assumption: no more than 2 fragments. 782 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 783 assert(JT->Parents[0]->isParentOrChildOf(Function) && 784 "cannot re-use jump table of a different function"); 785 // Duplicate the entry for the parent function for easy access 786 JT->Parents.push_back(&Function); 787 if (opts::Verbosity > 2) { 788 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 789 << JT->Parents[0]->getPrintName() << "; " 790 << Function.getPrintName() << "\n"; 791 JT->print(outs()); 792 } 793 Function.JumpTables.emplace(Address, JT); 794 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 795 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 796 } 797 798 bool IsJumpTableParent = false; 799 (void)IsJumpTableParent; 800 for (BinaryFunction *Frag : JT->Parents) 801 if (Frag == &Function) 802 IsJumpTableParent = true; 803 assert(IsJumpTableParent && 804 "cannot re-use jump table of a different function"); 805 return JT->getFirstLabel(); 806 } 807 808 // Re-use the existing symbol if possible. 809 MCSymbol *JTLabel = nullptr; 810 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 811 if (!isInternalSymbolName(Object->getSymbol()->getName())) 812 JTLabel = Object->getSymbol(); 813 } 814 815 const uint64_t EntrySize = getJumpTableEntrySize(Type); 816 if (!JTLabel) { 817 const std::string JumpTableName = generateJumpTableName(Function, Address); 818 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 819 } 820 821 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 822 << " in function " << Function << '\n'); 823 824 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 825 JumpTable::LabelMapType{{0, JTLabel}}, 826 *getSectionForAddress(Address)); 827 JT->Parents.push_back(&Function); 828 if (opts::Verbosity > 2) 829 JT->print(outs()); 830 JumpTables.emplace(Address, JT); 831 832 // Duplicate the entry for the parent function for easy access. 833 Function.JumpTables.emplace(Address, JT); 834 return JTLabel; 835 } 836 837 std::pair<uint64_t, const MCSymbol *> 838 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 839 const MCSymbol *OldLabel) { 840 auto L = scopeLock(); 841 unsigned Offset = 0; 842 bool Found = false; 843 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 844 if (Elmt.second != OldLabel) 845 continue; 846 Offset = Elmt.first; 847 Found = true; 848 break; 849 } 850 assert(Found && "Label not found"); 851 (void)Found; 852 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 853 JumpTable *NewJT = 854 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 855 JumpTable::LabelMapType{{Offset, NewLabel}}, 856 *getSectionForAddress(JT->getAddress())); 857 NewJT->Parents = JT->Parents; 858 NewJT->Entries = JT->Entries; 859 NewJT->Counts = JT->Counts; 860 uint64_t JumpTableID = ++DuplicatedJumpTables; 861 // Invert it to differentiate from regular jump tables whose IDs are their 862 // addresses in the input binary memory space 863 JumpTableID = ~JumpTableID; 864 JumpTables.emplace(JumpTableID, NewJT); 865 Function.JumpTables.emplace(JumpTableID, NewJT); 866 return std::make_pair(JumpTableID, NewLabel); 867 } 868 869 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 870 uint64_t Address) { 871 size_t Id; 872 uint64_t Offset = 0; 873 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 874 Offset = Address - JT->getAddress(); 875 auto Itr = JT->Labels.find(Offset); 876 if (Itr != JT->Labels.end()) 877 return std::string(Itr->second->getName()); 878 Id = JumpTableIds.at(JT->getAddress()); 879 } else { 880 Id = JumpTableIds[Address] = BF.JumpTables.size(); 881 } 882 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 883 (Offset ? ("." + std::to_string(Offset)) : "")); 884 } 885 886 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 887 // FIXME: aarch64 support is missing. 888 if (!isX86()) 889 return true; 890 891 if (BF.getSize() == BF.getMaxSize()) 892 return true; 893 894 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 895 assert(FunctionData && "cannot get function as data"); 896 897 uint64_t Offset = BF.getSize(); 898 MCInst Instr; 899 uint64_t InstrSize = 0; 900 uint64_t InstrAddress = BF.getAddress() + Offset; 901 using std::placeholders::_1; 902 903 // Skip instructions that satisfy the predicate condition. 904 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 905 const uint64_t StartOffset = Offset; 906 for (; Offset < BF.getMaxSize(); 907 Offset += InstrSize, InstrAddress += InstrSize) { 908 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 909 InstrAddress, nulls())) 910 break; 911 if (!Predicate(Instr)) 912 break; 913 } 914 915 return Offset - StartOffset; 916 }; 917 918 // Skip a sequence of zero bytes. 919 auto skipZeros = [&]() { 920 const uint64_t StartOffset = Offset; 921 for (; Offset < BF.getMaxSize(); ++Offset) 922 if ((*FunctionData)[Offset] != 0) 923 break; 924 925 return Offset - StartOffset; 926 }; 927 928 // Accept the whole padding area filled with breakpoints. 929 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 930 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 931 return true; 932 933 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 934 935 // Some functions have a jump to the next function or to the padding area 936 // inserted after the body. 937 auto isSkipJump = [&](const MCInst &Instr) { 938 uint64_t TargetAddress = 0; 939 if (MIB->isUnconditionalBranch(Instr) && 940 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 941 if (TargetAddress >= InstrAddress + InstrSize && 942 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 943 return true; 944 } 945 } 946 return false; 947 }; 948 949 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 950 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 951 skipZeros()) 952 ; 953 954 if (Offset == BF.getMaxSize()) 955 return true; 956 957 if (opts::Verbosity >= 1) { 958 errs() << "BOLT-WARNING: bad padding at address 0x" 959 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 960 << " starting at offset " << (Offset - BF.getSize()) 961 << " in function " << BF << '\n' 962 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 963 << '\n'; 964 } 965 966 return false; 967 } 968 969 void BinaryContext::adjustCodePadding() { 970 for (auto &BFI : BinaryFunctions) { 971 BinaryFunction &BF = BFI.second; 972 if (!shouldEmit(BF)) 973 continue; 974 975 if (!hasValidCodePadding(BF)) { 976 if (HasRelocations) { 977 if (opts::Verbosity >= 1) { 978 outs() << "BOLT-INFO: function " << BF 979 << " has invalid padding. Ignoring the function.\n"; 980 } 981 BF.setIgnored(); 982 } else { 983 BF.setMaxSize(BF.getSize()); 984 } 985 } 986 } 987 } 988 989 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 990 uint64_t Size, 991 uint16_t Alignment, 992 unsigned Flags) { 993 // Register the name with MCContext. 994 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 995 996 auto GAI = BinaryDataMap.find(Address); 997 BinaryData *BD; 998 if (GAI == BinaryDataMap.end()) { 999 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1000 BinarySection &Section = 1001 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1002 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1003 Section, Flags); 1004 GAI = BinaryDataMap.emplace(Address, BD).first; 1005 GlobalSymbols[Name] = BD; 1006 updateObjectNesting(GAI); 1007 } else { 1008 BD = GAI->second; 1009 if (!BD->hasName(Name)) { 1010 GlobalSymbols[Name] = BD; 1011 BD->Symbols.push_back(Symbol); 1012 } 1013 } 1014 1015 return Symbol; 1016 } 1017 1018 const BinaryData * 1019 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1020 auto NI = BinaryDataMap.lower_bound(Address); 1021 auto End = BinaryDataMap.end(); 1022 if ((NI != End && Address == NI->first) || 1023 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1024 if (NI->second->containsAddress(Address)) 1025 return NI->second; 1026 1027 // If this is a sub-symbol, see if a parent data contains the address. 1028 const BinaryData *BD = NI->second->getParent(); 1029 while (BD) { 1030 if (BD->containsAddress(Address)) 1031 return BD; 1032 BD = BD->getParent(); 1033 } 1034 } 1035 return nullptr; 1036 } 1037 1038 BinaryData *BinaryContext::getGOTSymbol() { 1039 // First tries to find a global symbol with that name 1040 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1041 if (GOTSymBD) 1042 return GOTSymBD; 1043 1044 // This symbol might be hidden from run-time link, so fetch the local 1045 // definition if available. 1046 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1047 if (!GOTSymBD) 1048 return nullptr; 1049 1050 // If the local symbol is not unique, fail 1051 unsigned Index = 2; 1052 SmallString<30> Storage; 1053 while (const BinaryData *BD = 1054 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1055 .concat(Twine(Index++)) 1056 .toStringRef(Storage))) 1057 if (BD->getAddress() != GOTSymBD->getAddress()) 1058 return nullptr; 1059 1060 return GOTSymBD; 1061 } 1062 1063 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1064 auto NI = BinaryDataMap.find(Address); 1065 assert(NI != BinaryDataMap.end()); 1066 if (NI == BinaryDataMap.end()) 1067 return false; 1068 // TODO: it's possible that a jump table starts at the same address 1069 // as a larger blob of private data. When we set the size of the 1070 // jump table, it might be smaller than the total blob size. In this 1071 // case we just leave the original size since (currently) it won't really 1072 // affect anything. 1073 assert((!NI->second->Size || NI->second->Size == Size || 1074 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1075 "can't change the size of a symbol that has already had its " 1076 "size set"); 1077 if (!NI->second->Size) { 1078 NI->second->Size = Size; 1079 updateObjectNesting(NI); 1080 return true; 1081 } 1082 return false; 1083 } 1084 1085 void BinaryContext::generateSymbolHashes() { 1086 auto isPadding = [](const BinaryData &BD) { 1087 StringRef Contents = BD.getSection().getContents(); 1088 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1089 return (BD.getName().startswith("HOLEat") || 1090 SymData.find_first_not_of(0) == StringRef::npos); 1091 }; 1092 1093 uint64_t NumCollisions = 0; 1094 for (auto &Entry : BinaryDataMap) { 1095 BinaryData &BD = *Entry.second; 1096 StringRef Name = BD.getName(); 1097 1098 if (!isInternalSymbolName(Name)) 1099 continue; 1100 1101 // First check if a non-anonymous alias exists and move it to the front. 1102 if (BD.getSymbols().size() > 1) { 1103 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1104 return !isInternalSymbolName(Symbol->getName()); 1105 }); 1106 if (Itr != BD.getSymbols().end()) { 1107 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1108 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1109 continue; 1110 } 1111 } 1112 1113 // We have to skip 0 size symbols since they will all collide. 1114 if (BD.getSize() == 0) { 1115 continue; 1116 } 1117 1118 const uint64_t Hash = BD.getSection().hash(BD); 1119 const size_t Idx = Name.find("0x"); 1120 std::string NewName = 1121 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1122 if (getBinaryDataByName(NewName)) { 1123 // Ignore collisions for symbols that appear to be padding 1124 // (i.e. all zeros or a "hole") 1125 if (!isPadding(BD)) { 1126 if (opts::Verbosity) { 1127 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1128 << " with new name (" << NewName << "), skipping.\n"; 1129 } 1130 ++NumCollisions; 1131 } 1132 continue; 1133 } 1134 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1135 GlobalSymbols[NewName] = &BD; 1136 } 1137 if (NumCollisions) { 1138 errs() << "BOLT-WARNING: " << NumCollisions 1139 << " collisions detected while hashing binary objects"; 1140 if (!opts::Verbosity) 1141 errs() << ". Use -v=1 to see the list."; 1142 errs() << '\n'; 1143 } 1144 } 1145 1146 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1147 BinaryFunction &Function) const { 1148 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1149 if (TargetFunction.isChildOf(Function)) 1150 return true; 1151 TargetFunction.addParentFragment(Function); 1152 Function.addFragment(TargetFunction); 1153 if (!HasRelocations) { 1154 TargetFunction.setSimple(false); 1155 Function.setSimple(false); 1156 } 1157 if (opts::Verbosity >= 1) { 1158 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1159 << Function << '\n'; 1160 } 1161 return true; 1162 } 1163 1164 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1165 MCInst &LoadLowBits, 1166 MCInst &LoadHiBits, 1167 uint64_t Target) { 1168 const MCSymbol *TargetSymbol; 1169 uint64_t Addend = 0; 1170 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1171 /*IsPCRel*/ true); 1172 int64_t Val; 1173 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1174 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1175 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1176 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1177 } 1178 1179 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1180 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1181 if (TargetFunction) 1182 return false; 1183 1184 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1185 assert(Section && "cannot get section for referenced address"); 1186 if (!Section->isText()) 1187 return false; 1188 1189 bool Ret = false; 1190 StringRef SectionContents = Section->getContents(); 1191 uint64_t Offset = Address - Section->getAddress(); 1192 const uint64_t MaxSize = SectionContents.size() - Offset; 1193 const uint8_t *Bytes = 1194 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1195 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1196 1197 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1198 MCInst &Instruction, uint64_t Offset, 1199 uint64_t AbsoluteInstrAddr, 1200 uint64_t TotalSize) -> bool { 1201 MCInst *TargetHiBits, *TargetLowBits; 1202 uint64_t TargetAddress, Count; 1203 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1204 AbsoluteInstrAddr, Instruction, TargetHiBits, 1205 TargetLowBits, TargetAddress); 1206 if (!Count) 1207 return false; 1208 1209 if (MatchOnly) 1210 return true; 1211 1212 // NOTE The target symbol was created during disassemble's 1213 // handleExternalReference 1214 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1215 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1216 *Section, Address, TotalSize); 1217 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1218 TargetAddress); 1219 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1220 Veneer->addInstruction(Offset, std::move(Instruction)); 1221 --Count; 1222 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1223 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1224 Veneer->addInstruction(It->first, std::move(It->second)); 1225 } 1226 1227 Veneer->getOrCreateLocalLabel(Address); 1228 Veneer->setMaxSize(TotalSize); 1229 Veneer->updateState(BinaryFunction::State::Disassembled); 1230 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1231 << "\n"); 1232 return true; 1233 }; 1234 1235 uint64_t Size = 0, TotalSize = 0; 1236 BinaryFunction::InstrMapType VeneerInstructions; 1237 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1238 MCInst Instruction; 1239 const uint64_t AbsoluteInstrAddr = Address + Offset; 1240 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1241 AbsoluteInstrAddr, nulls())) 1242 break; 1243 1244 TotalSize += Size; 1245 if (MIB->isBranch(Instruction)) { 1246 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1247 AbsoluteInstrAddr, TotalSize); 1248 break; 1249 } 1250 1251 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1252 } 1253 1254 return Ret; 1255 } 1256 1257 void BinaryContext::processInterproceduralReferences() { 1258 for (const std::pair<BinaryFunction *, uint64_t> &It : 1259 InterproceduralReferences) { 1260 BinaryFunction &Function = *It.first; 1261 uint64_t Address = It.second; 1262 if (!Address || Function.isIgnored()) 1263 continue; 1264 1265 BinaryFunction *TargetFunction = 1266 getBinaryFunctionContainingAddress(Address); 1267 if (&Function == TargetFunction) 1268 continue; 1269 1270 if (TargetFunction) { 1271 if (TargetFunction->isFragment() && 1272 !TargetFunction->isChildOf(Function)) { 1273 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1274 "fragments: " 1275 << Function.getPrintName() << " and " 1276 << TargetFunction->getPrintName() << '\n'; 1277 } 1278 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1279 TargetFunction->addEntryPointAtOffset(Offset); 1280 1281 continue; 1282 } 1283 1284 // Check if address falls in function padding space - this could be 1285 // unmarked data in code. In this case adjust the padding space size. 1286 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1287 assert(Section && "cannot get section for referenced address"); 1288 1289 if (!Section->isText()) 1290 continue; 1291 1292 // PLT requires special handling and could be ignored in this context. 1293 StringRef SectionName = Section->getName(); 1294 if (SectionName == ".plt" || SectionName == ".plt.got") 1295 continue; 1296 1297 // Check if it is aarch64 veneer written at Address 1298 if (isAArch64() && handleAArch64Veneer(Address)) 1299 continue; 1300 1301 if (opts::processAllFunctions()) { 1302 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1303 << "object in code at address 0x" << Twine::utohexstr(Address) 1304 << " belonging to section " << SectionName << " in current mode\n"; 1305 exit(1); 1306 } 1307 1308 TargetFunction = getBinaryFunctionContainingAddress(Address, 1309 /*CheckPastEnd=*/false, 1310 /*UseMaxSize=*/true); 1311 // We are not going to overwrite non-simple functions, but for simple 1312 // ones - adjust the padding size. 1313 if (TargetFunction && TargetFunction->isSimple()) { 1314 errs() << "BOLT-WARNING: function " << *TargetFunction 1315 << " has an object detected in a padding region at address 0x" 1316 << Twine::utohexstr(Address) << '\n'; 1317 TargetFunction->setMaxSize(TargetFunction->getSize()); 1318 } 1319 } 1320 1321 InterproceduralReferences.clear(); 1322 } 1323 1324 void BinaryContext::postProcessSymbolTable() { 1325 fixBinaryDataHoles(); 1326 bool Valid = true; 1327 for (auto &Entry : BinaryDataMap) { 1328 BinaryData *BD = Entry.second; 1329 if ((BD->getName().startswith("SYMBOLat") || 1330 BD->getName().startswith("DATAat")) && 1331 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1332 BD->getSection()) { 1333 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1334 Valid = false; 1335 } 1336 } 1337 assert(Valid); 1338 (void)Valid; 1339 generateSymbolHashes(); 1340 } 1341 1342 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1343 BinaryFunction &ParentBF) { 1344 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1345 "cannot merge functions with multiple entry points"); 1346 1347 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1348 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1349 SymbolToFunctionMapMutex, std::defer_lock); 1350 1351 const StringRef ChildName = ChildBF.getOneName(); 1352 1353 // Move symbols over and update bookkeeping info. 1354 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1355 ParentBF.getSymbols().push_back(Symbol); 1356 WriteSymbolMapLock.lock(); 1357 SymbolToFunctionMap[Symbol] = &ParentBF; 1358 WriteSymbolMapLock.unlock(); 1359 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1360 } 1361 ChildBF.getSymbols().clear(); 1362 1363 // Move other names the child function is known under. 1364 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1365 ChildBF.Aliases.clear(); 1366 1367 if (HasRelocations) { 1368 // Merge execution counts of ChildBF into those of ParentBF. 1369 // Without relocations, we cannot reliably merge profiles as both functions 1370 // continue to exist and either one can be executed. 1371 ChildBF.mergeProfileDataInto(ParentBF); 1372 1373 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1374 std::defer_lock); 1375 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1376 std::defer_lock); 1377 // Remove ChildBF from the global set of functions in relocs mode. 1378 ReadBfsLock.lock(); 1379 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1380 ReadBfsLock.unlock(); 1381 1382 assert(FI != BinaryFunctions.end() && "function not found"); 1383 assert(&ChildBF == &FI->second && "function mismatch"); 1384 1385 WriteBfsLock.lock(); 1386 ChildBF.clearDisasmState(); 1387 FI = BinaryFunctions.erase(FI); 1388 WriteBfsLock.unlock(); 1389 1390 } else { 1391 // In non-relocation mode we keep the function, but rename it. 1392 std::string NewName = "__ICF_" + ChildName.str(); 1393 1394 WriteCtxLock.lock(); 1395 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1396 WriteCtxLock.unlock(); 1397 1398 ChildBF.setFolded(&ParentBF); 1399 } 1400 1401 ParentBF.setHasFunctionsFoldedInto(); 1402 } 1403 1404 void BinaryContext::fixBinaryDataHoles() { 1405 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1406 1407 for (BinarySection &Section : allocatableSections()) { 1408 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1409 1410 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1411 BinaryData *BD = Itr->second; 1412 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1413 (BD->getName().startswith("SYMBOLat0x") || 1414 BD->getName().startswith("DATAat0x") || 1415 BD->getName().startswith("ANONYMOUS"))); 1416 return !isHole && BD->getSection() == Section && !BD->getParent(); 1417 }; 1418 1419 auto BDStart = BinaryDataMap.begin(); 1420 auto BDEnd = BinaryDataMap.end(); 1421 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1422 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1423 1424 uint64_t EndAddress = Section.getAddress(); 1425 1426 while (Itr != End) { 1427 if (Itr->second->getAddress() > EndAddress) { 1428 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1429 Holes.emplace_back(EndAddress, Gap); 1430 } 1431 EndAddress = Itr->second->getEndAddress(); 1432 ++Itr; 1433 } 1434 1435 if (EndAddress < Section.getEndAddress()) 1436 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1437 1438 // If there is already a symbol at the start of the hole, grow that symbol 1439 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1440 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1441 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1442 if (BD) { 1443 // BD->getSection() can be != Section if there are sections that 1444 // overlap. In this case it is probably safe to just skip the holes 1445 // since the overlapping section will not(?) have any symbols in it. 1446 if (BD->getSection() == Section) 1447 setBinaryDataSize(Hole.first, Hole.second); 1448 } else { 1449 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1450 } 1451 } 1452 } 1453 1454 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1455 assert(validateHoles() && "top level hole detected in object map"); 1456 } 1457 1458 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1459 const BinarySection *CurrentSection = nullptr; 1460 bool FirstSection = true; 1461 1462 for (auto &Entry : BinaryDataMap) { 1463 const BinaryData *BD = Entry.second; 1464 const BinarySection &Section = BD->getSection(); 1465 if (FirstSection || Section != *CurrentSection) { 1466 uint64_t Address, Size; 1467 StringRef Name = Section.getName(); 1468 if (Section) { 1469 Address = Section.getAddress(); 1470 Size = Section.getSize(); 1471 } else { 1472 Address = BD->getAddress(); 1473 Size = BD->getSize(); 1474 } 1475 OS << "BOLT-INFO: Section " << Name << ", " 1476 << "0x" + Twine::utohexstr(Address) << ":" 1477 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1478 CurrentSection = &Section; 1479 FirstSection = false; 1480 } 1481 1482 OS << "BOLT-INFO: "; 1483 const BinaryData *P = BD->getParent(); 1484 while (P) { 1485 OS << " "; 1486 P = P->getParent(); 1487 } 1488 OS << *BD << "\n"; 1489 } 1490 } 1491 1492 Expected<unsigned> BinaryContext::getDwarfFile( 1493 StringRef Directory, StringRef FileName, unsigned FileNumber, 1494 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1495 unsigned CUID, unsigned DWARFVersion) { 1496 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1497 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1498 FileNumber); 1499 } 1500 1501 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1502 const uint32_t SrcCUID, 1503 unsigned FileIndex) { 1504 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1505 const DWARFDebugLine::LineTable *LineTable = 1506 DwCtx->getLineTableForUnit(SrcUnit); 1507 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1508 LineTable->Prologue.FileNames; 1509 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1510 // means empty dir. 1511 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1512 "FileIndex out of range for the compilation unit."); 1513 StringRef Dir = ""; 1514 if (FileNames[FileIndex - 1].DirIdx != 0) { 1515 if (std::optional<const char *> DirName = dwarf::toString( 1516 LineTable->Prologue 1517 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1518 Dir = *DirName; 1519 } 1520 } 1521 StringRef FileName = ""; 1522 if (std::optional<const char *> FName = 1523 dwarf::toString(FileNames[FileIndex - 1].Name)) 1524 FileName = *FName; 1525 assert(FileName != ""); 1526 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1527 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1528 DestCUID, DstUnit->getVersion())); 1529 } 1530 1531 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1532 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1533 llvm::transform(llvm::make_second_range(BinaryFunctions), 1534 SortedFunctions.begin(), 1535 [](BinaryFunction &BF) { return &BF; }); 1536 1537 llvm::stable_sort(SortedFunctions, 1538 [](const BinaryFunction *A, const BinaryFunction *B) { 1539 if (A->hasValidIndex() && B->hasValidIndex()) { 1540 return A->getIndex() < B->getIndex(); 1541 } 1542 return A->hasValidIndex(); 1543 }); 1544 return SortedFunctions; 1545 } 1546 1547 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1548 std::vector<BinaryFunction *> AllFunctions; 1549 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1550 llvm::transform(llvm::make_second_range(BinaryFunctions), 1551 std::back_inserter(AllFunctions), 1552 [](BinaryFunction &BF) { return &BF; }); 1553 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1554 1555 return AllFunctions; 1556 } 1557 1558 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1559 auto Iter = DWOCUs.find(DWOId); 1560 if (Iter == DWOCUs.end()) 1561 return std::nullopt; 1562 1563 return Iter->second; 1564 } 1565 1566 DWARFContext *BinaryContext::getDWOContext() const { 1567 if (DWOCUs.empty()) 1568 return nullptr; 1569 return &DWOCUs.begin()->second->getContext(); 1570 } 1571 1572 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1573 void BinaryContext::preprocessDWODebugInfo() { 1574 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1575 DWARFUnit *const DwarfUnit = CU.get(); 1576 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1577 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1578 if (!DWOCU->isDWOUnit()) { 1579 std::string DWOName = dwarf::toString( 1580 DwarfUnit->getUnitDIE().find( 1581 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1582 ""); 1583 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1584 << DWOName 1585 << " was not retrieved and won't be updated. Please check " 1586 "relative path.\n"; 1587 continue; 1588 } 1589 DWOCUs[*DWOId] = DWOCU; 1590 } 1591 } 1592 if (!DWOCUs.empty()) 1593 outs() << "BOLT-INFO: processing split DWARF\n"; 1594 } 1595 1596 void BinaryContext::preprocessDebugInfo() { 1597 struct CURange { 1598 uint64_t LowPC; 1599 uint64_t HighPC; 1600 DWARFUnit *Unit; 1601 1602 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1603 }; 1604 1605 // Building a map of address ranges to CUs similar to .debug_aranges and use 1606 // it to assign CU to functions. 1607 std::vector<CURange> AllRanges; 1608 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1609 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1610 Expected<DWARFAddressRangesVector> RangesOrError = 1611 CU->getUnitDIE().getAddressRanges(); 1612 if (!RangesOrError) { 1613 consumeError(RangesOrError.takeError()); 1614 continue; 1615 } 1616 for (DWARFAddressRange &Range : *RangesOrError) { 1617 // Parts of the debug info could be invalidated due to corresponding code 1618 // being removed from the binary by the linker. Hence we check if the 1619 // address is a valid one. 1620 if (containsAddress(Range.LowPC)) 1621 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1622 } 1623 1624 ContainsDwarf5 |= CU->getVersion() >= 5; 1625 ContainsDwarfLegacy |= CU->getVersion() < 5; 1626 } 1627 1628 llvm::sort(AllRanges); 1629 for (auto &KV : BinaryFunctions) { 1630 const uint64_t FunctionAddress = KV.first; 1631 BinaryFunction &Function = KV.second; 1632 1633 auto It = llvm::partition_point( 1634 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1635 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1636 Function.setDWARFUnit(It->Unit); 1637 } 1638 1639 // Discover units with debug info that needs to be updated. 1640 for (const auto &KV : BinaryFunctions) { 1641 const BinaryFunction &BF = KV.second; 1642 if (shouldEmit(BF) && BF.getDWARFUnit()) 1643 ProcessedCUs.insert(BF.getDWARFUnit()); 1644 } 1645 1646 // Clear debug info for functions from units that we are not going to process. 1647 for (auto &KV : BinaryFunctions) { 1648 BinaryFunction &BF = KV.second; 1649 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1650 BF.setDWARFUnit(nullptr); 1651 } 1652 1653 if (opts::Verbosity >= 1) { 1654 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1655 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1656 } 1657 1658 preprocessDWODebugInfo(); 1659 1660 // Populate MCContext with DWARF files from all units. 1661 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1662 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1663 const uint64_t CUID = CU->getOffset(); 1664 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1665 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1666 GlobalPrefix + "line_table_start" + Twine(CUID))); 1667 1668 if (!ProcessedCUs.count(CU.get())) 1669 continue; 1670 1671 const DWARFDebugLine::LineTable *LineTable = 1672 DwCtx->getLineTableForUnit(CU.get()); 1673 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1674 LineTable->Prologue.FileNames; 1675 1676 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1677 if (DwarfVersion >= 5) { 1678 std::optional<MD5::MD5Result> Checksum; 1679 if (LineTable->Prologue.ContentTypes.HasMD5) 1680 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1681 std::optional<const char *> Name = 1682 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1683 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1684 auto Iter = DWOCUs.find(*DWOID); 1685 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1686 Name = dwarf::toString( 1687 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1688 } 1689 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1690 std::nullopt); 1691 } 1692 1693 BinaryLineTable.setDwarfVersion(DwarfVersion); 1694 1695 // Assign a unique label to every line table, one per CU. 1696 // Make sure empty debug line tables are registered too. 1697 if (FileNames.empty()) { 1698 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1699 CUID, DwarfVersion)); 1700 continue; 1701 } 1702 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1703 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1704 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1705 // means empty dir. 1706 StringRef Dir = ""; 1707 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1708 if (std::optional<const char *> DirName = dwarf::toString( 1709 LineTable->Prologue 1710 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1711 Dir = *DirName; 1712 StringRef FileName = ""; 1713 if (std::optional<const char *> FName = 1714 dwarf::toString(FileNames[I].Name)) 1715 FileName = *FName; 1716 assert(FileName != ""); 1717 std::optional<MD5::MD5Result> Checksum; 1718 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1719 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1720 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1721 DwarfVersion)); 1722 } 1723 } 1724 } 1725 1726 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1727 if (Function.isPseudo()) 1728 return false; 1729 1730 if (opts::processAllFunctions()) 1731 return true; 1732 1733 if (Function.isIgnored()) 1734 return false; 1735 1736 // In relocation mode we will emit non-simple functions with CFG. 1737 // If the function does not have a CFG it should be marked as ignored. 1738 return HasRelocations || Function.isSimple(); 1739 } 1740 1741 void BinaryContext::dump(const MCInst &Inst) const { 1742 if (LLVM_UNLIKELY(!InstPrinter)) { 1743 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1744 return; 1745 } 1746 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1747 dbgs() << "\n"; 1748 } 1749 1750 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1751 uint32_t Operation = Inst.getOperation(); 1752 switch (Operation) { 1753 case MCCFIInstruction::OpSameValue: 1754 OS << "OpSameValue Reg" << Inst.getRegister(); 1755 break; 1756 case MCCFIInstruction::OpRememberState: 1757 OS << "OpRememberState"; 1758 break; 1759 case MCCFIInstruction::OpRestoreState: 1760 OS << "OpRestoreState"; 1761 break; 1762 case MCCFIInstruction::OpOffset: 1763 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1764 break; 1765 case MCCFIInstruction::OpDefCfaRegister: 1766 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1767 break; 1768 case MCCFIInstruction::OpDefCfaOffset: 1769 OS << "OpDefCfaOffset " << Inst.getOffset(); 1770 break; 1771 case MCCFIInstruction::OpDefCfa: 1772 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1773 break; 1774 case MCCFIInstruction::OpRelOffset: 1775 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1776 break; 1777 case MCCFIInstruction::OpAdjustCfaOffset: 1778 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1779 break; 1780 case MCCFIInstruction::OpEscape: 1781 OS << "OpEscape"; 1782 break; 1783 case MCCFIInstruction::OpRestore: 1784 OS << "OpRestore Reg" << Inst.getRegister(); 1785 break; 1786 case MCCFIInstruction::OpUndefined: 1787 OS << "OpUndefined Reg" << Inst.getRegister(); 1788 break; 1789 case MCCFIInstruction::OpRegister: 1790 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1791 << Inst.getRegister2(); 1792 break; 1793 case MCCFIInstruction::OpWindowSave: 1794 OS << "OpWindowSave"; 1795 break; 1796 case MCCFIInstruction::OpGnuArgsSize: 1797 OS << "OpGnuArgsSize"; 1798 break; 1799 default: 1800 OS << "Op#" << Operation; 1801 break; 1802 } 1803 } 1804 1805 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1806 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1807 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1808 // the end of a data chunk inside code, $d identifies start of data. 1809 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize()) 1810 return MarkerSymType::NONE; 1811 1812 Expected<StringRef> NameOrError = Symbol.getName(); 1813 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1814 1815 if (!TypeOrError || !NameOrError) 1816 return MarkerSymType::NONE; 1817 1818 if (*TypeOrError != SymbolRef::ST_Unknown) 1819 return MarkerSymType::NONE; 1820 1821 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1822 return MarkerSymType::CODE; 1823 1824 // $x<ISA> 1825 if (isRISCV() && NameOrError->startswith("$x")) 1826 return MarkerSymType::CODE; 1827 1828 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1829 return MarkerSymType::DATA; 1830 1831 return MarkerSymType::NONE; 1832 } 1833 1834 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1835 return getMarkerType(Symbol) != MarkerSymType::NONE; 1836 } 1837 1838 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1839 const BinaryFunction *Function, 1840 DWARFContext *DwCtx) { 1841 DebugLineTableRowRef RowRef = 1842 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1843 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1844 return; 1845 1846 const DWARFDebugLine::LineTable *LineTable; 1847 if (Function && Function->getDWARFUnit() && 1848 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1849 LineTable = Function->getDWARFLineTable(); 1850 } else { 1851 LineTable = DwCtx->getLineTableForUnit( 1852 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1853 } 1854 assert(LineTable && "line table expected for instruction with debug info"); 1855 1856 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1857 StringRef FileName = ""; 1858 if (std::optional<const char *> FName = 1859 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1860 FileName = *FName; 1861 OS << " # debug line " << FileName << ":" << Row.Line; 1862 if (Row.Column) 1863 OS << ":" << Row.Column; 1864 if (Row.Discriminator) 1865 OS << " discriminator:" << Row.Discriminator; 1866 } 1867 1868 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1869 uint64_t Offset, 1870 const BinaryFunction *Function, 1871 bool PrintMCInst, bool PrintMemData, 1872 bool PrintRelocations, 1873 StringRef Endl) const { 1874 OS << format(" %08" PRIx64 ": ", Offset); 1875 if (MIB->isCFI(Instruction)) { 1876 uint32_t Offset = Instruction.getOperand(0).getImm(); 1877 OS << "\t!CFI\t$" << Offset << "\t; "; 1878 if (Function) 1879 printCFI(OS, *Function->getCFIFor(Instruction)); 1880 OS << Endl; 1881 return; 1882 } 1883 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1884 if (MIB->isCall(Instruction)) { 1885 if (MIB->isTailCall(Instruction)) 1886 OS << " # TAILCALL "; 1887 if (MIB->isInvoke(Instruction)) { 1888 const std::optional<MCPlus::MCLandingPad> EHInfo = 1889 MIB->getEHInfo(Instruction); 1890 OS << " # handler: "; 1891 if (EHInfo->first) 1892 OS << *EHInfo->first; 1893 else 1894 OS << '0'; 1895 OS << "; action: " << EHInfo->second; 1896 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1897 if (GnuArgsSize >= 0) 1898 OS << "; GNU_args_size = " << GnuArgsSize; 1899 } 1900 } else if (MIB->isIndirectBranch(Instruction)) { 1901 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1902 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1903 } else { 1904 OS << " # UNKNOWN CONTROL FLOW"; 1905 } 1906 } 1907 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1908 OS << " # Offset: " << *Offset; 1909 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 1910 OS << " # Size: " << *Size; 1911 if (MCSymbol *Label = MIB->getLabel(Instruction)) 1912 OS << " # Label: " << *Label; 1913 1914 MIB->printAnnotations(Instruction, OS); 1915 1916 if (opts::PrintDebugInfo) 1917 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1918 1919 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1920 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1921 Function->printRelocations(OS, Offset, Size); 1922 } 1923 1924 OS << Endl; 1925 1926 if (PrintMCInst) { 1927 Instruction.dump_pretty(OS, InstPrinter.get()); 1928 OS << Endl; 1929 } 1930 } 1931 1932 std::optional<uint64_t> 1933 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1934 uint64_t FileOffset) const { 1935 // Find a segment with a matching file offset. 1936 for (auto &KV : SegmentMapInfo) { 1937 const SegmentInfo &SegInfo = KV.second; 1938 // FileOffset is got from perf event, 1939 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 1940 // If the pagesize is not equal to SegInfo.Alignment. 1941 // FileOffset and SegInfo.FileOffset should be aligned first, 1942 // and then judge whether they are equal. 1943 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 1944 alignDown(FileOffset, SegInfo.Alignment)) { 1945 // The function's offset from base address in VAS is aligned by pagesize 1946 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 1947 // However, The ELF document says that SegInfo.FileOffset should equal 1948 // to SegInfo.Address, modulo the pagesize. 1949 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 1950 1951 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 1952 // alignDown(SegInfo.Address, pagesize) 1953 // = SegInfo.Address - (SegInfo.Address % pagesize) 1954 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 1955 // = SegInfo.Address - SegInfo.FileOffset + 1956 // alignDown(SegInfo.FileOffset, pagesize) 1957 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 1958 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 1959 } 1960 } 1961 1962 return std::nullopt; 1963 } 1964 1965 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1966 auto SI = AddressToSection.upper_bound(Address); 1967 if (SI != AddressToSection.begin()) { 1968 --SI; 1969 uint64_t UpperBound = SI->first + SI->second->getSize(); 1970 if (!SI->second->getSize()) 1971 UpperBound += 1; 1972 if (UpperBound > Address) 1973 return *SI->second; 1974 } 1975 return std::make_error_code(std::errc::bad_address); 1976 } 1977 1978 ErrorOr<StringRef> 1979 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1980 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1981 return Section->getName(); 1982 return std::make_error_code(std::errc::bad_address); 1983 } 1984 1985 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1986 auto Res = Sections.insert(Section); 1987 (void)Res; 1988 assert(Res.second && "can't register the same section twice."); 1989 1990 // Only register allocatable sections in the AddressToSection map. 1991 if (Section->isAllocatable() && Section->getAddress()) 1992 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1993 NameToSection.insert( 1994 std::make_pair(std::string(Section->getName()), Section)); 1995 if (Section->hasSectionRef()) 1996 SectionRefToBinarySection.insert( 1997 std::make_pair(Section->getSectionRef(), Section)); 1998 1999 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2000 return *Section; 2001 } 2002 2003 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2004 return registerSection(new BinarySection(*this, Section)); 2005 } 2006 2007 BinarySection & 2008 BinaryContext::registerSection(const Twine &SectionName, 2009 const BinarySection &OriginalSection) { 2010 return registerSection( 2011 new BinarySection(*this, SectionName, OriginalSection)); 2012 } 2013 2014 BinarySection & 2015 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2016 unsigned ELFFlags, uint8_t *Data, 2017 uint64_t Size, unsigned Alignment) { 2018 auto NamedSections = getSectionByName(Name); 2019 if (NamedSections.begin() != NamedSections.end()) { 2020 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2021 "can only update unique sections"); 2022 BinarySection *Section = NamedSections.begin()->second; 2023 2024 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2025 const bool Flag = Section->isAllocatable(); 2026 (void)Flag; 2027 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2028 LLVM_DEBUG(dbgs() << *Section << "\n"); 2029 // FIXME: Fix section flags/attributes for MachO. 2030 if (isELF()) 2031 assert(Flag == Section->isAllocatable() && 2032 "can't change section allocation status"); 2033 return *Section; 2034 } 2035 2036 return registerSection( 2037 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2038 } 2039 2040 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2041 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2042 while (NameRange.first != NameRange.second) { 2043 if (NameRange.first->second == &Section) { 2044 NameToSection.erase(NameRange.first); 2045 break; 2046 } 2047 ++NameRange.first; 2048 } 2049 } 2050 2051 void BinaryContext::deregisterUnusedSections() { 2052 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2053 for (auto SI = Sections.begin(); SI != Sections.end();) { 2054 BinarySection *Section = *SI; 2055 // We check getOutputData() instead of getOutputSize() because sometimes 2056 // zero-sized .text.cold sections are allocated. 2057 if (Section->hasSectionRef() || Section->getOutputData() || 2058 (AbsSection && Section == &AbsSection.get())) { 2059 ++SI; 2060 continue; 2061 } 2062 2063 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2064 << '\n';); 2065 deregisterSectionName(*Section); 2066 SI = Sections.erase(SI); 2067 delete Section; 2068 } 2069 } 2070 2071 bool BinaryContext::deregisterSection(BinarySection &Section) { 2072 BinarySection *SectionPtr = &Section; 2073 auto Itr = Sections.find(SectionPtr); 2074 if (Itr != Sections.end()) { 2075 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2076 while (Range.first != Range.second) { 2077 if (Range.first->second == SectionPtr) { 2078 AddressToSection.erase(Range.first); 2079 break; 2080 } 2081 ++Range.first; 2082 } 2083 2084 deregisterSectionName(*SectionPtr); 2085 Sections.erase(Itr); 2086 delete SectionPtr; 2087 return true; 2088 } 2089 return false; 2090 } 2091 2092 void BinaryContext::renameSection(BinarySection &Section, 2093 const Twine &NewName) { 2094 auto Itr = Sections.find(&Section); 2095 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2096 Sections.erase(Itr); 2097 2098 deregisterSectionName(Section); 2099 2100 Section.Name = NewName.str(); 2101 Section.setOutputName(Section.Name); 2102 2103 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2104 2105 // Reinsert with the new name. 2106 Sections.insert(&Section); 2107 } 2108 2109 void BinaryContext::printSections(raw_ostream &OS) const { 2110 for (BinarySection *const &Section : Sections) 2111 OS << "BOLT-INFO: " << *Section << "\n"; 2112 } 2113 2114 BinarySection &BinaryContext::absoluteSection() { 2115 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2116 return *Section; 2117 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2118 } 2119 2120 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2121 size_t Size) const { 2122 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2123 if (!Section) 2124 return std::make_error_code(std::errc::bad_address); 2125 2126 if (Section->isVirtual()) 2127 return 0; 2128 2129 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2130 AsmInfo->getCodePointerSize()); 2131 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2132 return DE.getUnsigned(&ValueOffset, Size); 2133 } 2134 2135 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2136 size_t Size) const { 2137 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2138 if (!Section) 2139 return std::make_error_code(std::errc::bad_address); 2140 2141 if (Section->isVirtual()) 2142 return 0; 2143 2144 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2145 AsmInfo->getCodePointerSize()); 2146 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2147 return DE.getSigned(&ValueOffset, Size); 2148 } 2149 2150 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2151 uint64_t Type, uint64_t Addend, 2152 uint64_t Value) { 2153 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2154 assert(Section && "cannot find section for address"); 2155 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2156 Value); 2157 } 2158 2159 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2160 uint64_t Type, uint64_t Addend, 2161 uint64_t Value) { 2162 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2163 assert(Section && "cannot find section for address"); 2164 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2165 Addend, Value); 2166 } 2167 2168 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2169 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2170 assert(Section && "cannot find section for address"); 2171 return Section->removeRelocationAt(Address - Section->getAddress()); 2172 } 2173 2174 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2175 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2176 if (!Section) 2177 return nullptr; 2178 2179 return Section->getRelocationAt(Address - Section->getAddress()); 2180 } 2181 2182 const Relocation * 2183 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2184 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2185 if (!Section) 2186 return nullptr; 2187 2188 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2189 } 2190 2191 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2192 const uint64_t Address) { 2193 auto setImmovable = [&](BinaryData &BD) { 2194 BinaryData *Root = BD.getAtomicRoot(); 2195 LLVM_DEBUG(if (Root->isMoveable()) { 2196 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2197 << "due to ambiguous relocation referencing 0x" 2198 << Twine::utohexstr(Address) << '\n'; 2199 }); 2200 Root->setIsMoveable(false); 2201 }; 2202 2203 if (Address == BD.getAddress()) { 2204 setImmovable(BD); 2205 2206 // Set previous symbol as immovable 2207 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2208 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2209 setImmovable(*Prev); 2210 } 2211 2212 if (Address == BD.getEndAddress()) { 2213 setImmovable(BD); 2214 2215 // Set next symbol as immovable 2216 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2217 if (Next && Next->getAddress() == BD.getEndAddress()) 2218 setImmovable(*Next); 2219 } 2220 } 2221 2222 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2223 uint64_t *EntryDesc) { 2224 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2225 auto BFI = SymbolToFunctionMap.find(Symbol); 2226 if (BFI == SymbolToFunctionMap.end()) 2227 return nullptr; 2228 2229 BinaryFunction *BF = BFI->second; 2230 if (EntryDesc) 2231 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2232 2233 return BF; 2234 } 2235 2236 void BinaryContext::exitWithBugReport(StringRef Message, 2237 const BinaryFunction &Function) const { 2238 errs() << "=======================================\n"; 2239 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2240 "this function.\n"; 2241 errs() << "If you are running the most recent version of BOLT, you may " 2242 "want to " 2243 "report this and paste this dump.\nPlease check that there is no " 2244 "sensitive contents being shared in this dump.\n"; 2245 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2246 ScopedPrinter SP(errs()); 2247 SP.printBinaryBlock("Function contents", *Function.getData()); 2248 errs() << "\n"; 2249 Function.dump(); 2250 errs() << "ERROR: " << Message; 2251 errs() << "\n=======================================\n"; 2252 exit(1); 2253 } 2254 2255 BinaryFunction * 2256 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2257 bool IsSimple) { 2258 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2259 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2260 setSymbolToFunctionMap(BF->getSymbol(), BF); 2261 BF->CurrentState = BinaryFunction::State::CFG; 2262 return BF; 2263 } 2264 2265 std::pair<size_t, size_t> 2266 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2267 // Adjust branch instruction to match the current layout. 2268 if (FixBranches) 2269 BF.fixBranches(); 2270 2271 // Create local MC context to isolate the effect of ephemeral code emission. 2272 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2273 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2274 MCAsmBackend *MAB = 2275 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2276 2277 SmallString<256> Code; 2278 raw_svector_ostream VecOS(Code); 2279 2280 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2281 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2282 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2283 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2284 /*RelaxAll=*/false, 2285 /*IncrementalLinkerCompatible=*/false, 2286 /*DWARFMustBeAtTheEnd=*/false)); 2287 2288 Streamer->initSections(false, *STI); 2289 2290 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2291 Section->setHasInstructions(true); 2292 2293 // Create symbols in the LocalCtx so that they get destroyed with it. 2294 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2295 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2296 2297 Streamer->switchSection(Section); 2298 Streamer->emitLabel(StartLabel); 2299 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2300 /*EmitCodeOnly=*/true); 2301 Streamer->emitLabel(EndLabel); 2302 2303 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2304 SmallVector<LabelRange> SplitLabels; 2305 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2306 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2307 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2308 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2309 2310 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2311 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2312 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2313 SplitSection->setHasInstructions(true); 2314 Streamer->switchSection(SplitSection); 2315 2316 Streamer->emitLabel(SplitStartLabel); 2317 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2318 Streamer->emitLabel(SplitEndLabel); 2319 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2320 // private 2321 Streamer->emitBytes(StringRef("")); 2322 Streamer->switchSection(Section); 2323 } 2324 2325 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2326 // MCStreamer::Finish(), which does more than we want 2327 Streamer->emitBytes(StringRef("")); 2328 2329 MCAssembler &Assembler = 2330 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2331 MCAsmLayout Layout(Assembler); 2332 Assembler.layout(Layout); 2333 2334 const uint64_t HotSize = 2335 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2336 const uint64_t ColdSize = 2337 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2338 [&](const uint64_t Accu, const LabelRange &Labels) { 2339 return Accu + Layout.getSymbolOffset(*Labels.second) - 2340 Layout.getSymbolOffset(*Labels.first); 2341 }); 2342 2343 // Clean-up the effect of the code emission. 2344 for (const MCSymbol &Symbol : Assembler.symbols()) { 2345 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2346 MutableSymbol->setUndefined(); 2347 MutableSymbol->setIsRegistered(false); 2348 } 2349 2350 return std::make_pair(HotSize, ColdSize); 2351 } 2352 2353 bool BinaryContext::validateInstructionEncoding( 2354 ArrayRef<uint8_t> InputSequence) const { 2355 MCInst Inst; 2356 uint64_t InstSize; 2357 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2358 assert(InstSize == InputSequence.size() && 2359 "Disassembled instruction size does not match the sequence."); 2360 2361 SmallString<256> Code; 2362 SmallVector<MCFixup, 4> Fixups; 2363 2364 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2365 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2366 if (InputSequence != OutputSequence) { 2367 if (opts::Verbosity > 1) { 2368 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2369 << " input: " << InputSequence << '\n' 2370 << " output: " << OutputSequence << '\n'; 2371 } 2372 return false; 2373 } 2374 2375 return true; 2376 } 2377 2378 uint64_t BinaryContext::getHotThreshold() const { 2379 static uint64_t Threshold = 0; 2380 if (Threshold == 0) { 2381 Threshold = std::max( 2382 (uint64_t)opts::ExecutionCountThreshold, 2383 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2384 } 2385 return Threshold; 2386 } 2387 2388 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2389 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2390 auto FI = BinaryFunctions.upper_bound(Address); 2391 if (FI == BinaryFunctions.begin()) 2392 return nullptr; 2393 --FI; 2394 2395 const uint64_t UsedSize = 2396 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2397 2398 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2399 return nullptr; 2400 2401 return &FI->second; 2402 } 2403 2404 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2405 // First, try to find a function starting at the given address. If the 2406 // function was folded, this will get us the original folded function if it 2407 // wasn't removed from the list, e.g. in non-relocation mode. 2408 auto BFI = BinaryFunctions.find(Address); 2409 if (BFI != BinaryFunctions.end()) 2410 return &BFI->second; 2411 2412 // We might have folded the function matching the object at the given 2413 // address. In such case, we look for a function matching the symbol 2414 // registered at the original address. The new function (the one that the 2415 // original was folded into) will hold the symbol. 2416 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2417 uint64_t EntryID = 0; 2418 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2419 if (BF && EntryID == 0) 2420 return BF; 2421 } 2422 return nullptr; 2423 } 2424 2425 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2426 const DWARFAddressRangesVector &InputRanges) const { 2427 DebugAddressRangesVector OutputRanges; 2428 2429 for (const DWARFAddressRange Range : InputRanges) { 2430 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2431 while (BFI != BinaryFunctions.end()) { 2432 const BinaryFunction &Function = BFI->second; 2433 if (Function.getAddress() >= Range.HighPC) 2434 break; 2435 const DebugAddressRangesVector FunctionRanges = 2436 Function.getOutputAddressRanges(); 2437 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2438 std::advance(BFI, 1); 2439 } 2440 } 2441 2442 return OutputRanges; 2443 } 2444 2445 } // namespace bolt 2446 } // namespace llvm 2447