1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 22 #include "llvm/MC/MCAsmLayout.h" 23 #include "llvm/MC/MCAssembler.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26 #include "llvm/MC/MCInstPrinter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCSectionELF.h" 30 #include "llvm/MC/MCStreamer.h" 31 #include "llvm/MC/MCSymbol.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Regex.h" 34 #include <algorithm> 35 #include <functional> 36 #include <iterator> 37 #include <unordered_set> 38 39 using namespace llvm; 40 41 #undef DEBUG_TYPE 42 #define DEBUG_TYPE "bolt" 43 44 namespace opts { 45 46 cl::opt<bool> 47 NoHugePages("no-huge-pages", 48 cl::desc("use regular size pages for code alignment"), 49 cl::ZeroOrMore, 50 cl::Hidden, 51 cl::cat(BoltCategory)); 52 53 static cl::opt<bool> 54 PrintDebugInfo("print-debug-info", 55 cl::desc("print debug info when printing functions"), 56 cl::Hidden, 57 cl::ZeroOrMore, 58 cl::cat(BoltCategory)); 59 60 cl::opt<bool> 61 PrintRelocations("print-relocations", 62 cl::desc("print relocations when printing functions/objects"), 63 cl::Hidden, 64 cl::ZeroOrMore, 65 cl::cat(BoltCategory)); 66 67 static cl::opt<bool> 68 PrintMemData("print-mem-data", 69 cl::desc("print memory data annotations when printing functions"), 70 cl::Hidden, 71 cl::ZeroOrMore, 72 cl::cat(BoltCategory)); 73 74 } // namespace opts 75 76 namespace llvm { 77 namespace bolt { 78 79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 80 std::unique_ptr<DWARFContext> DwCtx, 81 std::unique_ptr<Triple> TheTriple, 82 const Target *TheTarget, std::string TripleName, 83 std::unique_ptr<MCCodeEmitter> MCE, 84 std::unique_ptr<MCObjectFileInfo> MOFI, 85 std::unique_ptr<const MCAsmInfo> AsmInfo, 86 std::unique_ptr<const MCInstrInfo> MII, 87 std::unique_ptr<const MCSubtargetInfo> STI, 88 std::unique_ptr<MCInstPrinter> InstPrinter, 89 std::unique_ptr<const MCInstrAnalysis> MIA, 90 std::unique_ptr<MCPlusBuilder> MIB, 91 std::unique_ptr<const MCRegisterInfo> MRI, 92 std::unique_ptr<MCDisassembler> DisAsm) 93 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 94 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 95 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 96 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 97 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 98 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 99 Relocation::Arch = this->TheTriple->getArch(); 100 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 101 } 102 103 BinaryContext::~BinaryContext() { 104 for (BinarySection *Section : Sections) 105 delete Section; 106 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 107 delete InjectedFunction; 108 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 109 delete JTI.second; 110 clearBinaryData(); 111 } 112 113 /// Create BinaryContext for a given architecture \p ArchName and 114 /// triple \p TripleName. 115 std::unique_ptr<BinaryContext> 116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 117 std::unique_ptr<DWARFContext> DwCtx) { 118 StringRef ArchName = ""; 119 StringRef FeaturesStr = ""; 120 switch (File->getArch()) { 121 case llvm::Triple::x86_64: 122 ArchName = "x86-64"; 123 FeaturesStr = "+nopl"; 124 break; 125 case llvm::Triple::aarch64: 126 ArchName = "aarch64"; 127 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 128 "+fullfp16,+spe,+fuse-aes,+rcpc"; 129 break; 130 default: 131 errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n"; 132 return nullptr; 133 } 134 135 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 136 const std::string TripleName = TheTriple->str(); 137 138 std::string Error; 139 const Target *TheTarget = 140 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 141 if (!TheTarget) { 142 errs() << "BOLT-ERROR: " << Error; 143 return nullptr; 144 } 145 146 std::unique_ptr<const MCRegisterInfo> MRI( 147 TheTarget->createMCRegInfo(TripleName)); 148 if (!MRI) { 149 errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n"; 150 return nullptr; 151 } 152 153 // Set up disassembler. 154 std::unique_ptr<const MCAsmInfo> AsmInfo( 155 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 156 if (!AsmInfo) { 157 errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n"; 158 return nullptr; 159 } 160 161 std::unique_ptr<const MCSubtargetInfo> STI( 162 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 163 if (!STI) { 164 errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n"; 165 return nullptr; 166 } 167 168 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 169 if (!MII) { 170 errs() << "BOLT-ERROR: no instruction info for target " << TripleName 171 << "\n"; 172 return nullptr; 173 } 174 175 std::unique_ptr<MCContext> Ctx( 176 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 177 std::unique_ptr<MCObjectFileInfo> MOFI( 178 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 179 Ctx->setObjectFileInfo(MOFI.get()); 180 // We do not support X86 Large code model. Change this in the future. 181 bool Large = false; 182 if (TheTriple->getArch() == llvm::Triple::aarch64) 183 Large = true; 184 unsigned LSDAEncoding = 185 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 186 unsigned TTypeEncoding = 187 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 188 if (IsPIC) { 189 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 190 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 191 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 } 194 195 std::unique_ptr<MCDisassembler> DisAsm( 196 TheTarget->createMCDisassembler(*STI, *Ctx)); 197 198 if (!DisAsm) { 199 errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n"; 200 return nullptr; 201 } 202 203 std::unique_ptr<const MCInstrAnalysis> MIA( 204 TheTarget->createMCInstrAnalysis(MII.get())); 205 if (!MIA) { 206 errs() << "BOLT-ERROR: failed to create instruction analysis for target" 207 << TripleName << "\n"; 208 return nullptr; 209 } 210 211 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 212 std::unique_ptr<MCInstPrinter> InstructionPrinter( 213 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 214 *MII, *MRI)); 215 if (!InstructionPrinter) { 216 errs() << "BOLT-ERROR: no instruction printer for target " << TripleName 217 << '\n'; 218 return nullptr; 219 } 220 InstructionPrinter->setPrintImmHex(true); 221 222 std::unique_ptr<MCCodeEmitter> MCE( 223 TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx)); 224 225 // Make sure we don't miss any output on core dumps. 226 outs().SetUnbuffered(); 227 errs().SetUnbuffered(); 228 dbgs().SetUnbuffered(); 229 230 auto BC = std::make_unique<BinaryContext>( 231 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 232 std::string(TripleName), std::move(MCE), std::move(MOFI), 233 std::move(AsmInfo), std::move(MII), std::move(STI), 234 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 235 std::move(DisAsm)); 236 237 BC->TTypeEncoding = TTypeEncoding; 238 BC->LSDAEncoding = LSDAEncoding; 239 240 BC->MAB = std::unique_ptr<MCAsmBackend>( 241 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 242 243 BC->setFilename(File->getFileName()); 244 245 BC->HasFixedLoadAddress = !IsPIC; 246 247 return BC; 248 } 249 250 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 251 if (opts::HotText && 252 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 253 return true; 254 255 if (opts::HotData && 256 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 257 return true; 258 259 if (SymbolName == "_end") 260 return true; 261 262 return false; 263 } 264 265 std::unique_ptr<MCObjectWriter> 266 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 267 return MAB->createObjectWriter(OS); 268 } 269 270 bool BinaryContext::validateObjectNesting() const { 271 auto Itr = BinaryDataMap.begin(); 272 auto End = BinaryDataMap.end(); 273 bool Valid = true; 274 while (Itr != End) { 275 auto Next = std::next(Itr); 276 while (Next != End && 277 Itr->second->getSection() == Next->second->getSection() && 278 Itr->second->containsRange(Next->second->getAddress(), 279 Next->second->getSize())) { 280 if (Next->second->Parent != Itr->second) { 281 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 282 << "BOLT-WARNING: " << *Itr->second << "\n" 283 << "BOLT-WARNING: " << *Next->second << "\n"; 284 Valid = false; 285 } 286 ++Next; 287 } 288 Itr = Next; 289 } 290 return Valid; 291 } 292 293 bool BinaryContext::validateHoles() const { 294 bool Valid = true; 295 for (BinarySection &Section : sections()) { 296 for (const Relocation &Rel : Section.relocations()) { 297 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 298 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 299 if (!BD) { 300 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 301 << " 0x" << Twine::utohexstr(RelAddr) << " in " 302 << Section.getName() << "\n"; 303 Valid = false; 304 } else if (!BD->getAtomicRoot()) { 305 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 306 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 307 << Section.getName() << "\n"; 308 Valid = false; 309 } 310 } 311 } 312 return Valid; 313 } 314 315 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 316 const uint64_t Address = GAI->second->getAddress(); 317 const uint64_t Size = GAI->second->getSize(); 318 319 auto fixParents = [&](BinaryDataMapType::iterator Itr, 320 BinaryData *NewParent) { 321 BinaryData *OldParent = Itr->second->Parent; 322 Itr->second->Parent = NewParent; 323 ++Itr; 324 while (Itr != BinaryDataMap.end() && OldParent && 325 Itr->second->Parent == OldParent) { 326 Itr->second->Parent = NewParent; 327 ++Itr; 328 } 329 }; 330 331 // Check if the previous symbol contains the newly added symbol. 332 if (GAI != BinaryDataMap.begin()) { 333 BinaryData *Prev = std::prev(GAI)->second; 334 while (Prev) { 335 if (Prev->getSection() == GAI->second->getSection() && 336 Prev->containsRange(Address, Size)) { 337 fixParents(GAI, Prev); 338 } else { 339 fixParents(GAI, nullptr); 340 } 341 Prev = Prev->Parent; 342 } 343 } 344 345 // Check if the newly added symbol contains any subsequent symbols. 346 if (Size != 0) { 347 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 348 auto Itr = std::next(GAI); 349 while ( 350 Itr != BinaryDataMap.end() && 351 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 352 Itr->second->Parent = BD; 353 ++Itr; 354 } 355 } 356 } 357 358 iterator_range<BinaryContext::binary_data_iterator> 359 BinaryContext::getSubBinaryData(BinaryData *BD) { 360 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 361 auto End = Start; 362 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 363 ++End; 364 return make_range(Start, End); 365 } 366 367 std::pair<const MCSymbol *, uint64_t> 368 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 369 bool IsPCRel) { 370 uint64_t Addend = 0; 371 372 if (isAArch64()) { 373 // Check if this is an access to a constant island and create bookkeeping 374 // to keep track of it and emit it later as part of this function. 375 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 376 return std::make_pair(IslandSym, Addend); 377 378 // Detect custom code written in assembly that refers to arbitrary 379 // constant islands from other functions. Write this reference so we 380 // can pull this constant island and emit it as part of this function 381 // too. 382 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 383 if (IslandIter != AddressToConstantIslandMap.end()) { 384 if (MCSymbol *IslandSym = 385 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 386 BF.createIslandDependency(IslandSym, IslandIter->second); 387 return std::make_pair(IslandSym, Addend); 388 } 389 } 390 } 391 392 // Note that the address does not necessarily have to reside inside 393 // a section, it could be an absolute address too. 394 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 395 if (Section && Section->isText()) { 396 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 397 if (Address != BF.getAddress()) { 398 // The address could potentially escape. Mark it as another entry 399 // point into the function. 400 if (opts::Verbosity >= 1) { 401 outs() << "BOLT-INFO: potentially escaped address 0x" 402 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 403 } 404 BF.HasInternalLabelReference = true; 405 return std::make_pair( 406 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 407 } 408 } else { 409 BF.InterproceduralReferences.insert(Address); 410 } 411 } 412 413 // With relocations, catch jump table references outside of the basic block 414 // containing the indirect jump. 415 if (HasRelocations) { 416 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 417 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 418 const MCSymbol *Symbol = 419 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 420 421 return std::make_pair(Symbol, Addend); 422 } 423 } 424 425 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 426 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 427 428 // TODO: use DWARF info to get size/alignment here? 429 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 430 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 431 return std::make_pair(TargetSymbol, Addend); 432 } 433 434 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 435 BinaryFunction &BF) { 436 if (!isX86()) 437 return MemoryContentsType::UNKNOWN; 438 439 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 440 if (!Section) { 441 // No section - possibly an absolute address. Since we don't allow 442 // internal function addresses to escape the function scope - we 443 // consider it a tail call. 444 if (opts::Verbosity > 1) { 445 errs() << "BOLT-WARNING: no section for address 0x" 446 << Twine::utohexstr(Address) << " referenced from function " << BF 447 << '\n'; 448 } 449 return MemoryContentsType::UNKNOWN; 450 } 451 452 if (Section->isVirtual()) { 453 // The contents are filled at runtime. 454 return MemoryContentsType::UNKNOWN; 455 } 456 457 // No support for jump tables in code yet. 458 if (Section->isText()) 459 return MemoryContentsType::UNKNOWN; 460 461 // Start with checking for PIC jump table. We expect non-PIC jump tables 462 // to have high 32 bits set to 0. 463 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 464 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 465 466 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 467 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 468 469 return MemoryContentsType::UNKNOWN; 470 } 471 472 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 473 bool isPotentialFragmentByName(BinaryFunction &Fragment, 474 BinaryFunction &Parent) { 475 for (StringRef Name : Parent.getNames()) { 476 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 477 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 478 if (Fragment.hasRestoredNameRegex(NameRegex)) 479 return true; 480 } 481 return false; 482 } 483 484 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 485 const JumpTable::JumpTableType Type, 486 BinaryFunction &BF, 487 const uint64_t NextJTAddress, 488 JumpTable::OffsetsType *Offsets) { 489 // Is one of the targets __builtin_unreachable? 490 bool HasUnreachable = false; 491 492 // Number of targets other than __builtin_unreachable. 493 uint64_t NumRealEntries = 0; 494 495 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 496 auto addOffset = [&](uint64_t Offset) { 497 if (Offsets) 498 Offsets->emplace_back(Offset); 499 }; 500 501 auto doesBelongToFunction = [&](const uint64_t Addr, 502 BinaryFunction *TargetBF) -> bool { 503 if (BF.containsAddress(Addr)) 504 return true; 505 // Nothing to do if we failed to identify the containing function. 506 if (!TargetBF) 507 return false; 508 // Case 1: check if BF is a fragment and TargetBF is its parent. 509 if (BF.isFragment()) { 510 // Parent function may or may not be already registered. 511 // Set parent link based on function name matching heuristic. 512 return registerFragment(BF, *TargetBF); 513 } 514 // Case 2: check if TargetBF is a fragment and BF is its parent. 515 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 516 }; 517 518 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 519 if (!Section) 520 return false; 521 522 // The upper bound is defined by containing object, section limits, and 523 // the next jump table in memory. 524 uint64_t UpperBound = Section->getEndAddress(); 525 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 526 if (JumpTableBD && JumpTableBD->getSize()) { 527 assert(JumpTableBD->getEndAddress() <= UpperBound && 528 "data object cannot cross a section boundary"); 529 UpperBound = JumpTableBD->getEndAddress(); 530 } 531 if (NextJTAddress) 532 UpperBound = std::min(NextJTAddress, UpperBound); 533 534 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 535 << '\n'); 536 const uint64_t EntrySize = getJumpTableEntrySize(Type); 537 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 538 EntryAddress += EntrySize) { 539 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 540 << " -> "); 541 // Check if there's a proper relocation against the jump table entry. 542 if (HasRelocations) { 543 if (Type == JumpTable::JTT_PIC && 544 !DataPCRelocations.count(EntryAddress)) { 545 LLVM_DEBUG( 546 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 547 break; 548 } 549 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 550 LLVM_DEBUG( 551 dbgs() 552 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 553 break; 554 } 555 } 556 557 const uint64_t Value = 558 (Type == JumpTable::JTT_PIC) 559 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 560 : *getPointerAtAddress(EntryAddress); 561 562 // __builtin_unreachable() case. 563 if (Value == BF.getAddress() + BF.getSize()) { 564 addOffset(Value - BF.getAddress()); 565 HasUnreachable = true; 566 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 567 continue; 568 } 569 570 // Function or one of its fragments. 571 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 572 573 // We assume that a jump table cannot have function start as an entry. 574 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 575 LLVM_DEBUG({ 576 if (!BF.containsAddress(Value)) { 577 dbgs() << "FAIL: function doesn't contain this address\n"; 578 if (TargetBF) { 579 dbgs() << " ! function containing this address: " 580 << TargetBF->getPrintName() << '\n'; 581 if (TargetBF->isFragment()) 582 dbgs() << " ! is a fragment\n"; 583 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 584 dbgs() << " ! its parent is " 585 << (TargetParent ? TargetParent->getPrintName() : "(none)") 586 << '\n'; 587 } 588 } 589 if (Value == BF.getAddress()) 590 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 591 }); 592 break; 593 } 594 595 // Check there's an instruction at this offset. 596 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 597 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 598 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 599 break; 600 } 601 602 ++NumRealEntries; 603 604 if (TargetBF == &BF) { 605 // Address inside the function. 606 addOffset(Value - TargetBF->getAddress()); 607 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 608 } else { 609 // Address in split fragment. 610 BF.setHasSplitJumpTable(true); 611 // Add invalid offset for proper identification of jump table size. 612 addOffset(INVALID_OFFSET); 613 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 614 << TargetBF->getPrintName() << '\n'); 615 } 616 } 617 618 // It's a jump table if the number of real entries is more than 1, or there's 619 // one real entry and "unreachable" targets. If there are only multiple 620 // "unreachable" targets, then it's not a jump table. 621 return NumRealEntries + HasUnreachable >= 2; 622 } 623 624 void BinaryContext::populateJumpTables() { 625 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 626 << '\n'); 627 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 628 ++JTI) { 629 JumpTable *JT = JTI->second; 630 BinaryFunction &BF = *JT->Parent; 631 632 if (!BF.isSimple()) 633 continue; 634 635 uint64_t NextJTAddress = 0; 636 auto NextJTI = std::next(JTI); 637 if (NextJTI != JTE) 638 NextJTAddress = NextJTI->second->getAddress(); 639 640 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 641 NextJTAddress, &JT->OffsetEntries); 642 if (!Success) { 643 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 644 JT->print(dbgs()); 645 if (NextJTI != JTE) { 646 dbgs() << "next jump table at 0x" 647 << Twine::utohexstr(NextJTI->second->getAddress()) 648 << " belongs to function " << *NextJTI->second->Parent << '\n'; 649 NextJTI->second->print(dbgs()); 650 } 651 llvm_unreachable("jump table heuristic failure"); 652 } 653 654 for (uint64_t EntryOffset : JT->OffsetEntries) { 655 if (EntryOffset == BF.getSize()) 656 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 657 else 658 BF.registerReferencedOffset(EntryOffset); 659 } 660 661 // In strict mode, erase PC-relative relocation record. Later we check that 662 // all such records are erased and thus have been accounted for. 663 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 664 for (uint64_t Address = JT->getAddress(); 665 Address < JT->getAddress() + JT->getSize(); 666 Address += JT->EntrySize) { 667 DataPCRelocations.erase(DataPCRelocations.find(Address)); 668 } 669 } 670 671 // Mark to skip the function and all its fragments. 672 if (BF.hasSplitJumpTable()) 673 FragmentsToSkip.push_back(&BF); 674 } 675 676 if (opts::StrictMode && DataPCRelocations.size()) { 677 LLVM_DEBUG({ 678 dbgs() << DataPCRelocations.size() 679 << " unclaimed PC-relative relocations left in data:\n"; 680 for (uint64_t Reloc : DataPCRelocations) 681 dbgs() << Twine::utohexstr(Reloc) << '\n'; 682 }); 683 assert(0 && "unclaimed PC-relative relocations left in data\n"); 684 } 685 clearList(DataPCRelocations); 686 } 687 688 void BinaryContext::skipMarkedFragments() { 689 // Unique functions in the vector. 690 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 691 FragmentsToSkip.end()); 692 // Copy the functions back to FragmentsToSkip. 693 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 694 auto addToWorklist = [&](BinaryFunction *Function) -> void { 695 if (UniqueFunctions.count(Function)) 696 return; 697 FragmentsToSkip.push_back(Function); 698 UniqueFunctions.insert(Function); 699 }; 700 // Functions containing split jump tables need to be skipped with all 701 // fragments (transitively). 702 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 703 BinaryFunction *BF = FragmentsToSkip[I]; 704 assert(UniqueFunctions.count(BF) && 705 "internal error in traversing function fragments"); 706 if (opts::Verbosity >= 1) 707 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 708 BF->setIgnored(); 709 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 710 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 711 addToWorklist); 712 } 713 errs() << "BOLT-WARNING: Ignored " << FragmentsToSkip.size() << " functions " 714 << "due to cold fragments.\n"; 715 FragmentsToSkip.clear(); 716 } 717 718 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 719 uint64_t Size, 720 uint16_t Alignment, 721 unsigned Flags) { 722 auto Itr = BinaryDataMap.find(Address); 723 if (Itr != BinaryDataMap.end()) { 724 assert(Itr->second->getSize() == Size || !Size); 725 return Itr->second->getSymbol(); 726 } 727 728 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 729 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 730 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 731 } 732 733 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 734 return Ctx->getOrCreateSymbol(Name); 735 } 736 737 BinaryFunction *BinaryContext::createBinaryFunction( 738 const std::string &Name, BinarySection &Section, uint64_t Address, 739 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 740 auto Result = BinaryFunctions.emplace( 741 Address, BinaryFunction(Name, Section, Address, Size, *this)); 742 assert(Result.second == true && "unexpected duplicate function"); 743 BinaryFunction *BF = &Result.first->second; 744 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 745 Alignment); 746 setSymbolToFunctionMap(BF->getSymbol(), BF); 747 return BF; 748 } 749 750 const MCSymbol * 751 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 752 JumpTable::JumpTableType Type) { 753 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 754 assert(JT->Type == Type && "jump table types have to match"); 755 assert(JT->Parent == &Function && 756 "cannot re-use jump table of a different function"); 757 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 758 759 return JT->getFirstLabel(); 760 } 761 762 // Re-use the existing symbol if possible. 763 MCSymbol *JTLabel = nullptr; 764 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 765 if (!isInternalSymbolName(Object->getSymbol()->getName())) 766 JTLabel = Object->getSymbol(); 767 } 768 769 const uint64_t EntrySize = getJumpTableEntrySize(Type); 770 if (!JTLabel) { 771 const std::string JumpTableName = generateJumpTableName(Function, Address); 772 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 773 } 774 775 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 776 << " in function " << Function << '\n'); 777 778 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 779 JumpTable::LabelMapType{{0, JTLabel}}, Function, 780 *getSectionForAddress(Address)); 781 JumpTables.emplace(Address, JT); 782 783 // Duplicate the entry for the parent function for easy access. 784 Function.JumpTables.emplace(Address, JT); 785 786 return JTLabel; 787 } 788 789 std::pair<uint64_t, const MCSymbol *> 790 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 791 const MCSymbol *OldLabel) { 792 auto L = scopeLock(); 793 unsigned Offset = 0; 794 bool Found = false; 795 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 796 if (Elmt.second != OldLabel) 797 continue; 798 Offset = Elmt.first; 799 Found = true; 800 break; 801 } 802 assert(Found && "Label not found"); 803 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 804 JumpTable *NewJT = 805 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 806 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 807 *getSectionForAddress(JT->getAddress())); 808 NewJT->Entries = JT->Entries; 809 NewJT->Counts = JT->Counts; 810 uint64_t JumpTableID = ++DuplicatedJumpTables; 811 // Invert it to differentiate from regular jump tables whose IDs are their 812 // addresses in the input binary memory space 813 JumpTableID = ~JumpTableID; 814 JumpTables.emplace(JumpTableID, NewJT); 815 Function.JumpTables.emplace(JumpTableID, NewJT); 816 return std::make_pair(JumpTableID, NewLabel); 817 } 818 819 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 820 uint64_t Address) { 821 size_t Id; 822 uint64_t Offset = 0; 823 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 824 Offset = Address - JT->getAddress(); 825 auto Itr = JT->Labels.find(Offset); 826 if (Itr != JT->Labels.end()) 827 return std::string(Itr->second->getName()); 828 Id = JumpTableIds.at(JT->getAddress()); 829 } else { 830 Id = JumpTableIds[Address] = BF.JumpTables.size(); 831 } 832 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 833 (Offset ? ("." + std::to_string(Offset)) : "")); 834 } 835 836 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 837 // FIXME: aarch64 support is missing. 838 if (!isX86()) 839 return true; 840 841 if (BF.getSize() == BF.getMaxSize()) 842 return true; 843 844 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 845 assert(FunctionData && "cannot get function as data"); 846 847 uint64_t Offset = BF.getSize(); 848 MCInst Instr; 849 uint64_t InstrSize = 0; 850 uint64_t InstrAddress = BF.getAddress() + Offset; 851 using std::placeholders::_1; 852 853 // Skip instructions that satisfy the predicate condition. 854 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 855 const uint64_t StartOffset = Offset; 856 for (; Offset < BF.getMaxSize(); 857 Offset += InstrSize, InstrAddress += InstrSize) { 858 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 859 InstrAddress, nulls())) 860 break; 861 if (!Predicate(Instr)) 862 break; 863 } 864 865 return Offset - StartOffset; 866 }; 867 868 // Skip a sequence of zero bytes. 869 auto skipZeros = [&]() { 870 const uint64_t StartOffset = Offset; 871 for (; Offset < BF.getMaxSize(); ++Offset) 872 if ((*FunctionData)[Offset] != 0) 873 break; 874 875 return Offset - StartOffset; 876 }; 877 878 // Accept the whole padding area filled with breakpoints. 879 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 880 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 881 return true; 882 883 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 884 885 // Some functions have a jump to the next function or to the padding area 886 // inserted after the body. 887 auto isSkipJump = [&](const MCInst &Instr) { 888 uint64_t TargetAddress = 0; 889 if (MIB->isUnconditionalBranch(Instr) && 890 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 891 if (TargetAddress >= InstrAddress + InstrSize && 892 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 893 return true; 894 } 895 } 896 return false; 897 }; 898 899 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 900 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 901 skipZeros()) 902 ; 903 904 if (Offset == BF.getMaxSize()) 905 return true; 906 907 if (opts::Verbosity >= 1) { 908 errs() << "BOLT-WARNING: bad padding at address 0x" 909 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 910 << " starting at offset " << (Offset - BF.getSize()) 911 << " in function " << BF << '\n' 912 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 913 << '\n'; 914 } 915 916 return false; 917 } 918 919 void BinaryContext::adjustCodePadding() { 920 for (auto &BFI : BinaryFunctions) { 921 BinaryFunction &BF = BFI.second; 922 if (!shouldEmit(BF)) 923 continue; 924 925 if (!hasValidCodePadding(BF)) { 926 if (HasRelocations) { 927 if (opts::Verbosity >= 1) { 928 outs() << "BOLT-INFO: function " << BF 929 << " has invalid padding. Ignoring the function.\n"; 930 } 931 BF.setIgnored(); 932 } else { 933 BF.setMaxSize(BF.getSize()); 934 } 935 } 936 } 937 } 938 939 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 940 uint64_t Size, 941 uint16_t Alignment, 942 unsigned Flags) { 943 // Register the name with MCContext. 944 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 945 946 auto GAI = BinaryDataMap.find(Address); 947 BinaryData *BD; 948 if (GAI == BinaryDataMap.end()) { 949 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 950 BinarySection &Section = 951 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 952 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 953 Section, Flags); 954 GAI = BinaryDataMap.emplace(Address, BD).first; 955 GlobalSymbols[Name] = BD; 956 updateObjectNesting(GAI); 957 } else { 958 BD = GAI->second; 959 if (!BD->hasName(Name)) { 960 GlobalSymbols[Name] = BD; 961 BD->Symbols.push_back(Symbol); 962 } 963 } 964 965 return Symbol; 966 } 967 968 const BinaryData * 969 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 970 auto NI = BinaryDataMap.lower_bound(Address); 971 auto End = BinaryDataMap.end(); 972 if ((NI != End && Address == NI->first) || 973 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 974 if (NI->second->containsAddress(Address)) 975 return NI->second; 976 977 // If this is a sub-symbol, see if a parent data contains the address. 978 const BinaryData *BD = NI->second->getParent(); 979 while (BD) { 980 if (BD->containsAddress(Address)) 981 return BD; 982 BD = BD->getParent(); 983 } 984 } 985 return nullptr; 986 } 987 988 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 989 auto NI = BinaryDataMap.find(Address); 990 assert(NI != BinaryDataMap.end()); 991 if (NI == BinaryDataMap.end()) 992 return false; 993 // TODO: it's possible that a jump table starts at the same address 994 // as a larger blob of private data. When we set the size of the 995 // jump table, it might be smaller than the total blob size. In this 996 // case we just leave the original size since (currently) it won't really 997 // affect anything. 998 assert((!NI->second->Size || NI->second->Size == Size || 999 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1000 "can't change the size of a symbol that has already had its " 1001 "size set"); 1002 if (!NI->second->Size) { 1003 NI->second->Size = Size; 1004 updateObjectNesting(NI); 1005 return true; 1006 } 1007 return false; 1008 } 1009 1010 void BinaryContext::generateSymbolHashes() { 1011 auto isPadding = [](const BinaryData &BD) { 1012 StringRef Contents = BD.getSection().getContents(); 1013 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1014 return (BD.getName().startswith("HOLEat") || 1015 SymData.find_first_not_of(0) == StringRef::npos); 1016 }; 1017 1018 uint64_t NumCollisions = 0; 1019 for (auto &Entry : BinaryDataMap) { 1020 BinaryData &BD = *Entry.second; 1021 StringRef Name = BD.getName(); 1022 1023 if (!isInternalSymbolName(Name)) 1024 continue; 1025 1026 // First check if a non-anonymous alias exists and move it to the front. 1027 if (BD.getSymbols().size() > 1) { 1028 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1029 [&](const MCSymbol *Symbol) { 1030 return !isInternalSymbolName(Symbol->getName()); 1031 }); 1032 if (Itr != BD.getSymbols().end()) { 1033 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1034 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1035 continue; 1036 } 1037 } 1038 1039 // We have to skip 0 size symbols since they will all collide. 1040 if (BD.getSize() == 0) { 1041 continue; 1042 } 1043 1044 const uint64_t Hash = BD.getSection().hash(BD); 1045 const size_t Idx = Name.find("0x"); 1046 std::string NewName = 1047 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1048 if (getBinaryDataByName(NewName)) { 1049 // Ignore collisions for symbols that appear to be padding 1050 // (i.e. all zeros or a "hole") 1051 if (!isPadding(BD)) { 1052 if (opts::Verbosity) { 1053 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1054 << " with new name (" << NewName << "), skipping.\n"; 1055 } 1056 ++NumCollisions; 1057 } 1058 continue; 1059 } 1060 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1061 GlobalSymbols[NewName] = &BD; 1062 } 1063 if (NumCollisions) { 1064 errs() << "BOLT-WARNING: " << NumCollisions 1065 << " collisions detected while hashing binary objects"; 1066 if (!opts::Verbosity) 1067 errs() << ". Use -v=1 to see the list."; 1068 errs() << '\n'; 1069 } 1070 } 1071 1072 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1073 BinaryFunction &Function) const { 1074 if (!isPotentialFragmentByName(TargetFunction, Function)) 1075 return false; 1076 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1077 if (TargetFunction.isParentFragment(&Function)) 1078 return true; 1079 TargetFunction.addParentFragment(Function); 1080 Function.addFragment(TargetFunction); 1081 if (!HasRelocations) { 1082 TargetFunction.setSimple(false); 1083 Function.setSimple(false); 1084 } 1085 if (opts::Verbosity >= 1) { 1086 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1087 << Function << '\n'; 1088 } 1089 return true; 1090 } 1091 1092 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1093 for (uint64_t Address : Function.InterproceduralReferences) { 1094 if (!Address) 1095 continue; 1096 1097 BinaryFunction *TargetFunction = 1098 getBinaryFunctionContainingAddress(Address); 1099 if (&Function == TargetFunction) 1100 continue; 1101 1102 if (TargetFunction) { 1103 if (TargetFunction->IsFragment && 1104 !registerFragment(*TargetFunction, Function)) { 1105 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1106 "fragments: " 1107 << Function.getPrintName() << " and " 1108 << TargetFunction->getPrintName() << '\n'; 1109 } 1110 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1111 TargetFunction->addEntryPointAtOffset(Offset); 1112 1113 continue; 1114 } 1115 1116 // Check if address falls in function padding space - this could be 1117 // unmarked data in code. In this case adjust the padding space size. 1118 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1119 assert(Section && "cannot get section for referenced address"); 1120 1121 if (!Section->isText()) 1122 continue; 1123 1124 // PLT requires special handling and could be ignored in this context. 1125 StringRef SectionName = Section->getName(); 1126 if (SectionName == ".plt" || SectionName == ".plt.got") 1127 continue; 1128 1129 if (opts::processAllFunctions()) { 1130 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1131 << "object in code at address 0x" << Twine::utohexstr(Address) 1132 << " belonging to section " << SectionName << " in current mode\n"; 1133 exit(1); 1134 } 1135 1136 TargetFunction = getBinaryFunctionContainingAddress(Address, 1137 /*CheckPastEnd=*/false, 1138 /*UseMaxSize=*/true); 1139 // We are not going to overwrite non-simple functions, but for simple 1140 // ones - adjust the padding size. 1141 if (TargetFunction && TargetFunction->isSimple()) { 1142 errs() << "BOLT-WARNING: function " << *TargetFunction 1143 << " has an object detected in a padding region at address 0x" 1144 << Twine::utohexstr(Address) << '\n'; 1145 TargetFunction->setMaxSize(TargetFunction->getSize()); 1146 } 1147 } 1148 1149 clearList(Function.InterproceduralReferences); 1150 } 1151 1152 void BinaryContext::postProcessSymbolTable() { 1153 fixBinaryDataHoles(); 1154 bool Valid = true; 1155 for (auto &Entry : BinaryDataMap) { 1156 BinaryData *BD = Entry.second; 1157 if ((BD->getName().startswith("SYMBOLat") || 1158 BD->getName().startswith("DATAat")) && 1159 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1160 BD->getSection()) { 1161 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1162 Valid = false; 1163 } 1164 } 1165 assert(Valid); 1166 generateSymbolHashes(); 1167 } 1168 1169 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1170 BinaryFunction &ParentBF) { 1171 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1172 "cannot merge functions with multiple entry points"); 1173 1174 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1175 std::defer_lock); 1176 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1177 SymbolToFunctionMapMutex, std::defer_lock); 1178 1179 const StringRef ChildName = ChildBF.getOneName(); 1180 1181 // Move symbols over and update bookkeeping info. 1182 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1183 ParentBF.getSymbols().push_back(Symbol); 1184 WriteSymbolMapLock.lock(); 1185 SymbolToFunctionMap[Symbol] = &ParentBF; 1186 WriteSymbolMapLock.unlock(); 1187 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1188 } 1189 ChildBF.getSymbols().clear(); 1190 1191 // Move other names the child function is known under. 1192 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1193 std::back_inserter(ParentBF.Aliases)); 1194 ChildBF.Aliases.clear(); 1195 1196 if (HasRelocations) { 1197 // Merge execution counts of ChildBF into those of ParentBF. 1198 // Without relocations, we cannot reliably merge profiles as both functions 1199 // continue to exist and either one can be executed. 1200 ChildBF.mergeProfileDataInto(ParentBF); 1201 1202 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1203 std::defer_lock); 1204 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1205 std::defer_lock); 1206 // Remove ChildBF from the global set of functions in relocs mode. 1207 ReadBfsLock.lock(); 1208 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1209 ReadBfsLock.unlock(); 1210 1211 assert(FI != BinaryFunctions.end() && "function not found"); 1212 assert(&ChildBF == &FI->second && "function mismatch"); 1213 1214 WriteBfsLock.lock(); 1215 ChildBF.clearDisasmState(); 1216 FI = BinaryFunctions.erase(FI); 1217 WriteBfsLock.unlock(); 1218 1219 } else { 1220 // In non-relocation mode we keep the function, but rename it. 1221 std::string NewName = "__ICF_" + ChildName.str(); 1222 1223 WriteCtxLock.lock(); 1224 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1225 WriteCtxLock.unlock(); 1226 1227 ChildBF.setFolded(&ParentBF); 1228 } 1229 } 1230 1231 void BinaryContext::fixBinaryDataHoles() { 1232 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1233 1234 for (BinarySection &Section : allocatableSections()) { 1235 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1236 1237 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1238 BinaryData *BD = Itr->second; 1239 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1240 (BD->getName().startswith("SYMBOLat0x") || 1241 BD->getName().startswith("DATAat0x") || 1242 BD->getName().startswith("ANONYMOUS"))); 1243 return !isHole && BD->getSection() == Section && !BD->getParent(); 1244 }; 1245 1246 auto BDStart = BinaryDataMap.begin(); 1247 auto BDEnd = BinaryDataMap.end(); 1248 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1249 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1250 1251 uint64_t EndAddress = Section.getAddress(); 1252 1253 while (Itr != End) { 1254 if (Itr->second->getAddress() > EndAddress) { 1255 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1256 Holes.emplace_back(EndAddress, Gap); 1257 } 1258 EndAddress = Itr->second->getEndAddress(); 1259 ++Itr; 1260 } 1261 1262 if (EndAddress < Section.getEndAddress()) 1263 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1264 1265 // If there is already a symbol at the start of the hole, grow that symbol 1266 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1267 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1268 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1269 if (BD) { 1270 // BD->getSection() can be != Section if there are sections that 1271 // overlap. In this case it is probably safe to just skip the holes 1272 // since the overlapping section will not(?) have any symbols in it. 1273 if (BD->getSection() == Section) 1274 setBinaryDataSize(Hole.first, Hole.second); 1275 } else { 1276 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1277 } 1278 } 1279 } 1280 1281 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1282 assert(validateHoles() && "top level hole detected in object map"); 1283 } 1284 1285 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1286 const BinarySection *CurrentSection = nullptr; 1287 bool FirstSection = true; 1288 1289 for (auto &Entry : BinaryDataMap) { 1290 const BinaryData *BD = Entry.second; 1291 const BinarySection &Section = BD->getSection(); 1292 if (FirstSection || Section != *CurrentSection) { 1293 uint64_t Address, Size; 1294 StringRef Name = Section.getName(); 1295 if (Section) { 1296 Address = Section.getAddress(); 1297 Size = Section.getSize(); 1298 } else { 1299 Address = BD->getAddress(); 1300 Size = BD->getSize(); 1301 } 1302 OS << "BOLT-INFO: Section " << Name << ", " 1303 << "0x" + Twine::utohexstr(Address) << ":" 1304 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1305 CurrentSection = &Section; 1306 FirstSection = false; 1307 } 1308 1309 OS << "BOLT-INFO: "; 1310 const BinaryData *P = BD->getParent(); 1311 while (P) { 1312 OS << " "; 1313 P = P->getParent(); 1314 } 1315 OS << *BD << "\n"; 1316 } 1317 } 1318 1319 Expected<unsigned> 1320 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName, 1321 unsigned FileNumber, 1322 Optional<MD5::MD5Result> Checksum, 1323 Optional<StringRef> Source, unsigned CUID) { 1324 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1325 return Table.tryGetFile(Directory, FileName, Checksum, Source, 1326 Ctx->getDwarfVersion(), FileNumber); 1327 } 1328 1329 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1330 const uint32_t SrcCUID, 1331 unsigned FileIndex) { 1332 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1333 const DWARFDebugLine::LineTable *LineTable = 1334 DwCtx->getLineTableForUnit(SrcUnit); 1335 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1336 LineTable->Prologue.FileNames; 1337 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1338 // means empty dir. 1339 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1340 "FileIndex out of range for the compilation unit."); 1341 StringRef Dir = ""; 1342 if (FileNames[FileIndex - 1].DirIdx != 0) { 1343 if (Optional<const char *> DirName = dwarf::toString( 1344 LineTable->Prologue 1345 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1346 Dir = *DirName; 1347 } 1348 } 1349 StringRef FileName = ""; 1350 if (Optional<const char *> FName = 1351 dwarf::toString(FileNames[FileIndex - 1].Name)) 1352 FileName = *FName; 1353 assert(FileName != ""); 1354 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID)); 1355 } 1356 1357 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1358 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1359 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1360 SortedFunctions.begin(), 1361 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1362 return &BFI.second; 1363 }); 1364 1365 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1366 [](const BinaryFunction *A, const BinaryFunction *B) { 1367 if (A->hasValidIndex() && B->hasValidIndex()) { 1368 return A->getIndex() < B->getIndex(); 1369 } 1370 return A->hasValidIndex(); 1371 }); 1372 return SortedFunctions; 1373 } 1374 1375 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1376 std::vector<BinaryFunction *> AllFunctions; 1377 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1378 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1379 std::back_inserter(AllFunctions), 1380 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1381 return &BFI.second; 1382 }); 1383 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1384 std::back_inserter(AllFunctions)); 1385 1386 return AllFunctions; 1387 } 1388 1389 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1390 auto Iter = DWOCUs.find(DWOId); 1391 if (Iter == DWOCUs.end()) 1392 return None; 1393 1394 return Iter->second; 1395 } 1396 1397 DWARFContext *BinaryContext::getDWOContext() { 1398 if (DWOCUs.empty()) 1399 return nullptr; 1400 return &DWOCUs.begin()->second->getContext(); 1401 } 1402 1403 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1404 void BinaryContext::preprocessDWODebugInfo() { 1405 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1406 DWARFUnit *const DwarfUnit = CU.get(); 1407 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1408 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1409 if (!DWOCU->isDWOUnit()) { 1410 std::string DWOName = dwarf::toString( 1411 DwarfUnit->getUnitDIE().find( 1412 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1413 ""); 1414 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1415 << DWOName 1416 << " was not retrieved and won't be updated. Please check " 1417 "relative path.\n"; 1418 continue; 1419 } 1420 DWOCUs[*DWOId] = DWOCU; 1421 } 1422 } 1423 } 1424 1425 void BinaryContext::preprocessDebugInfo() { 1426 struct CURange { 1427 uint64_t LowPC; 1428 uint64_t HighPC; 1429 DWARFUnit *Unit; 1430 1431 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1432 }; 1433 1434 // Building a map of address ranges to CUs similar to .debug_aranges and use 1435 // it to assign CU to functions. 1436 std::vector<CURange> AllRanges; 1437 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1438 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1439 Expected<DWARFAddressRangesVector> RangesOrError = 1440 CU->getUnitDIE().getAddressRanges(); 1441 if (!RangesOrError) { 1442 consumeError(RangesOrError.takeError()); 1443 continue; 1444 } 1445 for (DWARFAddressRange &Range : *RangesOrError) { 1446 // Parts of the debug info could be invalidated due to corresponding code 1447 // being removed from the binary by the linker. Hence we check if the 1448 // address is a valid one. 1449 if (containsAddress(Range.LowPC)) 1450 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1451 } 1452 } 1453 1454 std::sort(AllRanges.begin(), AllRanges.end()); 1455 for (auto &KV : BinaryFunctions) { 1456 const uint64_t FunctionAddress = KV.first; 1457 BinaryFunction &Function = KV.second; 1458 1459 auto It = std::partition_point( 1460 AllRanges.begin(), AllRanges.end(), 1461 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1462 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1463 Function.setDWARFUnit(It->Unit); 1464 } 1465 } 1466 1467 // Discover units with debug info that needs to be updated. 1468 for (const auto &KV : BinaryFunctions) { 1469 const BinaryFunction &BF = KV.second; 1470 if (shouldEmit(BF) && BF.getDWARFUnit()) 1471 ProcessedCUs.insert(BF.getDWARFUnit()); 1472 } 1473 1474 // Clear debug info for functions from units that we are not going to process. 1475 for (auto &KV : BinaryFunctions) { 1476 BinaryFunction &BF = KV.second; 1477 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1478 BF.setDWARFUnit(nullptr); 1479 } 1480 1481 if (opts::Verbosity >= 1) { 1482 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1483 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1484 } 1485 1486 // Populate MCContext with DWARF files from all units. 1487 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1488 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1489 const uint64_t CUID = CU->getOffset(); 1490 getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol( 1491 GlobalPrefix + "line_table_start" + Twine(CUID))); 1492 1493 if (!ProcessedCUs.count(CU.get())) 1494 continue; 1495 1496 const DWARFDebugLine::LineTable *LineTable = 1497 DwCtx->getLineTableForUnit(CU.get()); 1498 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1499 LineTable->Prologue.FileNames; 1500 1501 // Assign a unique label to every line table, one per CU. 1502 // Make sure empty debug line tables are registered too. 1503 if (FileNames.empty()) { 1504 cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID)); 1505 continue; 1506 } 1507 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1508 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1509 // means empty dir. 1510 StringRef Dir = ""; 1511 if (FileNames[I].DirIdx != 0) 1512 if (Optional<const char *> DirName = dwarf::toString( 1513 LineTable->Prologue 1514 .IncludeDirectories[FileNames[I].DirIdx - 1])) 1515 Dir = *DirName; 1516 StringRef FileName = ""; 1517 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1518 FileName = *FName; 1519 assert(FileName != ""); 1520 cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID)); 1521 } 1522 } 1523 1524 preprocessDWODebugInfo(); 1525 } 1526 1527 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1528 if (opts::processAllFunctions()) 1529 return true; 1530 1531 if (Function.isIgnored()) 1532 return false; 1533 1534 // In relocation mode we will emit non-simple functions with CFG. 1535 // If the function does not have a CFG it should be marked as ignored. 1536 return HasRelocations || Function.isSimple(); 1537 } 1538 1539 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1540 uint32_t Operation = Inst.getOperation(); 1541 switch (Operation) { 1542 case MCCFIInstruction::OpSameValue: 1543 OS << "OpSameValue Reg" << Inst.getRegister(); 1544 break; 1545 case MCCFIInstruction::OpRememberState: 1546 OS << "OpRememberState"; 1547 break; 1548 case MCCFIInstruction::OpRestoreState: 1549 OS << "OpRestoreState"; 1550 break; 1551 case MCCFIInstruction::OpOffset: 1552 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1553 break; 1554 case MCCFIInstruction::OpDefCfaRegister: 1555 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1556 break; 1557 case MCCFIInstruction::OpDefCfaOffset: 1558 OS << "OpDefCfaOffset " << Inst.getOffset(); 1559 break; 1560 case MCCFIInstruction::OpDefCfa: 1561 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1562 break; 1563 case MCCFIInstruction::OpRelOffset: 1564 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1565 break; 1566 case MCCFIInstruction::OpAdjustCfaOffset: 1567 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1568 break; 1569 case MCCFIInstruction::OpEscape: 1570 OS << "OpEscape"; 1571 break; 1572 case MCCFIInstruction::OpRestore: 1573 OS << "OpRestore Reg" << Inst.getRegister(); 1574 break; 1575 case MCCFIInstruction::OpUndefined: 1576 OS << "OpUndefined Reg" << Inst.getRegister(); 1577 break; 1578 case MCCFIInstruction::OpRegister: 1579 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1580 << Inst.getRegister2(); 1581 break; 1582 case MCCFIInstruction::OpWindowSave: 1583 OS << "OpWindowSave"; 1584 break; 1585 case MCCFIInstruction::OpGnuArgsSize: 1586 OS << "OpGnuArgsSize"; 1587 break; 1588 default: 1589 OS << "Op#" << Operation; 1590 break; 1591 } 1592 } 1593 1594 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1595 uint64_t Offset, 1596 const BinaryFunction *Function, 1597 bool PrintMCInst, bool PrintMemData, 1598 bool PrintRelocations) const { 1599 if (MIB->isEHLabel(Instruction)) { 1600 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1601 return; 1602 } 1603 OS << format(" %08" PRIx64 ": ", Offset); 1604 if (MIB->isCFI(Instruction)) { 1605 uint32_t Offset = Instruction.getOperand(0).getImm(); 1606 OS << "\t!CFI\t$" << Offset << "\t; "; 1607 if (Function) 1608 printCFI(OS, *Function->getCFIFor(Instruction)); 1609 OS << "\n"; 1610 return; 1611 } 1612 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1613 if (MIB->isCall(Instruction)) { 1614 if (MIB->isTailCall(Instruction)) 1615 OS << " # TAILCALL "; 1616 if (MIB->isInvoke(Instruction)) { 1617 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1618 OS << " # handler: "; 1619 if (EHInfo->first) 1620 OS << *EHInfo->first; 1621 else 1622 OS << '0'; 1623 OS << "; action: " << EHInfo->second; 1624 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1625 if (GnuArgsSize >= 0) 1626 OS << "; GNU_args_size = " << GnuArgsSize; 1627 } 1628 } else if (MIB->isIndirectBranch(Instruction)) { 1629 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1630 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1631 } else { 1632 OS << " # UNKNOWN CONTROL FLOW"; 1633 } 1634 } 1635 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1636 OS << " # Offset: " << *Offset; 1637 1638 MIB->printAnnotations(Instruction, OS); 1639 1640 if (opts::PrintDebugInfo) { 1641 DebugLineTableRowRef RowRef = 1642 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1643 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1644 const DWARFDebugLine::LineTable *LineTable; 1645 if (Function && Function->getDWARFUnit() && 1646 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1647 LineTable = Function->getDWARFLineTable(); 1648 } else { 1649 LineTable = DwCtx->getLineTableForUnit( 1650 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1651 } 1652 assert(LineTable && 1653 "line table expected for instruction with debug info"); 1654 1655 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1656 StringRef FileName = ""; 1657 if (Optional<const char *> FName = 1658 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1659 FileName = *FName; 1660 OS << " # debug line " << FileName << ":" << Row.Line; 1661 if (Row.Column) 1662 OS << ":" << Row.Column; 1663 if (Row.Discriminator) 1664 OS << " discriminator:" << Row.Discriminator; 1665 } 1666 } 1667 1668 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1669 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1670 Function->printRelocations(OS, Offset, Size); 1671 } 1672 1673 OS << "\n"; 1674 1675 if (PrintMCInst) { 1676 Instruction.dump_pretty(OS, InstPrinter.get()); 1677 OS << "\n"; 1678 } 1679 } 1680 1681 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1682 auto SI = AddressToSection.upper_bound(Address); 1683 if (SI != AddressToSection.begin()) { 1684 --SI; 1685 uint64_t UpperBound = SI->first + SI->second->getSize(); 1686 if (!SI->second->getSize()) 1687 UpperBound += 1; 1688 if (UpperBound > Address) 1689 return *SI->second; 1690 } 1691 return std::make_error_code(std::errc::bad_address); 1692 } 1693 1694 ErrorOr<StringRef> 1695 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1696 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1697 return Section->getName(); 1698 return std::make_error_code(std::errc::bad_address); 1699 } 1700 1701 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1702 auto Res = Sections.insert(Section); 1703 (void)Res; 1704 assert(Res.second && "can't register the same section twice."); 1705 1706 // Only register allocatable sections in the AddressToSection map. 1707 if (Section->isAllocatable() && Section->getAddress()) 1708 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1709 NameToSection.insert( 1710 std::make_pair(std::string(Section->getName()), Section)); 1711 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1712 return *Section; 1713 } 1714 1715 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1716 return registerSection(new BinarySection(*this, Section)); 1717 } 1718 1719 BinarySection & 1720 BinaryContext::registerSection(StringRef SectionName, 1721 const BinarySection &OriginalSection) { 1722 return registerSection( 1723 new BinarySection(*this, SectionName, OriginalSection)); 1724 } 1725 1726 BinarySection & 1727 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1728 unsigned ELFFlags, uint8_t *Data, 1729 uint64_t Size, unsigned Alignment) { 1730 auto NamedSections = getSectionByName(Name); 1731 if (NamedSections.begin() != NamedSections.end()) { 1732 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1733 "can only update unique sections"); 1734 BinarySection *Section = NamedSections.begin()->second; 1735 1736 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1737 const bool Flag = Section->isAllocatable(); 1738 (void)Flag; 1739 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1740 LLVM_DEBUG(dbgs() << *Section << "\n"); 1741 // FIXME: Fix section flags/attributes for MachO. 1742 if (isELF()) 1743 assert(Flag == Section->isAllocatable() && 1744 "can't change section allocation status"); 1745 return *Section; 1746 } 1747 1748 return registerSection( 1749 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1750 } 1751 1752 bool BinaryContext::deregisterSection(BinarySection &Section) { 1753 BinarySection *SectionPtr = &Section; 1754 auto Itr = Sections.find(SectionPtr); 1755 if (Itr != Sections.end()) { 1756 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1757 while (Range.first != Range.second) { 1758 if (Range.first->second == SectionPtr) { 1759 AddressToSection.erase(Range.first); 1760 break; 1761 } 1762 ++Range.first; 1763 } 1764 1765 auto NameRange = 1766 NameToSection.equal_range(std::string(SectionPtr->getName())); 1767 while (NameRange.first != NameRange.second) { 1768 if (NameRange.first->second == SectionPtr) { 1769 NameToSection.erase(NameRange.first); 1770 break; 1771 } 1772 ++NameRange.first; 1773 } 1774 1775 Sections.erase(Itr); 1776 delete SectionPtr; 1777 return true; 1778 } 1779 return false; 1780 } 1781 1782 void BinaryContext::printSections(raw_ostream &OS) const { 1783 for (BinarySection *const &Section : Sections) 1784 OS << "BOLT-INFO: " << *Section << "\n"; 1785 } 1786 1787 BinarySection &BinaryContext::absoluteSection() { 1788 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1789 return *Section; 1790 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1791 } 1792 1793 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1794 size_t Size) const { 1795 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1796 if (!Section) 1797 return std::make_error_code(std::errc::bad_address); 1798 1799 if (Section->isVirtual()) 1800 return 0; 1801 1802 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1803 AsmInfo->getCodePointerSize()); 1804 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1805 return DE.getUnsigned(&ValueOffset, Size); 1806 } 1807 1808 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1809 size_t Size) const { 1810 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1811 if (!Section) 1812 return std::make_error_code(std::errc::bad_address); 1813 1814 if (Section->isVirtual()) 1815 return 0; 1816 1817 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1818 AsmInfo->getCodePointerSize()); 1819 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1820 return DE.getSigned(&ValueOffset, Size); 1821 } 1822 1823 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1824 uint64_t Type, uint64_t Addend, 1825 uint64_t Value) { 1826 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1827 assert(Section && "cannot find section for address"); 1828 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1829 Value); 1830 } 1831 1832 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1833 uint64_t Type, uint64_t Addend, 1834 uint64_t Value) { 1835 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1836 assert(Section && "cannot find section for address"); 1837 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1838 Addend, Value); 1839 } 1840 1841 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1842 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1843 assert(Section && "cannot find section for address"); 1844 return Section->removeRelocationAt(Address - Section->getAddress()); 1845 } 1846 1847 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1848 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1849 if (!Section) 1850 return nullptr; 1851 1852 return Section->getRelocationAt(Address - Section->getAddress()); 1853 } 1854 1855 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1856 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1857 if (!Section) 1858 return nullptr; 1859 1860 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1861 } 1862 1863 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1864 const uint64_t Address) { 1865 auto setImmovable = [&](BinaryData &BD) { 1866 BinaryData *Root = BD.getAtomicRoot(); 1867 LLVM_DEBUG(if (Root->isMoveable()) { 1868 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1869 << "due to ambiguous relocation referencing 0x" 1870 << Twine::utohexstr(Address) << '\n'; 1871 }); 1872 Root->setIsMoveable(false); 1873 }; 1874 1875 if (Address == BD.getAddress()) { 1876 setImmovable(BD); 1877 1878 // Set previous symbol as immovable 1879 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1880 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1881 setImmovable(*Prev); 1882 } 1883 1884 if (Address == BD.getEndAddress()) { 1885 setImmovable(BD); 1886 1887 // Set next symbol as immovable 1888 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1889 if (Next && Next->getAddress() == BD.getEndAddress()) 1890 setImmovable(*Next); 1891 } 1892 } 1893 1894 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1895 uint64_t *EntryDesc) { 1896 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1897 auto BFI = SymbolToFunctionMap.find(Symbol); 1898 if (BFI == SymbolToFunctionMap.end()) 1899 return nullptr; 1900 1901 BinaryFunction *BF = BFI->second; 1902 if (EntryDesc) 1903 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1904 1905 return BF; 1906 } 1907 1908 void BinaryContext::exitWithBugReport(StringRef Message, 1909 const BinaryFunction &Function) const { 1910 errs() << "=======================================\n"; 1911 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1912 "this function.\n"; 1913 errs() << "If you are running the most recent version of BOLT, you may " 1914 "want to " 1915 "report this and paste this dump.\nPlease check that there is no " 1916 "sensitive contents being shared in this dump.\n"; 1917 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1918 ScopedPrinter SP(errs()); 1919 SP.printBinaryBlock("Function contents", *Function.getData()); 1920 errs() << "\n"; 1921 Function.dump(); 1922 errs() << "ERROR: " << Message; 1923 errs() << "\n=======================================\n"; 1924 exit(1); 1925 } 1926 1927 BinaryFunction * 1928 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1929 bool IsSimple) { 1930 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1931 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1932 setSymbolToFunctionMap(BF->getSymbol(), BF); 1933 BF->CurrentState = BinaryFunction::State::CFG; 1934 return BF; 1935 } 1936 1937 std::pair<size_t, size_t> 1938 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 1939 // Adjust branch instruction to match the current layout. 1940 if (FixBranches) 1941 BF.fixBranches(); 1942 1943 // Create local MC context to isolate the effect of ephemeral code emission. 1944 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 1945 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 1946 MCAsmBackend *MAB = 1947 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1948 1949 SmallString<256> Code; 1950 raw_svector_ostream VecOS(Code); 1951 1952 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 1953 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 1954 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 1955 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 1956 /*RelaxAll=*/false, 1957 /*IncrementalLinkerCompatible=*/false, 1958 /*DWARFMustBeAtTheEnd=*/false)); 1959 1960 Streamer->initSections(false, *STI); 1961 1962 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 1963 Section->setHasInstructions(true); 1964 1965 // Create symbols in the LocalCtx so that they get destroyed with it. 1966 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 1967 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 1968 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 1969 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 1970 1971 Streamer->SwitchSection(Section); 1972 Streamer->emitLabel(StartLabel); 1973 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 1974 /*EmitCodeOnly=*/true); 1975 Streamer->emitLabel(EndLabel); 1976 1977 if (BF.isSplit()) { 1978 MCSectionELF *ColdSection = 1979 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 1980 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 1981 ColdSection->setHasInstructions(true); 1982 1983 Streamer->SwitchSection(ColdSection); 1984 Streamer->emitLabel(ColdStartLabel); 1985 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 1986 /*EmitCodeOnly=*/true); 1987 Streamer->emitLabel(ColdEndLabel); 1988 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 1989 Streamer->emitBytes(StringRef("")); 1990 Streamer->SwitchSection(Section); 1991 } 1992 1993 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 1994 // MCStreamer::Finish(), which does more than we want 1995 Streamer->emitBytes(StringRef("")); 1996 1997 MCAssembler &Assembler = 1998 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 1999 MCAsmLayout Layout(Assembler); 2000 Assembler.layout(Layout); 2001 2002 const uint64_t HotSize = 2003 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2004 const uint64_t ColdSize = BF.isSplit() 2005 ? Layout.getSymbolOffset(*ColdEndLabel) - 2006 Layout.getSymbolOffset(*ColdStartLabel) 2007 : 0ULL; 2008 2009 // Clean-up the effect of the code emission. 2010 for (const MCSymbol &Symbol : Assembler.symbols()) { 2011 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2012 MutableSymbol->setUndefined(); 2013 MutableSymbol->setIsRegistered(false); 2014 } 2015 2016 return std::make_pair(HotSize, ColdSize); 2017 } 2018 2019 bool BinaryContext::validateEncoding(const MCInst &Inst, 2020 ArrayRef<uint8_t> InputEncoding) const { 2021 SmallString<256> Code; 2022 SmallVector<MCFixup, 4> Fixups; 2023 raw_svector_ostream VecOS(Code); 2024 2025 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2026 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2027 if (InputEncoding != EncodedData) { 2028 if (opts::Verbosity > 1) { 2029 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2030 << " input: " << InputEncoding << '\n' 2031 << " output: " << EncodedData << '\n'; 2032 } 2033 return false; 2034 } 2035 2036 return true; 2037 } 2038 2039 uint64_t BinaryContext::getHotThreshold() const { 2040 static uint64_t Threshold = 0; 2041 if (Threshold == 0) { 2042 Threshold = std::max( 2043 (uint64_t)opts::ExecutionCountThreshold, 2044 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2045 } 2046 return Threshold; 2047 } 2048 2049 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2050 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2051 auto FI = BinaryFunctions.upper_bound(Address); 2052 if (FI == BinaryFunctions.begin()) 2053 return nullptr; 2054 --FI; 2055 2056 const uint64_t UsedSize = 2057 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2058 2059 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2060 return nullptr; 2061 2062 return &FI->second; 2063 } 2064 2065 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2066 // First, try to find a function starting at the given address. If the 2067 // function was folded, this will get us the original folded function if it 2068 // wasn't removed from the list, e.g. in non-relocation mode. 2069 auto BFI = BinaryFunctions.find(Address); 2070 if (BFI != BinaryFunctions.end()) 2071 return &BFI->second; 2072 2073 // We might have folded the function matching the object at the given 2074 // address. In such case, we look for a function matching the symbol 2075 // registered at the original address. The new function (the one that the 2076 // original was folded into) will hold the symbol. 2077 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2078 uint64_t EntryID = 0; 2079 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2080 if (BF && EntryID == 0) 2081 return BF; 2082 } 2083 return nullptr; 2084 } 2085 2086 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2087 const DWARFAddressRangesVector &InputRanges) const { 2088 DebugAddressRangesVector OutputRanges; 2089 2090 for (const DWARFAddressRange Range : InputRanges) { 2091 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2092 while (BFI != BinaryFunctions.end()) { 2093 const BinaryFunction &Function = BFI->second; 2094 if (Function.getAddress() >= Range.HighPC) 2095 break; 2096 const DebugAddressRangesVector FunctionRanges = 2097 Function.getOutputAddressRanges(); 2098 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2099 std::back_inserter(OutputRanges)); 2100 std::advance(BFI, 1); 2101 } 2102 } 2103 2104 return OutputRanges; 2105 } 2106 2107 } // namespace bolt 2108 } // namespace llvm 2109