1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 21 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 22 #include "llvm/MC/MCAsmLayout.h" 23 #include "llvm/MC/MCAssembler.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26 #include "llvm/MC/MCInstPrinter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionELF.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Regex.h" 36 #include <algorithm> 37 #include <functional> 38 #include <iterator> 39 #include <unordered_set> 40 41 using namespace llvm; 42 43 #undef DEBUG_TYPE 44 #define DEBUG_TYPE "bolt" 45 46 namespace opts { 47 48 cl::opt<bool> 49 NoHugePages("no-huge-pages", 50 cl::desc("use regular size pages for code alignment"), 51 cl::ZeroOrMore, 52 cl::Hidden, 53 cl::cat(BoltCategory)); 54 55 static cl::opt<bool> 56 PrintDebugInfo("print-debug-info", 57 cl::desc("print debug info when printing functions"), 58 cl::Hidden, 59 cl::ZeroOrMore, 60 cl::cat(BoltCategory)); 61 62 cl::opt<bool> 63 PrintRelocations("print-relocations", 64 cl::desc("print relocations when printing functions/objects"), 65 cl::Hidden, 66 cl::ZeroOrMore, 67 cl::cat(BoltCategory)); 68 69 static cl::opt<bool> 70 PrintMemData("print-mem-data", 71 cl::desc("print memory data annotations when printing functions"), 72 cl::Hidden, 73 cl::ZeroOrMore, 74 cl::cat(BoltCategory)); 75 76 } // namespace opts 77 78 namespace llvm { 79 namespace bolt { 80 81 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 82 std::unique_ptr<DWARFContext> DwCtx, 83 std::unique_ptr<Triple> TheTriple, 84 const Target *TheTarget, std::string TripleName, 85 std::unique_ptr<MCCodeEmitter> MCE, 86 std::unique_ptr<MCObjectFileInfo> MOFI, 87 std::unique_ptr<const MCAsmInfo> AsmInfo, 88 std::unique_ptr<const MCInstrInfo> MII, 89 std::unique_ptr<const MCSubtargetInfo> STI, 90 std::unique_ptr<MCInstPrinter> InstPrinter, 91 std::unique_ptr<const MCInstrAnalysis> MIA, 92 std::unique_ptr<MCPlusBuilder> MIB, 93 std::unique_ptr<const MCRegisterInfo> MRI, 94 std::unique_ptr<MCDisassembler> DisAsm) 95 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 96 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 97 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 98 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 99 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 100 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 101 Relocation::Arch = this->TheTriple->getArch(); 102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103 } 104 105 BinaryContext::~BinaryContext() { 106 for (BinarySection *Section : Sections) 107 delete Section; 108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109 delete InjectedFunction; 110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111 delete JTI.second; 112 clearBinaryData(); 113 } 114 115 /// Create BinaryContext for a given architecture \p ArchName and 116 /// triple \p TripleName. 117 std::unique_ptr<BinaryContext> 118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119 std::unique_ptr<DWARFContext> DwCtx) { 120 StringRef ArchName = ""; 121 StringRef FeaturesStr = ""; 122 switch (File->getArch()) { 123 case llvm::Triple::x86_64: 124 ArchName = "x86-64"; 125 FeaturesStr = "+nopl"; 126 break; 127 case llvm::Triple::aarch64: 128 ArchName = "aarch64"; 129 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 130 "+fullfp16,+spe,+fuse-aes,+rcpc"; 131 break; 132 default: 133 errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n"; 134 return nullptr; 135 } 136 137 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 138 const std::string TripleName = TheTriple->str(); 139 140 std::string Error; 141 const Target *TheTarget = 142 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 143 if (!TheTarget) { 144 errs() << "BOLT-ERROR: " << Error; 145 return nullptr; 146 } 147 148 std::unique_ptr<const MCRegisterInfo> MRI( 149 TheTarget->createMCRegInfo(TripleName)); 150 if (!MRI) { 151 errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n"; 152 return nullptr; 153 } 154 155 // Set up disassembler. 156 std::unique_ptr<const MCAsmInfo> AsmInfo( 157 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 158 if (!AsmInfo) { 159 errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n"; 160 return nullptr; 161 } 162 163 std::unique_ptr<const MCSubtargetInfo> STI( 164 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 165 if (!STI) { 166 errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n"; 167 return nullptr; 168 } 169 170 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 171 if (!MII) { 172 errs() << "BOLT-ERROR: no instruction info for target " << TripleName 173 << "\n"; 174 return nullptr; 175 } 176 177 std::unique_ptr<MCContext> Ctx( 178 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 179 std::unique_ptr<MCObjectFileInfo> MOFI( 180 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 181 Ctx->setObjectFileInfo(MOFI.get()); 182 // We do not support X86 Large code model. Change this in the future. 183 bool Large = false; 184 if (TheTriple->getArch() == llvm::Triple::aarch64) 185 Large = true; 186 unsigned LSDAEncoding = 187 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 188 unsigned TTypeEncoding = 189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190 if (IsPIC) { 191 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 194 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 195 } 196 197 std::unique_ptr<MCDisassembler> DisAsm( 198 TheTarget->createMCDisassembler(*STI, *Ctx)); 199 200 if (!DisAsm) { 201 errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n"; 202 return nullptr; 203 } 204 205 std::unique_ptr<const MCInstrAnalysis> MIA( 206 TheTarget->createMCInstrAnalysis(MII.get())); 207 if (!MIA) { 208 errs() << "BOLT-ERROR: failed to create instruction analysis for target" 209 << TripleName << "\n"; 210 return nullptr; 211 } 212 213 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 214 std::unique_ptr<MCInstPrinter> InstructionPrinter( 215 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 216 *MII, *MRI)); 217 if (!InstructionPrinter) { 218 errs() << "BOLT-ERROR: no instruction printer for target " << TripleName 219 << '\n'; 220 return nullptr; 221 } 222 InstructionPrinter->setPrintImmHex(true); 223 224 std::unique_ptr<MCCodeEmitter> MCE( 225 TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx)); 226 227 // Make sure we don't miss any output on core dumps. 228 outs().SetUnbuffered(); 229 errs().SetUnbuffered(); 230 dbgs().SetUnbuffered(); 231 232 auto BC = std::make_unique<BinaryContext>( 233 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 234 std::string(TripleName), std::move(MCE), std::move(MOFI), 235 std::move(AsmInfo), std::move(MII), std::move(STI), 236 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 237 std::move(DisAsm)); 238 239 BC->TTypeEncoding = TTypeEncoding; 240 BC->LSDAEncoding = LSDAEncoding; 241 242 BC->MAB = std::unique_ptr<MCAsmBackend>( 243 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 244 245 BC->setFilename(File->getFileName()); 246 247 BC->HasFixedLoadAddress = !IsPIC; 248 249 return BC; 250 } 251 252 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 253 if (opts::HotText && 254 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 255 return true; 256 257 if (opts::HotData && 258 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 259 return true; 260 261 if (SymbolName == "_end") 262 return true; 263 264 return false; 265 } 266 267 std::unique_ptr<MCObjectWriter> 268 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 269 return MAB->createObjectWriter(OS); 270 } 271 272 bool BinaryContext::validateObjectNesting() const { 273 auto Itr = BinaryDataMap.begin(); 274 auto End = BinaryDataMap.end(); 275 bool Valid = true; 276 while (Itr != End) { 277 auto Next = std::next(Itr); 278 while (Next != End && 279 Itr->second->getSection() == Next->second->getSection() && 280 Itr->second->containsRange(Next->second->getAddress(), 281 Next->second->getSize())) { 282 if (Next->second->Parent != Itr->second) { 283 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 284 << "BOLT-WARNING: " << *Itr->second << "\n" 285 << "BOLT-WARNING: " << *Next->second << "\n"; 286 Valid = false; 287 } 288 ++Next; 289 } 290 Itr = Next; 291 } 292 return Valid; 293 } 294 295 bool BinaryContext::validateHoles() const { 296 bool Valid = true; 297 for (BinarySection &Section : sections()) { 298 for (const Relocation &Rel : Section.relocations()) { 299 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 300 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 301 if (!BD) { 302 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 303 << " 0x" << Twine::utohexstr(RelAddr) << " in " 304 << Section.getName() << "\n"; 305 Valid = false; 306 } else if (!BD->getAtomicRoot()) { 307 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 308 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 309 << Section.getName() << "\n"; 310 Valid = false; 311 } 312 } 313 } 314 return Valid; 315 } 316 317 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 318 const uint64_t Address = GAI->second->getAddress(); 319 const uint64_t Size = GAI->second->getSize(); 320 321 auto fixParents = [&](BinaryDataMapType::iterator Itr, 322 BinaryData *NewParent) { 323 BinaryData *OldParent = Itr->second->Parent; 324 Itr->second->Parent = NewParent; 325 ++Itr; 326 while (Itr != BinaryDataMap.end() && OldParent && 327 Itr->second->Parent == OldParent) { 328 Itr->second->Parent = NewParent; 329 ++Itr; 330 } 331 }; 332 333 // Check if the previous symbol contains the newly added symbol. 334 if (GAI != BinaryDataMap.begin()) { 335 BinaryData *Prev = std::prev(GAI)->second; 336 while (Prev) { 337 if (Prev->getSection() == GAI->second->getSection() && 338 Prev->containsRange(Address, Size)) { 339 fixParents(GAI, Prev); 340 } else { 341 fixParents(GAI, nullptr); 342 } 343 Prev = Prev->Parent; 344 } 345 } 346 347 // Check if the newly added symbol contains any subsequent symbols. 348 if (Size != 0) { 349 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 350 auto Itr = std::next(GAI); 351 while ( 352 Itr != BinaryDataMap.end() && 353 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 354 Itr->second->Parent = BD; 355 ++Itr; 356 } 357 } 358 } 359 360 iterator_range<BinaryContext::binary_data_iterator> 361 BinaryContext::getSubBinaryData(BinaryData *BD) { 362 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 363 auto End = Start; 364 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 365 ++End; 366 return make_range(Start, End); 367 } 368 369 std::pair<const MCSymbol *, uint64_t> 370 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 371 bool IsPCRel) { 372 uint64_t Addend = 0; 373 374 if (isAArch64()) { 375 // Check if this is an access to a constant island and create bookkeeping 376 // to keep track of it and emit it later as part of this function. 377 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 378 return std::make_pair(IslandSym, Addend); 379 380 // Detect custom code written in assembly that refers to arbitrary 381 // constant islands from other functions. Write this reference so we 382 // can pull this constant island and emit it as part of this function 383 // too. 384 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 385 if (IslandIter != AddressToConstantIslandMap.end()) { 386 if (MCSymbol *IslandSym = 387 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 388 BF.createIslandDependency(IslandSym, IslandIter->second); 389 return std::make_pair(IslandSym, Addend); 390 } 391 } 392 } 393 394 // Note that the address does not necessarily have to reside inside 395 // a section, it could be an absolute address too. 396 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 397 if (Section && Section->isText()) { 398 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 399 if (Address != BF.getAddress()) { 400 // The address could potentially escape. Mark it as another entry 401 // point into the function. 402 if (opts::Verbosity >= 1) { 403 outs() << "BOLT-INFO: potentially escaped address 0x" 404 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 405 } 406 BF.HasInternalLabelReference = true; 407 return std::make_pair( 408 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 409 } 410 } else { 411 BF.InterproceduralReferences.insert(Address); 412 } 413 } 414 415 // With relocations, catch jump table references outside of the basic block 416 // containing the indirect jump. 417 if (HasRelocations) { 418 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 419 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 420 const MCSymbol *Symbol = 421 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 422 423 return std::make_pair(Symbol, Addend); 424 } 425 } 426 427 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 428 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 429 430 // TODO: use DWARF info to get size/alignment here? 431 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 432 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 433 return std::make_pair(TargetSymbol, Addend); 434 } 435 436 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 437 BinaryFunction &BF) { 438 if (!isX86()) 439 return MemoryContentsType::UNKNOWN; 440 441 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 442 if (!Section) { 443 // No section - possibly an absolute address. Since we don't allow 444 // internal function addresses to escape the function scope - we 445 // consider it a tail call. 446 if (opts::Verbosity > 1) { 447 errs() << "BOLT-WARNING: no section for address 0x" 448 << Twine::utohexstr(Address) << " referenced from function " << BF 449 << '\n'; 450 } 451 return MemoryContentsType::UNKNOWN; 452 } 453 454 if (Section->isVirtual()) { 455 // The contents are filled at runtime. 456 return MemoryContentsType::UNKNOWN; 457 } 458 459 // No support for jump tables in code yet. 460 if (Section->isText()) 461 return MemoryContentsType::UNKNOWN; 462 463 // Start with checking for PIC jump table. We expect non-PIC jump tables 464 // to have high 32 bits set to 0. 465 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 466 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 467 468 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 469 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 470 471 return MemoryContentsType::UNKNOWN; 472 } 473 474 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 475 bool isPotentialFragmentByName(BinaryFunction &Fragment, 476 BinaryFunction &Parent) { 477 for (StringRef Name : Parent.getNames()) { 478 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 479 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 480 if (Fragment.hasRestoredNameRegex(NameRegex)) 481 return true; 482 } 483 return false; 484 } 485 486 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 487 const JumpTable::JumpTableType Type, 488 BinaryFunction &BF, 489 const uint64_t NextJTAddress, 490 JumpTable::OffsetsType *Offsets) { 491 // Is one of the targets __builtin_unreachable? 492 bool HasUnreachable = false; 493 494 // Number of targets other than __builtin_unreachable. 495 uint64_t NumRealEntries = 0; 496 497 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 498 auto addOffset = [&](uint64_t Offset) { 499 if (Offsets) 500 Offsets->emplace_back(Offset); 501 }; 502 503 auto doesBelongToFunction = [&](const uint64_t Addr, 504 BinaryFunction *TargetBF) -> bool { 505 if (BF.containsAddress(Addr)) 506 return true; 507 // Nothing to do if we failed to identify the containing function. 508 if (!TargetBF) 509 return false; 510 // Case 1: check if BF is a fragment and TargetBF is its parent. 511 if (BF.isFragment()) { 512 // Parent function may or may not be already registered. 513 // Set parent link based on function name matching heuristic. 514 return registerFragment(BF, *TargetBF); 515 } 516 // Case 2: check if TargetBF is a fragment and BF is its parent. 517 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 518 }; 519 520 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 521 if (!Section) 522 return false; 523 524 // The upper bound is defined by containing object, section limits, and 525 // the next jump table in memory. 526 uint64_t UpperBound = Section->getEndAddress(); 527 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 528 if (JumpTableBD && JumpTableBD->getSize()) { 529 assert(JumpTableBD->getEndAddress() <= UpperBound && 530 "data object cannot cross a section boundary"); 531 UpperBound = JumpTableBD->getEndAddress(); 532 } 533 if (NextJTAddress) 534 UpperBound = std::min(NextJTAddress, UpperBound); 535 536 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 537 << '\n'); 538 const uint64_t EntrySize = getJumpTableEntrySize(Type); 539 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 540 EntryAddress += EntrySize) { 541 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 542 << " -> "); 543 // Check if there's a proper relocation against the jump table entry. 544 if (HasRelocations) { 545 if (Type == JumpTable::JTT_PIC && 546 !DataPCRelocations.count(EntryAddress)) { 547 LLVM_DEBUG( 548 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 549 break; 550 } 551 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 552 LLVM_DEBUG( 553 dbgs() 554 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 555 break; 556 } 557 } 558 559 const uint64_t Value = 560 (Type == JumpTable::JTT_PIC) 561 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 562 : *getPointerAtAddress(EntryAddress); 563 564 // __builtin_unreachable() case. 565 if (Value == BF.getAddress() + BF.getSize()) { 566 addOffset(Value - BF.getAddress()); 567 HasUnreachable = true; 568 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 569 continue; 570 } 571 572 // Function or one of its fragments. 573 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 574 575 // We assume that a jump table cannot have function start as an entry. 576 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 577 LLVM_DEBUG({ 578 if (!BF.containsAddress(Value)) { 579 dbgs() << "FAIL: function doesn't contain this address\n"; 580 if (TargetBF) { 581 dbgs() << " ! function containing this address: " 582 << TargetBF->getPrintName() << '\n'; 583 if (TargetBF->isFragment()) 584 dbgs() << " ! is a fragment\n"; 585 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 586 dbgs() << " ! its parent is " 587 << (TargetParent ? TargetParent->getPrintName() : "(none)") 588 << '\n'; 589 } 590 } 591 if (Value == BF.getAddress()) 592 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 593 }); 594 break; 595 } 596 597 // Check there's an instruction at this offset. 598 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 599 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 600 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 601 break; 602 } 603 604 ++NumRealEntries; 605 606 if (TargetBF == &BF) { 607 // Address inside the function. 608 addOffset(Value - TargetBF->getAddress()); 609 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 610 } else { 611 // Address in split fragment. 612 BF.setHasSplitJumpTable(true); 613 // Add invalid offset for proper identification of jump table size. 614 addOffset(INVALID_OFFSET); 615 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 616 << TargetBF->getPrintName() << '\n'); 617 } 618 } 619 620 // It's a jump table if the number of real entries is more than 1, or there's 621 // one real entry and "unreachable" targets. If there are only multiple 622 // "unreachable" targets, then it's not a jump table. 623 return NumRealEntries + HasUnreachable >= 2; 624 } 625 626 void BinaryContext::populateJumpTables() { 627 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 628 << '\n'); 629 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 630 ++JTI) { 631 JumpTable *JT = JTI->second; 632 BinaryFunction &BF = *JT->Parent; 633 634 if (!BF.isSimple()) 635 continue; 636 637 uint64_t NextJTAddress = 0; 638 auto NextJTI = std::next(JTI); 639 if (NextJTI != JTE) 640 NextJTAddress = NextJTI->second->getAddress(); 641 642 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 643 NextJTAddress, &JT->OffsetEntries); 644 if (!Success) { 645 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 646 JT->print(dbgs()); 647 if (NextJTI != JTE) { 648 dbgs() << "next jump table at 0x" 649 << Twine::utohexstr(NextJTI->second->getAddress()) 650 << " belongs to function " << *NextJTI->second->Parent << '\n'; 651 NextJTI->second->print(dbgs()); 652 } 653 llvm_unreachable("jump table heuristic failure"); 654 } 655 656 for (uint64_t EntryOffset : JT->OffsetEntries) { 657 if (EntryOffset == BF.getSize()) 658 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 659 else 660 BF.registerReferencedOffset(EntryOffset); 661 } 662 663 // In strict mode, erase PC-relative relocation record. Later we check that 664 // all such records are erased and thus have been accounted for. 665 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 666 for (uint64_t Address = JT->getAddress(); 667 Address < JT->getAddress() + JT->getSize(); 668 Address += JT->EntrySize) { 669 DataPCRelocations.erase(DataPCRelocations.find(Address)); 670 } 671 } 672 673 // Mark to skip the function and all its fragments. 674 if (BF.hasSplitJumpTable()) 675 FragmentsToSkip.push_back(&BF); 676 } 677 678 if (opts::StrictMode && DataPCRelocations.size()) { 679 LLVM_DEBUG({ 680 dbgs() << DataPCRelocations.size() 681 << " unclaimed PC-relative relocations left in data:\n"; 682 for (uint64_t Reloc : DataPCRelocations) 683 dbgs() << Twine::utohexstr(Reloc) << '\n'; 684 }); 685 assert(0 && "unclaimed PC-relative relocations left in data\n"); 686 } 687 clearList(DataPCRelocations); 688 } 689 690 void BinaryContext::skipMarkedFragments() { 691 // Unique functions in the vector. 692 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 693 FragmentsToSkip.end()); 694 // Copy the functions back to FragmentsToSkip. 695 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 696 auto addToWorklist = [&](BinaryFunction *Function) -> void { 697 if (UniqueFunctions.count(Function)) 698 return; 699 FragmentsToSkip.push_back(Function); 700 UniqueFunctions.insert(Function); 701 }; 702 // Functions containing split jump tables need to be skipped with all 703 // fragments (transitively). 704 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 705 BinaryFunction *BF = FragmentsToSkip[I]; 706 assert(UniqueFunctions.count(BF) && 707 "internal error in traversing function fragments"); 708 if (opts::Verbosity >= 1) 709 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 710 BF->setIgnored(); 711 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 712 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 713 addToWorklist); 714 } 715 if (!FragmentsToSkip.empty()) 716 errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function" 717 << (FragmentsToSkip.size() == 1 ? "" : "s") 718 << " due to cold fragments\n"; 719 FragmentsToSkip.clear(); 720 } 721 722 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 723 uint64_t Size, 724 uint16_t Alignment, 725 unsigned Flags) { 726 auto Itr = BinaryDataMap.find(Address); 727 if (Itr != BinaryDataMap.end()) { 728 assert(Itr->second->getSize() == Size || !Size); 729 return Itr->second->getSymbol(); 730 } 731 732 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 733 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 734 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 735 } 736 737 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 738 return Ctx->getOrCreateSymbol(Name); 739 } 740 741 BinaryFunction *BinaryContext::createBinaryFunction( 742 const std::string &Name, BinarySection &Section, uint64_t Address, 743 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 744 auto Result = BinaryFunctions.emplace( 745 Address, BinaryFunction(Name, Section, Address, Size, *this)); 746 assert(Result.second == true && "unexpected duplicate function"); 747 BinaryFunction *BF = &Result.first->second; 748 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 749 Alignment); 750 setSymbolToFunctionMap(BF->getSymbol(), BF); 751 return BF; 752 } 753 754 const MCSymbol * 755 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 756 JumpTable::JumpTableType Type) { 757 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 758 assert(JT->Type == Type && "jump table types have to match"); 759 assert(JT->Parent == &Function && 760 "cannot re-use jump table of a different function"); 761 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 762 763 return JT->getFirstLabel(); 764 } 765 766 // Re-use the existing symbol if possible. 767 MCSymbol *JTLabel = nullptr; 768 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 769 if (!isInternalSymbolName(Object->getSymbol()->getName())) 770 JTLabel = Object->getSymbol(); 771 } 772 773 const uint64_t EntrySize = getJumpTableEntrySize(Type); 774 if (!JTLabel) { 775 const std::string JumpTableName = generateJumpTableName(Function, Address); 776 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 777 } 778 779 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 780 << " in function " << Function << '\n'); 781 782 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 783 JumpTable::LabelMapType{{0, JTLabel}}, Function, 784 *getSectionForAddress(Address)); 785 JumpTables.emplace(Address, JT); 786 787 // Duplicate the entry for the parent function for easy access. 788 Function.JumpTables.emplace(Address, JT); 789 790 return JTLabel; 791 } 792 793 std::pair<uint64_t, const MCSymbol *> 794 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 795 const MCSymbol *OldLabel) { 796 auto L = scopeLock(); 797 unsigned Offset = 0; 798 bool Found = false; 799 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 800 if (Elmt.second != OldLabel) 801 continue; 802 Offset = Elmt.first; 803 Found = true; 804 break; 805 } 806 assert(Found && "Label not found"); 807 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 808 JumpTable *NewJT = 809 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 810 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 811 *getSectionForAddress(JT->getAddress())); 812 NewJT->Entries = JT->Entries; 813 NewJT->Counts = JT->Counts; 814 uint64_t JumpTableID = ++DuplicatedJumpTables; 815 // Invert it to differentiate from regular jump tables whose IDs are their 816 // addresses in the input binary memory space 817 JumpTableID = ~JumpTableID; 818 JumpTables.emplace(JumpTableID, NewJT); 819 Function.JumpTables.emplace(JumpTableID, NewJT); 820 return std::make_pair(JumpTableID, NewLabel); 821 } 822 823 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 824 uint64_t Address) { 825 size_t Id; 826 uint64_t Offset = 0; 827 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 828 Offset = Address - JT->getAddress(); 829 auto Itr = JT->Labels.find(Offset); 830 if (Itr != JT->Labels.end()) 831 return std::string(Itr->second->getName()); 832 Id = JumpTableIds.at(JT->getAddress()); 833 } else { 834 Id = JumpTableIds[Address] = BF.JumpTables.size(); 835 } 836 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 837 (Offset ? ("." + std::to_string(Offset)) : "")); 838 } 839 840 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 841 // FIXME: aarch64 support is missing. 842 if (!isX86()) 843 return true; 844 845 if (BF.getSize() == BF.getMaxSize()) 846 return true; 847 848 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 849 assert(FunctionData && "cannot get function as data"); 850 851 uint64_t Offset = BF.getSize(); 852 MCInst Instr; 853 uint64_t InstrSize = 0; 854 uint64_t InstrAddress = BF.getAddress() + Offset; 855 using std::placeholders::_1; 856 857 // Skip instructions that satisfy the predicate condition. 858 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 859 const uint64_t StartOffset = Offset; 860 for (; Offset < BF.getMaxSize(); 861 Offset += InstrSize, InstrAddress += InstrSize) { 862 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 863 InstrAddress, nulls())) 864 break; 865 if (!Predicate(Instr)) 866 break; 867 } 868 869 return Offset - StartOffset; 870 }; 871 872 // Skip a sequence of zero bytes. 873 auto skipZeros = [&]() { 874 const uint64_t StartOffset = Offset; 875 for (; Offset < BF.getMaxSize(); ++Offset) 876 if ((*FunctionData)[Offset] != 0) 877 break; 878 879 return Offset - StartOffset; 880 }; 881 882 // Accept the whole padding area filled with breakpoints. 883 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 884 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 885 return true; 886 887 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 888 889 // Some functions have a jump to the next function or to the padding area 890 // inserted after the body. 891 auto isSkipJump = [&](const MCInst &Instr) { 892 uint64_t TargetAddress = 0; 893 if (MIB->isUnconditionalBranch(Instr) && 894 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 895 if (TargetAddress >= InstrAddress + InstrSize && 896 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 897 return true; 898 } 899 } 900 return false; 901 }; 902 903 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 904 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 905 skipZeros()) 906 ; 907 908 if (Offset == BF.getMaxSize()) 909 return true; 910 911 if (opts::Verbosity >= 1) { 912 errs() << "BOLT-WARNING: bad padding at address 0x" 913 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 914 << " starting at offset " << (Offset - BF.getSize()) 915 << " in function " << BF << '\n' 916 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 917 << '\n'; 918 } 919 920 return false; 921 } 922 923 void BinaryContext::adjustCodePadding() { 924 for (auto &BFI : BinaryFunctions) { 925 BinaryFunction &BF = BFI.second; 926 if (!shouldEmit(BF)) 927 continue; 928 929 if (!hasValidCodePadding(BF)) { 930 if (HasRelocations) { 931 if (opts::Verbosity >= 1) { 932 outs() << "BOLT-INFO: function " << BF 933 << " has invalid padding. Ignoring the function.\n"; 934 } 935 BF.setIgnored(); 936 } else { 937 BF.setMaxSize(BF.getSize()); 938 } 939 } 940 } 941 } 942 943 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 944 uint64_t Size, 945 uint16_t Alignment, 946 unsigned Flags) { 947 // Register the name with MCContext. 948 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 949 950 auto GAI = BinaryDataMap.find(Address); 951 BinaryData *BD; 952 if (GAI == BinaryDataMap.end()) { 953 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 954 BinarySection &Section = 955 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 956 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 957 Section, Flags); 958 GAI = BinaryDataMap.emplace(Address, BD).first; 959 GlobalSymbols[Name] = BD; 960 updateObjectNesting(GAI); 961 } else { 962 BD = GAI->second; 963 if (!BD->hasName(Name)) { 964 GlobalSymbols[Name] = BD; 965 BD->Symbols.push_back(Symbol); 966 } 967 } 968 969 return Symbol; 970 } 971 972 const BinaryData * 973 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 974 auto NI = BinaryDataMap.lower_bound(Address); 975 auto End = BinaryDataMap.end(); 976 if ((NI != End && Address == NI->first) || 977 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 978 if (NI->second->containsAddress(Address)) 979 return NI->second; 980 981 // If this is a sub-symbol, see if a parent data contains the address. 982 const BinaryData *BD = NI->second->getParent(); 983 while (BD) { 984 if (BD->containsAddress(Address)) 985 return BD; 986 BD = BD->getParent(); 987 } 988 } 989 return nullptr; 990 } 991 992 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 993 auto NI = BinaryDataMap.find(Address); 994 assert(NI != BinaryDataMap.end()); 995 if (NI == BinaryDataMap.end()) 996 return false; 997 // TODO: it's possible that a jump table starts at the same address 998 // as a larger blob of private data. When we set the size of the 999 // jump table, it might be smaller than the total blob size. In this 1000 // case we just leave the original size since (currently) it won't really 1001 // affect anything. 1002 assert((!NI->second->Size || NI->second->Size == Size || 1003 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1004 "can't change the size of a symbol that has already had its " 1005 "size set"); 1006 if (!NI->second->Size) { 1007 NI->second->Size = Size; 1008 updateObjectNesting(NI); 1009 return true; 1010 } 1011 return false; 1012 } 1013 1014 void BinaryContext::generateSymbolHashes() { 1015 auto isPadding = [](const BinaryData &BD) { 1016 StringRef Contents = BD.getSection().getContents(); 1017 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1018 return (BD.getName().startswith("HOLEat") || 1019 SymData.find_first_not_of(0) == StringRef::npos); 1020 }; 1021 1022 uint64_t NumCollisions = 0; 1023 for (auto &Entry : BinaryDataMap) { 1024 BinaryData &BD = *Entry.second; 1025 StringRef Name = BD.getName(); 1026 1027 if (!isInternalSymbolName(Name)) 1028 continue; 1029 1030 // First check if a non-anonymous alias exists and move it to the front. 1031 if (BD.getSymbols().size() > 1) { 1032 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1033 [&](const MCSymbol *Symbol) { 1034 return !isInternalSymbolName(Symbol->getName()); 1035 }); 1036 if (Itr != BD.getSymbols().end()) { 1037 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1038 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1039 continue; 1040 } 1041 } 1042 1043 // We have to skip 0 size symbols since they will all collide. 1044 if (BD.getSize() == 0) { 1045 continue; 1046 } 1047 1048 const uint64_t Hash = BD.getSection().hash(BD); 1049 const size_t Idx = Name.find("0x"); 1050 std::string NewName = 1051 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1052 if (getBinaryDataByName(NewName)) { 1053 // Ignore collisions for symbols that appear to be padding 1054 // (i.e. all zeros or a "hole") 1055 if (!isPadding(BD)) { 1056 if (opts::Verbosity) { 1057 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1058 << " with new name (" << NewName << "), skipping.\n"; 1059 } 1060 ++NumCollisions; 1061 } 1062 continue; 1063 } 1064 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1065 GlobalSymbols[NewName] = &BD; 1066 } 1067 if (NumCollisions) { 1068 errs() << "BOLT-WARNING: " << NumCollisions 1069 << " collisions detected while hashing binary objects"; 1070 if (!opts::Verbosity) 1071 errs() << ". Use -v=1 to see the list."; 1072 errs() << '\n'; 1073 } 1074 } 1075 1076 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1077 BinaryFunction &Function) const { 1078 if (!isPotentialFragmentByName(TargetFunction, Function)) 1079 return false; 1080 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1081 if (TargetFunction.isParentFragment(&Function)) 1082 return true; 1083 TargetFunction.addParentFragment(Function); 1084 Function.addFragment(TargetFunction); 1085 if (!HasRelocations) { 1086 TargetFunction.setSimple(false); 1087 Function.setSimple(false); 1088 } 1089 if (opts::Verbosity >= 1) { 1090 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1091 << Function << '\n'; 1092 } 1093 return true; 1094 } 1095 1096 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1097 for (uint64_t Address : Function.InterproceduralReferences) { 1098 if (!Address) 1099 continue; 1100 1101 BinaryFunction *TargetFunction = 1102 getBinaryFunctionContainingAddress(Address); 1103 if (&Function == TargetFunction) 1104 continue; 1105 1106 if (TargetFunction) { 1107 if (TargetFunction->IsFragment && 1108 !registerFragment(*TargetFunction, Function)) { 1109 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1110 "fragments: " 1111 << Function.getPrintName() << " and " 1112 << TargetFunction->getPrintName() << '\n'; 1113 } 1114 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1115 TargetFunction->addEntryPointAtOffset(Offset); 1116 1117 continue; 1118 } 1119 1120 // Check if address falls in function padding space - this could be 1121 // unmarked data in code. In this case adjust the padding space size. 1122 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1123 assert(Section && "cannot get section for referenced address"); 1124 1125 if (!Section->isText()) 1126 continue; 1127 1128 // PLT requires special handling and could be ignored in this context. 1129 StringRef SectionName = Section->getName(); 1130 if (SectionName == ".plt" || SectionName == ".plt.got") 1131 continue; 1132 1133 if (opts::processAllFunctions()) { 1134 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1135 << "object in code at address 0x" << Twine::utohexstr(Address) 1136 << " belonging to section " << SectionName << " in current mode\n"; 1137 exit(1); 1138 } 1139 1140 TargetFunction = getBinaryFunctionContainingAddress(Address, 1141 /*CheckPastEnd=*/false, 1142 /*UseMaxSize=*/true); 1143 // We are not going to overwrite non-simple functions, but for simple 1144 // ones - adjust the padding size. 1145 if (TargetFunction && TargetFunction->isSimple()) { 1146 errs() << "BOLT-WARNING: function " << *TargetFunction 1147 << " has an object detected in a padding region at address 0x" 1148 << Twine::utohexstr(Address) << '\n'; 1149 TargetFunction->setMaxSize(TargetFunction->getSize()); 1150 } 1151 } 1152 1153 clearList(Function.InterproceduralReferences); 1154 } 1155 1156 void BinaryContext::postProcessSymbolTable() { 1157 fixBinaryDataHoles(); 1158 bool Valid = true; 1159 for (auto &Entry : BinaryDataMap) { 1160 BinaryData *BD = Entry.second; 1161 if ((BD->getName().startswith("SYMBOLat") || 1162 BD->getName().startswith("DATAat")) && 1163 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1164 BD->getSection()) { 1165 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1166 Valid = false; 1167 } 1168 } 1169 assert(Valid); 1170 generateSymbolHashes(); 1171 } 1172 1173 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1174 BinaryFunction &ParentBF) { 1175 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1176 "cannot merge functions with multiple entry points"); 1177 1178 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1179 std::defer_lock); 1180 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1181 SymbolToFunctionMapMutex, std::defer_lock); 1182 1183 const StringRef ChildName = ChildBF.getOneName(); 1184 1185 // Move symbols over and update bookkeeping info. 1186 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1187 ParentBF.getSymbols().push_back(Symbol); 1188 WriteSymbolMapLock.lock(); 1189 SymbolToFunctionMap[Symbol] = &ParentBF; 1190 WriteSymbolMapLock.unlock(); 1191 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1192 } 1193 ChildBF.getSymbols().clear(); 1194 1195 // Move other names the child function is known under. 1196 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1197 std::back_inserter(ParentBF.Aliases)); 1198 ChildBF.Aliases.clear(); 1199 1200 if (HasRelocations) { 1201 // Merge execution counts of ChildBF into those of ParentBF. 1202 // Without relocations, we cannot reliably merge profiles as both functions 1203 // continue to exist and either one can be executed. 1204 ChildBF.mergeProfileDataInto(ParentBF); 1205 1206 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1207 std::defer_lock); 1208 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1209 std::defer_lock); 1210 // Remove ChildBF from the global set of functions in relocs mode. 1211 ReadBfsLock.lock(); 1212 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1213 ReadBfsLock.unlock(); 1214 1215 assert(FI != BinaryFunctions.end() && "function not found"); 1216 assert(&ChildBF == &FI->second && "function mismatch"); 1217 1218 WriteBfsLock.lock(); 1219 ChildBF.clearDisasmState(); 1220 FI = BinaryFunctions.erase(FI); 1221 WriteBfsLock.unlock(); 1222 1223 } else { 1224 // In non-relocation mode we keep the function, but rename it. 1225 std::string NewName = "__ICF_" + ChildName.str(); 1226 1227 WriteCtxLock.lock(); 1228 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1229 WriteCtxLock.unlock(); 1230 1231 ChildBF.setFolded(&ParentBF); 1232 } 1233 } 1234 1235 void BinaryContext::fixBinaryDataHoles() { 1236 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1237 1238 for (BinarySection &Section : allocatableSections()) { 1239 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1240 1241 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1242 BinaryData *BD = Itr->second; 1243 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1244 (BD->getName().startswith("SYMBOLat0x") || 1245 BD->getName().startswith("DATAat0x") || 1246 BD->getName().startswith("ANONYMOUS"))); 1247 return !isHole && BD->getSection() == Section && !BD->getParent(); 1248 }; 1249 1250 auto BDStart = BinaryDataMap.begin(); 1251 auto BDEnd = BinaryDataMap.end(); 1252 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1253 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1254 1255 uint64_t EndAddress = Section.getAddress(); 1256 1257 while (Itr != End) { 1258 if (Itr->second->getAddress() > EndAddress) { 1259 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1260 Holes.emplace_back(EndAddress, Gap); 1261 } 1262 EndAddress = Itr->second->getEndAddress(); 1263 ++Itr; 1264 } 1265 1266 if (EndAddress < Section.getEndAddress()) 1267 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1268 1269 // If there is already a symbol at the start of the hole, grow that symbol 1270 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1271 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1272 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1273 if (BD) { 1274 // BD->getSection() can be != Section if there are sections that 1275 // overlap. In this case it is probably safe to just skip the holes 1276 // since the overlapping section will not(?) have any symbols in it. 1277 if (BD->getSection() == Section) 1278 setBinaryDataSize(Hole.first, Hole.second); 1279 } else { 1280 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1281 } 1282 } 1283 } 1284 1285 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1286 assert(validateHoles() && "top level hole detected in object map"); 1287 } 1288 1289 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1290 const BinarySection *CurrentSection = nullptr; 1291 bool FirstSection = true; 1292 1293 for (auto &Entry : BinaryDataMap) { 1294 const BinaryData *BD = Entry.second; 1295 const BinarySection &Section = BD->getSection(); 1296 if (FirstSection || Section != *CurrentSection) { 1297 uint64_t Address, Size; 1298 StringRef Name = Section.getName(); 1299 if (Section) { 1300 Address = Section.getAddress(); 1301 Size = Section.getSize(); 1302 } else { 1303 Address = BD->getAddress(); 1304 Size = BD->getSize(); 1305 } 1306 OS << "BOLT-INFO: Section " << Name << ", " 1307 << "0x" + Twine::utohexstr(Address) << ":" 1308 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1309 CurrentSection = &Section; 1310 FirstSection = false; 1311 } 1312 1313 OS << "BOLT-INFO: "; 1314 const BinaryData *P = BD->getParent(); 1315 while (P) { 1316 OS << " "; 1317 P = P->getParent(); 1318 } 1319 OS << *BD << "\n"; 1320 } 1321 } 1322 1323 Expected<unsigned> 1324 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName, 1325 unsigned FileNumber, 1326 Optional<MD5::MD5Result> Checksum, 1327 Optional<StringRef> Source, unsigned CUID) { 1328 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1329 return Table.tryGetFile(Directory, FileName, Checksum, Source, 1330 Ctx->getDwarfVersion(), FileNumber); 1331 } 1332 1333 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1334 const uint32_t SrcCUID, 1335 unsigned FileIndex) { 1336 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1337 const DWARFDebugLine::LineTable *LineTable = 1338 DwCtx->getLineTableForUnit(SrcUnit); 1339 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1340 LineTable->Prologue.FileNames; 1341 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1342 // means empty dir. 1343 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1344 "FileIndex out of range for the compilation unit."); 1345 StringRef Dir = ""; 1346 if (FileNames[FileIndex - 1].DirIdx != 0) { 1347 if (Optional<const char *> DirName = dwarf::toString( 1348 LineTable->Prologue 1349 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1350 Dir = *DirName; 1351 } 1352 } 1353 StringRef FileName = ""; 1354 if (Optional<const char *> FName = 1355 dwarf::toString(FileNames[FileIndex - 1].Name)) 1356 FileName = *FName; 1357 assert(FileName != ""); 1358 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID)); 1359 } 1360 1361 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1362 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1363 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1364 SortedFunctions.begin(), 1365 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1366 return &BFI.second; 1367 }); 1368 1369 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1370 [](const BinaryFunction *A, const BinaryFunction *B) { 1371 if (A->hasValidIndex() && B->hasValidIndex()) { 1372 return A->getIndex() < B->getIndex(); 1373 } 1374 return A->hasValidIndex(); 1375 }); 1376 return SortedFunctions; 1377 } 1378 1379 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1380 std::vector<BinaryFunction *> AllFunctions; 1381 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1382 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1383 std::back_inserter(AllFunctions), 1384 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1385 return &BFI.second; 1386 }); 1387 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1388 std::back_inserter(AllFunctions)); 1389 1390 return AllFunctions; 1391 } 1392 1393 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1394 auto Iter = DWOCUs.find(DWOId); 1395 if (Iter == DWOCUs.end()) 1396 return None; 1397 1398 return Iter->second; 1399 } 1400 1401 DWARFContext *BinaryContext::getDWOContext() { 1402 if (DWOCUs.empty()) 1403 return nullptr; 1404 return &DWOCUs.begin()->second->getContext(); 1405 } 1406 1407 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1408 void BinaryContext::preprocessDWODebugInfo() { 1409 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1410 DWARFUnit *const DwarfUnit = CU.get(); 1411 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1412 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1413 if (!DWOCU->isDWOUnit()) { 1414 std::string DWOName = dwarf::toString( 1415 DwarfUnit->getUnitDIE().find( 1416 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1417 ""); 1418 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1419 << DWOName 1420 << " was not retrieved and won't be updated. Please check " 1421 "relative path.\n"; 1422 continue; 1423 } 1424 DWOCUs[*DWOId] = DWOCU; 1425 } 1426 } 1427 } 1428 1429 void BinaryContext::preprocessDebugInfo() { 1430 struct CURange { 1431 uint64_t LowPC; 1432 uint64_t HighPC; 1433 DWARFUnit *Unit; 1434 1435 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1436 }; 1437 1438 // Building a map of address ranges to CUs similar to .debug_aranges and use 1439 // it to assign CU to functions. 1440 std::vector<CURange> AllRanges; 1441 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1442 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1443 Expected<DWARFAddressRangesVector> RangesOrError = 1444 CU->getUnitDIE().getAddressRanges(); 1445 if (!RangesOrError) { 1446 consumeError(RangesOrError.takeError()); 1447 continue; 1448 } 1449 for (DWARFAddressRange &Range : *RangesOrError) { 1450 // Parts of the debug info could be invalidated due to corresponding code 1451 // being removed from the binary by the linker. Hence we check if the 1452 // address is a valid one. 1453 if (containsAddress(Range.LowPC)) 1454 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1455 } 1456 } 1457 1458 std::sort(AllRanges.begin(), AllRanges.end()); 1459 for (auto &KV : BinaryFunctions) { 1460 const uint64_t FunctionAddress = KV.first; 1461 BinaryFunction &Function = KV.second; 1462 1463 auto It = std::partition_point( 1464 AllRanges.begin(), AllRanges.end(), 1465 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1466 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1467 Function.setDWARFUnit(It->Unit); 1468 } 1469 } 1470 1471 // Discover units with debug info that needs to be updated. 1472 for (const auto &KV : BinaryFunctions) { 1473 const BinaryFunction &BF = KV.second; 1474 if (shouldEmit(BF) && BF.getDWARFUnit()) 1475 ProcessedCUs.insert(BF.getDWARFUnit()); 1476 } 1477 1478 // Clear debug info for functions from units that we are not going to process. 1479 for (auto &KV : BinaryFunctions) { 1480 BinaryFunction &BF = KV.second; 1481 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1482 BF.setDWARFUnit(nullptr); 1483 } 1484 1485 if (opts::Verbosity >= 1) { 1486 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1487 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1488 } 1489 1490 // Populate MCContext with DWARF files from all units. 1491 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1492 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1493 const uint64_t CUID = CU->getOffset(); 1494 getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol( 1495 GlobalPrefix + "line_table_start" + Twine(CUID))); 1496 1497 if (!ProcessedCUs.count(CU.get())) 1498 continue; 1499 1500 const DWARFDebugLine::LineTable *LineTable = 1501 DwCtx->getLineTableForUnit(CU.get()); 1502 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1503 LineTable->Prologue.FileNames; 1504 1505 // Assign a unique label to every line table, one per CU. 1506 // Make sure empty debug line tables are registered too. 1507 if (FileNames.empty()) { 1508 cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID)); 1509 continue; 1510 } 1511 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1512 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1513 // means empty dir. 1514 StringRef Dir = ""; 1515 if (FileNames[I].DirIdx != 0) 1516 if (Optional<const char *> DirName = dwarf::toString( 1517 LineTable->Prologue 1518 .IncludeDirectories[FileNames[I].DirIdx - 1])) 1519 Dir = *DirName; 1520 StringRef FileName = ""; 1521 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1522 FileName = *FName; 1523 assert(FileName != ""); 1524 cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID)); 1525 } 1526 } 1527 1528 preprocessDWODebugInfo(); 1529 } 1530 1531 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1532 if (opts::processAllFunctions()) 1533 return true; 1534 1535 if (Function.isIgnored()) 1536 return false; 1537 1538 // In relocation mode we will emit non-simple functions with CFG. 1539 // If the function does not have a CFG it should be marked as ignored. 1540 return HasRelocations || Function.isSimple(); 1541 } 1542 1543 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1544 uint32_t Operation = Inst.getOperation(); 1545 switch (Operation) { 1546 case MCCFIInstruction::OpSameValue: 1547 OS << "OpSameValue Reg" << Inst.getRegister(); 1548 break; 1549 case MCCFIInstruction::OpRememberState: 1550 OS << "OpRememberState"; 1551 break; 1552 case MCCFIInstruction::OpRestoreState: 1553 OS << "OpRestoreState"; 1554 break; 1555 case MCCFIInstruction::OpOffset: 1556 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1557 break; 1558 case MCCFIInstruction::OpDefCfaRegister: 1559 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1560 break; 1561 case MCCFIInstruction::OpDefCfaOffset: 1562 OS << "OpDefCfaOffset " << Inst.getOffset(); 1563 break; 1564 case MCCFIInstruction::OpDefCfa: 1565 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1566 break; 1567 case MCCFIInstruction::OpRelOffset: 1568 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1569 break; 1570 case MCCFIInstruction::OpAdjustCfaOffset: 1571 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1572 break; 1573 case MCCFIInstruction::OpEscape: 1574 OS << "OpEscape"; 1575 break; 1576 case MCCFIInstruction::OpRestore: 1577 OS << "OpRestore Reg" << Inst.getRegister(); 1578 break; 1579 case MCCFIInstruction::OpUndefined: 1580 OS << "OpUndefined Reg" << Inst.getRegister(); 1581 break; 1582 case MCCFIInstruction::OpRegister: 1583 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1584 << Inst.getRegister2(); 1585 break; 1586 case MCCFIInstruction::OpWindowSave: 1587 OS << "OpWindowSave"; 1588 break; 1589 case MCCFIInstruction::OpGnuArgsSize: 1590 OS << "OpGnuArgsSize"; 1591 break; 1592 default: 1593 OS << "Op#" << Operation; 1594 break; 1595 } 1596 } 1597 1598 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1599 uint64_t Offset, 1600 const BinaryFunction *Function, 1601 bool PrintMCInst, bool PrintMemData, 1602 bool PrintRelocations) const { 1603 if (MIB->isEHLabel(Instruction)) { 1604 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1605 return; 1606 } 1607 OS << format(" %08" PRIx64 ": ", Offset); 1608 if (MIB->isCFI(Instruction)) { 1609 uint32_t Offset = Instruction.getOperand(0).getImm(); 1610 OS << "\t!CFI\t$" << Offset << "\t; "; 1611 if (Function) 1612 printCFI(OS, *Function->getCFIFor(Instruction)); 1613 OS << "\n"; 1614 return; 1615 } 1616 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1617 if (MIB->isCall(Instruction)) { 1618 if (MIB->isTailCall(Instruction)) 1619 OS << " # TAILCALL "; 1620 if (MIB->isInvoke(Instruction)) { 1621 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1622 OS << " # handler: "; 1623 if (EHInfo->first) 1624 OS << *EHInfo->first; 1625 else 1626 OS << '0'; 1627 OS << "; action: " << EHInfo->second; 1628 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1629 if (GnuArgsSize >= 0) 1630 OS << "; GNU_args_size = " << GnuArgsSize; 1631 } 1632 } else if (MIB->isIndirectBranch(Instruction)) { 1633 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1634 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1635 } else { 1636 OS << " # UNKNOWN CONTROL FLOW"; 1637 } 1638 } 1639 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1640 OS << " # Offset: " << *Offset; 1641 1642 MIB->printAnnotations(Instruction, OS); 1643 1644 if (opts::PrintDebugInfo) { 1645 DebugLineTableRowRef RowRef = 1646 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1647 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1648 const DWARFDebugLine::LineTable *LineTable; 1649 if (Function && Function->getDWARFUnit() && 1650 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1651 LineTable = Function->getDWARFLineTable(); 1652 } else { 1653 LineTable = DwCtx->getLineTableForUnit( 1654 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1655 } 1656 assert(LineTable && 1657 "line table expected for instruction with debug info"); 1658 1659 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1660 StringRef FileName = ""; 1661 if (Optional<const char *> FName = 1662 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1663 FileName = *FName; 1664 OS << " # debug line " << FileName << ":" << Row.Line; 1665 if (Row.Column) 1666 OS << ":" << Row.Column; 1667 if (Row.Discriminator) 1668 OS << " discriminator:" << Row.Discriminator; 1669 } 1670 } 1671 1672 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1673 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1674 Function->printRelocations(OS, Offset, Size); 1675 } 1676 1677 OS << "\n"; 1678 1679 if (PrintMCInst) { 1680 Instruction.dump_pretty(OS, InstPrinter.get()); 1681 OS << "\n"; 1682 } 1683 } 1684 1685 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1686 auto SI = AddressToSection.upper_bound(Address); 1687 if (SI != AddressToSection.begin()) { 1688 --SI; 1689 uint64_t UpperBound = SI->first + SI->second->getSize(); 1690 if (!SI->second->getSize()) 1691 UpperBound += 1; 1692 if (UpperBound > Address) 1693 return *SI->second; 1694 } 1695 return std::make_error_code(std::errc::bad_address); 1696 } 1697 1698 ErrorOr<StringRef> 1699 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1700 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1701 return Section->getName(); 1702 return std::make_error_code(std::errc::bad_address); 1703 } 1704 1705 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1706 auto Res = Sections.insert(Section); 1707 (void)Res; 1708 assert(Res.second && "can't register the same section twice."); 1709 1710 // Only register allocatable sections in the AddressToSection map. 1711 if (Section->isAllocatable() && Section->getAddress()) 1712 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1713 NameToSection.insert( 1714 std::make_pair(std::string(Section->getName()), Section)); 1715 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1716 return *Section; 1717 } 1718 1719 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1720 return registerSection(new BinarySection(*this, Section)); 1721 } 1722 1723 BinarySection & 1724 BinaryContext::registerSection(StringRef SectionName, 1725 const BinarySection &OriginalSection) { 1726 return registerSection( 1727 new BinarySection(*this, SectionName, OriginalSection)); 1728 } 1729 1730 BinarySection & 1731 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1732 unsigned ELFFlags, uint8_t *Data, 1733 uint64_t Size, unsigned Alignment) { 1734 auto NamedSections = getSectionByName(Name); 1735 if (NamedSections.begin() != NamedSections.end()) { 1736 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1737 "can only update unique sections"); 1738 BinarySection *Section = NamedSections.begin()->second; 1739 1740 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1741 const bool Flag = Section->isAllocatable(); 1742 (void)Flag; 1743 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1744 LLVM_DEBUG(dbgs() << *Section << "\n"); 1745 // FIXME: Fix section flags/attributes for MachO. 1746 if (isELF()) 1747 assert(Flag == Section->isAllocatable() && 1748 "can't change section allocation status"); 1749 return *Section; 1750 } 1751 1752 return registerSection( 1753 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1754 } 1755 1756 bool BinaryContext::deregisterSection(BinarySection &Section) { 1757 BinarySection *SectionPtr = &Section; 1758 auto Itr = Sections.find(SectionPtr); 1759 if (Itr != Sections.end()) { 1760 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1761 while (Range.first != Range.second) { 1762 if (Range.first->second == SectionPtr) { 1763 AddressToSection.erase(Range.first); 1764 break; 1765 } 1766 ++Range.first; 1767 } 1768 1769 auto NameRange = 1770 NameToSection.equal_range(std::string(SectionPtr->getName())); 1771 while (NameRange.first != NameRange.second) { 1772 if (NameRange.first->second == SectionPtr) { 1773 NameToSection.erase(NameRange.first); 1774 break; 1775 } 1776 ++NameRange.first; 1777 } 1778 1779 Sections.erase(Itr); 1780 delete SectionPtr; 1781 return true; 1782 } 1783 return false; 1784 } 1785 1786 void BinaryContext::printSections(raw_ostream &OS) const { 1787 for (BinarySection *const &Section : Sections) 1788 OS << "BOLT-INFO: " << *Section << "\n"; 1789 } 1790 1791 BinarySection &BinaryContext::absoluteSection() { 1792 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1793 return *Section; 1794 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1795 } 1796 1797 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1798 size_t Size) const { 1799 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1800 if (!Section) 1801 return std::make_error_code(std::errc::bad_address); 1802 1803 if (Section->isVirtual()) 1804 return 0; 1805 1806 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1807 AsmInfo->getCodePointerSize()); 1808 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1809 return DE.getUnsigned(&ValueOffset, Size); 1810 } 1811 1812 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1813 size_t Size) const { 1814 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1815 if (!Section) 1816 return std::make_error_code(std::errc::bad_address); 1817 1818 if (Section->isVirtual()) 1819 return 0; 1820 1821 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1822 AsmInfo->getCodePointerSize()); 1823 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1824 return DE.getSigned(&ValueOffset, Size); 1825 } 1826 1827 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1828 uint64_t Type, uint64_t Addend, 1829 uint64_t Value) { 1830 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1831 assert(Section && "cannot find section for address"); 1832 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1833 Value); 1834 } 1835 1836 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1837 uint64_t Type, uint64_t Addend, 1838 uint64_t Value) { 1839 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1840 assert(Section && "cannot find section for address"); 1841 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1842 Addend, Value); 1843 } 1844 1845 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1846 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1847 assert(Section && "cannot find section for address"); 1848 return Section->removeRelocationAt(Address - Section->getAddress()); 1849 } 1850 1851 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1852 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1853 if (!Section) 1854 return nullptr; 1855 1856 return Section->getRelocationAt(Address - Section->getAddress()); 1857 } 1858 1859 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1860 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1861 if (!Section) 1862 return nullptr; 1863 1864 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1865 } 1866 1867 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1868 const uint64_t Address) { 1869 auto setImmovable = [&](BinaryData &BD) { 1870 BinaryData *Root = BD.getAtomicRoot(); 1871 LLVM_DEBUG(if (Root->isMoveable()) { 1872 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1873 << "due to ambiguous relocation referencing 0x" 1874 << Twine::utohexstr(Address) << '\n'; 1875 }); 1876 Root->setIsMoveable(false); 1877 }; 1878 1879 if (Address == BD.getAddress()) { 1880 setImmovable(BD); 1881 1882 // Set previous symbol as immovable 1883 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1884 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1885 setImmovable(*Prev); 1886 } 1887 1888 if (Address == BD.getEndAddress()) { 1889 setImmovable(BD); 1890 1891 // Set next symbol as immovable 1892 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1893 if (Next && Next->getAddress() == BD.getEndAddress()) 1894 setImmovable(*Next); 1895 } 1896 } 1897 1898 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1899 uint64_t *EntryDesc) { 1900 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1901 auto BFI = SymbolToFunctionMap.find(Symbol); 1902 if (BFI == SymbolToFunctionMap.end()) 1903 return nullptr; 1904 1905 BinaryFunction *BF = BFI->second; 1906 if (EntryDesc) 1907 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1908 1909 return BF; 1910 } 1911 1912 void BinaryContext::exitWithBugReport(StringRef Message, 1913 const BinaryFunction &Function) const { 1914 errs() << "=======================================\n"; 1915 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1916 "this function.\n"; 1917 errs() << "If you are running the most recent version of BOLT, you may " 1918 "want to " 1919 "report this and paste this dump.\nPlease check that there is no " 1920 "sensitive contents being shared in this dump.\n"; 1921 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1922 ScopedPrinter SP(errs()); 1923 SP.printBinaryBlock("Function contents", *Function.getData()); 1924 errs() << "\n"; 1925 Function.dump(); 1926 errs() << "ERROR: " << Message; 1927 errs() << "\n=======================================\n"; 1928 exit(1); 1929 } 1930 1931 BinaryFunction * 1932 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1933 bool IsSimple) { 1934 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1935 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1936 setSymbolToFunctionMap(BF->getSymbol(), BF); 1937 BF->CurrentState = BinaryFunction::State::CFG; 1938 return BF; 1939 } 1940 1941 std::pair<size_t, size_t> 1942 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 1943 // Adjust branch instruction to match the current layout. 1944 if (FixBranches) 1945 BF.fixBranches(); 1946 1947 // Create local MC context to isolate the effect of ephemeral code emission. 1948 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 1949 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 1950 MCAsmBackend *MAB = 1951 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1952 1953 SmallString<256> Code; 1954 raw_svector_ostream VecOS(Code); 1955 1956 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 1957 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 1958 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 1959 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 1960 /*RelaxAll=*/false, 1961 /*IncrementalLinkerCompatible=*/false, 1962 /*DWARFMustBeAtTheEnd=*/false)); 1963 1964 Streamer->initSections(false, *STI); 1965 1966 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 1967 Section->setHasInstructions(true); 1968 1969 // Create symbols in the LocalCtx so that they get destroyed with it. 1970 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 1971 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 1972 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 1973 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 1974 1975 Streamer->SwitchSection(Section); 1976 Streamer->emitLabel(StartLabel); 1977 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 1978 /*EmitCodeOnly=*/true); 1979 Streamer->emitLabel(EndLabel); 1980 1981 if (BF.isSplit()) { 1982 MCSectionELF *ColdSection = 1983 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 1984 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 1985 ColdSection->setHasInstructions(true); 1986 1987 Streamer->SwitchSection(ColdSection); 1988 Streamer->emitLabel(ColdStartLabel); 1989 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 1990 /*EmitCodeOnly=*/true); 1991 Streamer->emitLabel(ColdEndLabel); 1992 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 1993 Streamer->emitBytes(StringRef("")); 1994 Streamer->SwitchSection(Section); 1995 } 1996 1997 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 1998 // MCStreamer::Finish(), which does more than we want 1999 Streamer->emitBytes(StringRef("")); 2000 2001 MCAssembler &Assembler = 2002 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2003 MCAsmLayout Layout(Assembler); 2004 Assembler.layout(Layout); 2005 2006 const uint64_t HotSize = 2007 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2008 const uint64_t ColdSize = BF.isSplit() 2009 ? Layout.getSymbolOffset(*ColdEndLabel) - 2010 Layout.getSymbolOffset(*ColdStartLabel) 2011 : 0ULL; 2012 2013 // Clean-up the effect of the code emission. 2014 for (const MCSymbol &Symbol : Assembler.symbols()) { 2015 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2016 MutableSymbol->setUndefined(); 2017 MutableSymbol->setIsRegistered(false); 2018 } 2019 2020 return std::make_pair(HotSize, ColdSize); 2021 } 2022 2023 bool BinaryContext::validateEncoding(const MCInst &Inst, 2024 ArrayRef<uint8_t> InputEncoding) const { 2025 SmallString<256> Code; 2026 SmallVector<MCFixup, 4> Fixups; 2027 raw_svector_ostream VecOS(Code); 2028 2029 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2030 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2031 if (InputEncoding != EncodedData) { 2032 if (opts::Verbosity > 1) { 2033 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2034 << " input: " << InputEncoding << '\n' 2035 << " output: " << EncodedData << '\n'; 2036 } 2037 return false; 2038 } 2039 2040 return true; 2041 } 2042 2043 uint64_t BinaryContext::getHotThreshold() const { 2044 static uint64_t Threshold = 0; 2045 if (Threshold == 0) { 2046 Threshold = std::max( 2047 (uint64_t)opts::ExecutionCountThreshold, 2048 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2049 } 2050 return Threshold; 2051 } 2052 2053 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2054 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2055 auto FI = BinaryFunctions.upper_bound(Address); 2056 if (FI == BinaryFunctions.begin()) 2057 return nullptr; 2058 --FI; 2059 2060 const uint64_t UsedSize = 2061 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2062 2063 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2064 return nullptr; 2065 2066 return &FI->second; 2067 } 2068 2069 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2070 // First, try to find a function starting at the given address. If the 2071 // function was folded, this will get us the original folded function if it 2072 // wasn't removed from the list, e.g. in non-relocation mode. 2073 auto BFI = BinaryFunctions.find(Address); 2074 if (BFI != BinaryFunctions.end()) 2075 return &BFI->second; 2076 2077 // We might have folded the function matching the object at the given 2078 // address. In such case, we look for a function matching the symbol 2079 // registered at the original address. The new function (the one that the 2080 // original was folded into) will hold the symbol. 2081 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2082 uint64_t EntryID = 0; 2083 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2084 if (BF && EntryID == 0) 2085 return BF; 2086 } 2087 return nullptr; 2088 } 2089 2090 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2091 const DWARFAddressRangesVector &InputRanges) const { 2092 DebugAddressRangesVector OutputRanges; 2093 2094 for (const DWARFAddressRange Range : InputRanges) { 2095 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2096 while (BFI != BinaryFunctions.end()) { 2097 const BinaryFunction &Function = BFI->second; 2098 if (Function.getAddress() >= Range.HighPC) 2099 break; 2100 const DebugAddressRangesVector FunctionRanges = 2101 Function.getOutputAddressRanges(); 2102 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2103 std::back_inserter(OutputRanges)); 2104 std::advance(BFI, 1); 2105 } 2106 } 2107 2108 return OutputRanges; 2109 } 2110 2111 } // namespace bolt 2112 } // namespace llvm 2113