1 //===--- BinaryContext.cpp - Interface for machine-level context ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Core/BinaryContext.h" 12 #include "bolt/Core/BinaryEmitter.h" 13 #include "bolt/Core/BinaryFunction.h" 14 #include "bolt/Utils/CommandLineOpts.h" 15 #include "bolt/Utils/NameResolver.h" 16 #include "bolt/Utils/Utils.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 19 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 20 #include "llvm/MC/MCAsmLayout.h" 21 #include "llvm/MC/MCAssembler.h" 22 #include "llvm/MC/MCContext.h" 23 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 24 #include "llvm/MC/MCInstPrinter.h" 25 #include "llvm/MC/MCObjectStreamer.h" 26 #include "llvm/MC/MCObjectWriter.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Regex.h" 32 #include <functional> 33 #include <iterator> 34 35 using namespace llvm; 36 37 #undef DEBUG_TYPE 38 #define DEBUG_TYPE "bolt" 39 40 namespace opts { 41 42 cl::opt<bool> 43 NoHugePages("no-huge-pages", 44 cl::desc("use regular size pages for code alignment"), 45 cl::ZeroOrMore, 46 cl::Hidden, 47 cl::cat(BoltCategory)); 48 49 static cl::opt<bool> 50 PrintDebugInfo("print-debug-info", 51 cl::desc("print debug info when printing functions"), 52 cl::Hidden, 53 cl::ZeroOrMore, 54 cl::cat(BoltCategory)); 55 56 cl::opt<bool> 57 PrintRelocations("print-relocations", 58 cl::desc("print relocations when printing functions/objects"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 static cl::opt<bool> 64 PrintMemData("print-mem-data", 65 cl::desc("print memory data annotations when printing functions"), 66 cl::Hidden, 67 cl::ZeroOrMore, 68 cl::cat(BoltCategory)); 69 70 } // namespace opts 71 72 namespace llvm { 73 namespace bolt { 74 75 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 76 std::unique_ptr<DWARFContext> DwCtx, 77 std::unique_ptr<Triple> TheTriple, 78 const Target *TheTarget, 79 std::string TripleName, 80 std::unique_ptr<MCCodeEmitter> MCE, 81 std::unique_ptr<MCObjectFileInfo> MOFI, 82 std::unique_ptr<const MCAsmInfo> AsmInfo, 83 std::unique_ptr<const MCInstrInfo> MII, 84 std::unique_ptr<const MCSubtargetInfo> STI, 85 std::unique_ptr<MCInstPrinter> InstPrinter, 86 std::unique_ptr<const MCInstrAnalysis> MIA, 87 std::unique_ptr<MCPlusBuilder> MIB, 88 std::unique_ptr<const MCRegisterInfo> MRI, 89 std::unique_ptr<MCDisassembler> DisAsm) 90 : Ctx(std::move(Ctx)), 91 DwCtx(std::move(DwCtx)), 92 TheTriple(std::move(TheTriple)), 93 TheTarget(TheTarget), 94 TripleName(TripleName), 95 MCE(std::move(MCE)), 96 MOFI(std::move(MOFI)), 97 AsmInfo(std::move(AsmInfo)), 98 MII(std::move(MII)), 99 STI(std::move(STI)), 100 InstPrinter(std::move(InstPrinter)), 101 MIA(std::move(MIA)), 102 MIB(std::move(MIB)), 103 MRI(std::move(MRI)), 104 DisAsm(std::move(DisAsm)) { 105 Relocation::Arch = this->TheTriple->getArch(); 106 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 107 } 108 109 BinaryContext::~BinaryContext() { 110 for (BinarySection *Section : Sections) { 111 delete Section; 112 } 113 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) { 114 delete InjectedFunction; 115 } 116 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) { 117 delete JTI.second; 118 } 119 clearBinaryData(); 120 } 121 122 /// Create BinaryContext for a given architecture \p ArchName and 123 /// triple \p TripleName. 124 std::unique_ptr<BinaryContext> 125 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 126 std::unique_ptr<DWARFContext> DwCtx) { 127 StringRef ArchName = ""; 128 StringRef FeaturesStr = ""; 129 switch (File->getArch()) { 130 case llvm::Triple::x86_64: 131 ArchName = "x86-64"; 132 FeaturesStr = "+nopl"; 133 break; 134 case llvm::Triple::aarch64: 135 ArchName = "aarch64"; 136 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 137 "+fullfp16,+spe,+fuse-aes,+rcpc"; 138 break; 139 default: 140 errs() << "BOLT-ERROR: Unrecognized machine in ELF file.\n"; 141 return nullptr; 142 } 143 144 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 145 const std::string TripleName = TheTriple->str(); 146 147 std::string Error; 148 const Target *TheTarget = 149 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 150 if (!TheTarget) { 151 errs() << "BOLT-ERROR: " << Error; 152 return nullptr; 153 } 154 155 std::unique_ptr<const MCRegisterInfo> MRI( 156 TheTarget->createMCRegInfo(TripleName)); 157 if (!MRI) { 158 errs() << "BOLT-ERROR: no register info for target " << TripleName << "\n"; 159 return nullptr; 160 } 161 162 // Set up disassembler. 163 std::unique_ptr<const MCAsmInfo> AsmInfo( 164 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 165 if (!AsmInfo) { 166 errs() << "BOLT-ERROR: no assembly info for target " << TripleName << "\n"; 167 return nullptr; 168 } 169 170 std::unique_ptr<const MCSubtargetInfo> STI( 171 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 172 if (!STI) { 173 errs() << "BOLT-ERROR: no subtarget info for target " << TripleName << "\n"; 174 return nullptr; 175 } 176 177 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 178 if (!MII) { 179 errs() << "BOLT-ERROR: no instruction info for target " << TripleName 180 << "\n"; 181 return nullptr; 182 } 183 184 std::unique_ptr<MCContext> Ctx( 185 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 186 std::unique_ptr<MCObjectFileInfo> MOFI( 187 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 188 Ctx->setObjectFileInfo(MOFI.get()); 189 // We do not support X86 Large code model. Change this in the future. 190 bool Large = false; 191 if (TheTriple->getArch() == llvm::Triple::aarch64) 192 Large = true; 193 unsigned LSDAEncoding = 194 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 195 unsigned TTypeEncoding = 196 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 197 if (IsPIC) { 198 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 199 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 200 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 201 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 202 } 203 204 std::unique_ptr<MCDisassembler> DisAsm( 205 TheTarget->createMCDisassembler(*STI, *Ctx)); 206 207 if (!DisAsm) { 208 errs() << "BOLT-ERROR: no disassembler for target " << TripleName << "\n"; 209 return nullptr; 210 } 211 212 std::unique_ptr<const MCInstrAnalysis> MIA( 213 TheTarget->createMCInstrAnalysis(MII.get())); 214 if (!MIA) { 215 errs() << "BOLT-ERROR: failed to create instruction analysis for target" 216 << TripleName << "\n"; 217 return nullptr; 218 } 219 220 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 221 std::unique_ptr<MCInstPrinter> InstructionPrinter( 222 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 223 *MII, *MRI)); 224 if (!InstructionPrinter) { 225 errs() << "BOLT-ERROR: no instruction printer for target " << TripleName 226 << '\n'; 227 return nullptr; 228 } 229 InstructionPrinter->setPrintImmHex(true); 230 231 std::unique_ptr<MCCodeEmitter> MCE( 232 TheTarget->createMCCodeEmitter(*MII, *MRI, *Ctx)); 233 234 // Make sure we don't miss any output on core dumps. 235 outs().SetUnbuffered(); 236 errs().SetUnbuffered(); 237 dbgs().SetUnbuffered(); 238 239 auto BC = std::make_unique<BinaryContext>( 240 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 241 std::string(TripleName), std::move(MCE), std::move(MOFI), 242 std::move(AsmInfo), std::move(MII), std::move(STI), 243 std::move(InstructionPrinter), std::move(MIA), nullptr, 244 std::move(MRI), std::move(DisAsm)); 245 246 BC->TTypeEncoding = TTypeEncoding; 247 BC->LSDAEncoding = LSDAEncoding; 248 249 BC->MAB = std::unique_ptr<MCAsmBackend>( 250 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 251 252 BC->setFilename(File->getFileName()); 253 254 BC->HasFixedLoadAddress = !IsPIC; 255 256 return BC; 257 } 258 259 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 260 if (opts::HotText && (SymbolName == "__hot_start" || 261 SymbolName == "__hot_end")) 262 return true; 263 264 if (opts::HotData && (SymbolName == "__hot_data_start" || 265 SymbolName == "__hot_data_end")) 266 return true; 267 268 if (SymbolName == "_end") 269 return true; 270 271 return false; 272 } 273 274 std::unique_ptr<MCObjectWriter> 275 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 276 return MAB->createObjectWriter(OS); 277 } 278 279 bool BinaryContext::validateObjectNesting() const { 280 auto Itr = BinaryDataMap.begin(); 281 auto End = BinaryDataMap.end(); 282 bool Valid = true; 283 while (Itr != End) { 284 auto Next = std::next(Itr); 285 while (Next != End && 286 Itr->second->getSection() == Next->second->getSection() && 287 Itr->second->containsRange(Next->second->getAddress(), 288 Next->second->getSize())) { 289 if (Next->second->Parent != Itr->second) { 290 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 291 << "BOLT-WARNING: " << *Itr->second << "\n" 292 << "BOLT-WARNING: " << *Next->second << "\n"; 293 Valid = false; 294 } 295 ++Next; 296 } 297 Itr = Next; 298 } 299 return Valid; 300 } 301 302 bool BinaryContext::validateHoles() const { 303 bool Valid = true; 304 for (BinarySection &Section : sections()) { 305 for (const Relocation &Rel : Section.relocations()) { 306 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 307 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 308 if (!BD) { 309 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 310 << " 0x" << Twine::utohexstr(RelAddr) << " in " 311 << Section.getName() << "\n"; 312 Valid = false; 313 } else if (!BD->getAtomicRoot()) { 314 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 315 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 316 << Section.getName() << "\n"; 317 Valid = false; 318 } 319 } 320 } 321 return Valid; 322 } 323 324 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 325 const uint64_t Address = GAI->second->getAddress(); 326 const uint64_t Size = GAI->second->getSize(); 327 328 auto fixParents = 329 [&](BinaryDataMapType::iterator Itr, BinaryData *NewParent) { 330 BinaryData *OldParent = Itr->second->Parent; 331 Itr->second->Parent = NewParent; 332 ++Itr; 333 while (Itr != BinaryDataMap.end() && OldParent && 334 Itr->second->Parent == OldParent) { 335 Itr->second->Parent = NewParent; 336 ++Itr; 337 } 338 }; 339 340 // Check if the previous symbol contains the newly added symbol. 341 if (GAI != BinaryDataMap.begin()) { 342 BinaryData *Prev = std::prev(GAI)->second; 343 while (Prev) { 344 if (Prev->getSection() == GAI->second->getSection() && 345 Prev->containsRange(Address, Size)) { 346 fixParents(GAI, Prev); 347 } else { 348 fixParents(GAI, nullptr); 349 } 350 Prev = Prev->Parent; 351 } 352 } 353 354 // Check if the newly added symbol contains any subsequent symbols. 355 if (Size != 0) { 356 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 357 auto Itr = std::next(GAI); 358 while (Itr != BinaryDataMap.end() && 359 BD->containsRange(Itr->second->getAddress(), 360 Itr->second->getSize())) { 361 Itr->second->Parent = BD; 362 ++Itr; 363 } 364 } 365 } 366 367 iterator_range<BinaryContext::binary_data_iterator> 368 BinaryContext::getSubBinaryData(BinaryData *BD) { 369 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 370 auto End = Start; 371 while (End != BinaryDataMap.end() && 372 BD->isAncestorOf(End->second)) { 373 ++End; 374 } 375 return make_range(Start, End); 376 } 377 378 std::pair<const MCSymbol *, uint64_t> 379 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 380 bool IsPCRel) { 381 uint64_t Addend = 0; 382 383 if (isAArch64()) { 384 // Check if this is an access to a constant island and create bookkeeping 385 // to keep track of it and emit it later as part of this function. 386 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 387 return std::make_pair(IslandSym, Addend); 388 389 // Detect custom code written in assembly that refers to arbitrary 390 // constant islands from other functions. Write this reference so we 391 // can pull this constant island and emit it as part of this function 392 // too. 393 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 394 if (IslandIter != AddressToConstantIslandMap.end()) { 395 if (MCSymbol *IslandSym = 396 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 397 BF.createIslandDependency(IslandSym, IslandIter->second); 398 return std::make_pair(IslandSym, Addend); 399 } 400 } 401 } 402 403 // Note that the address does not necessarily have to reside inside 404 // a section, it could be an absolute address too. 405 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 406 if (Section && Section->isText()) { 407 if (BF.containsAddress(Address, /*UseMaxSize=*/ isAArch64())) { 408 if (Address != BF.getAddress()) { 409 // The address could potentially escape. Mark it as another entry 410 // point into the function. 411 if (opts::Verbosity >= 1) { 412 outs() << "BOLT-INFO: potentially escaped address 0x" 413 << Twine::utohexstr(Address) << " in function " 414 << BF << '\n'; 415 } 416 BF.HasInternalLabelReference = true; 417 return std::make_pair( 418 BF.addEntryPointAtOffset(Address - BF.getAddress()), 419 Addend); 420 } 421 } else { 422 BF.InterproceduralReferences.insert(Address); 423 } 424 } 425 426 // With relocations, catch jump table references outside of the basic block 427 // containing the indirect jump. 428 if (HasRelocations) { 429 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 430 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 431 const MCSymbol *Symbol = 432 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 433 434 return std::make_pair(Symbol, Addend); 435 } 436 } 437 438 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) { 439 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 440 } 441 442 // TODO: use DWARF info to get size/alignment here? 443 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 444 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 445 return std::make_pair(TargetSymbol, Addend); 446 } 447 448 MemoryContentsType 449 BinaryContext::analyzeMemoryAt(uint64_t Address, BinaryFunction &BF) { 450 if (!isX86()) 451 return MemoryContentsType::UNKNOWN; 452 453 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 454 if (!Section) { 455 // No section - possibly an absolute address. Since we don't allow 456 // internal function addresses to escape the function scope - we 457 // consider it a tail call. 458 if (opts::Verbosity > 1) { 459 errs() << "BOLT-WARNING: no section for address 0x" 460 << Twine::utohexstr(Address) << " referenced from function " 461 << BF << '\n'; 462 } 463 return MemoryContentsType::UNKNOWN; 464 } 465 466 if (Section->isVirtual()) { 467 // The contents are filled at runtime. 468 return MemoryContentsType::UNKNOWN; 469 } 470 471 // No support for jump tables in code yet. 472 if (Section->isText()) 473 return MemoryContentsType::UNKNOWN; 474 475 // Start with checking for PIC jump table. We expect non-PIC jump tables 476 // to have high 32 bits set to 0. 477 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 478 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 479 480 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 481 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 482 483 return MemoryContentsType::UNKNOWN; 484 } 485 486 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 487 const JumpTable::JumpTableType Type, 488 BinaryFunction &BF, 489 const uint64_t NextJTAddress, 490 JumpTable::OffsetsType *Offsets) { 491 // Is one of the targets __builtin_unreachable? 492 bool HasUnreachable = false; 493 494 // Number of targets other than __builtin_unreachable. 495 uint64_t NumRealEntries = 0; 496 497 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 498 auto addOffset = [&](uint64_t Offset) { 499 if (Offsets) 500 Offsets->emplace_back(Offset); 501 }; 502 503 auto isFragment = [](BinaryFunction &Fragment, 504 BinaryFunction &Parent) -> bool { 505 // Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 506 for (StringRef BFName : Parent.getNames()) { 507 std::string BFNamePrefix = Regex::escape(NameResolver::restore(BFName)); 508 std::string BFNameRegex = 509 Twine(BFNamePrefix, "\\.cold(\\.[0-9]+)?").str(); 510 if (Fragment.hasRestoredNameRegex(BFNameRegex)) 511 return true; 512 } 513 return false; 514 }; 515 516 auto doesBelongToFunction = [&](const uint64_t Addr, 517 BinaryFunction *TargetBF) -> bool { 518 if (BF.containsAddress(Addr)) 519 return true; 520 // Nothing to do if we failed to identify the containing function. 521 if (!TargetBF) 522 return false; 523 // Case 1: check if BF is a fragment and TargetBF is its parent. 524 if (BF.isFragment()) { 525 // BF is a fragment, but parent function is not registered. 526 // This means there's no direct jump between parent and fragment. 527 // Set parent link here in jump table analysis, based on function name 528 // matching heuristic. 529 if (!BF.getParentFragment() && isFragment(BF, *TargetBF)) 530 registerFragment(BF, *TargetBF); 531 return BF.getParentFragment() == TargetBF; 532 } 533 // Case 2: check if TargetBF is a fragment and BF is its parent. 534 if (TargetBF->isFragment()) { 535 // TargetBF is a fragment, but parent function is not registered. 536 // This means there's no direct jump between parent and fragment. 537 // Set parent link here in jump table analysis, based on function name 538 // matching heuristic. 539 if (!TargetBF->getParentFragment() && isFragment(*TargetBF, BF)) 540 registerFragment(*TargetBF, BF); 541 return TargetBF->getParentFragment() == &BF; 542 } 543 return false; 544 }; 545 546 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 547 if (!Section) 548 return false; 549 550 // The upper bound is defined by containing object, section limits, and 551 // the next jump table in memory. 552 uint64_t UpperBound = Section->getEndAddress(); 553 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 554 if (JumpTableBD && JumpTableBD->getSize()) { 555 assert(JumpTableBD->getEndAddress() <= UpperBound && 556 "data object cannot cross a section boundary"); 557 UpperBound = JumpTableBD->getEndAddress(); 558 } 559 if (NextJTAddress) { 560 UpperBound = std::min(NextJTAddress, UpperBound); 561 } 562 563 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 564 << '\n'); 565 const uint64_t EntrySize = getJumpTableEntrySize(Type); 566 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 567 EntryAddress += EntrySize) { 568 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 569 << " -> "); 570 // Check if there's a proper relocation against the jump table entry. 571 if (HasRelocations) { 572 if (Type == JumpTable::JTT_PIC && 573 !DataPCRelocations.count(EntryAddress)) { 574 LLVM_DEBUG( 575 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 576 break; 577 } 578 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 579 LLVM_DEBUG( 580 dbgs() 581 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 582 break; 583 } 584 } 585 586 const uint64_t Value = (Type == JumpTable::JTT_PIC) 587 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 588 : *getPointerAtAddress(EntryAddress); 589 590 // __builtin_unreachable() case. 591 if (Value == BF.getAddress() + BF.getSize()) { 592 addOffset(Value - BF.getAddress()); 593 HasUnreachable = true; 594 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 595 continue; 596 } 597 598 // Function or one of its fragments. 599 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 600 601 // We assume that a jump table cannot have function start as an entry. 602 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 603 LLVM_DEBUG({ 604 if (!BF.containsAddress(Value)) { 605 dbgs() << "FAIL: function doesn't contain this address\n"; 606 if (TargetBF) { 607 dbgs() << " ! function containing this address: " 608 << TargetBF->getPrintName() << '\n'; 609 if (TargetBF->isFragment()) 610 dbgs() << " ! is a fragment\n"; 611 BinaryFunction *TargetParent = TargetBF->getParentFragment(); 612 dbgs() << " ! its parent is " 613 << (TargetParent ? TargetParent->getPrintName() : "(none)") 614 << '\n'; 615 } 616 } 617 if (Value == BF.getAddress()) 618 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 619 }); 620 break; 621 } 622 623 // Check there's an instruction at this offset. 624 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 625 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 626 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 627 break; 628 } 629 630 ++NumRealEntries; 631 632 if (TargetBF == &BF) { 633 // Address inside the function. 634 addOffset(Value - TargetBF->getAddress()); 635 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 636 } else { 637 // Address in split fragment. 638 BF.setHasSplitJumpTable(true); 639 // Add invalid offset for proper identification of jump table size. 640 addOffset(INVALID_OFFSET); 641 LLVM_DEBUG(dbgs() << "OK: address in split fragment\n"); 642 } 643 } 644 645 // It's a jump table if the number of real entries is more than 1, or there's 646 // one real entry and "unreachable" targets. If there are only multiple 647 // "unreachable" targets, then it's not a jump table. 648 return NumRealEntries + HasUnreachable >= 2; 649 } 650 651 void BinaryContext::populateJumpTables() { 652 std::vector<BinaryFunction *> FuncsToSkip; 653 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 654 << '\n'); 655 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 656 ++JTI) { 657 JumpTable *JT = JTI->second; 658 BinaryFunction &BF = *JT->Parent; 659 660 if (!BF.isSimple()) 661 continue; 662 663 uint64_t NextJTAddress = 0; 664 auto NextJTI = std::next(JTI); 665 if (NextJTI != JTE) { 666 NextJTAddress = NextJTI->second->getAddress(); 667 } 668 669 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 670 NextJTAddress, &JT->OffsetEntries); 671 if (!Success) { 672 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 673 JT->print(dbgs()); 674 if (NextJTI != JTE) { 675 dbgs() << "next jump table at 0x" 676 << Twine::utohexstr(NextJTI->second->getAddress()) 677 << " belongs to function " << *NextJTI->second->Parent << '\n'; 678 NextJTI->second->print(dbgs()); 679 } 680 llvm_unreachable("jump table heuristic failure"); 681 } 682 683 for (uint64_t EntryOffset : JT->OffsetEntries) { 684 if (EntryOffset == BF.getSize()) 685 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 686 else 687 BF.registerReferencedOffset(EntryOffset); 688 } 689 690 // In strict mode, erase PC-relative relocation record. Later we check that 691 // all such records are erased and thus have been accounted for. 692 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 693 for (uint64_t Address = JT->getAddress(); 694 Address < JT->getAddress() + JT->getSize(); 695 Address += JT->EntrySize) { 696 DataPCRelocations.erase(DataPCRelocations.find(Address)); 697 } 698 } 699 700 // Mark to skip the function and all its fragments. 701 if (BF.hasSplitJumpTable()) 702 FuncsToSkip.push_back(&BF); 703 } 704 705 if (opts::StrictMode && DataPCRelocations.size()) { 706 LLVM_DEBUG({ 707 dbgs() << DataPCRelocations.size() 708 << " unclaimed PC-relative relocations left in data:\n"; 709 for (uint64_t Reloc : DataPCRelocations) 710 dbgs() << Twine::utohexstr(Reloc) << '\n'; 711 }); 712 assert(0 && "unclaimed PC-relative relocations left in data\n"); 713 } 714 clearList(DataPCRelocations); 715 // Functions containing split jump tables need to be skipped with all 716 // fragments. 717 for (BinaryFunction *BF : FuncsToSkip) { 718 BinaryFunction *ParentBF = 719 const_cast<BinaryFunction *>(BF->getTopmostFragment()); 720 LLVM_DEBUG(dbgs() << "Skipping " << ParentBF->getPrintName() 721 << " family\n"); 722 ParentBF->setIgnored(); 723 ParentBF->ignoreFragments(); 724 } 725 } 726 727 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, 728 Twine Prefix, 729 uint64_t Size, 730 uint16_t Alignment, 731 unsigned Flags) { 732 auto Itr = BinaryDataMap.find(Address); 733 if (Itr != BinaryDataMap.end()) { 734 assert(Itr->second->getSize() == Size || !Size); 735 return Itr->second->getSymbol(); 736 } 737 738 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 739 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 740 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 741 } 742 743 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 744 return Ctx->getOrCreateSymbol(Name); 745 } 746 747 BinaryFunction *BinaryContext::createBinaryFunction( 748 const std::string &Name, BinarySection &Section, uint64_t Address, 749 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 750 auto Result = BinaryFunctions.emplace( 751 Address, BinaryFunction(Name, Section, Address, Size, *this)); 752 assert(Result.second == true && "unexpected duplicate function"); 753 BinaryFunction *BF = &Result.first->second; 754 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 755 Alignment); 756 setSymbolToFunctionMap(BF->getSymbol(), BF); 757 return BF; 758 } 759 760 const MCSymbol * 761 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 762 JumpTable::JumpTableType Type) { 763 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 764 assert(JT->Type == Type && "jump table types have to match"); 765 assert(JT->Parent == &Function && 766 "cannot re-use jump table of a different function"); 767 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 768 769 return JT->getFirstLabel(); 770 } 771 772 // Re-use the existing symbol if possible. 773 MCSymbol *JTLabel = nullptr; 774 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 775 if (!isInternalSymbolName(Object->getSymbol()->getName())) 776 JTLabel = Object->getSymbol(); 777 } 778 779 const uint64_t EntrySize = getJumpTableEntrySize(Type); 780 if (!JTLabel) { 781 const std::string JumpTableName = generateJumpTableName(Function, Address); 782 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 783 } 784 785 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 786 << " in function " << Function << '\n'); 787 788 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 789 JumpTable::LabelMapType{{0, JTLabel}}, Function, 790 *getSectionForAddress(Address)); 791 JumpTables.emplace(Address, JT); 792 793 // Duplicate the entry for the parent function for easy access. 794 Function.JumpTables.emplace(Address, JT); 795 796 return JTLabel; 797 } 798 799 std::pair<uint64_t, const MCSymbol *> 800 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 801 const MCSymbol *OldLabel) { 802 auto L = scopeLock(); 803 unsigned Offset = 0; 804 bool Found = false; 805 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 806 if (Elmt.second != OldLabel) 807 continue; 808 Offset = Elmt.first; 809 Found = true; 810 break; 811 } 812 assert(Found && "Label not found"); 813 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 814 JumpTable *NewJT = 815 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 816 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 817 *getSectionForAddress(JT->getAddress())); 818 NewJT->Entries = JT->Entries; 819 NewJT->Counts = JT->Counts; 820 uint64_t JumpTableID = ++DuplicatedJumpTables; 821 // Invert it to differentiate from regular jump tables whose IDs are their 822 // addresses in the input binary memory space 823 JumpTableID = ~JumpTableID; 824 JumpTables.emplace(JumpTableID, NewJT); 825 Function.JumpTables.emplace(JumpTableID, NewJT); 826 return std::make_pair(JumpTableID, NewLabel); 827 } 828 829 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 830 uint64_t Address) { 831 size_t Id; 832 uint64_t Offset = 0; 833 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 834 Offset = Address - JT->getAddress(); 835 auto Itr = JT->Labels.find(Offset); 836 if (Itr != JT->Labels.end()) { 837 return std::string(Itr->second->getName()); 838 } 839 Id = JumpTableIds.at(JT->getAddress()); 840 } else { 841 Id = JumpTableIds[Address] = BF.JumpTables.size(); 842 } 843 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 844 (Offset ? ("." + std::to_string(Offset)) : "")); 845 } 846 847 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 848 // FIXME: aarch64 support is missing. 849 if (!isX86()) 850 return true; 851 852 if (BF.getSize() == BF.getMaxSize()) 853 return true; 854 855 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 856 assert(FunctionData && "cannot get function as data"); 857 858 uint64_t Offset = BF.getSize(); 859 MCInst Instr; 860 uint64_t InstrSize = 0; 861 uint64_t InstrAddress = BF.getAddress() + Offset; 862 using std::placeholders::_1; 863 864 // Skip instructions that satisfy the predicate condition. 865 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 866 const uint64_t StartOffset = Offset; 867 for (; Offset < BF.getMaxSize(); 868 Offset += InstrSize, InstrAddress += InstrSize) { 869 if (!DisAsm->getInstruction(Instr, 870 InstrSize, 871 FunctionData->slice(Offset), 872 InstrAddress, 873 nulls())) 874 break; 875 if (!Predicate(Instr)) 876 break; 877 } 878 879 return Offset - StartOffset; 880 }; 881 882 // Skip a sequence of zero bytes. 883 auto skipZeros = [&]() { 884 const uint64_t StartOffset = Offset; 885 for (; Offset < BF.getMaxSize(); ++Offset) 886 if ((*FunctionData)[Offset] != 0) 887 break; 888 889 return Offset - StartOffset; 890 }; 891 892 // Accept the whole padding area filled with breakpoints. 893 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 894 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 895 return true; 896 897 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 898 899 // Some functions have a jump to the next function or to the padding area 900 // inserted after the body. 901 auto isSkipJump = [&](const MCInst &Instr) { 902 uint64_t TargetAddress = 0; 903 if (MIB->isUnconditionalBranch(Instr) && 904 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 905 if (TargetAddress >= InstrAddress + InstrSize && 906 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 907 return true; 908 } 909 } 910 return false; 911 }; 912 913 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 914 while (skipInstructions(isNoop) || 915 skipInstructions(isSkipJump) || 916 skipZeros()) 917 ; 918 919 if (Offset == BF.getMaxSize()) 920 return true; 921 922 if (opts::Verbosity >= 1) { 923 errs() << "BOLT-WARNING: bad padding at address 0x" 924 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 925 << " starting at offset " 926 << (Offset - BF.getSize()) << " in function " 927 << BF << '\n' 928 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 929 << '\n'; 930 } 931 932 return false; 933 } 934 935 void BinaryContext::adjustCodePadding() { 936 for (auto &BFI : BinaryFunctions) { 937 BinaryFunction &BF = BFI.second; 938 if (!shouldEmit(BF)) 939 continue; 940 941 if (!hasValidCodePadding(BF)) { 942 if (HasRelocations) { 943 if (opts::Verbosity >= 1) { 944 outs() << "BOLT-INFO: function " << BF 945 << " has invalid padding. Ignoring the function.\n"; 946 } 947 BF.setIgnored(); 948 } else { 949 BF.setMaxSize(BF.getSize()); 950 } 951 } 952 } 953 } 954 955 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, 956 uint64_t Address, 957 uint64_t Size, 958 uint16_t Alignment, 959 unsigned Flags) { 960 // Register the name with MCContext. 961 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 962 963 auto GAI = BinaryDataMap.find(Address); 964 BinaryData *BD; 965 if (GAI == BinaryDataMap.end()) { 966 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 967 BinarySection &Section = 968 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 969 BD = new BinaryData(*Symbol, 970 Address, 971 Size, 972 Alignment ? Alignment : 1, 973 Section, 974 Flags); 975 GAI = BinaryDataMap.emplace(Address, BD).first; 976 GlobalSymbols[Name] = BD; 977 updateObjectNesting(GAI); 978 } else { 979 BD = GAI->second; 980 if (!BD->hasName(Name)) { 981 GlobalSymbols[Name] = BD; 982 BD->Symbols.push_back(Symbol); 983 } 984 } 985 986 return Symbol; 987 } 988 989 const BinaryData * 990 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 991 auto NI = BinaryDataMap.lower_bound(Address); 992 auto End = BinaryDataMap.end(); 993 if ((NI != End && Address == NI->first) || 994 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 995 if (NI->second->containsAddress(Address)) { 996 return NI->second; 997 } 998 999 // If this is a sub-symbol, see if a parent data contains the address. 1000 const BinaryData *BD = NI->second->getParent(); 1001 while (BD) { 1002 if (BD->containsAddress(Address)) 1003 return BD; 1004 BD = BD->getParent(); 1005 } 1006 } 1007 return nullptr; 1008 } 1009 1010 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1011 auto NI = BinaryDataMap.find(Address); 1012 assert(NI != BinaryDataMap.end()); 1013 if (NI == BinaryDataMap.end()) 1014 return false; 1015 // TODO: it's possible that a jump table starts at the same address 1016 // as a larger blob of private data. When we set the size of the 1017 // jump table, it might be smaller than the total blob size. In this 1018 // case we just leave the original size since (currently) it won't really 1019 // affect anything. See T26915981. 1020 assert((!NI->second->Size || NI->second->Size == Size || 1021 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1022 "can't change the size of a symbol that has already had its " 1023 "size set"); 1024 if (!NI->second->Size) { 1025 NI->second->Size = Size; 1026 updateObjectNesting(NI); 1027 return true; 1028 } 1029 return false; 1030 } 1031 1032 void BinaryContext::generateSymbolHashes() { 1033 auto isPadding = [](const BinaryData &BD) { 1034 StringRef Contents = BD.getSection().getContents(); 1035 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1036 return (BD.getName().startswith("HOLEat") || 1037 SymData.find_first_not_of(0) == StringRef::npos); 1038 }; 1039 1040 uint64_t NumCollisions = 0; 1041 for (auto &Entry : BinaryDataMap) { 1042 BinaryData &BD = *Entry.second; 1043 StringRef Name = BD.getName(); 1044 1045 if (!isInternalSymbolName(Name)) 1046 continue; 1047 1048 // First check if a non-anonymous alias exists and move it to the front. 1049 if (BD.getSymbols().size() > 1) { 1050 auto Itr = std::find_if(BD.getSymbols().begin(), 1051 BD.getSymbols().end(), 1052 [&](const MCSymbol *Symbol) { 1053 return !isInternalSymbolName(Symbol->getName()); 1054 }); 1055 if (Itr != BD.getSymbols().end()) { 1056 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1057 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1058 continue; 1059 } 1060 } 1061 1062 // We have to skip 0 size symbols since they will all collide. 1063 if (BD.getSize() == 0) { 1064 continue; 1065 } 1066 1067 const uint64_t Hash = BD.getSection().hash(BD); 1068 const size_t Idx = Name.find("0x"); 1069 std::string NewName = (Twine(Name.substr(0, Idx)) + 1070 "_" + Twine::utohexstr(Hash)).str(); 1071 if (getBinaryDataByName(NewName)) { 1072 // Ignore collisions for symbols that appear to be padding 1073 // (i.e. all zeros or a "hole") 1074 if (!isPadding(BD)) { 1075 if (opts::Verbosity) { 1076 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1077 << " with new name (" << NewName << "), skipping.\n"; 1078 } 1079 ++NumCollisions; 1080 } 1081 continue; 1082 } 1083 BD.Symbols.insert(BD.Symbols.begin(), 1084 Ctx->getOrCreateSymbol(NewName)); 1085 GlobalSymbols[NewName] = &BD; 1086 } 1087 if (NumCollisions) { 1088 errs() << "BOLT-WARNING: " << NumCollisions 1089 << " collisions detected while hashing binary objects"; 1090 if (!opts::Verbosity) 1091 errs() << ". Use -v=1 to see the list."; 1092 errs() << '\n'; 1093 } 1094 } 1095 1096 void BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1097 BinaryFunction &Function) const { 1098 // Only a parent function (or a sibling) can reach its fragment. 1099 assert(!Function.IsFragment && 1100 "only one cold fragment is supported at this time"); 1101 if (BinaryFunction *TargetParent = TargetFunction.getParentFragment()) { 1102 assert(TargetParent == &Function && "mismatching parent function"); 1103 return; 1104 } 1105 TargetFunction.setParentFragment(Function); 1106 Function.addFragment(TargetFunction); 1107 if (!HasRelocations) { 1108 TargetFunction.setSimple(false); 1109 Function.setSimple(false); 1110 } 1111 if (opts::Verbosity >= 1) { 1112 outs() << "BOLT-INFO: marking " << TargetFunction 1113 << " as a fragment of " << Function << '\n'; 1114 } 1115 } 1116 1117 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1118 for (uint64_t Address : Function.InterproceduralReferences) { 1119 if (!Address) 1120 continue; 1121 1122 BinaryFunction *TargetFunction = 1123 getBinaryFunctionContainingAddress(Address); 1124 if (&Function == TargetFunction) 1125 continue; 1126 1127 if (TargetFunction) { 1128 if (TargetFunction->IsFragment) 1129 registerFragment(*TargetFunction, Function); 1130 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1131 TargetFunction->addEntryPointAtOffset(Offset); 1132 1133 continue; 1134 } 1135 1136 // Check if address falls in function padding space - this could be 1137 // unmarked data in code. In this case adjust the padding space size. 1138 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1139 assert(Section && "cannot get section for referenced address"); 1140 1141 if (!Section->isText()) 1142 continue; 1143 1144 // PLT requires special handling and could be ignored in this context. 1145 StringRef SectionName = Section->getName(); 1146 if (SectionName == ".plt" || SectionName == ".plt.got") 1147 continue; 1148 1149 if (opts::processAllFunctions()) { 1150 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1151 << "object in code at address 0x" 1152 << Twine::utohexstr(Address) << " belonging to section " 1153 << SectionName << " in current mode\n"; 1154 exit(1); 1155 } 1156 1157 TargetFunction = 1158 getBinaryFunctionContainingAddress(Address, 1159 /*CheckPastEnd=*/false, 1160 /*UseMaxSize=*/true); 1161 // We are not going to overwrite non-simple functions, but for simple 1162 // ones - adjust the padding size. 1163 if (TargetFunction && TargetFunction->isSimple()) { 1164 errs() << "BOLT-WARNING: function " << *TargetFunction 1165 << " has an object detected in a padding region at address 0x" 1166 << Twine::utohexstr(Address) << '\n'; 1167 TargetFunction->setMaxSize(TargetFunction->getSize()); 1168 } 1169 } 1170 1171 clearList(Function.InterproceduralReferences); 1172 } 1173 1174 void BinaryContext::postProcessSymbolTable() { 1175 fixBinaryDataHoles(); 1176 bool Valid = true; 1177 for (auto &Entry : BinaryDataMap) { 1178 BinaryData *BD = Entry.second; 1179 if ((BD->getName().startswith("SYMBOLat") || 1180 BD->getName().startswith("DATAat")) && 1181 !BD->getParent() && 1182 !BD->getSize() && 1183 !BD->isAbsolute() && 1184 BD->getSection()) { 1185 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1186 Valid = false; 1187 } 1188 } 1189 assert(Valid); 1190 generateSymbolHashes(); 1191 } 1192 1193 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1194 BinaryFunction &ParentBF) { 1195 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1196 "cannot merge functions with multiple entry points"); 1197 1198 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1199 std::defer_lock); 1200 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1201 SymbolToFunctionMapMutex, std::defer_lock); 1202 1203 const StringRef ChildName = ChildBF.getOneName(); 1204 1205 // Move symbols over and update bookkeeping info. 1206 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1207 ParentBF.getSymbols().push_back(Symbol); 1208 WriteSymbolMapLock.lock(); 1209 SymbolToFunctionMap[Symbol] = &ParentBF; 1210 WriteSymbolMapLock.unlock(); 1211 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1212 } 1213 ChildBF.getSymbols().clear(); 1214 1215 // Move other names the child function is known under. 1216 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1217 std::back_inserter(ParentBF.Aliases)); 1218 ChildBF.Aliases.clear(); 1219 1220 if (HasRelocations) { 1221 // Merge execution counts of ChildBF into those of ParentBF. 1222 // Without relocations, we cannot reliably merge profiles as both functions 1223 // continue to exist and either one can be executed. 1224 ChildBF.mergeProfileDataInto(ParentBF); 1225 1226 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1227 std::defer_lock); 1228 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1229 std::defer_lock); 1230 // Remove ChildBF from the global set of functions in relocs mode. 1231 ReadBfsLock.lock(); 1232 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1233 ReadBfsLock.unlock(); 1234 1235 assert(FI != BinaryFunctions.end() && "function not found"); 1236 assert(&ChildBF == &FI->second && "function mismatch"); 1237 1238 WriteBfsLock.lock(); 1239 ChildBF.clearDisasmState(); 1240 FI = BinaryFunctions.erase(FI); 1241 WriteBfsLock.unlock(); 1242 1243 } else { 1244 // In non-relocation mode we keep the function, but rename it. 1245 std::string NewName = "__ICF_" + ChildName.str(); 1246 1247 WriteCtxLock.lock(); 1248 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1249 WriteCtxLock.unlock(); 1250 1251 ChildBF.setFolded(&ParentBF); 1252 } 1253 } 1254 1255 void BinaryContext::fixBinaryDataHoles() { 1256 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1257 1258 for (BinarySection &Section : allocatableSections()) { 1259 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1260 1261 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1262 BinaryData *BD = Itr->second; 1263 bool isHole = (!BD->getParent() && 1264 !BD->getSize() && 1265 BD->isObject() && 1266 (BD->getName().startswith("SYMBOLat0x") || 1267 BD->getName().startswith("DATAat0x") || 1268 BD->getName().startswith("ANONYMOUS"))); 1269 return !isHole && BD->getSection() == Section && !BD->getParent(); 1270 }; 1271 1272 auto BDStart = BinaryDataMap.begin(); 1273 auto BDEnd = BinaryDataMap.end(); 1274 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1275 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1276 1277 uint64_t EndAddress = Section.getAddress(); 1278 1279 while (Itr != End) { 1280 if (Itr->second->getAddress() > EndAddress) { 1281 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1282 Holes.emplace_back(EndAddress, Gap); 1283 } 1284 EndAddress = Itr->second->getEndAddress(); 1285 ++Itr; 1286 } 1287 1288 if (EndAddress < Section.getEndAddress()) { 1289 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1290 } 1291 1292 // If there is already a symbol at the start of the hole, grow that symbol 1293 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1294 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1295 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1296 if (BD) { 1297 // BD->getSection() can be != Section if there are sections that 1298 // overlap. In this case it is probably safe to just skip the holes 1299 // since the overlapping section will not(?) have any symbols in it. 1300 if (BD->getSection() == Section) 1301 setBinaryDataSize(Hole.first, Hole.second); 1302 } else { 1303 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1304 } 1305 } 1306 } 1307 1308 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1309 assert(validateHoles() && "top level hole detected in object map"); 1310 } 1311 1312 void BinaryContext::printGlobalSymbols(raw_ostream& OS) const { 1313 const BinarySection* CurrentSection = nullptr; 1314 bool FirstSection = true; 1315 1316 for (auto &Entry : BinaryDataMap) { 1317 const BinaryData *BD = Entry.second; 1318 const BinarySection &Section = BD->getSection(); 1319 if (FirstSection || Section != *CurrentSection) { 1320 uint64_t Address, Size; 1321 StringRef Name = Section.getName(); 1322 if (Section) { 1323 Address = Section.getAddress(); 1324 Size = Section.getSize(); 1325 } else { 1326 Address = BD->getAddress(); 1327 Size = BD->getSize(); 1328 } 1329 OS << "BOLT-INFO: Section " << Name << ", " 1330 << "0x" + Twine::utohexstr(Address) << ":" 1331 << "0x" + Twine::utohexstr(Address + Size) << "/" 1332 << Size << "\n"; 1333 CurrentSection = &Section; 1334 FirstSection = false; 1335 } 1336 1337 OS << "BOLT-INFO: "; 1338 const BinaryData *P = BD->getParent(); 1339 while (P) { 1340 OS << " "; 1341 P = P->getParent(); 1342 } 1343 OS << *BD << "\n"; 1344 } 1345 } 1346 1347 Expected<unsigned> 1348 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName, 1349 unsigned FileNumber, 1350 Optional<MD5::MD5Result> Checksum, 1351 Optional<StringRef> Source, unsigned CUID) { 1352 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1353 return Table.tryGetFile(Directory, FileName, Checksum, Source, 1354 Ctx->getDwarfVersion(), FileNumber); 1355 } 1356 1357 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1358 const uint32_t SrcCUID, 1359 unsigned FileIndex) { 1360 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1361 const DWARFDebugLine::LineTable *LineTable = 1362 DwCtx->getLineTableForUnit(SrcUnit); 1363 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1364 LineTable->Prologue.FileNames; 1365 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1366 // means empty dir. 1367 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1368 "FileIndex out of range for the compilation unit."); 1369 StringRef Dir = ""; 1370 if (FileNames[FileIndex - 1].DirIdx != 0) { 1371 if (Optional<const char *> DirName = dwarf::toString( 1372 LineTable->Prologue 1373 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1374 Dir = *DirName; 1375 } 1376 } 1377 StringRef FileName = ""; 1378 if (Optional<const char *> FName = 1379 dwarf::toString(FileNames[FileIndex - 1].Name)) 1380 FileName = *FName; 1381 assert(FileName != ""); 1382 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID)); 1383 } 1384 1385 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1386 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1387 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1388 SortedFunctions.begin(), 1389 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1390 return &BFI.second; 1391 }); 1392 1393 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1394 [] (const BinaryFunction *A, const BinaryFunction *B) { 1395 if (A->hasValidIndex() && B->hasValidIndex()) { 1396 return A->getIndex() < B->getIndex(); 1397 } 1398 return A->hasValidIndex(); 1399 }); 1400 return SortedFunctions; 1401 } 1402 1403 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1404 std::vector<BinaryFunction *> AllFunctions; 1405 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1406 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1407 std::back_inserter(AllFunctions), 1408 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1409 return &BFI.second; 1410 }); 1411 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1412 std::back_inserter(AllFunctions)); 1413 1414 return AllFunctions; 1415 } 1416 1417 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1418 auto Iter = DWOCUs.find(DWOId); 1419 if (Iter == DWOCUs.end()) 1420 return None; 1421 1422 return Iter->second; 1423 } 1424 1425 DWARFContext *BinaryContext::getDWOContext() { 1426 if (DWOCUs.empty()) 1427 return nullptr; 1428 return &DWOCUs.begin()->second->getContext(); 1429 } 1430 1431 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1432 void BinaryContext::preprocessDWODebugInfo() { 1433 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1434 DWARFUnit *const DwarfUnit = CU.get(); 1435 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1436 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1437 if (!DWOCU->isDWOUnit()) { 1438 std::string DWOName = dwarf::toString( 1439 DwarfUnit->getUnitDIE().find( 1440 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1441 ""); 1442 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1443 << DWOName 1444 << " was not retrieved and won't be updated. Please check " 1445 "relative path.\n"; 1446 continue; 1447 } 1448 DWOCUs[*DWOId] = DWOCU; 1449 } 1450 } 1451 } 1452 1453 void BinaryContext::preprocessDebugInfo() { 1454 struct CURange { 1455 uint64_t LowPC; 1456 uint64_t HighPC; 1457 DWARFUnit *Unit; 1458 1459 bool operator<(const CURange &Other) const { 1460 return LowPC < Other.LowPC; 1461 } 1462 }; 1463 1464 // Building a map of address ranges to CUs similar to .debug_aranges and use 1465 // it to assign CU to functions. 1466 std::vector<CURange> AllRanges; 1467 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1468 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1469 Expected<DWARFAddressRangesVector> RangesOrError = 1470 CU->getUnitDIE().getAddressRanges(); 1471 if (!RangesOrError) { 1472 consumeError(RangesOrError.takeError()); 1473 continue; 1474 } 1475 for (DWARFAddressRange &Range : *RangesOrError) { 1476 // Parts of the debug info could be invalidated due to corresponding code 1477 // being removed from the binary by the linker. Hence we check if the 1478 // address is a valid one. 1479 if (containsAddress(Range.LowPC)) 1480 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1481 } 1482 } 1483 1484 std::sort(AllRanges.begin(), AllRanges.end()); 1485 for (auto &KV : BinaryFunctions) { 1486 const uint64_t FunctionAddress = KV.first; 1487 BinaryFunction &Function = KV.second; 1488 1489 auto It = std::partition_point(AllRanges.begin(), AllRanges.end(), 1490 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1491 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1492 Function.setDWARFUnit(It->Unit); 1493 } 1494 } 1495 1496 // Discover units with debug info that needs to be updated. 1497 for (const auto &KV : BinaryFunctions) { 1498 const BinaryFunction &BF = KV.second; 1499 if (shouldEmit(BF) && BF.getDWARFUnit()) 1500 ProcessedCUs.insert(BF.getDWARFUnit()); 1501 } 1502 1503 // Clear debug info for functions from units that we are not going to process. 1504 for (auto &KV : BinaryFunctions) { 1505 BinaryFunction &BF = KV.second; 1506 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1507 BF.setDWARFUnit(nullptr); 1508 } 1509 1510 if (opts::Verbosity >= 1) { 1511 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1512 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1513 } 1514 1515 // Populate MCContext with DWARF files from all units. 1516 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1517 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1518 const uint64_t CUID = CU->getOffset(); 1519 getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol( 1520 GlobalPrefix + "line_table_start" + Twine(CUID))); 1521 1522 if (!ProcessedCUs.count(CU.get())) 1523 continue; 1524 1525 const DWARFDebugLine::LineTable *LineTable = 1526 DwCtx->getLineTableForUnit(CU.get()); 1527 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1528 LineTable->Prologue.FileNames; 1529 1530 // Assign a unique label to every line table, one per CU. 1531 // Make sure empty debug line tables are registered too. 1532 if (FileNames.empty()) { 1533 cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID)); 1534 continue; 1535 } 1536 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1537 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1538 // means empty dir. 1539 StringRef Dir = ""; 1540 if (FileNames[I].DirIdx != 0) 1541 if (Optional<const char *> DirName = dwarf::toString( 1542 LineTable->Prologue 1543 .IncludeDirectories[FileNames[I].DirIdx - 1])) 1544 Dir = *DirName; 1545 StringRef FileName = ""; 1546 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1547 FileName = *FName; 1548 assert(FileName != ""); 1549 cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID)); 1550 } 1551 } 1552 1553 preprocessDWODebugInfo(); 1554 } 1555 1556 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1557 if (opts::processAllFunctions()) 1558 return true; 1559 1560 if (Function.isIgnored()) 1561 return false; 1562 1563 // In relocation mode we will emit non-simple functions with CFG. 1564 // If the function does not have a CFG it should be marked as ignored. 1565 return HasRelocations || Function.isSimple(); 1566 } 1567 1568 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1569 uint32_t Operation = Inst.getOperation(); 1570 switch (Operation) { 1571 case MCCFIInstruction::OpSameValue: 1572 OS << "OpSameValue Reg" << Inst.getRegister(); 1573 break; 1574 case MCCFIInstruction::OpRememberState: 1575 OS << "OpRememberState"; 1576 break; 1577 case MCCFIInstruction::OpRestoreState: 1578 OS << "OpRestoreState"; 1579 break; 1580 case MCCFIInstruction::OpOffset: 1581 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1582 break; 1583 case MCCFIInstruction::OpDefCfaRegister: 1584 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1585 break; 1586 case MCCFIInstruction::OpDefCfaOffset: 1587 OS << "OpDefCfaOffset " << Inst.getOffset(); 1588 break; 1589 case MCCFIInstruction::OpDefCfa: 1590 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1591 break; 1592 case MCCFIInstruction::OpRelOffset: 1593 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1594 break; 1595 case MCCFIInstruction::OpAdjustCfaOffset: 1596 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1597 break; 1598 case MCCFIInstruction::OpEscape: 1599 OS << "OpEscape"; 1600 break; 1601 case MCCFIInstruction::OpRestore: 1602 OS << "OpRestore Reg" << Inst.getRegister(); 1603 break; 1604 case MCCFIInstruction::OpUndefined: 1605 OS << "OpUndefined Reg" << Inst.getRegister(); 1606 break; 1607 case MCCFIInstruction::OpRegister: 1608 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1609 << Inst.getRegister2(); 1610 break; 1611 case MCCFIInstruction::OpWindowSave: 1612 OS << "OpWindowSave"; 1613 break; 1614 case MCCFIInstruction::OpGnuArgsSize: 1615 OS << "OpGnuArgsSize"; 1616 break; 1617 default: 1618 OS << "Op#" << Operation; 1619 break; 1620 } 1621 } 1622 1623 void BinaryContext::printInstruction(raw_ostream &OS, 1624 const MCInst &Instruction, 1625 uint64_t Offset, 1626 const BinaryFunction* Function, 1627 bool PrintMCInst, 1628 bool PrintMemData, 1629 bool PrintRelocations) const { 1630 if (MIB->isEHLabel(Instruction)) { 1631 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1632 return; 1633 } 1634 OS << format(" %08" PRIx64 ": ", Offset); 1635 if (MIB->isCFI(Instruction)) { 1636 uint32_t Offset = Instruction.getOperand(0).getImm(); 1637 OS << "\t!CFI\t$" << Offset << "\t; "; 1638 if (Function) 1639 printCFI(OS, *Function->getCFIFor(Instruction)); 1640 OS << "\n"; 1641 return; 1642 } 1643 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1644 if (MIB->isCall(Instruction)) { 1645 if (MIB->isTailCall(Instruction)) 1646 OS << " # TAILCALL "; 1647 if (MIB->isInvoke(Instruction)) { 1648 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1649 OS << " # handler: "; 1650 if (EHInfo->first) 1651 OS << *EHInfo->first; 1652 else 1653 OS << '0'; 1654 OS << "; action: " << EHInfo->second; 1655 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1656 if (GnuArgsSize >= 0) 1657 OS << "; GNU_args_size = " << GnuArgsSize; 1658 } 1659 } else if (MIB->isIndirectBranch(Instruction)) { 1660 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1661 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1662 } else { 1663 OS << " # UNKNOWN CONTROL FLOW"; 1664 } 1665 } 1666 1667 MIB->printAnnotations(Instruction, OS); 1668 1669 if (opts::PrintDebugInfo) { 1670 DebugLineTableRowRef RowRef = 1671 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1672 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1673 const DWARFDebugLine::LineTable *LineTable; 1674 if (Function && Function->getDWARFUnit() && 1675 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1676 LineTable = Function->getDWARFLineTable(); 1677 } else { 1678 LineTable = DwCtx->getLineTableForUnit( 1679 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1680 } 1681 assert(LineTable && 1682 "line table expected for instruction with debug info"); 1683 1684 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1685 StringRef FileName = ""; 1686 if (Optional<const char *> FName = 1687 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1688 FileName = *FName; 1689 OS << " # debug line " << FileName << ":" << Row.Line; 1690 if (Row.Column) 1691 OS << ":" << Row.Column; 1692 if (Row.Discriminator) 1693 OS << " discriminator:" << Row.Discriminator; 1694 } 1695 } 1696 1697 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1698 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1699 Function->printRelocations(OS, Offset, Size); 1700 } 1701 1702 OS << "\n"; 1703 1704 if (PrintMCInst) { 1705 Instruction.dump_pretty(OS, InstPrinter.get()); 1706 OS << "\n"; 1707 } 1708 } 1709 1710 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1711 auto SI = AddressToSection.upper_bound(Address); 1712 if (SI != AddressToSection.begin()) { 1713 --SI; 1714 uint64_t UpperBound = SI->first + SI->second->getSize(); 1715 if (!SI->second->getSize()) 1716 UpperBound += 1; 1717 if (UpperBound > Address) 1718 return *SI->second; 1719 } 1720 return std::make_error_code(std::errc::bad_address); 1721 } 1722 1723 ErrorOr<StringRef> 1724 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1725 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) { 1726 return Section->getName(); 1727 } 1728 return std::make_error_code(std::errc::bad_address); 1729 } 1730 1731 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1732 auto Res = Sections.insert(Section); 1733 (void)Res; 1734 assert(Res.second && "can't register the same section twice."); 1735 1736 // Only register allocatable sections in the AddressToSection map. 1737 if (Section->isAllocatable() && Section->getAddress()) 1738 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1739 NameToSection.insert( 1740 std::make_pair(std::string(Section->getName()), Section)); 1741 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1742 return *Section; 1743 } 1744 1745 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1746 return registerSection(new BinarySection(*this, Section)); 1747 } 1748 1749 BinarySection & 1750 BinaryContext::registerSection(StringRef SectionName, 1751 const BinarySection &OriginalSection) { 1752 return registerSection(new BinarySection(*this, 1753 SectionName, 1754 OriginalSection)); 1755 } 1756 1757 BinarySection &BinaryContext::registerOrUpdateSection(StringRef Name, 1758 unsigned ELFType, 1759 unsigned ELFFlags, 1760 uint8_t *Data, 1761 uint64_t Size, 1762 unsigned Alignment) { 1763 auto NamedSections = getSectionByName(Name); 1764 if (NamedSections.begin() != NamedSections.end()) { 1765 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1766 "can only update unique sections"); 1767 BinarySection *Section = NamedSections.begin()->second; 1768 1769 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1770 const bool Flag = Section->isAllocatable(); 1771 (void)Flag; 1772 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1773 LLVM_DEBUG(dbgs() << *Section << "\n"); 1774 // FIXME: Fix section flags/attributes for MachO. 1775 if (isELF()) 1776 assert(Flag == Section->isAllocatable() && 1777 "can't change section allocation status"); 1778 return *Section; 1779 } 1780 1781 return registerSection(new BinarySection(*this, Name, Data, Size, Alignment, 1782 ELFType, ELFFlags)); 1783 } 1784 1785 bool BinaryContext::deregisterSection(BinarySection &Section) { 1786 BinarySection *SectionPtr = &Section; 1787 auto Itr = Sections.find(SectionPtr); 1788 if (Itr != Sections.end()) { 1789 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1790 while (Range.first != Range.second) { 1791 if (Range.first->second == SectionPtr) { 1792 AddressToSection.erase(Range.first); 1793 break; 1794 } 1795 ++Range.first; 1796 } 1797 1798 auto NameRange = 1799 NameToSection.equal_range(std::string(SectionPtr->getName())); 1800 while (NameRange.first != NameRange.second) { 1801 if (NameRange.first->second == SectionPtr) { 1802 NameToSection.erase(NameRange.first); 1803 break; 1804 } 1805 ++NameRange.first; 1806 } 1807 1808 Sections.erase(Itr); 1809 delete SectionPtr; 1810 return true; 1811 } 1812 return false; 1813 } 1814 1815 void BinaryContext::printSections(raw_ostream &OS) const { 1816 for (BinarySection *const &Section : Sections) { 1817 OS << "BOLT-INFO: " << *Section << "\n"; 1818 } 1819 } 1820 1821 BinarySection &BinaryContext::absoluteSection() { 1822 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1823 return *Section; 1824 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1825 } 1826 1827 ErrorOr<uint64_t> 1828 BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1829 size_t Size) const { 1830 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1831 if (!Section) 1832 return std::make_error_code(std::errc::bad_address); 1833 1834 if (Section->isVirtual()) 1835 return 0; 1836 1837 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1838 AsmInfo->getCodePointerSize()); 1839 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1840 return DE.getUnsigned(&ValueOffset, Size); 1841 } 1842 1843 ErrorOr<uint64_t> 1844 BinaryContext::getSignedValueAtAddress(uint64_t Address, 1845 size_t Size) const { 1846 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1847 if (!Section) 1848 return std::make_error_code(std::errc::bad_address); 1849 1850 if (Section->isVirtual()) 1851 return 0; 1852 1853 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1854 AsmInfo->getCodePointerSize()); 1855 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1856 return DE.getSigned(&ValueOffset, Size); 1857 } 1858 1859 void BinaryContext::addRelocation(uint64_t Address, 1860 MCSymbol *Symbol, 1861 uint64_t Type, 1862 uint64_t Addend, 1863 uint64_t Value) { 1864 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1865 assert(Section && "cannot find section for address"); 1866 Section->addRelocation(Address - Section->getAddress(), 1867 Symbol, 1868 Type, 1869 Addend, 1870 Value); 1871 } 1872 1873 void BinaryContext::addDynamicRelocation(uint64_t Address, 1874 MCSymbol *Symbol, 1875 uint64_t Type, 1876 uint64_t Addend, 1877 uint64_t Value) { 1878 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1879 assert(Section && "cannot find section for address"); 1880 Section->addDynamicRelocation(Address - Section->getAddress(), 1881 Symbol, 1882 Type, 1883 Addend, 1884 Value); 1885 } 1886 1887 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1888 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1889 assert(Section && "cannot find section for address"); 1890 return Section->removeRelocationAt(Address - Section->getAddress()); 1891 } 1892 1893 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1894 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1895 if (!Section) 1896 return nullptr; 1897 1898 return Section->getRelocationAt(Address - Section->getAddress()); 1899 } 1900 1901 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1902 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1903 if (!Section) 1904 return nullptr; 1905 1906 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1907 } 1908 1909 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1910 const uint64_t Address) { 1911 auto setImmovable = [&](BinaryData &BD) { 1912 BinaryData *Root = BD.getAtomicRoot(); 1913 LLVM_DEBUG(if (Root->isMoveable()) { 1914 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1915 << "due to ambiguous relocation referencing 0x" 1916 << Twine::utohexstr(Address) << '\n'; 1917 }); 1918 Root->setIsMoveable(false); 1919 }; 1920 1921 if (Address == BD.getAddress()) { 1922 setImmovable(BD); 1923 1924 // Set previous symbol as immovable 1925 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1926 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1927 setImmovable(*Prev); 1928 } 1929 1930 if (Address == BD.getEndAddress()) { 1931 setImmovable(BD); 1932 1933 // Set next symbol as immovable 1934 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1935 if (Next && Next->getAddress() == BD.getEndAddress()) 1936 setImmovable(*Next); 1937 } 1938 } 1939 1940 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1941 uint64_t *EntryDesc) { 1942 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1943 auto BFI = SymbolToFunctionMap.find(Symbol); 1944 if (BFI == SymbolToFunctionMap.end()) 1945 return nullptr; 1946 1947 BinaryFunction *BF = BFI->second; 1948 if (EntryDesc) 1949 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1950 1951 return BF; 1952 } 1953 1954 void BinaryContext::exitWithBugReport(StringRef Message, 1955 const BinaryFunction &Function) const { 1956 errs() << "=======================================\n"; 1957 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1958 "this function.\n"; 1959 errs() << "If you are running the most recent version of BOLT, you may " 1960 "want to " 1961 "report this and paste this dump.\nPlease check that there is no " 1962 "sensitive contents being shared in this dump.\n"; 1963 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1964 ScopedPrinter SP(errs()); 1965 SP.printBinaryBlock("Function contents", *Function.getData()); 1966 errs() << "\n"; 1967 Function.dump(); 1968 errs() << "ERROR: " << Message; 1969 errs() << "\n=======================================\n"; 1970 exit(1); 1971 } 1972 1973 BinaryFunction * 1974 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1975 bool IsSimple) { 1976 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1977 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1978 setSymbolToFunctionMap(BF->getSymbol(), BF); 1979 BF->CurrentState = BinaryFunction::State::CFG; 1980 return BF; 1981 } 1982 1983 std::pair<size_t, size_t> 1984 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 1985 // Adjust branch instruction to match the current layout. 1986 if (FixBranches) 1987 BF.fixBranches(); 1988 1989 // Create local MC context to isolate the effect of ephemeral code emission. 1990 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 1991 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 1992 MCAsmBackend *MAB = 1993 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1994 1995 SmallString<256> Code; 1996 raw_svector_ostream VecOS(Code); 1997 1998 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 1999 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2000 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2001 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2002 /*RelaxAll=*/false, 2003 /*IncrementalLinkerCompatible=*/false, 2004 /*DWARFMustBeAtTheEnd=*/false)); 2005 2006 Streamer->initSections(false, *STI); 2007 2008 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2009 Section->setHasInstructions(true); 2010 2011 // Create symbols in the LocalCtx so that they get destroyed with it. 2012 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2013 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2014 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2015 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2016 2017 Streamer->SwitchSection(Section); 2018 Streamer->emitLabel(StartLabel); 2019 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2020 /*EmitCodeOnly=*/true); 2021 Streamer->emitLabel(EndLabel); 2022 2023 if (BF.isSplit()) { 2024 MCSectionELF *ColdSection = 2025 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2026 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2027 ColdSection->setHasInstructions(true); 2028 2029 Streamer->SwitchSection(ColdSection); 2030 Streamer->emitLabel(ColdStartLabel); 2031 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2032 /*EmitCodeOnly=*/true); 2033 Streamer->emitLabel(ColdEndLabel); 2034 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2035 Streamer->emitBytes(StringRef("")); 2036 Streamer->SwitchSection(Section); 2037 } 2038 2039 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2040 // MCStreamer::Finish(), which does more than we want 2041 Streamer->emitBytes(StringRef("")); 2042 2043 MCAssembler &Assembler = 2044 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2045 MCAsmLayout Layout(Assembler); 2046 Assembler.layout(Layout); 2047 2048 const uint64_t HotSize = 2049 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2050 const uint64_t ColdSize = BF.isSplit() 2051 ? Layout.getSymbolOffset(*ColdEndLabel) - 2052 Layout.getSymbolOffset(*ColdStartLabel) 2053 : 0ULL; 2054 2055 // Clean-up the effect of the code emission. 2056 for (const MCSymbol &Symbol : Assembler.symbols()) { 2057 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2058 MutableSymbol->setUndefined(); 2059 MutableSymbol->setIsRegistered(false); 2060 } 2061 2062 return std::make_pair(HotSize, ColdSize); 2063 } 2064 2065 bool BinaryContext::validateEncoding(const MCInst &Inst, 2066 ArrayRef<uint8_t> InputEncoding) const { 2067 SmallString<256> Code; 2068 SmallVector<MCFixup, 4> Fixups; 2069 raw_svector_ostream VecOS(Code); 2070 2071 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2072 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2073 if (InputEncoding != EncodedData) { 2074 if (opts::Verbosity > 1) { 2075 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2076 << " input: " << InputEncoding << '\n' 2077 << " output: " << EncodedData << '\n'; 2078 } 2079 return false; 2080 } 2081 2082 return true; 2083 } 2084 2085 uint64_t BinaryContext::getHotThreshold() const { 2086 static uint64_t Threshold = 0; 2087 if (Threshold == 0) { 2088 Threshold = std::max((uint64_t)opts::ExecutionCountThreshold, 2089 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2090 } 2091 return Threshold; 2092 } 2093 2094 BinaryFunction * 2095 BinaryContext::getBinaryFunctionContainingAddress(uint64_t Address, 2096 bool CheckPastEnd, 2097 bool UseMaxSize) { 2098 auto FI = BinaryFunctions.upper_bound(Address); 2099 if (FI == BinaryFunctions.begin()) 2100 return nullptr; 2101 --FI; 2102 2103 const uint64_t UsedSize = 2104 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2105 2106 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2107 return nullptr; 2108 2109 return &FI->second; 2110 } 2111 2112 BinaryFunction * 2113 BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2114 // First, try to find a function starting at the given address. If the 2115 // function was folded, this will get us the original folded function if it 2116 // wasn't removed from the list, e.g. in non-relocation mode. 2117 auto BFI = BinaryFunctions.find(Address); 2118 if (BFI != BinaryFunctions.end()) { 2119 return &BFI->second; 2120 } 2121 2122 // We might have folded the function matching the object at the given 2123 // address. In such case, we look for a function matching the symbol 2124 // registered at the original address. The new function (the one that the 2125 // original was folded into) will hold the symbol. 2126 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2127 uint64_t EntryID = 0; 2128 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2129 if (BF && EntryID == 0) 2130 return BF; 2131 } 2132 return nullptr; 2133 } 2134 2135 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2136 const DWARFAddressRangesVector &InputRanges) const { 2137 DebugAddressRangesVector OutputRanges; 2138 2139 for (const DWARFAddressRange Range : InputRanges) { 2140 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2141 while (BFI != BinaryFunctions.end()) { 2142 const BinaryFunction &Function = BFI->second; 2143 if (Function.getAddress() >= Range.HighPC) 2144 break; 2145 const DebugAddressRangesVector FunctionRanges = 2146 Function.getOutputAddressRanges(); 2147 std::move(std::begin(FunctionRanges), 2148 std::end(FunctionRanges), 2149 std::back_inserter(OutputRanges)); 2150 std::advance(BFI, 1); 2151 } 2152 } 2153 2154 return OutputRanges; 2155 } 2156 2157 } // namespace bolt 2158 } // namespace llvm 2159