1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 cl::opt<std::string> CompDirOverride( 76 "comp-dir-override", 77 cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base " 78 "location, which is used with DW_AT_dwo_name to construct a path " 79 "to *.dwo files."), 80 cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 81 } // namespace opts 82 83 namespace llvm { 84 namespace bolt { 85 86 char BOLTError::ID = 0; 87 88 BOLTError::BOLTError(bool IsFatal, const Twine &S) 89 : IsFatal(IsFatal), Msg(S.str()) {} 90 91 void BOLTError::log(raw_ostream &OS) const { 92 if (IsFatal) 93 OS << "FATAL "; 94 StringRef ErrMsg = StringRef(Msg); 95 // Prepend our error prefix if it is missing 96 if (ErrMsg.empty()) { 97 OS << "BOLT-ERROR\n"; 98 } else { 99 if (!ErrMsg.starts_with("BOLT-ERROR")) 100 OS << "BOLT-ERROR: "; 101 OS << ErrMsg << "\n"; 102 } 103 } 104 105 std::error_code BOLTError::convertToErrorCode() const { 106 return inconvertibleErrorCode(); 107 } 108 109 Error createNonFatalBOLTError(const Twine &S) { 110 return make_error<BOLTError>(/*IsFatal*/ false, S); 111 } 112 113 Error createFatalBOLTError(const Twine &S) { 114 return make_error<BOLTError>(/*IsFatal*/ true, S); 115 } 116 117 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 118 std::unique_ptr<DWARFContext> DwCtx, 119 std::unique_ptr<Triple> TheTriple, 120 const Target *TheTarget, std::string TripleName, 121 std::unique_ptr<MCCodeEmitter> MCE, 122 std::unique_ptr<MCObjectFileInfo> MOFI, 123 std::unique_ptr<const MCAsmInfo> AsmInfo, 124 std::unique_ptr<const MCInstrInfo> MII, 125 std::unique_ptr<const MCSubtargetInfo> STI, 126 std::unique_ptr<MCInstPrinter> InstPrinter, 127 std::unique_ptr<const MCInstrAnalysis> MIA, 128 std::unique_ptr<MCPlusBuilder> MIB, 129 std::unique_ptr<const MCRegisterInfo> MRI, 130 std::unique_ptr<MCDisassembler> DisAsm) 131 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 132 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 133 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 134 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 135 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 136 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 137 Relocation::Arch = this->TheTriple->getArch(); 138 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 139 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 140 } 141 142 BinaryContext::~BinaryContext() { 143 for (BinarySection *Section : Sections) 144 delete Section; 145 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 146 delete InjectedFunction; 147 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 148 delete JTI.second; 149 clearBinaryData(); 150 } 151 152 /// Create BinaryContext for a given architecture \p ArchName and 153 /// triple \p TripleName. 154 Expected<std::unique_ptr<BinaryContext>> 155 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 156 std::unique_ptr<DWARFContext> DwCtx) { 157 StringRef ArchName = ""; 158 std::string FeaturesStr = ""; 159 switch (File->getArch()) { 160 case llvm::Triple::x86_64: 161 ArchName = "x86-64"; 162 FeaturesStr = "+nopl"; 163 break; 164 case llvm::Triple::aarch64: 165 ArchName = "aarch64"; 166 FeaturesStr = "+all"; 167 break; 168 case llvm::Triple::riscv64: { 169 ArchName = "riscv64"; 170 Expected<SubtargetFeatures> Features = File->getFeatures(); 171 172 if (auto E = Features.takeError()) 173 return std::move(E); 174 175 // We rely on relaxation for some transformations (e.g., promoting all calls 176 // to PseudoCALL and then making JITLink relax them). Since the relax 177 // feature is not stored in the object file, we manually enable it. 178 Features->AddFeature("relax"); 179 FeaturesStr = Features->getString(); 180 break; 181 } 182 default: 183 return createStringError(std::errc::not_supported, 184 "BOLT-ERROR: Unrecognized machine in ELF file"); 185 } 186 187 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 188 const std::string TripleName = TheTriple->str(); 189 190 std::string Error; 191 const Target *TheTarget = 192 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 193 if (!TheTarget) 194 return createStringError(make_error_code(std::errc::not_supported), 195 Twine("BOLT-ERROR: ", Error)); 196 197 std::unique_ptr<const MCRegisterInfo> MRI( 198 TheTarget->createMCRegInfo(TripleName)); 199 if (!MRI) 200 return createStringError( 201 make_error_code(std::errc::not_supported), 202 Twine("BOLT-ERROR: no register info for target ", TripleName)); 203 204 // Set up disassembler. 205 std::unique_ptr<MCAsmInfo> AsmInfo( 206 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 207 if (!AsmInfo) 208 return createStringError( 209 make_error_code(std::errc::not_supported), 210 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 211 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 212 // we want to emit such names as using @PLT without double quotes to convey 213 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 214 // override the default AsmInfo behavior to emit names the way we want. 215 AsmInfo->setAllowAtInName(true); 216 217 std::unique_ptr<const MCSubtargetInfo> STI( 218 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 219 if (!STI) 220 return createStringError( 221 make_error_code(std::errc::not_supported), 222 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 223 224 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 225 if (!MII) 226 return createStringError( 227 make_error_code(std::errc::not_supported), 228 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 229 230 std::unique_ptr<MCContext> Ctx( 231 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 232 std::unique_ptr<MCObjectFileInfo> MOFI( 233 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 234 Ctx->setObjectFileInfo(MOFI.get()); 235 // We do not support X86 Large code model. Change this in the future. 236 bool Large = false; 237 if (TheTriple->getArch() == llvm::Triple::aarch64) 238 Large = true; 239 unsigned LSDAEncoding = 240 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 241 if (IsPIC) { 242 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 243 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 244 } 245 246 std::unique_ptr<MCDisassembler> DisAsm( 247 TheTarget->createMCDisassembler(*STI, *Ctx)); 248 249 if (!DisAsm) 250 return createStringError( 251 make_error_code(std::errc::not_supported), 252 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 253 254 std::unique_ptr<const MCInstrAnalysis> MIA( 255 TheTarget->createMCInstrAnalysis(MII.get())); 256 if (!MIA) 257 return createStringError( 258 make_error_code(std::errc::not_supported), 259 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 260 TripleName)); 261 262 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 263 std::unique_ptr<MCInstPrinter> InstructionPrinter( 264 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 265 *MII, *MRI)); 266 if (!InstructionPrinter) 267 return createStringError( 268 make_error_code(std::errc::not_supported), 269 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 270 InstructionPrinter->setPrintImmHex(true); 271 272 std::unique_ptr<MCCodeEmitter> MCE( 273 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 274 275 // Make sure we don't miss any output on core dumps. 276 outs().SetUnbuffered(); 277 errs().SetUnbuffered(); 278 dbgs().SetUnbuffered(); 279 280 auto BC = std::make_unique<BinaryContext>( 281 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 282 std::string(TripleName), std::move(MCE), std::move(MOFI), 283 std::move(AsmInfo), std::move(MII), std::move(STI), 284 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 285 std::move(DisAsm)); 286 287 BC->LSDAEncoding = LSDAEncoding; 288 289 BC->MAB = std::unique_ptr<MCAsmBackend>( 290 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 291 292 BC->setFilename(File->getFileName()); 293 294 BC->HasFixedLoadAddress = !IsPIC; 295 296 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 297 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 298 299 if (!BC->SymbolicDisAsm) 300 return createStringError( 301 make_error_code(std::errc::not_supported), 302 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 303 304 return std::move(BC); 305 } 306 307 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 308 if (opts::HotText && 309 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 310 return true; 311 312 if (opts::HotData && 313 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 314 return true; 315 316 if (SymbolName == "_end") 317 return true; 318 319 return false; 320 } 321 322 std::unique_ptr<MCObjectWriter> 323 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 324 return MAB->createObjectWriter(OS); 325 } 326 327 bool BinaryContext::validateObjectNesting() const { 328 auto Itr = BinaryDataMap.begin(); 329 auto End = BinaryDataMap.end(); 330 bool Valid = true; 331 while (Itr != End) { 332 auto Next = std::next(Itr); 333 while (Next != End && 334 Itr->second->getSection() == Next->second->getSection() && 335 Itr->second->containsRange(Next->second->getAddress(), 336 Next->second->getSize())) { 337 if (Next->second->Parent != Itr->second) { 338 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 339 << "BOLT-WARNING: " << *Itr->second << "\n" 340 << "BOLT-WARNING: " << *Next->second << "\n"; 341 Valid = false; 342 } 343 ++Next; 344 } 345 Itr = Next; 346 } 347 return Valid; 348 } 349 350 bool BinaryContext::validateHoles() const { 351 bool Valid = true; 352 for (BinarySection &Section : sections()) { 353 for (const Relocation &Rel : Section.relocations()) { 354 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 355 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 356 if (!BD) { 357 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 358 << " 0x" << Twine::utohexstr(RelAddr) << " in " 359 << Section.getName() << "\n"; 360 Valid = false; 361 } else if (!BD->getAtomicRoot()) { 362 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 363 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 364 << Section.getName() << "\n"; 365 Valid = false; 366 } 367 } 368 } 369 return Valid; 370 } 371 372 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 373 const uint64_t Address = GAI->second->getAddress(); 374 const uint64_t Size = GAI->second->getSize(); 375 376 auto fixParents = [&](BinaryDataMapType::iterator Itr, 377 BinaryData *NewParent) { 378 BinaryData *OldParent = Itr->second->Parent; 379 Itr->second->Parent = NewParent; 380 ++Itr; 381 while (Itr != BinaryDataMap.end() && OldParent && 382 Itr->second->Parent == OldParent) { 383 Itr->second->Parent = NewParent; 384 ++Itr; 385 } 386 }; 387 388 // Check if the previous symbol contains the newly added symbol. 389 if (GAI != BinaryDataMap.begin()) { 390 BinaryData *Prev = std::prev(GAI)->second; 391 while (Prev) { 392 if (Prev->getSection() == GAI->second->getSection() && 393 Prev->containsRange(Address, Size)) { 394 fixParents(GAI, Prev); 395 } else { 396 fixParents(GAI, nullptr); 397 } 398 Prev = Prev->Parent; 399 } 400 } 401 402 // Check if the newly added symbol contains any subsequent symbols. 403 if (Size != 0) { 404 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 405 auto Itr = std::next(GAI); 406 while ( 407 Itr != BinaryDataMap.end() && 408 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 409 Itr->second->Parent = BD; 410 ++Itr; 411 } 412 } 413 } 414 415 iterator_range<BinaryContext::binary_data_iterator> 416 BinaryContext::getSubBinaryData(BinaryData *BD) { 417 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 418 auto End = Start; 419 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 420 ++End; 421 return make_range(Start, End); 422 } 423 424 std::pair<const MCSymbol *, uint64_t> 425 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 426 bool IsPCRel) { 427 if (isAArch64()) { 428 // Check if this is an access to a constant island and create bookkeeping 429 // to keep track of it and emit it later as part of this function. 430 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 431 return std::make_pair(IslandSym, 0); 432 433 // Detect custom code written in assembly that refers to arbitrary 434 // constant islands from other functions. Write this reference so we 435 // can pull this constant island and emit it as part of this function 436 // too. 437 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 438 439 if (IslandIter != AddressToConstantIslandMap.begin() && 440 (IslandIter == AddressToConstantIslandMap.end() || 441 IslandIter->first > Address)) 442 --IslandIter; 443 444 if (IslandIter != AddressToConstantIslandMap.end()) { 445 // Fall-back to referencing the original constant island in the presence 446 // of dynamic relocs, as we currently do not support cloning them. 447 // Notice: we might fail to link because of this, if the original constant 448 // island we are referring would be emitted too far away. 449 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 450 MCSymbol *IslandSym = 451 IslandIter->second->getOrCreateIslandAccess(Address); 452 if (IslandSym) 453 return std::make_pair(IslandSym, 0); 454 } else if (MCSymbol *IslandSym = 455 IslandIter->second->getOrCreateProxyIslandAccess(Address, 456 BF)) { 457 BF.createIslandDependency(IslandSym, IslandIter->second); 458 return std::make_pair(IslandSym, 0); 459 } 460 } 461 } 462 463 // Note that the address does not necessarily have to reside inside 464 // a section, it could be an absolute address too. 465 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 466 if (Section && Section->isText()) { 467 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 468 if (Address != BF.getAddress()) { 469 // The address could potentially escape. Mark it as another entry 470 // point into the function. 471 if (opts::Verbosity >= 1) { 472 outs() << "BOLT-INFO: potentially escaped address 0x" 473 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 474 } 475 BF.HasInternalLabelReference = true; 476 return std::make_pair( 477 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 478 } 479 } else { 480 addInterproceduralReference(&BF, Address); 481 } 482 } 483 484 // With relocations, catch jump table references outside of the basic block 485 // containing the indirect jump. 486 if (HasRelocations) { 487 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 488 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 489 const MCSymbol *Symbol = 490 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 491 492 return std::make_pair(Symbol, 0); 493 } 494 } 495 496 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 497 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 498 499 // TODO: use DWARF info to get size/alignment here? 500 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 501 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 502 return std::make_pair(TargetSymbol, 0); 503 } 504 505 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 506 BinaryFunction &BF) { 507 if (!isX86()) 508 return MemoryContentsType::UNKNOWN; 509 510 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 511 if (!Section) { 512 // No section - possibly an absolute address. Since we don't allow 513 // internal function addresses to escape the function scope - we 514 // consider it a tail call. 515 if (opts::Verbosity > 1) { 516 errs() << "BOLT-WARNING: no section for address 0x" 517 << Twine::utohexstr(Address) << " referenced from function " << BF 518 << '\n'; 519 } 520 return MemoryContentsType::UNKNOWN; 521 } 522 523 if (Section->isVirtual()) { 524 // The contents are filled at runtime. 525 return MemoryContentsType::UNKNOWN; 526 } 527 528 // No support for jump tables in code yet. 529 if (Section->isText()) 530 return MemoryContentsType::UNKNOWN; 531 532 // Start with checking for PIC jump table. We expect non-PIC jump tables 533 // to have high 32 bits set to 0. 534 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 535 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 536 537 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 538 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 539 540 return MemoryContentsType::UNKNOWN; 541 } 542 543 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 544 const JumpTable::JumpTableType Type, 545 const BinaryFunction &BF, 546 const uint64_t NextJTAddress, 547 JumpTable::AddressesType *EntriesAsAddress, 548 bool *HasEntryInFragment) const { 549 // Is one of the targets __builtin_unreachable? 550 bool HasUnreachable = false; 551 552 // Does one of the entries match function start address? 553 bool HasStartAsEntry = false; 554 555 // Number of targets other than __builtin_unreachable. 556 uint64_t NumRealEntries = 0; 557 558 auto addEntryAddress = [&](uint64_t EntryAddress) { 559 if (EntriesAsAddress) 560 EntriesAsAddress->emplace_back(EntryAddress); 561 }; 562 563 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 564 if (!Section) 565 return false; 566 567 // The upper bound is defined by containing object, section limits, and 568 // the next jump table in memory. 569 uint64_t UpperBound = Section->getEndAddress(); 570 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 571 if (JumpTableBD && JumpTableBD->getSize()) { 572 assert(JumpTableBD->getEndAddress() <= UpperBound && 573 "data object cannot cross a section boundary"); 574 UpperBound = JumpTableBD->getEndAddress(); 575 } 576 if (NextJTAddress) 577 UpperBound = std::min(NextJTAddress, UpperBound); 578 579 LLVM_DEBUG({ 580 using JTT = JumpTable::JumpTableType; 581 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 582 Address, BF.getPrintName(), 583 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 584 }); 585 const uint64_t EntrySize = getJumpTableEntrySize(Type); 586 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 587 EntryAddress += EntrySize) { 588 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 589 << " -> "); 590 // Check if there's a proper relocation against the jump table entry. 591 if (HasRelocations) { 592 if (Type == JumpTable::JTT_PIC && 593 !DataPCRelocations.count(EntryAddress)) { 594 LLVM_DEBUG( 595 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 596 break; 597 } 598 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 599 LLVM_DEBUG( 600 dbgs() 601 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 602 break; 603 } 604 } 605 606 const uint64_t Value = 607 (Type == JumpTable::JTT_PIC) 608 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 609 : *getPointerAtAddress(EntryAddress); 610 611 // __builtin_unreachable() case. 612 if (Value == BF.getAddress() + BF.getSize()) { 613 addEntryAddress(Value); 614 HasUnreachable = true; 615 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 616 continue; 617 } 618 619 // Function start is another special case. It is allowed in the jump table, 620 // but we need at least one another regular entry to distinguish the table 621 // from, e.g. a function pointer array. 622 if (Value == BF.getAddress()) { 623 HasStartAsEntry = true; 624 addEntryAddress(Value); 625 continue; 626 } 627 628 // Function or one of its fragments. 629 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 630 const bool DoesBelongToFunction = 631 BF.containsAddress(Value) || 632 (TargetBF && TargetBF->isParentOrChildOf(BF)); 633 if (!DoesBelongToFunction) { 634 LLVM_DEBUG({ 635 if (!BF.containsAddress(Value)) { 636 dbgs() << "FAIL: function doesn't contain this address\n"; 637 if (TargetBF) { 638 dbgs() << " ! function containing this address: " 639 << TargetBF->getPrintName() << '\n'; 640 if (TargetBF->isFragment()) { 641 dbgs() << " ! is a fragment"; 642 for (BinaryFunction *Parent : TargetBF->ParentFragments) 643 dbgs() << ", parent: " << Parent->getPrintName(); 644 dbgs() << '\n'; 645 } 646 } 647 } 648 }); 649 break; 650 } 651 652 // Check there's an instruction at this offset. 653 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 654 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 655 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 656 break; 657 } 658 659 ++NumRealEntries; 660 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 661 662 if (TargetBF != &BF && HasEntryInFragment) 663 *HasEntryInFragment = true; 664 addEntryAddress(Value); 665 } 666 667 // It's a jump table if the number of real entries is more than 1, or there's 668 // one real entry and one or more special targets. If there are only multiple 669 // special targets, then it's not a jump table. 670 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 671 } 672 673 void BinaryContext::populateJumpTables() { 674 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 675 << '\n'); 676 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 677 ++JTI) { 678 JumpTable *JT = JTI->second; 679 680 bool NonSimpleParent = false; 681 for (BinaryFunction *BF : JT->Parents) 682 NonSimpleParent |= !BF->isSimple(); 683 if (NonSimpleParent) 684 continue; 685 686 uint64_t NextJTAddress = 0; 687 auto NextJTI = std::next(JTI); 688 if (NextJTI != JTE) 689 NextJTAddress = NextJTI->second->getAddress(); 690 691 const bool Success = 692 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 693 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 694 if (!Success) { 695 LLVM_DEBUG({ 696 dbgs() << "failed to analyze "; 697 JT->print(dbgs()); 698 if (NextJTI != JTE) { 699 dbgs() << "next "; 700 NextJTI->second->print(dbgs()); 701 } 702 }); 703 llvm_unreachable("jump table heuristic failure"); 704 } 705 for (BinaryFunction *Frag : JT->Parents) { 706 if (JT->IsSplit) 707 Frag->setHasIndirectTargetToSplitFragment(true); 708 for (uint64_t EntryAddress : JT->EntriesAsAddress) 709 // if target is builtin_unreachable 710 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 711 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 712 Frag->getSize()); 713 } else if (EntryAddress >= Frag->getAddress() && 714 EntryAddress < Frag->getAddress() + Frag->getSize()) { 715 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 716 } 717 } 718 719 // In strict mode, erase PC-relative relocation record. Later we check that 720 // all such records are erased and thus have been accounted for. 721 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 722 for (uint64_t Address = JT->getAddress(); 723 Address < JT->getAddress() + JT->getSize(); 724 Address += JT->EntrySize) { 725 DataPCRelocations.erase(DataPCRelocations.find(Address)); 726 } 727 } 728 729 // Mark to skip the function and all its fragments. 730 for (BinaryFunction *Frag : JT->Parents) 731 if (Frag->hasIndirectTargetToSplitFragment()) 732 addFragmentsToSkip(Frag); 733 } 734 735 if (opts::StrictMode && DataPCRelocations.size()) { 736 LLVM_DEBUG({ 737 dbgs() << DataPCRelocations.size() 738 << " unclaimed PC-relative relocations left in data:\n"; 739 for (uint64_t Reloc : DataPCRelocations) 740 dbgs() << Twine::utohexstr(Reloc) << '\n'; 741 }); 742 assert(0 && "unclaimed PC-relative relocations left in data\n"); 743 } 744 clearList(DataPCRelocations); 745 } 746 747 void BinaryContext::skipMarkedFragments() { 748 std::vector<BinaryFunction *> FragmentQueue; 749 // Copy the functions to FragmentQueue. 750 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 751 auto addToWorklist = [&](BinaryFunction *Function) -> void { 752 if (FragmentsToSkip.count(Function)) 753 return; 754 FragmentQueue.push_back(Function); 755 addFragmentsToSkip(Function); 756 }; 757 // Functions containing split jump tables need to be skipped with all 758 // fragments (transitively). 759 for (size_t I = 0; I != FragmentQueue.size(); I++) { 760 BinaryFunction *BF = FragmentQueue[I]; 761 assert(FragmentsToSkip.count(BF) && 762 "internal error in traversing function fragments"); 763 if (opts::Verbosity >= 1) 764 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 765 BF->setSimple(false); 766 BF->setHasIndirectTargetToSplitFragment(true); 767 768 llvm::for_each(BF->Fragments, addToWorklist); 769 llvm::for_each(BF->ParentFragments, addToWorklist); 770 } 771 if (!FragmentsToSkip.empty()) 772 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 773 << (FragmentsToSkip.size() == 1 ? "" : "s") 774 << " due to cold fragments\n"; 775 } 776 777 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 778 uint64_t Size, 779 uint16_t Alignment, 780 unsigned Flags) { 781 auto Itr = BinaryDataMap.find(Address); 782 if (Itr != BinaryDataMap.end()) { 783 assert(Itr->second->getSize() == Size || !Size); 784 return Itr->second->getSymbol(); 785 } 786 787 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 788 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 789 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 790 } 791 792 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 793 return Ctx->getOrCreateSymbol(Name); 794 } 795 796 BinaryFunction *BinaryContext::createBinaryFunction( 797 const std::string &Name, BinarySection &Section, uint64_t Address, 798 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 799 auto Result = BinaryFunctions.emplace( 800 Address, BinaryFunction(Name, Section, Address, Size, *this)); 801 assert(Result.second == true && "unexpected duplicate function"); 802 BinaryFunction *BF = &Result.first->second; 803 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 804 Alignment); 805 setSymbolToFunctionMap(BF->getSymbol(), BF); 806 return BF; 807 } 808 809 const MCSymbol * 810 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 811 JumpTable::JumpTableType Type) { 812 // Two fragments of same function access same jump table 813 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 814 assert(JT->Type == Type && "jump table types have to match"); 815 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 816 817 // Prevent associating a jump table to a specific fragment twice. 818 // This simple check arises from the assumption: no more than 2 fragments. 819 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 820 assert(JT->Parents[0]->isParentOrChildOf(Function) && 821 "cannot re-use jump table of a different function"); 822 // Duplicate the entry for the parent function for easy access 823 JT->Parents.push_back(&Function); 824 if (opts::Verbosity > 2) { 825 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 826 << JT->Parents[0]->getPrintName() << "; " 827 << Function.getPrintName() << "\n"; 828 JT->print(outs()); 829 } 830 Function.JumpTables.emplace(Address, JT); 831 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 832 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 833 } 834 835 bool IsJumpTableParent = false; 836 (void)IsJumpTableParent; 837 for (BinaryFunction *Frag : JT->Parents) 838 if (Frag == &Function) 839 IsJumpTableParent = true; 840 assert(IsJumpTableParent && 841 "cannot re-use jump table of a different function"); 842 return JT->getFirstLabel(); 843 } 844 845 // Re-use the existing symbol if possible. 846 MCSymbol *JTLabel = nullptr; 847 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 848 if (!isInternalSymbolName(Object->getSymbol()->getName())) 849 JTLabel = Object->getSymbol(); 850 } 851 852 const uint64_t EntrySize = getJumpTableEntrySize(Type); 853 if (!JTLabel) { 854 const std::string JumpTableName = generateJumpTableName(Function, Address); 855 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 856 } 857 858 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 859 << " in function " << Function << '\n'); 860 861 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 862 JumpTable::LabelMapType{{0, JTLabel}}, 863 *getSectionForAddress(Address)); 864 JT->Parents.push_back(&Function); 865 if (opts::Verbosity > 2) 866 JT->print(outs()); 867 JumpTables.emplace(Address, JT); 868 869 // Duplicate the entry for the parent function for easy access. 870 Function.JumpTables.emplace(Address, JT); 871 return JTLabel; 872 } 873 874 std::pair<uint64_t, const MCSymbol *> 875 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 876 const MCSymbol *OldLabel) { 877 auto L = scopeLock(); 878 unsigned Offset = 0; 879 bool Found = false; 880 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 881 if (Elmt.second != OldLabel) 882 continue; 883 Offset = Elmt.first; 884 Found = true; 885 break; 886 } 887 assert(Found && "Label not found"); 888 (void)Found; 889 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 890 JumpTable *NewJT = 891 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 892 JumpTable::LabelMapType{{Offset, NewLabel}}, 893 *getSectionForAddress(JT->getAddress())); 894 NewJT->Parents = JT->Parents; 895 NewJT->Entries = JT->Entries; 896 NewJT->Counts = JT->Counts; 897 uint64_t JumpTableID = ++DuplicatedJumpTables; 898 // Invert it to differentiate from regular jump tables whose IDs are their 899 // addresses in the input binary memory space 900 JumpTableID = ~JumpTableID; 901 JumpTables.emplace(JumpTableID, NewJT); 902 Function.JumpTables.emplace(JumpTableID, NewJT); 903 return std::make_pair(JumpTableID, NewLabel); 904 } 905 906 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 907 uint64_t Address) { 908 size_t Id; 909 uint64_t Offset = 0; 910 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 911 Offset = Address - JT->getAddress(); 912 auto Itr = JT->Labels.find(Offset); 913 if (Itr != JT->Labels.end()) 914 return std::string(Itr->second->getName()); 915 Id = JumpTableIds.at(JT->getAddress()); 916 } else { 917 Id = JumpTableIds[Address] = BF.JumpTables.size(); 918 } 919 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 920 (Offset ? ("." + std::to_string(Offset)) : "")); 921 } 922 923 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 924 // FIXME: aarch64 support is missing. 925 if (!isX86()) 926 return true; 927 928 if (BF.getSize() == BF.getMaxSize()) 929 return true; 930 931 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 932 assert(FunctionData && "cannot get function as data"); 933 934 uint64_t Offset = BF.getSize(); 935 MCInst Instr; 936 uint64_t InstrSize = 0; 937 uint64_t InstrAddress = BF.getAddress() + Offset; 938 using std::placeholders::_1; 939 940 // Skip instructions that satisfy the predicate condition. 941 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 942 const uint64_t StartOffset = Offset; 943 for (; Offset < BF.getMaxSize(); 944 Offset += InstrSize, InstrAddress += InstrSize) { 945 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 946 InstrAddress, nulls())) 947 break; 948 if (!Predicate(Instr)) 949 break; 950 } 951 952 return Offset - StartOffset; 953 }; 954 955 // Skip a sequence of zero bytes. 956 auto skipZeros = [&]() { 957 const uint64_t StartOffset = Offset; 958 for (; Offset < BF.getMaxSize(); ++Offset) 959 if ((*FunctionData)[Offset] != 0) 960 break; 961 962 return Offset - StartOffset; 963 }; 964 965 // Accept the whole padding area filled with breakpoints. 966 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 967 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 968 return true; 969 970 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 971 972 // Some functions have a jump to the next function or to the padding area 973 // inserted after the body. 974 auto isSkipJump = [&](const MCInst &Instr) { 975 uint64_t TargetAddress = 0; 976 if (MIB->isUnconditionalBranch(Instr) && 977 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 978 if (TargetAddress >= InstrAddress + InstrSize && 979 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 980 return true; 981 } 982 } 983 return false; 984 }; 985 986 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 987 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 988 skipZeros()) 989 ; 990 991 if (Offset == BF.getMaxSize()) 992 return true; 993 994 if (opts::Verbosity >= 1) { 995 errs() << "BOLT-WARNING: bad padding at address 0x" 996 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 997 << " starting at offset " << (Offset - BF.getSize()) 998 << " in function " << BF << '\n' 999 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 1000 << '\n'; 1001 } 1002 1003 return false; 1004 } 1005 1006 void BinaryContext::adjustCodePadding() { 1007 for (auto &BFI : BinaryFunctions) { 1008 BinaryFunction &BF = BFI.second; 1009 if (!shouldEmit(BF)) 1010 continue; 1011 1012 if (!hasValidCodePadding(BF)) { 1013 if (HasRelocations) { 1014 if (opts::Verbosity >= 1) { 1015 outs() << "BOLT-INFO: function " << BF 1016 << " has invalid padding. Ignoring the function.\n"; 1017 } 1018 BF.setIgnored(); 1019 } else { 1020 BF.setMaxSize(BF.getSize()); 1021 } 1022 } 1023 } 1024 } 1025 1026 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1027 uint64_t Size, 1028 uint16_t Alignment, 1029 unsigned Flags) { 1030 // Register the name with MCContext. 1031 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1032 1033 auto GAI = BinaryDataMap.find(Address); 1034 BinaryData *BD; 1035 if (GAI == BinaryDataMap.end()) { 1036 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1037 BinarySection &Section = 1038 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1039 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1040 Section, Flags); 1041 GAI = BinaryDataMap.emplace(Address, BD).first; 1042 GlobalSymbols[Name] = BD; 1043 updateObjectNesting(GAI); 1044 } else { 1045 BD = GAI->second; 1046 if (!BD->hasName(Name)) { 1047 GlobalSymbols[Name] = BD; 1048 BD->Symbols.push_back(Symbol); 1049 } 1050 } 1051 1052 return Symbol; 1053 } 1054 1055 const BinaryData * 1056 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1057 auto NI = BinaryDataMap.lower_bound(Address); 1058 auto End = BinaryDataMap.end(); 1059 if ((NI != End && Address == NI->first) || 1060 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1061 if (NI->second->containsAddress(Address)) 1062 return NI->second; 1063 1064 // If this is a sub-symbol, see if a parent data contains the address. 1065 const BinaryData *BD = NI->second->getParent(); 1066 while (BD) { 1067 if (BD->containsAddress(Address)) 1068 return BD; 1069 BD = BD->getParent(); 1070 } 1071 } 1072 return nullptr; 1073 } 1074 1075 BinaryData *BinaryContext::getGOTSymbol() { 1076 // First tries to find a global symbol with that name 1077 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1078 if (GOTSymBD) 1079 return GOTSymBD; 1080 1081 // This symbol might be hidden from run-time link, so fetch the local 1082 // definition if available. 1083 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1084 if (!GOTSymBD) 1085 return nullptr; 1086 1087 // If the local symbol is not unique, fail 1088 unsigned Index = 2; 1089 SmallString<30> Storage; 1090 while (const BinaryData *BD = 1091 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1092 .concat(Twine(Index++)) 1093 .toStringRef(Storage))) 1094 if (BD->getAddress() != GOTSymBD->getAddress()) 1095 return nullptr; 1096 1097 return GOTSymBD; 1098 } 1099 1100 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1101 auto NI = BinaryDataMap.find(Address); 1102 assert(NI != BinaryDataMap.end()); 1103 if (NI == BinaryDataMap.end()) 1104 return false; 1105 // TODO: it's possible that a jump table starts at the same address 1106 // as a larger blob of private data. When we set the size of the 1107 // jump table, it might be smaller than the total blob size. In this 1108 // case we just leave the original size since (currently) it won't really 1109 // affect anything. 1110 assert((!NI->second->Size || NI->second->Size == Size || 1111 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1112 "can't change the size of a symbol that has already had its " 1113 "size set"); 1114 if (!NI->second->Size) { 1115 NI->second->Size = Size; 1116 updateObjectNesting(NI); 1117 return true; 1118 } 1119 return false; 1120 } 1121 1122 void BinaryContext::generateSymbolHashes() { 1123 auto isPadding = [](const BinaryData &BD) { 1124 StringRef Contents = BD.getSection().getContents(); 1125 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1126 return (BD.getName().starts_with("HOLEat") || 1127 SymData.find_first_not_of(0) == StringRef::npos); 1128 }; 1129 1130 uint64_t NumCollisions = 0; 1131 for (auto &Entry : BinaryDataMap) { 1132 BinaryData &BD = *Entry.second; 1133 StringRef Name = BD.getName(); 1134 1135 if (!isInternalSymbolName(Name)) 1136 continue; 1137 1138 // First check if a non-anonymous alias exists and move it to the front. 1139 if (BD.getSymbols().size() > 1) { 1140 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1141 return !isInternalSymbolName(Symbol->getName()); 1142 }); 1143 if (Itr != BD.getSymbols().end()) { 1144 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1145 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1146 continue; 1147 } 1148 } 1149 1150 // We have to skip 0 size symbols since they will all collide. 1151 if (BD.getSize() == 0) { 1152 continue; 1153 } 1154 1155 const uint64_t Hash = BD.getSection().hash(BD); 1156 const size_t Idx = Name.find("0x"); 1157 std::string NewName = 1158 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1159 if (getBinaryDataByName(NewName)) { 1160 // Ignore collisions for symbols that appear to be padding 1161 // (i.e. all zeros or a "hole") 1162 if (!isPadding(BD)) { 1163 if (opts::Verbosity) { 1164 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1165 << " with new name (" << NewName << "), skipping.\n"; 1166 } 1167 ++NumCollisions; 1168 } 1169 continue; 1170 } 1171 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1172 GlobalSymbols[NewName] = &BD; 1173 } 1174 if (NumCollisions) { 1175 errs() << "BOLT-WARNING: " << NumCollisions 1176 << " collisions detected while hashing binary objects"; 1177 if (!opts::Verbosity) 1178 errs() << ". Use -v=1 to see the list."; 1179 errs() << '\n'; 1180 } 1181 } 1182 1183 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1184 BinaryFunction &Function) const { 1185 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1186 if (TargetFunction.isChildOf(Function)) 1187 return true; 1188 TargetFunction.addParentFragment(Function); 1189 Function.addFragment(TargetFunction); 1190 if (!HasRelocations) { 1191 TargetFunction.setSimple(false); 1192 Function.setSimple(false); 1193 } 1194 if (opts::Verbosity >= 1) { 1195 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1196 << Function << '\n'; 1197 } 1198 return true; 1199 } 1200 1201 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1202 MCInst &LoadLowBits, 1203 MCInst &LoadHiBits, 1204 uint64_t Target) { 1205 const MCSymbol *TargetSymbol; 1206 uint64_t Addend = 0; 1207 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1208 /*IsPCRel*/ true); 1209 int64_t Val; 1210 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1211 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1212 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1213 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1214 } 1215 1216 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1217 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1218 if (TargetFunction) 1219 return false; 1220 1221 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1222 assert(Section && "cannot get section for referenced address"); 1223 if (!Section->isText()) 1224 return false; 1225 1226 bool Ret = false; 1227 StringRef SectionContents = Section->getContents(); 1228 uint64_t Offset = Address - Section->getAddress(); 1229 const uint64_t MaxSize = SectionContents.size() - Offset; 1230 const uint8_t *Bytes = 1231 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1232 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1233 1234 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1235 MCInst &Instruction, uint64_t Offset, 1236 uint64_t AbsoluteInstrAddr, 1237 uint64_t TotalSize) -> bool { 1238 MCInst *TargetHiBits, *TargetLowBits; 1239 uint64_t TargetAddress, Count; 1240 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1241 AbsoluteInstrAddr, Instruction, TargetHiBits, 1242 TargetLowBits, TargetAddress); 1243 if (!Count) 1244 return false; 1245 1246 if (MatchOnly) 1247 return true; 1248 1249 // NOTE The target symbol was created during disassemble's 1250 // handleExternalReference 1251 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1252 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1253 *Section, Address, TotalSize); 1254 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1255 TargetAddress); 1256 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1257 Veneer->addInstruction(Offset, std::move(Instruction)); 1258 --Count; 1259 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1260 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1261 Veneer->addInstruction(It->first, std::move(It->second)); 1262 } 1263 1264 Veneer->getOrCreateLocalLabel(Address); 1265 Veneer->setMaxSize(TotalSize); 1266 Veneer->updateState(BinaryFunction::State::Disassembled); 1267 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1268 << "\n"); 1269 return true; 1270 }; 1271 1272 uint64_t Size = 0, TotalSize = 0; 1273 BinaryFunction::InstrMapType VeneerInstructions; 1274 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1275 MCInst Instruction; 1276 const uint64_t AbsoluteInstrAddr = Address + Offset; 1277 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1278 AbsoluteInstrAddr, nulls())) 1279 break; 1280 1281 TotalSize += Size; 1282 if (MIB->isBranch(Instruction)) { 1283 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1284 AbsoluteInstrAddr, TotalSize); 1285 break; 1286 } 1287 1288 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1289 } 1290 1291 return Ret; 1292 } 1293 1294 void BinaryContext::processInterproceduralReferences() { 1295 for (const std::pair<BinaryFunction *, uint64_t> &It : 1296 InterproceduralReferences) { 1297 BinaryFunction &Function = *It.first; 1298 uint64_t Address = It.second; 1299 if (!Address || Function.isIgnored()) 1300 continue; 1301 1302 BinaryFunction *TargetFunction = 1303 getBinaryFunctionContainingAddress(Address); 1304 if (&Function == TargetFunction) 1305 continue; 1306 1307 if (TargetFunction) { 1308 if (TargetFunction->isFragment() && 1309 !TargetFunction->isChildOf(Function)) { 1310 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1311 "fragments: " 1312 << Function.getPrintName() << " and " 1313 << TargetFunction->getPrintName() << '\n'; 1314 } 1315 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1316 TargetFunction->addEntryPointAtOffset(Offset); 1317 1318 continue; 1319 } 1320 1321 // Check if address falls in function padding space - this could be 1322 // unmarked data in code. In this case adjust the padding space size. 1323 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1324 assert(Section && "cannot get section for referenced address"); 1325 1326 if (!Section->isText()) 1327 continue; 1328 1329 // PLT requires special handling and could be ignored in this context. 1330 StringRef SectionName = Section->getName(); 1331 if (SectionName == ".plt" || SectionName == ".plt.got") 1332 continue; 1333 1334 // Check if it is aarch64 veneer written at Address 1335 if (isAArch64() && handleAArch64Veneer(Address)) 1336 continue; 1337 1338 if (opts::processAllFunctions()) { 1339 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1340 << "object in code at address 0x" << Twine::utohexstr(Address) 1341 << " belonging to section " << SectionName << " in current mode\n"; 1342 exit(1); 1343 } 1344 1345 TargetFunction = getBinaryFunctionContainingAddress(Address, 1346 /*CheckPastEnd=*/false, 1347 /*UseMaxSize=*/true); 1348 // We are not going to overwrite non-simple functions, but for simple 1349 // ones - adjust the padding size. 1350 if (TargetFunction && TargetFunction->isSimple()) { 1351 errs() << "BOLT-WARNING: function " << *TargetFunction 1352 << " has an object detected in a padding region at address 0x" 1353 << Twine::utohexstr(Address) << '\n'; 1354 TargetFunction->setMaxSize(TargetFunction->getSize()); 1355 } 1356 } 1357 1358 InterproceduralReferences.clear(); 1359 } 1360 1361 void BinaryContext::postProcessSymbolTable() { 1362 fixBinaryDataHoles(); 1363 bool Valid = true; 1364 for (auto &Entry : BinaryDataMap) { 1365 BinaryData *BD = Entry.second; 1366 if ((BD->getName().starts_with("SYMBOLat") || 1367 BD->getName().starts_with("DATAat")) && 1368 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1369 BD->getSection()) { 1370 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1371 Valid = false; 1372 } 1373 } 1374 assert(Valid); 1375 (void)Valid; 1376 generateSymbolHashes(); 1377 } 1378 1379 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1380 BinaryFunction &ParentBF) { 1381 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1382 "cannot merge functions with multiple entry points"); 1383 1384 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1385 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1386 SymbolToFunctionMapMutex, std::defer_lock); 1387 1388 const StringRef ChildName = ChildBF.getOneName(); 1389 1390 // Move symbols over and update bookkeeping info. 1391 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1392 ParentBF.getSymbols().push_back(Symbol); 1393 WriteSymbolMapLock.lock(); 1394 SymbolToFunctionMap[Symbol] = &ParentBF; 1395 WriteSymbolMapLock.unlock(); 1396 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1397 } 1398 ChildBF.getSymbols().clear(); 1399 1400 // Move other names the child function is known under. 1401 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1402 ChildBF.Aliases.clear(); 1403 1404 if (HasRelocations) { 1405 // Merge execution counts of ChildBF into those of ParentBF. 1406 // Without relocations, we cannot reliably merge profiles as both functions 1407 // continue to exist and either one can be executed. 1408 ChildBF.mergeProfileDataInto(ParentBF); 1409 1410 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1411 std::defer_lock); 1412 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1413 std::defer_lock); 1414 // Remove ChildBF from the global set of functions in relocs mode. 1415 ReadBfsLock.lock(); 1416 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1417 ReadBfsLock.unlock(); 1418 1419 assert(FI != BinaryFunctions.end() && "function not found"); 1420 assert(&ChildBF == &FI->second && "function mismatch"); 1421 1422 WriteBfsLock.lock(); 1423 ChildBF.clearDisasmState(); 1424 FI = BinaryFunctions.erase(FI); 1425 WriteBfsLock.unlock(); 1426 1427 } else { 1428 // In non-relocation mode we keep the function, but rename it. 1429 std::string NewName = "__ICF_" + ChildName.str(); 1430 1431 WriteCtxLock.lock(); 1432 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1433 WriteCtxLock.unlock(); 1434 1435 ChildBF.setFolded(&ParentBF); 1436 } 1437 1438 ParentBF.setHasFunctionsFoldedInto(); 1439 } 1440 1441 void BinaryContext::fixBinaryDataHoles() { 1442 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1443 1444 for (BinarySection &Section : allocatableSections()) { 1445 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1446 1447 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1448 BinaryData *BD = Itr->second; 1449 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1450 (BD->getName().starts_with("SYMBOLat0x") || 1451 BD->getName().starts_with("DATAat0x") || 1452 BD->getName().starts_with("ANONYMOUS"))); 1453 return !isHole && BD->getSection() == Section && !BD->getParent(); 1454 }; 1455 1456 auto BDStart = BinaryDataMap.begin(); 1457 auto BDEnd = BinaryDataMap.end(); 1458 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1459 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1460 1461 uint64_t EndAddress = Section.getAddress(); 1462 1463 while (Itr != End) { 1464 if (Itr->second->getAddress() > EndAddress) { 1465 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1466 Holes.emplace_back(EndAddress, Gap); 1467 } 1468 EndAddress = Itr->second->getEndAddress(); 1469 ++Itr; 1470 } 1471 1472 if (EndAddress < Section.getEndAddress()) 1473 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1474 1475 // If there is already a symbol at the start of the hole, grow that symbol 1476 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1477 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1478 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1479 if (BD) { 1480 // BD->getSection() can be != Section if there are sections that 1481 // overlap. In this case it is probably safe to just skip the holes 1482 // since the overlapping section will not(?) have any symbols in it. 1483 if (BD->getSection() == Section) 1484 setBinaryDataSize(Hole.first, Hole.second); 1485 } else { 1486 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1487 } 1488 } 1489 } 1490 1491 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1492 assert(validateHoles() && "top level hole detected in object map"); 1493 } 1494 1495 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1496 const BinarySection *CurrentSection = nullptr; 1497 bool FirstSection = true; 1498 1499 for (auto &Entry : BinaryDataMap) { 1500 const BinaryData *BD = Entry.second; 1501 const BinarySection &Section = BD->getSection(); 1502 if (FirstSection || Section != *CurrentSection) { 1503 uint64_t Address, Size; 1504 StringRef Name = Section.getName(); 1505 if (Section) { 1506 Address = Section.getAddress(); 1507 Size = Section.getSize(); 1508 } else { 1509 Address = BD->getAddress(); 1510 Size = BD->getSize(); 1511 } 1512 OS << "BOLT-INFO: Section " << Name << ", " 1513 << "0x" + Twine::utohexstr(Address) << ":" 1514 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1515 CurrentSection = &Section; 1516 FirstSection = false; 1517 } 1518 1519 OS << "BOLT-INFO: "; 1520 const BinaryData *P = BD->getParent(); 1521 while (P) { 1522 OS << " "; 1523 P = P->getParent(); 1524 } 1525 OS << *BD << "\n"; 1526 } 1527 } 1528 1529 Expected<unsigned> BinaryContext::getDwarfFile( 1530 StringRef Directory, StringRef FileName, unsigned FileNumber, 1531 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1532 unsigned CUID, unsigned DWARFVersion) { 1533 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1534 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1535 FileNumber); 1536 } 1537 1538 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1539 const uint32_t SrcCUID, 1540 unsigned FileIndex) { 1541 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1542 const DWARFDebugLine::LineTable *LineTable = 1543 DwCtx->getLineTableForUnit(SrcUnit); 1544 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1545 LineTable->Prologue.FileNames; 1546 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1547 // means empty dir. 1548 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1549 "FileIndex out of range for the compilation unit."); 1550 StringRef Dir = ""; 1551 if (FileNames[FileIndex - 1].DirIdx != 0) { 1552 if (std::optional<const char *> DirName = dwarf::toString( 1553 LineTable->Prologue 1554 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1555 Dir = *DirName; 1556 } 1557 } 1558 StringRef FileName = ""; 1559 if (std::optional<const char *> FName = 1560 dwarf::toString(FileNames[FileIndex - 1].Name)) 1561 FileName = *FName; 1562 assert(FileName != ""); 1563 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1564 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1565 DestCUID, DstUnit->getVersion())); 1566 } 1567 1568 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1569 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1570 llvm::transform(llvm::make_second_range(BinaryFunctions), 1571 SortedFunctions.begin(), 1572 [](BinaryFunction &BF) { return &BF; }); 1573 1574 llvm::stable_sort(SortedFunctions, 1575 [](const BinaryFunction *A, const BinaryFunction *B) { 1576 if (A->hasValidIndex() && B->hasValidIndex()) { 1577 return A->getIndex() < B->getIndex(); 1578 } 1579 return A->hasValidIndex(); 1580 }); 1581 return SortedFunctions; 1582 } 1583 1584 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1585 std::vector<BinaryFunction *> AllFunctions; 1586 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1587 llvm::transform(llvm::make_second_range(BinaryFunctions), 1588 std::back_inserter(AllFunctions), 1589 [](BinaryFunction &BF) { return &BF; }); 1590 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1591 1592 return AllFunctions; 1593 } 1594 1595 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1596 auto Iter = DWOCUs.find(DWOId); 1597 if (Iter == DWOCUs.end()) 1598 return std::nullopt; 1599 1600 return Iter->second; 1601 } 1602 1603 DWARFContext *BinaryContext::getDWOContext() const { 1604 if (DWOCUs.empty()) 1605 return nullptr; 1606 return &DWOCUs.begin()->second->getContext(); 1607 } 1608 1609 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1610 void BinaryContext::preprocessDWODebugInfo() { 1611 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1612 DWARFUnit *const DwarfUnit = CU.get(); 1613 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1614 std::string DWOName = dwarf::toString( 1615 DwarfUnit->getUnitDIE().find( 1616 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1617 ""); 1618 SmallString<16> AbsolutePath; 1619 if (!opts::CompDirOverride.empty()) { 1620 sys::path::append(AbsolutePath, opts::CompDirOverride); 1621 sys::path::append(AbsolutePath, DWOName); 1622 } 1623 DWARFUnit *DWOCU = 1624 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 1625 if (!DWOCU->isDWOUnit()) { 1626 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1627 << DWOName 1628 << " was not retrieved and won't be updated. Please check " 1629 "relative path.\n"; 1630 continue; 1631 } 1632 DWOCUs[*DWOId] = DWOCU; 1633 } 1634 } 1635 if (!DWOCUs.empty()) 1636 outs() << "BOLT-INFO: processing split DWARF\n"; 1637 } 1638 1639 void BinaryContext::preprocessDebugInfo() { 1640 struct CURange { 1641 uint64_t LowPC; 1642 uint64_t HighPC; 1643 DWARFUnit *Unit; 1644 1645 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1646 }; 1647 1648 // Building a map of address ranges to CUs similar to .debug_aranges and use 1649 // it to assign CU to functions. 1650 std::vector<CURange> AllRanges; 1651 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1652 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1653 Expected<DWARFAddressRangesVector> RangesOrError = 1654 CU->getUnitDIE().getAddressRanges(); 1655 if (!RangesOrError) { 1656 consumeError(RangesOrError.takeError()); 1657 continue; 1658 } 1659 for (DWARFAddressRange &Range : *RangesOrError) { 1660 // Parts of the debug info could be invalidated due to corresponding code 1661 // being removed from the binary by the linker. Hence we check if the 1662 // address is a valid one. 1663 if (containsAddress(Range.LowPC)) 1664 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1665 } 1666 1667 ContainsDwarf5 |= CU->getVersion() >= 5; 1668 ContainsDwarfLegacy |= CU->getVersion() < 5; 1669 } 1670 1671 llvm::sort(AllRanges); 1672 for (auto &KV : BinaryFunctions) { 1673 const uint64_t FunctionAddress = KV.first; 1674 BinaryFunction &Function = KV.second; 1675 1676 auto It = llvm::partition_point( 1677 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1678 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1679 Function.setDWARFUnit(It->Unit); 1680 } 1681 1682 // Discover units with debug info that needs to be updated. 1683 for (const auto &KV : BinaryFunctions) { 1684 const BinaryFunction &BF = KV.second; 1685 if (shouldEmit(BF) && BF.getDWARFUnit()) 1686 ProcessedCUs.insert(BF.getDWARFUnit()); 1687 } 1688 1689 // Clear debug info for functions from units that we are not going to process. 1690 for (auto &KV : BinaryFunctions) { 1691 BinaryFunction &BF = KV.second; 1692 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1693 BF.setDWARFUnit(nullptr); 1694 } 1695 1696 if (opts::Verbosity >= 1) { 1697 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1698 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1699 } 1700 1701 preprocessDWODebugInfo(); 1702 1703 // Populate MCContext with DWARF files from all units. 1704 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1705 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1706 const uint64_t CUID = CU->getOffset(); 1707 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1708 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1709 GlobalPrefix + "line_table_start" + Twine(CUID))); 1710 1711 if (!ProcessedCUs.count(CU.get())) 1712 continue; 1713 1714 const DWARFDebugLine::LineTable *LineTable = 1715 DwCtx->getLineTableForUnit(CU.get()); 1716 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1717 LineTable->Prologue.FileNames; 1718 1719 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1720 if (DwarfVersion >= 5) { 1721 std::optional<MD5::MD5Result> Checksum; 1722 if (LineTable->Prologue.ContentTypes.HasMD5) 1723 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1724 std::optional<const char *> Name = 1725 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1726 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1727 auto Iter = DWOCUs.find(*DWOID); 1728 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1729 Name = dwarf::toString( 1730 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1731 } 1732 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1733 std::nullopt); 1734 } 1735 1736 BinaryLineTable.setDwarfVersion(DwarfVersion); 1737 1738 // Assign a unique label to every line table, one per CU. 1739 // Make sure empty debug line tables are registered too. 1740 if (FileNames.empty()) { 1741 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1742 CUID, DwarfVersion)); 1743 continue; 1744 } 1745 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1746 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1747 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1748 // means empty dir. 1749 StringRef Dir = ""; 1750 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1751 if (std::optional<const char *> DirName = dwarf::toString( 1752 LineTable->Prologue 1753 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1754 Dir = *DirName; 1755 StringRef FileName = ""; 1756 if (std::optional<const char *> FName = 1757 dwarf::toString(FileNames[I].Name)) 1758 FileName = *FName; 1759 assert(FileName != ""); 1760 std::optional<MD5::MD5Result> Checksum; 1761 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1762 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1763 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1764 DwarfVersion)); 1765 } 1766 } 1767 } 1768 1769 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1770 if (Function.isPseudo()) 1771 return false; 1772 1773 if (opts::processAllFunctions()) 1774 return true; 1775 1776 if (Function.isIgnored()) 1777 return false; 1778 1779 // In relocation mode we will emit non-simple functions with CFG. 1780 // If the function does not have a CFG it should be marked as ignored. 1781 return HasRelocations || Function.isSimple(); 1782 } 1783 1784 void BinaryContext::dump(const MCInst &Inst) const { 1785 if (LLVM_UNLIKELY(!InstPrinter)) { 1786 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1787 return; 1788 } 1789 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1790 dbgs() << "\n"; 1791 } 1792 1793 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1794 uint32_t Operation = Inst.getOperation(); 1795 switch (Operation) { 1796 case MCCFIInstruction::OpSameValue: 1797 OS << "OpSameValue Reg" << Inst.getRegister(); 1798 break; 1799 case MCCFIInstruction::OpRememberState: 1800 OS << "OpRememberState"; 1801 break; 1802 case MCCFIInstruction::OpRestoreState: 1803 OS << "OpRestoreState"; 1804 break; 1805 case MCCFIInstruction::OpOffset: 1806 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1807 break; 1808 case MCCFIInstruction::OpDefCfaRegister: 1809 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1810 break; 1811 case MCCFIInstruction::OpDefCfaOffset: 1812 OS << "OpDefCfaOffset " << Inst.getOffset(); 1813 break; 1814 case MCCFIInstruction::OpDefCfa: 1815 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1816 break; 1817 case MCCFIInstruction::OpRelOffset: 1818 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1819 break; 1820 case MCCFIInstruction::OpAdjustCfaOffset: 1821 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1822 break; 1823 case MCCFIInstruction::OpEscape: 1824 OS << "OpEscape"; 1825 break; 1826 case MCCFIInstruction::OpRestore: 1827 OS << "OpRestore Reg" << Inst.getRegister(); 1828 break; 1829 case MCCFIInstruction::OpUndefined: 1830 OS << "OpUndefined Reg" << Inst.getRegister(); 1831 break; 1832 case MCCFIInstruction::OpRegister: 1833 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1834 << Inst.getRegister2(); 1835 break; 1836 case MCCFIInstruction::OpWindowSave: 1837 OS << "OpWindowSave"; 1838 break; 1839 case MCCFIInstruction::OpGnuArgsSize: 1840 OS << "OpGnuArgsSize"; 1841 break; 1842 default: 1843 OS << "Op#" << Operation; 1844 break; 1845 } 1846 } 1847 1848 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1849 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1850 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1851 // the end of a data chunk inside code, $d identifies start of data. 1852 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize()) 1853 return MarkerSymType::NONE; 1854 1855 Expected<StringRef> NameOrError = Symbol.getName(); 1856 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1857 1858 if (!TypeOrError || !NameOrError) 1859 return MarkerSymType::NONE; 1860 1861 if (*TypeOrError != SymbolRef::ST_Unknown) 1862 return MarkerSymType::NONE; 1863 1864 if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 1865 return MarkerSymType::CODE; 1866 1867 // $x<ISA> 1868 if (isRISCV() && NameOrError->starts_with("$x")) 1869 return MarkerSymType::CODE; 1870 1871 if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 1872 return MarkerSymType::DATA; 1873 1874 return MarkerSymType::NONE; 1875 } 1876 1877 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1878 return getMarkerType(Symbol) != MarkerSymType::NONE; 1879 } 1880 1881 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1882 const BinaryFunction *Function, 1883 DWARFContext *DwCtx) { 1884 DebugLineTableRowRef RowRef = 1885 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1886 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1887 return; 1888 1889 const DWARFDebugLine::LineTable *LineTable; 1890 if (Function && Function->getDWARFUnit() && 1891 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1892 LineTable = Function->getDWARFLineTable(); 1893 } else { 1894 LineTable = DwCtx->getLineTableForUnit( 1895 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1896 } 1897 assert(LineTable && "line table expected for instruction with debug info"); 1898 1899 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1900 StringRef FileName = ""; 1901 if (std::optional<const char *> FName = 1902 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1903 FileName = *FName; 1904 OS << " # debug line " << FileName << ":" << Row.Line; 1905 if (Row.Column) 1906 OS << ":" << Row.Column; 1907 if (Row.Discriminator) 1908 OS << " discriminator:" << Row.Discriminator; 1909 } 1910 1911 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1912 uint64_t Offset, 1913 const BinaryFunction *Function, 1914 bool PrintMCInst, bool PrintMemData, 1915 bool PrintRelocations, 1916 StringRef Endl) const { 1917 OS << format(" %08" PRIx64 ": ", Offset); 1918 if (MIB->isCFI(Instruction)) { 1919 uint32_t Offset = Instruction.getOperand(0).getImm(); 1920 OS << "\t!CFI\t$" << Offset << "\t; "; 1921 if (Function) 1922 printCFI(OS, *Function->getCFIFor(Instruction)); 1923 OS << Endl; 1924 return; 1925 } 1926 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1927 if (MIB->isCall(Instruction)) { 1928 if (MIB->isTailCall(Instruction)) 1929 OS << " # TAILCALL "; 1930 if (MIB->isInvoke(Instruction)) { 1931 const std::optional<MCPlus::MCLandingPad> EHInfo = 1932 MIB->getEHInfo(Instruction); 1933 OS << " # handler: "; 1934 if (EHInfo->first) 1935 OS << *EHInfo->first; 1936 else 1937 OS << '0'; 1938 OS << "; action: " << EHInfo->second; 1939 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1940 if (GnuArgsSize >= 0) 1941 OS << "; GNU_args_size = " << GnuArgsSize; 1942 } 1943 } else if (MIB->isIndirectBranch(Instruction)) { 1944 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1945 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1946 } else { 1947 OS << " # UNKNOWN CONTROL FLOW"; 1948 } 1949 } 1950 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1951 OS << " # Offset: " << *Offset; 1952 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 1953 OS << " # Size: " << *Size; 1954 if (MCSymbol *Label = MIB->getLabel(Instruction)) 1955 OS << " # Label: " << *Label; 1956 1957 MIB->printAnnotations(Instruction, OS); 1958 1959 if (opts::PrintDebugInfo) 1960 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1961 1962 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1963 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1964 Function->printRelocations(OS, Offset, Size); 1965 } 1966 1967 OS << Endl; 1968 1969 if (PrintMCInst) { 1970 Instruction.dump_pretty(OS, InstPrinter.get()); 1971 OS << Endl; 1972 } 1973 } 1974 1975 std::optional<uint64_t> 1976 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1977 uint64_t FileOffset) const { 1978 // Find a segment with a matching file offset. 1979 for (auto &KV : SegmentMapInfo) { 1980 const SegmentInfo &SegInfo = KV.second; 1981 // FileOffset is got from perf event, 1982 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 1983 // If the pagesize is not equal to SegInfo.Alignment. 1984 // FileOffset and SegInfo.FileOffset should be aligned first, 1985 // and then judge whether they are equal. 1986 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 1987 alignDown(FileOffset, SegInfo.Alignment)) { 1988 // The function's offset from base address in VAS is aligned by pagesize 1989 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 1990 // However, The ELF document says that SegInfo.FileOffset should equal 1991 // to SegInfo.Address, modulo the pagesize. 1992 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 1993 1994 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 1995 // alignDown(SegInfo.Address, pagesize) 1996 // = SegInfo.Address - (SegInfo.Address % pagesize) 1997 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 1998 // = SegInfo.Address - SegInfo.FileOffset + 1999 // alignDown(SegInfo.FileOffset, pagesize) 2000 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2001 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 2002 } 2003 } 2004 2005 return std::nullopt; 2006 } 2007 2008 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2009 auto SI = AddressToSection.upper_bound(Address); 2010 if (SI != AddressToSection.begin()) { 2011 --SI; 2012 uint64_t UpperBound = SI->first + SI->second->getSize(); 2013 if (!SI->second->getSize()) 2014 UpperBound += 1; 2015 if (UpperBound > Address) 2016 return *SI->second; 2017 } 2018 return std::make_error_code(std::errc::bad_address); 2019 } 2020 2021 ErrorOr<StringRef> 2022 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 2023 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2024 return Section->getName(); 2025 return std::make_error_code(std::errc::bad_address); 2026 } 2027 2028 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2029 auto Res = Sections.insert(Section); 2030 (void)Res; 2031 assert(Res.second && "can't register the same section twice."); 2032 2033 // Only register allocatable sections in the AddressToSection map. 2034 if (Section->isAllocatable() && Section->getAddress()) 2035 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2036 NameToSection.insert( 2037 std::make_pair(std::string(Section->getName()), Section)); 2038 if (Section->hasSectionRef()) 2039 SectionRefToBinarySection.insert( 2040 std::make_pair(Section->getSectionRef(), Section)); 2041 2042 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2043 return *Section; 2044 } 2045 2046 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2047 return registerSection(new BinarySection(*this, Section)); 2048 } 2049 2050 BinarySection & 2051 BinaryContext::registerSection(const Twine &SectionName, 2052 const BinarySection &OriginalSection) { 2053 return registerSection( 2054 new BinarySection(*this, SectionName, OriginalSection)); 2055 } 2056 2057 BinarySection & 2058 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2059 unsigned ELFFlags, uint8_t *Data, 2060 uint64_t Size, unsigned Alignment) { 2061 auto NamedSections = getSectionByName(Name); 2062 if (NamedSections.begin() != NamedSections.end()) { 2063 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2064 "can only update unique sections"); 2065 BinarySection *Section = NamedSections.begin()->second; 2066 2067 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2068 const bool Flag = Section->isAllocatable(); 2069 (void)Flag; 2070 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2071 LLVM_DEBUG(dbgs() << *Section << "\n"); 2072 // FIXME: Fix section flags/attributes for MachO. 2073 if (isELF()) 2074 assert(Flag == Section->isAllocatable() && 2075 "can't change section allocation status"); 2076 return *Section; 2077 } 2078 2079 return registerSection( 2080 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2081 } 2082 2083 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2084 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2085 while (NameRange.first != NameRange.second) { 2086 if (NameRange.first->second == &Section) { 2087 NameToSection.erase(NameRange.first); 2088 break; 2089 } 2090 ++NameRange.first; 2091 } 2092 } 2093 2094 void BinaryContext::deregisterUnusedSections() { 2095 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2096 for (auto SI = Sections.begin(); SI != Sections.end();) { 2097 BinarySection *Section = *SI; 2098 // We check getOutputData() instead of getOutputSize() because sometimes 2099 // zero-sized .text.cold sections are allocated. 2100 if (Section->hasSectionRef() || Section->getOutputData() || 2101 (AbsSection && Section == &AbsSection.get())) { 2102 ++SI; 2103 continue; 2104 } 2105 2106 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2107 << '\n';); 2108 deregisterSectionName(*Section); 2109 SI = Sections.erase(SI); 2110 delete Section; 2111 } 2112 } 2113 2114 bool BinaryContext::deregisterSection(BinarySection &Section) { 2115 BinarySection *SectionPtr = &Section; 2116 auto Itr = Sections.find(SectionPtr); 2117 if (Itr != Sections.end()) { 2118 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2119 while (Range.first != Range.second) { 2120 if (Range.first->second == SectionPtr) { 2121 AddressToSection.erase(Range.first); 2122 break; 2123 } 2124 ++Range.first; 2125 } 2126 2127 deregisterSectionName(*SectionPtr); 2128 Sections.erase(Itr); 2129 delete SectionPtr; 2130 return true; 2131 } 2132 return false; 2133 } 2134 2135 void BinaryContext::renameSection(BinarySection &Section, 2136 const Twine &NewName) { 2137 auto Itr = Sections.find(&Section); 2138 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2139 Sections.erase(Itr); 2140 2141 deregisterSectionName(Section); 2142 2143 Section.Name = NewName.str(); 2144 Section.setOutputName(Section.Name); 2145 2146 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2147 2148 // Reinsert with the new name. 2149 Sections.insert(&Section); 2150 } 2151 2152 void BinaryContext::printSections(raw_ostream &OS) const { 2153 for (BinarySection *const &Section : Sections) 2154 OS << "BOLT-INFO: " << *Section << "\n"; 2155 } 2156 2157 BinarySection &BinaryContext::absoluteSection() { 2158 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2159 return *Section; 2160 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2161 } 2162 2163 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2164 size_t Size) const { 2165 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2166 if (!Section) 2167 return std::make_error_code(std::errc::bad_address); 2168 2169 if (Section->isVirtual()) 2170 return 0; 2171 2172 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2173 AsmInfo->getCodePointerSize()); 2174 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2175 return DE.getUnsigned(&ValueOffset, Size); 2176 } 2177 2178 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2179 size_t Size) const { 2180 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2181 if (!Section) 2182 return std::make_error_code(std::errc::bad_address); 2183 2184 if (Section->isVirtual()) 2185 return 0; 2186 2187 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2188 AsmInfo->getCodePointerSize()); 2189 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2190 return DE.getSigned(&ValueOffset, Size); 2191 } 2192 2193 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2194 uint64_t Type, uint64_t Addend, 2195 uint64_t Value) { 2196 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2197 assert(Section && "cannot find section for address"); 2198 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2199 Value); 2200 } 2201 2202 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2203 uint64_t Type, uint64_t Addend, 2204 uint64_t Value) { 2205 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2206 assert(Section && "cannot find section for address"); 2207 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2208 Addend, Value); 2209 } 2210 2211 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2212 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2213 assert(Section && "cannot find section for address"); 2214 return Section->removeRelocationAt(Address - Section->getAddress()); 2215 } 2216 2217 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2218 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2219 if (!Section) 2220 return nullptr; 2221 2222 return Section->getRelocationAt(Address - Section->getAddress()); 2223 } 2224 2225 const Relocation * 2226 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2227 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2228 if (!Section) 2229 return nullptr; 2230 2231 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2232 } 2233 2234 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2235 const uint64_t Address) { 2236 auto setImmovable = [&](BinaryData &BD) { 2237 BinaryData *Root = BD.getAtomicRoot(); 2238 LLVM_DEBUG(if (Root->isMoveable()) { 2239 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2240 << "due to ambiguous relocation referencing 0x" 2241 << Twine::utohexstr(Address) << '\n'; 2242 }); 2243 Root->setIsMoveable(false); 2244 }; 2245 2246 if (Address == BD.getAddress()) { 2247 setImmovable(BD); 2248 2249 // Set previous symbol as immovable 2250 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2251 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2252 setImmovable(*Prev); 2253 } 2254 2255 if (Address == BD.getEndAddress()) { 2256 setImmovable(BD); 2257 2258 // Set next symbol as immovable 2259 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2260 if (Next && Next->getAddress() == BD.getEndAddress()) 2261 setImmovable(*Next); 2262 } 2263 } 2264 2265 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2266 uint64_t *EntryDesc) { 2267 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2268 auto BFI = SymbolToFunctionMap.find(Symbol); 2269 if (BFI == SymbolToFunctionMap.end()) 2270 return nullptr; 2271 2272 BinaryFunction *BF = BFI->second; 2273 if (EntryDesc) 2274 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2275 2276 return BF; 2277 } 2278 2279 void BinaryContext::exitWithBugReport(StringRef Message, 2280 const BinaryFunction &Function) const { 2281 errs() << "=======================================\n"; 2282 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2283 "this function.\n"; 2284 errs() << "If you are running the most recent version of BOLT, you may " 2285 "want to " 2286 "report this and paste this dump.\nPlease check that there is no " 2287 "sensitive contents being shared in this dump.\n"; 2288 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2289 ScopedPrinter SP(errs()); 2290 SP.printBinaryBlock("Function contents", *Function.getData()); 2291 errs() << "\n"; 2292 Function.dump(); 2293 errs() << "ERROR: " << Message; 2294 errs() << "\n=======================================\n"; 2295 exit(1); 2296 } 2297 2298 BinaryFunction * 2299 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2300 bool IsSimple) { 2301 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2302 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2303 setSymbolToFunctionMap(BF->getSymbol(), BF); 2304 BF->CurrentState = BinaryFunction::State::CFG; 2305 return BF; 2306 } 2307 2308 std::pair<size_t, size_t> 2309 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2310 // Adjust branch instruction to match the current layout. 2311 if (FixBranches) 2312 BF.fixBranches(); 2313 2314 // Create local MC context to isolate the effect of ephemeral code emission. 2315 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2316 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2317 MCAsmBackend *MAB = 2318 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2319 2320 SmallString<256> Code; 2321 raw_svector_ostream VecOS(Code); 2322 2323 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2324 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2325 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2326 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2327 /*RelaxAll=*/false, 2328 /*IncrementalLinkerCompatible=*/false, 2329 /*DWARFMustBeAtTheEnd=*/false)); 2330 2331 Streamer->initSections(false, *STI); 2332 2333 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2334 Section->setHasInstructions(true); 2335 2336 // Create symbols in the LocalCtx so that they get destroyed with it. 2337 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2338 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2339 2340 Streamer->switchSection(Section); 2341 Streamer->emitLabel(StartLabel); 2342 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2343 /*EmitCodeOnly=*/true); 2344 Streamer->emitLabel(EndLabel); 2345 2346 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2347 SmallVector<LabelRange> SplitLabels; 2348 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2349 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2350 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2351 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2352 2353 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2354 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2355 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2356 SplitSection->setHasInstructions(true); 2357 Streamer->switchSection(SplitSection); 2358 2359 Streamer->emitLabel(SplitStartLabel); 2360 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2361 Streamer->emitLabel(SplitEndLabel); 2362 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2363 // private 2364 Streamer->emitBytes(StringRef("")); 2365 Streamer->switchSection(Section); 2366 } 2367 2368 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2369 // MCStreamer::Finish(), which does more than we want 2370 Streamer->emitBytes(StringRef("")); 2371 2372 MCAssembler &Assembler = 2373 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2374 MCAsmLayout Layout(Assembler); 2375 Assembler.layout(Layout); 2376 2377 // Obtain fragment sizes. 2378 std::vector<uint64_t> FragmentSizes; 2379 // Main fragment size. 2380 const uint64_t HotSize = 2381 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2382 FragmentSizes.push_back(HotSize); 2383 // Split fragment sizes. 2384 uint64_t ColdSize = 0; 2385 for (const auto &Labels : SplitLabels) { 2386 uint64_t Size = Layout.getSymbolOffset(*Labels.second) - 2387 Layout.getSymbolOffset(*Labels.first); 2388 FragmentSizes.push_back(Size); 2389 ColdSize += Size; 2390 } 2391 2392 // Populate new start and end offsets of each basic block. 2393 uint64_t FragmentIndex = 0; 2394 for (FunctionFragment &FF : BF.getLayout().fragments()) { 2395 BinaryBasicBlock *PrevBB = nullptr; 2396 for (BinaryBasicBlock *BB : FF) { 2397 const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel())); 2398 BB->setOutputStartAddress(BBStartOffset); 2399 if (PrevBB) 2400 PrevBB->setOutputEndAddress(BBStartOffset); 2401 PrevBB = BB; 2402 } 2403 if (PrevBB) 2404 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2405 FragmentIndex++; 2406 } 2407 2408 // Clean-up the effect of the code emission. 2409 for (const MCSymbol &Symbol : Assembler.symbols()) { 2410 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2411 MutableSymbol->setUndefined(); 2412 MutableSymbol->setIsRegistered(false); 2413 } 2414 2415 return std::make_pair(HotSize, ColdSize); 2416 } 2417 2418 bool BinaryContext::validateInstructionEncoding( 2419 ArrayRef<uint8_t> InputSequence) const { 2420 MCInst Inst; 2421 uint64_t InstSize; 2422 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2423 assert(InstSize == InputSequence.size() && 2424 "Disassembled instruction size does not match the sequence."); 2425 2426 SmallString<256> Code; 2427 SmallVector<MCFixup, 4> Fixups; 2428 2429 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2430 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2431 if (InputSequence != OutputSequence) { 2432 if (opts::Verbosity > 1) { 2433 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2434 << " input: " << InputSequence << '\n' 2435 << " output: " << OutputSequence << '\n'; 2436 } 2437 return false; 2438 } 2439 2440 return true; 2441 } 2442 2443 uint64_t BinaryContext::getHotThreshold() const { 2444 static uint64_t Threshold = 0; 2445 if (Threshold == 0) { 2446 Threshold = std::max( 2447 (uint64_t)opts::ExecutionCountThreshold, 2448 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2449 } 2450 return Threshold; 2451 } 2452 2453 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2454 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2455 auto FI = BinaryFunctions.upper_bound(Address); 2456 if (FI == BinaryFunctions.begin()) 2457 return nullptr; 2458 --FI; 2459 2460 const uint64_t UsedSize = 2461 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2462 2463 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2464 return nullptr; 2465 2466 return &FI->second; 2467 } 2468 2469 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2470 // First, try to find a function starting at the given address. If the 2471 // function was folded, this will get us the original folded function if it 2472 // wasn't removed from the list, e.g. in non-relocation mode. 2473 auto BFI = BinaryFunctions.find(Address); 2474 if (BFI != BinaryFunctions.end()) 2475 return &BFI->second; 2476 2477 // We might have folded the function matching the object at the given 2478 // address. In such case, we look for a function matching the symbol 2479 // registered at the original address. The new function (the one that the 2480 // original was folded into) will hold the symbol. 2481 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2482 uint64_t EntryID = 0; 2483 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2484 if (BF && EntryID == 0) 2485 return BF; 2486 } 2487 return nullptr; 2488 } 2489 2490 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2491 const DWARFAddressRangesVector &InputRanges) const { 2492 DebugAddressRangesVector OutputRanges; 2493 2494 for (const DWARFAddressRange Range : InputRanges) { 2495 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2496 while (BFI != BinaryFunctions.end()) { 2497 const BinaryFunction &Function = BFI->second; 2498 if (Function.getAddress() >= Range.HighPC) 2499 break; 2500 const DebugAddressRangesVector FunctionRanges = 2501 Function.getOutputAddressRanges(); 2502 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2503 std::advance(BFI, 1); 2504 } 2505 } 2506 2507 return OutputRanges; 2508 } 2509 2510 } // namespace bolt 2511 } // namespace llvm 2512