1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/Utils.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAssembler.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26 #include "llvm/MC/MCInstPrinter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionELF.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/Regex.h" 37 #include <algorithm> 38 #include <functional> 39 #include <iterator> 40 #include <unordered_set> 41 42 using namespace llvm; 43 44 #undef DEBUG_TYPE 45 #define DEBUG_TYPE "bolt" 46 47 namespace opts { 48 49 cl::opt<bool> NoHugePages("no-huge-pages", 50 cl::desc("use regular size pages for code alignment"), 51 cl::Hidden, cl::cat(BoltCategory)); 52 53 static cl::opt<bool> 54 PrintDebugInfo("print-debug-info", 55 cl::desc("print debug info when printing functions"), 56 cl::Hidden, 57 cl::ZeroOrMore, 58 cl::cat(BoltCategory)); 59 60 cl::opt<bool> PrintRelocations( 61 "print-relocations", 62 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 63 cl::cat(BoltCategory)); 64 65 static cl::opt<bool> 66 PrintMemData("print-mem-data", 67 cl::desc("print memory data annotations when printing functions"), 68 cl::Hidden, 69 cl::ZeroOrMore, 70 cl::cat(BoltCategory)); 71 72 cl::opt<std::string> CompDirOverride( 73 "comp-dir-override", 74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " 75 "location, which is used with DW_AT_dwo_name to construct a path " 76 "to *.dwo files."), 77 cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 char BOLTError::ID = 0; 84 85 BOLTError::BOLTError(bool IsFatal, const Twine &S) 86 : IsFatal(IsFatal), Msg(S.str()) {} 87 88 void BOLTError::log(raw_ostream &OS) const { 89 if (IsFatal) 90 OS << "FATAL "; 91 StringRef ErrMsg = StringRef(Msg); 92 // Prepend our error prefix if it is missing 93 if (ErrMsg.empty()) { 94 OS << "BOLT-ERROR\n"; 95 } else { 96 if (!ErrMsg.starts_with("BOLT-ERROR")) 97 OS << "BOLT-ERROR: "; 98 OS << ErrMsg << "\n"; 99 } 100 } 101 102 std::error_code BOLTError::convertToErrorCode() const { 103 return inconvertibleErrorCode(); 104 } 105 106 Error createNonFatalBOLTError(const Twine &S) { 107 return make_error<BOLTError>(/*IsFatal*/ false, S); 108 } 109 110 Error createFatalBOLTError(const Twine &S) { 111 return make_error<BOLTError>(/*IsFatal*/ true, S); 112 } 113 114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { 115 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { 116 if (!E.getMessage().empty()) 117 E.log(this->errs()); 118 if (E.isFatal()) 119 exit(1); 120 }); 121 } 122 123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 124 std::unique_ptr<DWARFContext> DwCtx, 125 std::unique_ptr<Triple> TheTriple, 126 std::shared_ptr<orc::SymbolStringPool> SSP, 127 const Target *TheTarget, std::string TripleName, 128 std::unique_ptr<MCCodeEmitter> MCE, 129 std::unique_ptr<MCObjectFileInfo> MOFI, 130 std::unique_ptr<const MCAsmInfo> AsmInfo, 131 std::unique_ptr<const MCInstrInfo> MII, 132 std::unique_ptr<const MCSubtargetInfo> STI, 133 std::unique_ptr<MCInstPrinter> InstPrinter, 134 std::unique_ptr<const MCInstrAnalysis> MIA, 135 std::unique_ptr<MCPlusBuilder> MIB, 136 std::unique_ptr<const MCRegisterInfo> MRI, 137 std::unique_ptr<MCDisassembler> DisAsm, 138 JournalingStreams Logger) 139 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 140 TheTriple(std::move(TheTriple)), SSP(std::move(SSP)), 141 TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)), 142 MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), 143 STI(std::move(STI)), InstPrinter(std::move(InstPrinter)), 144 MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)), 145 DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) { 146 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 147 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 148 } 149 150 BinaryContext::~BinaryContext() { 151 for (BinarySection *Section : Sections) 152 delete Section; 153 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 154 delete InjectedFunction; 155 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 156 delete JTI.second; 157 clearBinaryData(); 158 } 159 160 /// Create BinaryContext for a given architecture \p ArchName and 161 /// triple \p TripleName. 162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( 163 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP, 164 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC, 165 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { 166 StringRef ArchName = ""; 167 std::string FeaturesStr = ""; 168 switch (TheTriple.getArch()) { 169 case llvm::Triple::x86_64: 170 if (Features) 171 return createFatalBOLTError( 172 "x86_64 target does not use SubtargetFeatures"); 173 ArchName = "x86-64"; 174 FeaturesStr = "+nopl"; 175 break; 176 case llvm::Triple::aarch64: 177 if (Features) 178 return createFatalBOLTError( 179 "AArch64 target does not use SubtargetFeatures"); 180 ArchName = "aarch64"; 181 FeaturesStr = "+all"; 182 break; 183 case llvm::Triple::riscv64: { 184 ArchName = "riscv64"; 185 if (!Features) 186 return createFatalBOLTError("RISCV target needs SubtargetFeatures"); 187 // We rely on relaxation for some transformations (e.g., promoting all calls 188 // to PseudoCALL and then making JITLink relax them). Since the relax 189 // feature is not stored in the object file, we manually enable it. 190 Features->AddFeature("relax"); 191 FeaturesStr = Features->getString(); 192 break; 193 } 194 default: 195 return createStringError(std::errc::not_supported, 196 "BOLT-ERROR: Unrecognized machine in ELF file"); 197 } 198 199 const std::string TripleName = TheTriple.str(); 200 201 std::string Error; 202 const Target *TheTarget = 203 TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); 204 if (!TheTarget) 205 return createStringError(make_error_code(std::errc::not_supported), 206 Twine("BOLT-ERROR: ", Error)); 207 208 std::unique_ptr<const MCRegisterInfo> MRI( 209 TheTarget->createMCRegInfo(TripleName)); 210 if (!MRI) 211 return createStringError( 212 make_error_code(std::errc::not_supported), 213 Twine("BOLT-ERROR: no register info for target ", TripleName)); 214 215 // Set up disassembler. 216 std::unique_ptr<MCAsmInfo> AsmInfo( 217 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 218 if (!AsmInfo) 219 return createStringError( 220 make_error_code(std::errc::not_supported), 221 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 222 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 223 // we want to emit such names as using @PLT without double quotes to convey 224 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 225 // override the default AsmInfo behavior to emit names the way we want. 226 AsmInfo->setAllowAtInName(true); 227 228 std::unique_ptr<const MCSubtargetInfo> STI( 229 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 230 if (!STI) 231 return createStringError( 232 make_error_code(std::errc::not_supported), 233 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 234 235 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 236 if (!MII) 237 return createStringError( 238 make_error_code(std::errc::not_supported), 239 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 240 241 std::unique_ptr<MCContext> Ctx( 242 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 243 std::unique_ptr<MCObjectFileInfo> MOFI( 244 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 245 Ctx->setObjectFileInfo(MOFI.get()); 246 // We do not support X86 Large code model. Change this in the future. 247 bool Large = false; 248 if (TheTriple.getArch() == llvm::Triple::aarch64) 249 Large = true; 250 unsigned LSDAEncoding = 251 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 252 if (IsPIC) { 253 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 254 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 255 } 256 257 std::unique_ptr<MCDisassembler> DisAsm( 258 TheTarget->createMCDisassembler(*STI, *Ctx)); 259 260 if (!DisAsm) 261 return createStringError( 262 make_error_code(std::errc::not_supported), 263 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 264 265 std::unique_ptr<const MCInstrAnalysis> MIA( 266 TheTarget->createMCInstrAnalysis(MII.get())); 267 if (!MIA) 268 return createStringError( 269 make_error_code(std::errc::not_supported), 270 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 271 TripleName)); 272 273 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 274 std::unique_ptr<MCInstPrinter> InstructionPrinter( 275 TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, 276 *MII, *MRI)); 277 if (!InstructionPrinter) 278 return createStringError( 279 make_error_code(std::errc::not_supported), 280 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 281 InstructionPrinter->setPrintImmHex(true); 282 283 std::unique_ptr<MCCodeEmitter> MCE( 284 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 285 286 auto BC = std::make_unique<BinaryContext>( 287 std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), 288 std::move(SSP), TheTarget, std::string(TripleName), std::move(MCE), 289 std::move(MOFI), std::move(AsmInfo), std::move(MII), std::move(STI), 290 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 291 std::move(DisAsm), Logger); 292 293 BC->LSDAEncoding = LSDAEncoding; 294 295 BC->MAB = std::unique_ptr<MCAsmBackend>( 296 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 297 298 BC->setFilename(InputFileName); 299 300 BC->HasFixedLoadAddress = !IsPIC; 301 302 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 303 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 304 305 if (!BC->SymbolicDisAsm) 306 return createStringError( 307 make_error_code(std::errc::not_supported), 308 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 309 310 return std::move(BC); 311 } 312 313 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 314 if (opts::HotText && 315 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 316 return true; 317 318 if (opts::HotData && 319 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 320 return true; 321 322 if (SymbolName == "_end") 323 return true; 324 325 return false; 326 } 327 328 std::unique_ptr<MCObjectWriter> 329 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 330 return MAB->createObjectWriter(OS); 331 } 332 333 bool BinaryContext::validateObjectNesting() const { 334 auto Itr = BinaryDataMap.begin(); 335 auto End = BinaryDataMap.end(); 336 bool Valid = true; 337 while (Itr != End) { 338 auto Next = std::next(Itr); 339 while (Next != End && 340 Itr->second->getSection() == Next->second->getSection() && 341 Itr->second->containsRange(Next->second->getAddress(), 342 Next->second->getSize())) { 343 if (Next->second->Parent != Itr->second) { 344 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" 345 << "BOLT-WARNING: " << *Itr->second << "\n" 346 << "BOLT-WARNING: " << *Next->second << "\n"; 347 Valid = false; 348 } 349 ++Next; 350 } 351 Itr = Next; 352 } 353 return Valid; 354 } 355 356 bool BinaryContext::validateHoles() const { 357 bool Valid = true; 358 for (BinarySection &Section : sections()) { 359 for (const Relocation &Rel : Section.relocations()) { 360 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 361 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 362 if (!BD) { 363 this->errs() 364 << "BOLT-WARNING: no BinaryData found for relocation at address" 365 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() 366 << "\n"; 367 Valid = false; 368 } else if (!BD->getAtomicRoot()) { 369 this->errs() 370 << "BOLT-WARNING: no atomic BinaryData found for relocation at " 371 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 372 << Section.getName() << "\n"; 373 Valid = false; 374 } 375 } 376 } 377 return Valid; 378 } 379 380 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 381 const uint64_t Address = GAI->second->getAddress(); 382 const uint64_t Size = GAI->second->getSize(); 383 384 auto fixParents = [&](BinaryDataMapType::iterator Itr, 385 BinaryData *NewParent) { 386 BinaryData *OldParent = Itr->second->Parent; 387 Itr->second->Parent = NewParent; 388 ++Itr; 389 while (Itr != BinaryDataMap.end() && OldParent && 390 Itr->second->Parent == OldParent) { 391 Itr->second->Parent = NewParent; 392 ++Itr; 393 } 394 }; 395 396 // Check if the previous symbol contains the newly added symbol. 397 if (GAI != BinaryDataMap.begin()) { 398 BinaryData *Prev = std::prev(GAI)->second; 399 while (Prev) { 400 if (Prev->getSection() == GAI->second->getSection() && 401 Prev->containsRange(Address, Size)) { 402 fixParents(GAI, Prev); 403 } else { 404 fixParents(GAI, nullptr); 405 } 406 Prev = Prev->Parent; 407 } 408 } 409 410 // Check if the newly added symbol contains any subsequent symbols. 411 if (Size != 0) { 412 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 413 auto Itr = std::next(GAI); 414 while ( 415 Itr != BinaryDataMap.end() && 416 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 417 Itr->second->Parent = BD; 418 ++Itr; 419 } 420 } 421 } 422 423 iterator_range<BinaryContext::binary_data_iterator> 424 BinaryContext::getSubBinaryData(BinaryData *BD) { 425 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 426 auto End = Start; 427 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 428 ++End; 429 return make_range(Start, End); 430 } 431 432 std::pair<const MCSymbol *, uint64_t> 433 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 434 bool IsPCRel) { 435 if (isAArch64()) { 436 // Check if this is an access to a constant island and create bookkeeping 437 // to keep track of it and emit it later as part of this function. 438 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 439 return std::make_pair(IslandSym, 0); 440 441 // Detect custom code written in assembly that refers to arbitrary 442 // constant islands from other functions. Write this reference so we 443 // can pull this constant island and emit it as part of this function 444 // too. 445 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 446 447 if (IslandIter != AddressToConstantIslandMap.begin() && 448 (IslandIter == AddressToConstantIslandMap.end() || 449 IslandIter->first > Address)) 450 --IslandIter; 451 452 if (IslandIter != AddressToConstantIslandMap.end()) { 453 // Fall-back to referencing the original constant island in the presence 454 // of dynamic relocs, as we currently do not support cloning them. 455 // Notice: we might fail to link because of this, if the original constant 456 // island we are referring would be emitted too far away. 457 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 458 MCSymbol *IslandSym = 459 IslandIter->second->getOrCreateIslandAccess(Address); 460 if (IslandSym) 461 return std::make_pair(IslandSym, 0); 462 } else if (MCSymbol *IslandSym = 463 IslandIter->second->getOrCreateProxyIslandAccess(Address, 464 BF)) { 465 BF.createIslandDependency(IslandSym, IslandIter->second); 466 return std::make_pair(IslandSym, 0); 467 } 468 } 469 } 470 471 // Note that the address does not necessarily have to reside inside 472 // a section, it could be an absolute address too. 473 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 474 if (Section && Section->isText()) { 475 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 476 if (Address != BF.getAddress()) { 477 // The address could potentially escape. Mark it as another entry 478 // point into the function. 479 if (opts::Verbosity >= 1) { 480 this->outs() << "BOLT-INFO: potentially escaped address 0x" 481 << Twine::utohexstr(Address) << " in function " << BF 482 << '\n'; 483 } 484 BF.HasInternalLabelReference = true; 485 return std::make_pair( 486 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 487 } 488 } else { 489 addInterproceduralReference(&BF, Address); 490 } 491 } 492 493 // With relocations, catch jump table references outside of the basic block 494 // containing the indirect jump. 495 if (HasRelocations) { 496 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 497 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 498 const MCSymbol *Symbol = 499 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 500 501 return std::make_pair(Symbol, 0); 502 } 503 } 504 505 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 506 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 507 508 // TODO: use DWARF info to get size/alignment here? 509 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 510 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 511 return std::make_pair(TargetSymbol, 0); 512 } 513 514 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 515 BinaryFunction &BF) { 516 if (!isX86()) 517 return MemoryContentsType::UNKNOWN; 518 519 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 520 if (!Section) { 521 // No section - possibly an absolute address. Since we don't allow 522 // internal function addresses to escape the function scope - we 523 // consider it a tail call. 524 if (opts::Verbosity > 1) { 525 this->errs() << "BOLT-WARNING: no section for address 0x" 526 << Twine::utohexstr(Address) << " referenced from function " 527 << BF << '\n'; 528 } 529 return MemoryContentsType::UNKNOWN; 530 } 531 532 if (Section->isVirtual()) { 533 // The contents are filled at runtime. 534 return MemoryContentsType::UNKNOWN; 535 } 536 537 // No support for jump tables in code yet. 538 if (Section->isText()) 539 return MemoryContentsType::UNKNOWN; 540 541 // Start with checking for PIC jump table. We expect non-PIC jump tables 542 // to have high 32 bits set to 0. 543 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 544 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 545 546 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 547 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 548 549 return MemoryContentsType::UNKNOWN; 550 } 551 552 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 553 const JumpTable::JumpTableType Type, 554 const BinaryFunction &BF, 555 const uint64_t NextJTAddress, 556 JumpTable::AddressesType *EntriesAsAddress, 557 bool *HasEntryInFragment) const { 558 // Target address of __builtin_unreachable. 559 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); 560 561 // Is one of the targets __builtin_unreachable? 562 bool HasUnreachable = false; 563 564 // Does one of the entries match function start address? 565 bool HasStartAsEntry = false; 566 567 // Number of targets other than __builtin_unreachable. 568 uint64_t NumRealEntries = 0; 569 570 // Size of the jump table without trailing __builtin_unreachable entries. 571 size_t TrimmedSize = 0; 572 573 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { 574 if (!EntriesAsAddress) 575 return; 576 EntriesAsAddress->emplace_back(EntryAddress); 577 if (!Unreachable) 578 TrimmedSize = EntriesAsAddress->size(); 579 }; 580 581 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 582 if (!Section) 583 return false; 584 585 // The upper bound is defined by containing object, section limits, and 586 // the next jump table in memory. 587 uint64_t UpperBound = Section->getEndAddress(); 588 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 589 if (JumpTableBD && JumpTableBD->getSize()) { 590 assert(JumpTableBD->getEndAddress() <= UpperBound && 591 "data object cannot cross a section boundary"); 592 UpperBound = JumpTableBD->getEndAddress(); 593 } 594 if (NextJTAddress) 595 UpperBound = std::min(NextJTAddress, UpperBound); 596 597 LLVM_DEBUG({ 598 using JTT = JumpTable::JumpTableType; 599 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 600 Address, BF.getPrintName(), 601 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 602 }); 603 const uint64_t EntrySize = getJumpTableEntrySize(Type); 604 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 605 EntryAddress += EntrySize) { 606 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 607 << " -> "); 608 // Check if there's a proper relocation against the jump table entry. 609 if (HasRelocations) { 610 if (Type == JumpTable::JTT_PIC && 611 !DataPCRelocations.count(EntryAddress)) { 612 LLVM_DEBUG( 613 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 614 break; 615 } 616 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 617 LLVM_DEBUG( 618 dbgs() 619 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 620 break; 621 } 622 } 623 624 const uint64_t Value = 625 (Type == JumpTable::JTT_PIC) 626 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 627 : *getPointerAtAddress(EntryAddress); 628 629 // __builtin_unreachable() case. 630 if (Value == UnreachableAddress) { 631 addEntryAddress(Value, /*Unreachable*/ true); 632 HasUnreachable = true; 633 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 634 continue; 635 } 636 637 // Function start is another special case. It is allowed in the jump table, 638 // but we need at least one another regular entry to distinguish the table 639 // from, e.g. a function pointer array. 640 if (Value == BF.getAddress()) { 641 HasStartAsEntry = true; 642 addEntryAddress(Value); 643 continue; 644 } 645 646 // Function or one of its fragments. 647 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 648 const bool DoesBelongToFunction = 649 BF.containsAddress(Value) || 650 (TargetBF && areRelatedFragments(TargetBF, &BF)); 651 if (!DoesBelongToFunction) { 652 LLVM_DEBUG({ 653 if (!BF.containsAddress(Value)) { 654 dbgs() << "FAIL: function doesn't contain this address\n"; 655 if (TargetBF) { 656 dbgs() << " ! function containing this address: " 657 << TargetBF->getPrintName() << '\n'; 658 if (TargetBF->isFragment()) { 659 dbgs() << " ! is a fragment"; 660 for (BinaryFunction *Parent : TargetBF->ParentFragments) 661 dbgs() << ", parent: " << Parent->getPrintName(); 662 dbgs() << '\n'; 663 } 664 } 665 } 666 }); 667 break; 668 } 669 670 // Check there's an instruction at this offset. 671 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 672 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 673 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 674 break; 675 } 676 677 ++NumRealEntries; 678 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 679 680 if (TargetBF != &BF && HasEntryInFragment) 681 *HasEntryInFragment = true; 682 addEntryAddress(Value); 683 } 684 685 // Trim direct/normal jump table to exclude trailing unreachable entries that 686 // can collide with a function address. 687 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && 688 TrimmedSize != EntriesAsAddress->size() && 689 getBinaryFunctionAtAddress(UnreachableAddress)) 690 EntriesAsAddress->resize(TrimmedSize); 691 692 // It's a jump table if the number of real entries is more than 1, or there's 693 // one real entry and one or more special targets. If there are only multiple 694 // special targets, then it's not a jump table. 695 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 696 } 697 698 void BinaryContext::populateJumpTables() { 699 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 700 << '\n'); 701 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 702 ++JTI) { 703 JumpTable *JT = JTI->second; 704 705 bool NonSimpleParent = false; 706 for (BinaryFunction *BF : JT->Parents) 707 NonSimpleParent |= !BF->isSimple(); 708 if (NonSimpleParent) 709 continue; 710 711 uint64_t NextJTAddress = 0; 712 auto NextJTI = std::next(JTI); 713 if (NextJTI != JTE) 714 NextJTAddress = NextJTI->second->getAddress(); 715 716 const bool Success = 717 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 718 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 719 if (!Success) { 720 LLVM_DEBUG({ 721 dbgs() << "failed to analyze "; 722 JT->print(dbgs()); 723 if (NextJTI != JTE) { 724 dbgs() << "next "; 725 NextJTI->second->print(dbgs()); 726 } 727 }); 728 llvm_unreachable("jump table heuristic failure"); 729 } 730 for (BinaryFunction *Frag : JT->Parents) { 731 if (JT->IsSplit) 732 Frag->setHasIndirectTargetToSplitFragment(true); 733 for (uint64_t EntryAddress : JT->EntriesAsAddress) 734 // if target is builtin_unreachable 735 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 736 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 737 Frag->getSize()); 738 } else if (EntryAddress >= Frag->getAddress() && 739 EntryAddress < Frag->getAddress() + Frag->getSize()) { 740 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 741 } 742 } 743 744 // In strict mode, erase PC-relative relocation record. Later we check that 745 // all such records are erased and thus have been accounted for. 746 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 747 for (uint64_t Address = JT->getAddress(); 748 Address < JT->getAddress() + JT->getSize(); 749 Address += JT->EntrySize) { 750 DataPCRelocations.erase(DataPCRelocations.find(Address)); 751 } 752 } 753 754 // Mark to skip the function and all its fragments. 755 for (BinaryFunction *Frag : JT->Parents) 756 if (Frag->hasIndirectTargetToSplitFragment()) 757 addFragmentsToSkip(Frag); 758 } 759 760 if (opts::StrictMode && DataPCRelocations.size()) { 761 LLVM_DEBUG({ 762 dbgs() << DataPCRelocations.size() 763 << " unclaimed PC-relative relocations left in data:\n"; 764 for (uint64_t Reloc : DataPCRelocations) 765 dbgs() << Twine::utohexstr(Reloc) << '\n'; 766 }); 767 assert(0 && "unclaimed PC-relative relocations left in data\n"); 768 } 769 clearList(DataPCRelocations); 770 } 771 772 void BinaryContext::skipMarkedFragments() { 773 std::vector<BinaryFunction *> FragmentQueue; 774 // Copy the functions to FragmentQueue. 775 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 776 auto addToWorklist = [&](BinaryFunction *Function) -> void { 777 if (FragmentsToSkip.count(Function)) 778 return; 779 FragmentQueue.push_back(Function); 780 addFragmentsToSkip(Function); 781 }; 782 // Functions containing split jump tables need to be skipped with all 783 // fragments (transitively). 784 for (size_t I = 0; I != FragmentQueue.size(); I++) { 785 BinaryFunction *BF = FragmentQueue[I]; 786 assert(FragmentsToSkip.count(BF) && 787 "internal error in traversing function fragments"); 788 if (opts::Verbosity >= 1) 789 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 790 BF->setSimple(false); 791 BF->setHasIndirectTargetToSplitFragment(true); 792 793 llvm::for_each(BF->Fragments, addToWorklist); 794 llvm::for_each(BF->ParentFragments, addToWorklist); 795 } 796 if (!FragmentsToSkip.empty()) 797 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() 798 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") 799 << " due to cold fragments\n"; 800 } 801 802 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 803 uint64_t Size, 804 uint16_t Alignment, 805 unsigned Flags) { 806 auto Itr = BinaryDataMap.find(Address); 807 if (Itr != BinaryDataMap.end()) { 808 assert(Itr->second->getSize() == Size || !Size); 809 return Itr->second->getSymbol(); 810 } 811 812 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 813 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 814 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 815 } 816 817 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 818 return Ctx->getOrCreateSymbol(Name); 819 } 820 821 BinaryFunction *BinaryContext::createBinaryFunction( 822 const std::string &Name, BinarySection &Section, uint64_t Address, 823 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 824 auto Result = BinaryFunctions.emplace( 825 Address, BinaryFunction(Name, Section, Address, Size, *this)); 826 assert(Result.second == true && "unexpected duplicate function"); 827 BinaryFunction *BF = &Result.first->second; 828 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 829 Alignment); 830 setSymbolToFunctionMap(BF->getSymbol(), BF); 831 return BF; 832 } 833 834 const MCSymbol * 835 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 836 JumpTable::JumpTableType Type) { 837 // Two fragments of same function access same jump table 838 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 839 assert(JT->Type == Type && "jump table types have to match"); 840 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 841 842 // Prevent associating a jump table to a specific fragment twice. 843 if (!llvm::is_contained(JT->Parents, &Function)) { 844 assert(llvm::all_of(JT->Parents, 845 [&](const BinaryFunction *BF) { 846 return areRelatedFragments(&Function, BF); 847 }) && 848 "cannot re-use jump table of a different function"); 849 // Duplicate the entry for the parent function for easy access 850 JT->Parents.push_back(&Function); 851 if (opts::Verbosity > 2) { 852 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " 853 << JT->Parents[0]->getPrintName() << "; " 854 << Function.getPrintName() << "\n"; 855 JT->print(this->outs()); 856 } 857 Function.JumpTables.emplace(Address, JT); 858 for (BinaryFunction *Parent : JT->Parents) 859 Parent->setHasIndirectTargetToSplitFragment(true); 860 } 861 862 bool IsJumpTableParent = false; 863 (void)IsJumpTableParent; 864 for (BinaryFunction *Frag : JT->Parents) 865 if (Frag == &Function) 866 IsJumpTableParent = true; 867 assert(IsJumpTableParent && 868 "cannot re-use jump table of a different function"); 869 return JT->getFirstLabel(); 870 } 871 872 // Re-use the existing symbol if possible. 873 MCSymbol *JTLabel = nullptr; 874 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 875 if (!isInternalSymbolName(Object->getSymbol()->getName())) 876 JTLabel = Object->getSymbol(); 877 } 878 879 const uint64_t EntrySize = getJumpTableEntrySize(Type); 880 if (!JTLabel) { 881 const std::string JumpTableName = generateJumpTableName(Function, Address); 882 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 883 } 884 885 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 886 << " in function " << Function << '\n'); 887 888 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 889 JumpTable::LabelMapType{{0, JTLabel}}, 890 *getSectionForAddress(Address)); 891 JT->Parents.push_back(&Function); 892 if (opts::Verbosity > 2) 893 JT->print(this->outs()); 894 JumpTables.emplace(Address, JT); 895 896 // Duplicate the entry for the parent function for easy access. 897 Function.JumpTables.emplace(Address, JT); 898 return JTLabel; 899 } 900 901 std::pair<uint64_t, const MCSymbol *> 902 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 903 const MCSymbol *OldLabel) { 904 auto L = scopeLock(); 905 unsigned Offset = 0; 906 bool Found = false; 907 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 908 if (Elmt.second != OldLabel) 909 continue; 910 Offset = Elmt.first; 911 Found = true; 912 break; 913 } 914 assert(Found && "Label not found"); 915 (void)Found; 916 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 917 JumpTable *NewJT = 918 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 919 JumpTable::LabelMapType{{Offset, NewLabel}}, 920 *getSectionForAddress(JT->getAddress())); 921 NewJT->Parents = JT->Parents; 922 NewJT->Entries = JT->Entries; 923 NewJT->Counts = JT->Counts; 924 uint64_t JumpTableID = ++DuplicatedJumpTables; 925 // Invert it to differentiate from regular jump tables whose IDs are their 926 // addresses in the input binary memory space 927 JumpTableID = ~JumpTableID; 928 JumpTables.emplace(JumpTableID, NewJT); 929 Function.JumpTables.emplace(JumpTableID, NewJT); 930 return std::make_pair(JumpTableID, NewLabel); 931 } 932 933 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 934 uint64_t Address) { 935 size_t Id; 936 uint64_t Offset = 0; 937 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 938 Offset = Address - JT->getAddress(); 939 auto JTLabelsIt = JT->Labels.find(Offset); 940 if (JTLabelsIt != JT->Labels.end()) 941 return std::string(JTLabelsIt->second->getName()); 942 943 auto JTIdsIt = JumpTableIds.find(JT->getAddress()); 944 assert(JTIdsIt != JumpTableIds.end()); 945 Id = JTIdsIt->second; 946 } else { 947 Id = JumpTableIds[Address] = BF.JumpTables.size(); 948 } 949 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 950 (Offset ? ("." + std::to_string(Offset)) : "")); 951 } 952 953 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 954 // FIXME: aarch64 support is missing. 955 if (!isX86()) 956 return true; 957 958 if (BF.getSize() == BF.getMaxSize()) 959 return true; 960 961 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 962 assert(FunctionData && "cannot get function as data"); 963 964 uint64_t Offset = BF.getSize(); 965 MCInst Instr; 966 uint64_t InstrSize = 0; 967 uint64_t InstrAddress = BF.getAddress() + Offset; 968 using std::placeholders::_1; 969 970 // Skip instructions that satisfy the predicate condition. 971 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 972 const uint64_t StartOffset = Offset; 973 for (; Offset < BF.getMaxSize(); 974 Offset += InstrSize, InstrAddress += InstrSize) { 975 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 976 InstrAddress, nulls())) 977 break; 978 if (!Predicate(Instr)) 979 break; 980 } 981 982 return Offset - StartOffset; 983 }; 984 985 // Skip a sequence of zero bytes. 986 auto skipZeros = [&]() { 987 const uint64_t StartOffset = Offset; 988 for (; Offset < BF.getMaxSize(); ++Offset) 989 if ((*FunctionData)[Offset] != 0) 990 break; 991 992 return Offset - StartOffset; 993 }; 994 995 // Accept the whole padding area filled with breakpoints. 996 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 997 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 998 return true; 999 1000 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 1001 1002 // Some functions have a jump to the next function or to the padding area 1003 // inserted after the body. 1004 auto isSkipJump = [&](const MCInst &Instr) { 1005 uint64_t TargetAddress = 0; 1006 if (MIB->isUnconditionalBranch(Instr) && 1007 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 1008 if (TargetAddress >= InstrAddress + InstrSize && 1009 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 1010 return true; 1011 } 1012 } 1013 return false; 1014 }; 1015 1016 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 1017 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 1018 skipZeros()) 1019 ; 1020 1021 if (Offset == BF.getMaxSize()) 1022 return true; 1023 1024 if (opts::Verbosity >= 1) { 1025 this->errs() << "BOLT-WARNING: bad padding at address 0x" 1026 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 1027 << " starting at offset " << (Offset - BF.getSize()) 1028 << " in function " << BF << '\n' 1029 << FunctionData->slice(BF.getSize(), 1030 BF.getMaxSize() - BF.getSize()) 1031 << '\n'; 1032 } 1033 1034 return false; 1035 } 1036 1037 void BinaryContext::adjustCodePadding() { 1038 for (auto &BFI : BinaryFunctions) { 1039 BinaryFunction &BF = BFI.second; 1040 if (!shouldEmit(BF)) 1041 continue; 1042 1043 if (!hasValidCodePadding(BF)) { 1044 if (HasRelocations) { 1045 if (opts::Verbosity >= 1) { 1046 this->outs() << "BOLT-INFO: function " << BF 1047 << " has invalid padding. Ignoring the function.\n"; 1048 } 1049 BF.setIgnored(); 1050 } else { 1051 BF.setMaxSize(BF.getSize()); 1052 } 1053 } 1054 } 1055 } 1056 1057 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1058 uint64_t Size, 1059 uint16_t Alignment, 1060 unsigned Flags) { 1061 // Register the name with MCContext. 1062 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1063 1064 auto GAI = BinaryDataMap.find(Address); 1065 BinaryData *BD; 1066 if (GAI == BinaryDataMap.end()) { 1067 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1068 BinarySection &Section = 1069 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1070 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1071 Section, Flags); 1072 GAI = BinaryDataMap.emplace(Address, BD).first; 1073 GlobalSymbols[Name] = BD; 1074 updateObjectNesting(GAI); 1075 } else { 1076 BD = GAI->second; 1077 if (!BD->hasName(Name)) { 1078 GlobalSymbols[Name] = BD; 1079 BD->updateSize(Size); 1080 BD->Symbols.push_back(Symbol); 1081 } 1082 } 1083 1084 return Symbol; 1085 } 1086 1087 const BinaryData * 1088 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1089 auto NI = BinaryDataMap.lower_bound(Address); 1090 auto End = BinaryDataMap.end(); 1091 if ((NI != End && Address == NI->first) || 1092 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1093 if (NI->second->containsAddress(Address)) 1094 return NI->second; 1095 1096 // If this is a sub-symbol, see if a parent data contains the address. 1097 const BinaryData *BD = NI->second->getParent(); 1098 while (BD) { 1099 if (BD->containsAddress(Address)) 1100 return BD; 1101 BD = BD->getParent(); 1102 } 1103 } 1104 return nullptr; 1105 } 1106 1107 BinaryData *BinaryContext::getGOTSymbol() { 1108 // First tries to find a global symbol with that name 1109 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1110 if (GOTSymBD) 1111 return GOTSymBD; 1112 1113 // This symbol might be hidden from run-time link, so fetch the local 1114 // definition if available. 1115 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1116 if (!GOTSymBD) 1117 return nullptr; 1118 1119 // If the local symbol is not unique, fail 1120 unsigned Index = 2; 1121 SmallString<30> Storage; 1122 while (const BinaryData *BD = 1123 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1124 .concat(Twine(Index++)) 1125 .toStringRef(Storage))) 1126 if (BD->getAddress() != GOTSymBD->getAddress()) 1127 return nullptr; 1128 1129 return GOTSymBD; 1130 } 1131 1132 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1133 auto NI = BinaryDataMap.find(Address); 1134 assert(NI != BinaryDataMap.end()); 1135 if (NI == BinaryDataMap.end()) 1136 return false; 1137 // TODO: it's possible that a jump table starts at the same address 1138 // as a larger blob of private data. When we set the size of the 1139 // jump table, it might be smaller than the total blob size. In this 1140 // case we just leave the original size since (currently) it won't really 1141 // affect anything. 1142 assert((!NI->second->Size || NI->second->Size == Size || 1143 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1144 "can't change the size of a symbol that has already had its " 1145 "size set"); 1146 if (!NI->second->Size) { 1147 NI->second->Size = Size; 1148 updateObjectNesting(NI); 1149 return true; 1150 } 1151 return false; 1152 } 1153 1154 void BinaryContext::generateSymbolHashes() { 1155 auto isPadding = [](const BinaryData &BD) { 1156 StringRef Contents = BD.getSection().getContents(); 1157 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1158 return (BD.getName().starts_with("HOLEat") || 1159 SymData.find_first_not_of(0) == StringRef::npos); 1160 }; 1161 1162 uint64_t NumCollisions = 0; 1163 for (auto &Entry : BinaryDataMap) { 1164 BinaryData &BD = *Entry.second; 1165 StringRef Name = BD.getName(); 1166 1167 if (!isInternalSymbolName(Name)) 1168 continue; 1169 1170 // First check if a non-anonymous alias exists and move it to the front. 1171 if (BD.getSymbols().size() > 1) { 1172 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1173 return !isInternalSymbolName(Symbol->getName()); 1174 }); 1175 if (Itr != BD.getSymbols().end()) { 1176 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1177 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1178 continue; 1179 } 1180 } 1181 1182 // We have to skip 0 size symbols since they will all collide. 1183 if (BD.getSize() == 0) { 1184 continue; 1185 } 1186 1187 const uint64_t Hash = BD.getSection().hash(BD); 1188 const size_t Idx = Name.find("0x"); 1189 std::string NewName = 1190 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1191 if (getBinaryDataByName(NewName)) { 1192 // Ignore collisions for symbols that appear to be padding 1193 // (i.e. all zeros or a "hole") 1194 if (!isPadding(BD)) { 1195 if (opts::Verbosity) { 1196 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD 1197 << " with new name (" << NewName << "), skipping.\n"; 1198 } 1199 ++NumCollisions; 1200 } 1201 continue; 1202 } 1203 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1204 GlobalSymbols[NewName] = &BD; 1205 } 1206 if (NumCollisions) { 1207 this->errs() << "BOLT-WARNING: " << NumCollisions 1208 << " collisions detected while hashing binary objects"; 1209 if (!opts::Verbosity) 1210 this->errs() << ". Use -v=1 to see the list."; 1211 this->errs() << '\n'; 1212 } 1213 } 1214 1215 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1216 BinaryFunction &Function) { 1217 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1218 if (TargetFunction.isChildOf(Function)) 1219 return true; 1220 TargetFunction.addParentFragment(Function); 1221 Function.addFragment(TargetFunction); 1222 FragmentClasses.unionSets(&TargetFunction, &Function); 1223 if (!HasRelocations) { 1224 TargetFunction.setSimple(false); 1225 Function.setSimple(false); 1226 } 1227 if (opts::Verbosity >= 1) { 1228 this->outs() << "BOLT-INFO: marking " << TargetFunction 1229 << " as a fragment of " << Function << '\n'; 1230 } 1231 return true; 1232 } 1233 1234 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1235 MCInst &LoadLowBits, 1236 MCInst &LoadHiBits, 1237 uint64_t Target) { 1238 const MCSymbol *TargetSymbol; 1239 uint64_t Addend = 0; 1240 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1241 /*IsPCRel*/ true); 1242 int64_t Val; 1243 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1244 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1245 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1246 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1247 } 1248 1249 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1250 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1251 if (TargetFunction) 1252 return false; 1253 1254 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1255 assert(Section && "cannot get section for referenced address"); 1256 if (!Section->isText()) 1257 return false; 1258 1259 bool Ret = false; 1260 StringRef SectionContents = Section->getContents(); 1261 uint64_t Offset = Address - Section->getAddress(); 1262 const uint64_t MaxSize = SectionContents.size() - Offset; 1263 const uint8_t *Bytes = 1264 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1265 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1266 1267 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1268 MCInst &Instruction, uint64_t Offset, 1269 uint64_t AbsoluteInstrAddr, 1270 uint64_t TotalSize) -> bool { 1271 MCInst *TargetHiBits, *TargetLowBits; 1272 uint64_t TargetAddress, Count; 1273 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1274 AbsoluteInstrAddr, Instruction, TargetHiBits, 1275 TargetLowBits, TargetAddress); 1276 if (!Count) 1277 return false; 1278 1279 if (MatchOnly) 1280 return true; 1281 1282 // NOTE The target symbol was created during disassemble's 1283 // handleExternalReference 1284 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1285 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1286 *Section, Address, TotalSize); 1287 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1288 TargetAddress); 1289 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1290 Veneer->addInstruction(Offset, std::move(Instruction)); 1291 --Count; 1292 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1293 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1294 Veneer->addInstruction(It->first, std::move(It->second)); 1295 } 1296 1297 Veneer->getOrCreateLocalLabel(Address); 1298 Veneer->setMaxSize(TotalSize); 1299 Veneer->updateState(BinaryFunction::State::Disassembled); 1300 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" 1301 << Twine::utohexstr(Address) << "\n"); 1302 return true; 1303 }; 1304 1305 uint64_t Size = 0, TotalSize = 0; 1306 BinaryFunction::InstrMapType VeneerInstructions; 1307 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1308 MCInst Instruction; 1309 const uint64_t AbsoluteInstrAddr = Address + Offset; 1310 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1311 AbsoluteInstrAddr, nulls())) 1312 break; 1313 1314 TotalSize += Size; 1315 if (MIB->isBranch(Instruction)) { 1316 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1317 AbsoluteInstrAddr, TotalSize); 1318 break; 1319 } 1320 1321 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1322 } 1323 1324 return Ret; 1325 } 1326 1327 void BinaryContext::processInterproceduralReferences() { 1328 for (const std::pair<BinaryFunction *, uint64_t> &It : 1329 InterproceduralReferences) { 1330 BinaryFunction &Function = *It.first; 1331 uint64_t Address = It.second; 1332 // Process interprocedural references from ignored functions in BAT mode 1333 // (non-simple in non-relocation mode) to properly register entry points 1334 if (!Address || (Function.isIgnored() && !HasBATSection)) 1335 continue; 1336 1337 BinaryFunction *TargetFunction = 1338 getBinaryFunctionContainingAddress(Address); 1339 if (&Function == TargetFunction) 1340 continue; 1341 1342 if (TargetFunction) { 1343 if (TargetFunction->isFragment() && 1344 !areRelatedFragments(TargetFunction, &Function)) { 1345 this->errs() 1346 << "BOLT-WARNING: interprocedural reference between unrelated " 1347 "fragments: " 1348 << Function.getPrintName() << " and " 1349 << TargetFunction->getPrintName() << '\n'; 1350 } 1351 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1352 TargetFunction->addEntryPointAtOffset(Offset); 1353 1354 continue; 1355 } 1356 1357 // Check if address falls in function padding space - this could be 1358 // unmarked data in code. In this case adjust the padding space size. 1359 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1360 assert(Section && "cannot get section for referenced address"); 1361 1362 if (!Section->isText()) 1363 continue; 1364 1365 // PLT requires special handling and could be ignored in this context. 1366 StringRef SectionName = Section->getName(); 1367 if (SectionName == ".plt" || SectionName == ".plt.got") 1368 continue; 1369 1370 // Check if it is aarch64 veneer written at Address 1371 if (isAArch64() && handleAArch64Veneer(Address)) 1372 continue; 1373 1374 if (opts::processAllFunctions()) { 1375 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1376 << "object in code at address 0x" 1377 << Twine::utohexstr(Address) << " belonging to section " 1378 << SectionName << " in current mode\n"; 1379 exit(1); 1380 } 1381 1382 TargetFunction = getBinaryFunctionContainingAddress(Address, 1383 /*CheckPastEnd=*/false, 1384 /*UseMaxSize=*/true); 1385 // We are not going to overwrite non-simple functions, but for simple 1386 // ones - adjust the padding size. 1387 if (TargetFunction && TargetFunction->isSimple()) { 1388 this->errs() 1389 << "BOLT-WARNING: function " << *TargetFunction 1390 << " has an object detected in a padding region at address 0x" 1391 << Twine::utohexstr(Address) << '\n'; 1392 TargetFunction->setMaxSize(TargetFunction->getSize()); 1393 } 1394 } 1395 1396 InterproceduralReferences.clear(); 1397 } 1398 1399 void BinaryContext::postProcessSymbolTable() { 1400 fixBinaryDataHoles(); 1401 bool Valid = true; 1402 for (auto &Entry : BinaryDataMap) { 1403 BinaryData *BD = Entry.second; 1404 if ((BD->getName().starts_with("SYMBOLat") || 1405 BD->getName().starts_with("DATAat")) && 1406 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1407 BD->getSection()) { 1408 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD 1409 << "\n"; 1410 Valid = false; 1411 } 1412 } 1413 assert(Valid); 1414 (void)Valid; 1415 generateSymbolHashes(); 1416 } 1417 1418 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1419 BinaryFunction &ParentBF) { 1420 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1421 "cannot merge functions with multiple entry points"); 1422 1423 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1424 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1425 SymbolToFunctionMapMutex, std::defer_lock); 1426 1427 const StringRef ChildName = ChildBF.getOneName(); 1428 1429 // Move symbols over and update bookkeeping info. 1430 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1431 ParentBF.getSymbols().push_back(Symbol); 1432 WriteSymbolMapLock.lock(); 1433 SymbolToFunctionMap[Symbol] = &ParentBF; 1434 WriteSymbolMapLock.unlock(); 1435 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1436 } 1437 ChildBF.getSymbols().clear(); 1438 1439 // Move other names the child function is known under. 1440 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1441 ChildBF.Aliases.clear(); 1442 1443 if (HasRelocations) { 1444 // Merge execution counts of ChildBF into those of ParentBF. 1445 // Without relocations, we cannot reliably merge profiles as both functions 1446 // continue to exist and either one can be executed. 1447 ChildBF.mergeProfileDataInto(ParentBF); 1448 1449 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1450 std::defer_lock); 1451 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1452 std::defer_lock); 1453 // Remove ChildBF from the global set of functions in relocs mode. 1454 ReadBfsLock.lock(); 1455 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1456 ReadBfsLock.unlock(); 1457 1458 assert(FI != BinaryFunctions.end() && "function not found"); 1459 assert(&ChildBF == &FI->second && "function mismatch"); 1460 1461 WriteBfsLock.lock(); 1462 ChildBF.clearDisasmState(); 1463 FI = BinaryFunctions.erase(FI); 1464 WriteBfsLock.unlock(); 1465 1466 } else { 1467 // In non-relocation mode we keep the function, but rename it. 1468 std::string NewName = "__ICF_" + ChildName.str(); 1469 1470 WriteCtxLock.lock(); 1471 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1472 WriteCtxLock.unlock(); 1473 1474 ChildBF.setFolded(&ParentBF); 1475 } 1476 1477 ParentBF.setHasFunctionsFoldedInto(); 1478 } 1479 1480 void BinaryContext::fixBinaryDataHoles() { 1481 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1482 1483 for (BinarySection &Section : allocatableSections()) { 1484 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1485 1486 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1487 BinaryData *BD = Itr->second; 1488 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1489 (BD->getName().starts_with("SYMBOLat0x") || 1490 BD->getName().starts_with("DATAat0x") || 1491 BD->getName().starts_with("ANONYMOUS"))); 1492 return !isHole && BD->getSection() == Section && !BD->getParent(); 1493 }; 1494 1495 auto BDStart = BinaryDataMap.begin(); 1496 auto BDEnd = BinaryDataMap.end(); 1497 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1498 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1499 1500 uint64_t EndAddress = Section.getAddress(); 1501 1502 while (Itr != End) { 1503 if (Itr->second->getAddress() > EndAddress) { 1504 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1505 Holes.emplace_back(EndAddress, Gap); 1506 } 1507 EndAddress = Itr->second->getEndAddress(); 1508 ++Itr; 1509 } 1510 1511 if (EndAddress < Section.getEndAddress()) 1512 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1513 1514 // If there is already a symbol at the start of the hole, grow that symbol 1515 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1516 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1517 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1518 if (BD) { 1519 // BD->getSection() can be != Section if there are sections that 1520 // overlap. In this case it is probably safe to just skip the holes 1521 // since the overlapping section will not(?) have any symbols in it. 1522 if (BD->getSection() == Section) 1523 setBinaryDataSize(Hole.first, Hole.second); 1524 } else { 1525 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1526 } 1527 } 1528 } 1529 1530 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1531 assert(validateHoles() && "top level hole detected in object map"); 1532 } 1533 1534 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1535 const BinarySection *CurrentSection = nullptr; 1536 bool FirstSection = true; 1537 1538 for (auto &Entry : BinaryDataMap) { 1539 const BinaryData *BD = Entry.second; 1540 const BinarySection &Section = BD->getSection(); 1541 if (FirstSection || Section != *CurrentSection) { 1542 uint64_t Address, Size; 1543 StringRef Name = Section.getName(); 1544 if (Section) { 1545 Address = Section.getAddress(); 1546 Size = Section.getSize(); 1547 } else { 1548 Address = BD->getAddress(); 1549 Size = BD->getSize(); 1550 } 1551 OS << "BOLT-INFO: Section " << Name << ", " 1552 << "0x" + Twine::utohexstr(Address) << ":" 1553 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1554 CurrentSection = &Section; 1555 FirstSection = false; 1556 } 1557 1558 OS << "BOLT-INFO: "; 1559 const BinaryData *P = BD->getParent(); 1560 while (P) { 1561 OS << " "; 1562 P = P->getParent(); 1563 } 1564 OS << *BD << "\n"; 1565 } 1566 } 1567 1568 Expected<unsigned> BinaryContext::getDwarfFile( 1569 StringRef Directory, StringRef FileName, unsigned FileNumber, 1570 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1571 unsigned CUID, unsigned DWARFVersion) { 1572 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1573 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1574 FileNumber); 1575 } 1576 1577 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1578 const uint32_t SrcCUID, 1579 unsigned FileIndex) { 1580 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1581 const DWARFDebugLine::LineTable *LineTable = 1582 DwCtx->getLineTableForUnit(SrcUnit); 1583 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1584 LineTable->Prologue.FileNames; 1585 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1586 // means empty dir. 1587 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1588 "FileIndex out of range for the compilation unit."); 1589 StringRef Dir = ""; 1590 if (FileNames[FileIndex - 1].DirIdx != 0) { 1591 if (std::optional<const char *> DirName = dwarf::toString( 1592 LineTable->Prologue 1593 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1594 Dir = *DirName; 1595 } 1596 } 1597 StringRef FileName = ""; 1598 if (std::optional<const char *> FName = 1599 dwarf::toString(FileNames[FileIndex - 1].Name)) 1600 FileName = *FName; 1601 assert(FileName != ""); 1602 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1603 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1604 DestCUID, DstUnit->getVersion())); 1605 } 1606 1607 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1608 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1609 llvm::transform(llvm::make_second_range(BinaryFunctions), 1610 SortedFunctions.begin(), 1611 [](BinaryFunction &BF) { return &BF; }); 1612 1613 llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex); 1614 return SortedFunctions; 1615 } 1616 1617 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1618 std::vector<BinaryFunction *> AllFunctions; 1619 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1620 llvm::transform(llvm::make_second_range(BinaryFunctions), 1621 std::back_inserter(AllFunctions), 1622 [](BinaryFunction &BF) { return &BF; }); 1623 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1624 1625 return AllFunctions; 1626 } 1627 1628 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1629 auto Iter = DWOCUs.find(DWOId); 1630 if (Iter == DWOCUs.end()) 1631 return std::nullopt; 1632 1633 return Iter->second; 1634 } 1635 1636 DWARFContext *BinaryContext::getDWOContext() const { 1637 if (DWOCUs.empty()) 1638 return nullptr; 1639 return &DWOCUs.begin()->second->getContext(); 1640 } 1641 1642 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1643 void BinaryContext::preprocessDWODebugInfo() { 1644 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1645 DWARFUnit *const DwarfUnit = CU.get(); 1646 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1647 std::string DWOName = dwarf::toString( 1648 DwarfUnit->getUnitDIE().find( 1649 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1650 ""); 1651 SmallString<16> AbsolutePath; 1652 if (!opts::CompDirOverride.empty()) { 1653 sys::path::append(AbsolutePath, opts::CompDirOverride); 1654 sys::path::append(AbsolutePath, DWOName); 1655 } 1656 DWARFUnit *DWOCU = 1657 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 1658 if (!DWOCU->isDWOUnit()) { 1659 this->outs() 1660 << "BOLT-WARNING: Debug Fission: DWO debug information for " 1661 << DWOName 1662 << " was not retrieved and won't be updated. Please check " 1663 "relative path.\n"; 1664 continue; 1665 } 1666 DWOCUs[*DWOId] = DWOCU; 1667 } 1668 } 1669 if (!DWOCUs.empty()) 1670 this->outs() << "BOLT-INFO: processing split DWARF\n"; 1671 } 1672 1673 void BinaryContext::preprocessDebugInfo() { 1674 struct CURange { 1675 uint64_t LowPC; 1676 uint64_t HighPC; 1677 DWARFUnit *Unit; 1678 1679 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1680 }; 1681 1682 // Building a map of address ranges to CUs similar to .debug_aranges and use 1683 // it to assign CU to functions. 1684 std::vector<CURange> AllRanges; 1685 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1686 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1687 Expected<DWARFAddressRangesVector> RangesOrError = 1688 CU->getUnitDIE().getAddressRanges(); 1689 if (!RangesOrError) { 1690 consumeError(RangesOrError.takeError()); 1691 continue; 1692 } 1693 for (DWARFAddressRange &Range : *RangesOrError) { 1694 // Parts of the debug info could be invalidated due to corresponding code 1695 // being removed from the binary by the linker. Hence we check if the 1696 // address is a valid one. 1697 if (containsAddress(Range.LowPC)) 1698 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1699 } 1700 1701 ContainsDwarf5 |= CU->getVersion() >= 5; 1702 ContainsDwarfLegacy |= CU->getVersion() < 5; 1703 } 1704 1705 llvm::sort(AllRanges); 1706 for (auto &KV : BinaryFunctions) { 1707 const uint64_t FunctionAddress = KV.first; 1708 BinaryFunction &Function = KV.second; 1709 1710 auto It = llvm::partition_point( 1711 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1712 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1713 Function.setDWARFUnit(It->Unit); 1714 } 1715 1716 // Discover units with debug info that needs to be updated. 1717 for (const auto &KV : BinaryFunctions) { 1718 const BinaryFunction &BF = KV.second; 1719 if (shouldEmit(BF) && BF.getDWARFUnit()) 1720 ProcessedCUs.insert(BF.getDWARFUnit()); 1721 } 1722 1723 // Clear debug info for functions from units that we are not going to process. 1724 for (auto &KV : BinaryFunctions) { 1725 BinaryFunction &BF = KV.second; 1726 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1727 BF.setDWARFUnit(nullptr); 1728 } 1729 1730 if (opts::Verbosity >= 1) { 1731 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1732 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1733 } 1734 1735 preprocessDWODebugInfo(); 1736 1737 // Populate MCContext with DWARF files from all units. 1738 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1739 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1740 const uint64_t CUID = CU->getOffset(); 1741 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1742 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1743 GlobalPrefix + "line_table_start" + Twine(CUID))); 1744 1745 if (!ProcessedCUs.count(CU.get())) 1746 continue; 1747 1748 const DWARFDebugLine::LineTable *LineTable = 1749 DwCtx->getLineTableForUnit(CU.get()); 1750 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1751 LineTable->Prologue.FileNames; 1752 1753 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1754 if (DwarfVersion >= 5) { 1755 std::optional<MD5::MD5Result> Checksum; 1756 if (LineTable->Prologue.ContentTypes.HasMD5) 1757 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1758 std::optional<const char *> Name = 1759 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1760 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1761 auto Iter = DWOCUs.find(*DWOID); 1762 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1763 Name = dwarf::toString( 1764 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1765 } 1766 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1767 std::nullopt); 1768 } 1769 1770 BinaryLineTable.setDwarfVersion(DwarfVersion); 1771 1772 // Assign a unique label to every line table, one per CU. 1773 // Make sure empty debug line tables are registered too. 1774 if (FileNames.empty()) { 1775 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1776 CUID, DwarfVersion)); 1777 continue; 1778 } 1779 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1780 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1781 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1782 // means empty dir. 1783 StringRef Dir = ""; 1784 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1785 if (std::optional<const char *> DirName = dwarf::toString( 1786 LineTable->Prologue 1787 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1788 Dir = *DirName; 1789 StringRef FileName = ""; 1790 if (std::optional<const char *> FName = 1791 dwarf::toString(FileNames[I].Name)) 1792 FileName = *FName; 1793 assert(FileName != ""); 1794 std::optional<MD5::MD5Result> Checksum; 1795 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1796 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1797 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1798 DwarfVersion)); 1799 } 1800 } 1801 } 1802 1803 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1804 if (Function.isPseudo()) 1805 return false; 1806 1807 if (opts::processAllFunctions()) 1808 return true; 1809 1810 if (Function.isIgnored()) 1811 return false; 1812 1813 // In relocation mode we will emit non-simple functions with CFG. 1814 // If the function does not have a CFG it should be marked as ignored. 1815 return HasRelocations || Function.isSimple(); 1816 } 1817 1818 void BinaryContext::dump(const MCInst &Inst) const { 1819 if (LLVM_UNLIKELY(!InstPrinter)) { 1820 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1821 return; 1822 } 1823 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1824 dbgs() << "\n"; 1825 } 1826 1827 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1828 uint32_t Operation = Inst.getOperation(); 1829 switch (Operation) { 1830 case MCCFIInstruction::OpSameValue: 1831 OS << "OpSameValue Reg" << Inst.getRegister(); 1832 break; 1833 case MCCFIInstruction::OpRememberState: 1834 OS << "OpRememberState"; 1835 break; 1836 case MCCFIInstruction::OpRestoreState: 1837 OS << "OpRestoreState"; 1838 break; 1839 case MCCFIInstruction::OpOffset: 1840 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1841 break; 1842 case MCCFIInstruction::OpDefCfaRegister: 1843 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1844 break; 1845 case MCCFIInstruction::OpDefCfaOffset: 1846 OS << "OpDefCfaOffset " << Inst.getOffset(); 1847 break; 1848 case MCCFIInstruction::OpDefCfa: 1849 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1850 break; 1851 case MCCFIInstruction::OpRelOffset: 1852 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1853 break; 1854 case MCCFIInstruction::OpAdjustCfaOffset: 1855 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1856 break; 1857 case MCCFIInstruction::OpEscape: 1858 OS << "OpEscape"; 1859 break; 1860 case MCCFIInstruction::OpRestore: 1861 OS << "OpRestore Reg" << Inst.getRegister(); 1862 break; 1863 case MCCFIInstruction::OpUndefined: 1864 OS << "OpUndefined Reg" << Inst.getRegister(); 1865 break; 1866 case MCCFIInstruction::OpRegister: 1867 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1868 << Inst.getRegister2(); 1869 break; 1870 case MCCFIInstruction::OpWindowSave: 1871 OS << "OpWindowSave"; 1872 break; 1873 case MCCFIInstruction::OpGnuArgsSize: 1874 OS << "OpGnuArgsSize"; 1875 break; 1876 default: 1877 OS << "Op#" << Operation; 1878 break; 1879 } 1880 } 1881 1882 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1883 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1884 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1885 // the end of a data chunk inside code, $d identifies start of data. 1886 if (isX86() || ELFSymbolRef(Symbol).getSize()) 1887 return MarkerSymType::NONE; 1888 1889 Expected<StringRef> NameOrError = Symbol.getName(); 1890 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1891 1892 if (!TypeOrError || !NameOrError) 1893 return MarkerSymType::NONE; 1894 1895 if (*TypeOrError != SymbolRef::ST_Unknown) 1896 return MarkerSymType::NONE; 1897 1898 if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 1899 return MarkerSymType::CODE; 1900 1901 // $x<ISA> 1902 if (isRISCV() && NameOrError->starts_with("$x")) 1903 return MarkerSymType::CODE; 1904 1905 if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 1906 return MarkerSymType::DATA; 1907 1908 return MarkerSymType::NONE; 1909 } 1910 1911 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1912 return getMarkerType(Symbol) != MarkerSymType::NONE; 1913 } 1914 1915 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1916 const BinaryFunction *Function, 1917 DWARFContext *DwCtx) { 1918 DebugLineTableRowRef RowRef = 1919 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1920 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1921 return; 1922 1923 const DWARFDebugLine::LineTable *LineTable; 1924 if (Function && Function->getDWARFUnit() && 1925 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1926 LineTable = Function->getDWARFLineTable(); 1927 } else { 1928 LineTable = DwCtx->getLineTableForUnit( 1929 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1930 } 1931 assert(LineTable && "line table expected for instruction with debug info"); 1932 1933 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1934 StringRef FileName = ""; 1935 if (std::optional<const char *> FName = 1936 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1937 FileName = *FName; 1938 OS << " # debug line " << FileName << ":" << Row.Line; 1939 if (Row.Column) 1940 OS << ":" << Row.Column; 1941 if (Row.Discriminator) 1942 OS << " discriminator:" << Row.Discriminator; 1943 } 1944 1945 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1946 uint64_t Offset, 1947 const BinaryFunction *Function, 1948 bool PrintMCInst, bool PrintMemData, 1949 bool PrintRelocations, 1950 StringRef Endl) const { 1951 OS << format(" %08" PRIx64 ": ", Offset); 1952 if (MIB->isCFI(Instruction)) { 1953 uint32_t Offset = Instruction.getOperand(0).getImm(); 1954 OS << "\t!CFI\t$" << Offset << "\t; "; 1955 if (Function) 1956 printCFI(OS, *Function->getCFIFor(Instruction)); 1957 OS << Endl; 1958 return; 1959 } 1960 if (std::optional<uint32_t> DynamicID = 1961 MIB->getDynamicBranchID(Instruction)) { 1962 OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName() 1963 << " # ID: " << DynamicID; 1964 } else { 1965 // If there are annotations on the instruction, the MCInstPrinter will fail 1966 // to print the preferred alias as it only does so when the number of 1967 // operands is as expected. See 1968 // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142 1969 // Therefore, create a temporary copy of the Inst from which the annotations 1970 // are removed, and print that Inst. 1971 MCInst InstNoAnnot = Instruction; 1972 MIB->stripAnnotations(InstNoAnnot); 1973 InstPrinter->printInst(&InstNoAnnot, 0, "", *STI, OS); 1974 } 1975 if (MIB->isCall(Instruction)) { 1976 if (MIB->isTailCall(Instruction)) 1977 OS << " # TAILCALL "; 1978 if (MIB->isInvoke(Instruction)) { 1979 const std::optional<MCPlus::MCLandingPad> EHInfo = 1980 MIB->getEHInfo(Instruction); 1981 OS << " # handler: "; 1982 if (EHInfo->first) 1983 OS << *EHInfo->first; 1984 else 1985 OS << '0'; 1986 OS << "; action: " << EHInfo->second; 1987 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1988 if (GnuArgsSize >= 0) 1989 OS << "; GNU_args_size = " << GnuArgsSize; 1990 } 1991 } else if (MIB->isIndirectBranch(Instruction)) { 1992 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1993 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1994 } else { 1995 OS << " # UNKNOWN CONTROL FLOW"; 1996 } 1997 } 1998 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1999 OS << " # Offset: " << *Offset; 2000 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 2001 OS << " # Size: " << *Size; 2002 if (MCSymbol *Label = MIB->getInstLabel(Instruction)) 2003 OS << " # Label: " << *Label; 2004 2005 MIB->printAnnotations(Instruction, OS); 2006 2007 if (opts::PrintDebugInfo) 2008 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 2009 2010 if ((opts::PrintRelocations || PrintRelocations) && Function) { 2011 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 2012 Function->printRelocations(OS, Offset, Size); 2013 } 2014 2015 OS << Endl; 2016 2017 if (PrintMCInst) { 2018 Instruction.dump_pretty(OS, InstPrinter.get()); 2019 OS << Endl; 2020 } 2021 } 2022 2023 std::optional<uint64_t> 2024 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 2025 uint64_t FileOffset) const { 2026 // Find a segment with a matching file offset. 2027 for (auto &KV : SegmentMapInfo) { 2028 const SegmentInfo &SegInfo = KV.second; 2029 // Only consider executable segments. 2030 if (!SegInfo.IsExecutable) 2031 continue; 2032 // FileOffset is got from perf event, 2033 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 2034 // If the pagesize is not equal to SegInfo.Alignment. 2035 // FileOffset and SegInfo.FileOffset should be aligned first, 2036 // and then judge whether they are equal. 2037 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 2038 alignDown(FileOffset, SegInfo.Alignment)) { 2039 // The function's offset from base address in VAS is aligned by pagesize 2040 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 2041 // However, The ELF document says that SegInfo.FileOffset should equal 2042 // to SegInfo.Address, modulo the pagesize. 2043 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 2044 2045 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 2046 // alignDown(SegInfo.Address, pagesize) 2047 // = SegInfo.Address - (SegInfo.Address % pagesize) 2048 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 2049 // = SegInfo.Address - SegInfo.FileOffset + 2050 // alignDown(SegInfo.FileOffset, pagesize) 2051 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2052 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 2053 } 2054 } 2055 2056 return std::nullopt; 2057 } 2058 2059 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2060 auto SI = AddressToSection.upper_bound(Address); 2061 if (SI != AddressToSection.begin()) { 2062 --SI; 2063 uint64_t UpperBound = SI->first + SI->second->getSize(); 2064 if (!SI->second->getSize()) 2065 UpperBound += 1; 2066 if (UpperBound > Address) 2067 return *SI->second; 2068 } 2069 return std::make_error_code(std::errc::bad_address); 2070 } 2071 2072 ErrorOr<StringRef> 2073 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 2074 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2075 return Section->getName(); 2076 return std::make_error_code(std::errc::bad_address); 2077 } 2078 2079 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2080 auto Res = Sections.insert(Section); 2081 (void)Res; 2082 assert(Res.second && "can't register the same section twice."); 2083 2084 // Only register allocatable sections in the AddressToSection map. 2085 if (Section->isAllocatable() && Section->getAddress()) 2086 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2087 NameToSection.insert( 2088 std::make_pair(std::string(Section->getName()), Section)); 2089 if (Section->hasSectionRef()) 2090 SectionRefToBinarySection.insert( 2091 std::make_pair(Section->getSectionRef(), Section)); 2092 2093 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2094 return *Section; 2095 } 2096 2097 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2098 return registerSection(new BinarySection(*this, Section)); 2099 } 2100 2101 BinarySection & 2102 BinaryContext::registerSection(const Twine &SectionName, 2103 const BinarySection &OriginalSection) { 2104 return registerSection( 2105 new BinarySection(*this, SectionName, OriginalSection)); 2106 } 2107 2108 BinarySection & 2109 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2110 unsigned ELFFlags, uint8_t *Data, 2111 uint64_t Size, unsigned Alignment) { 2112 auto NamedSections = getSectionByName(Name); 2113 if (NamedSections.begin() != NamedSections.end()) { 2114 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2115 "can only update unique sections"); 2116 BinarySection *Section = NamedSections.begin()->second; 2117 2118 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2119 const bool Flag = Section->isAllocatable(); 2120 (void)Flag; 2121 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2122 LLVM_DEBUG(dbgs() << *Section << "\n"); 2123 // FIXME: Fix section flags/attributes for MachO. 2124 if (isELF()) 2125 assert(Flag == Section->isAllocatable() && 2126 "can't change section allocation status"); 2127 return *Section; 2128 } 2129 2130 return registerSection( 2131 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2132 } 2133 2134 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2135 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2136 while (NameRange.first != NameRange.second) { 2137 if (NameRange.first->second == &Section) { 2138 NameToSection.erase(NameRange.first); 2139 break; 2140 } 2141 ++NameRange.first; 2142 } 2143 } 2144 2145 void BinaryContext::deregisterUnusedSections() { 2146 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2147 for (auto SI = Sections.begin(); SI != Sections.end();) { 2148 BinarySection *Section = *SI; 2149 // We check getOutputData() instead of getOutputSize() because sometimes 2150 // zero-sized .text.cold sections are allocated. 2151 if (Section->hasSectionRef() || Section->getOutputData() || 2152 (AbsSection && Section == &AbsSection.get())) { 2153 ++SI; 2154 continue; 2155 } 2156 2157 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2158 << '\n';); 2159 deregisterSectionName(*Section); 2160 SI = Sections.erase(SI); 2161 delete Section; 2162 } 2163 } 2164 2165 bool BinaryContext::deregisterSection(BinarySection &Section) { 2166 BinarySection *SectionPtr = &Section; 2167 auto Itr = Sections.find(SectionPtr); 2168 if (Itr != Sections.end()) { 2169 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2170 while (Range.first != Range.second) { 2171 if (Range.first->second == SectionPtr) { 2172 AddressToSection.erase(Range.first); 2173 break; 2174 } 2175 ++Range.first; 2176 } 2177 2178 deregisterSectionName(*SectionPtr); 2179 Sections.erase(Itr); 2180 delete SectionPtr; 2181 return true; 2182 } 2183 return false; 2184 } 2185 2186 void BinaryContext::renameSection(BinarySection &Section, 2187 const Twine &NewName) { 2188 auto Itr = Sections.find(&Section); 2189 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2190 Sections.erase(Itr); 2191 2192 deregisterSectionName(Section); 2193 2194 Section.Name = NewName.str(); 2195 Section.setOutputName(Section.Name); 2196 2197 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2198 2199 // Reinsert with the new name. 2200 Sections.insert(&Section); 2201 } 2202 2203 void BinaryContext::printSections(raw_ostream &OS) const { 2204 for (BinarySection *const &Section : Sections) 2205 OS << "BOLT-INFO: " << *Section << "\n"; 2206 } 2207 2208 BinarySection &BinaryContext::absoluteSection() { 2209 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2210 return *Section; 2211 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2212 } 2213 2214 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2215 size_t Size) const { 2216 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2217 if (!Section) 2218 return std::make_error_code(std::errc::bad_address); 2219 2220 if (Section->isVirtual()) 2221 return 0; 2222 2223 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2224 AsmInfo->getCodePointerSize()); 2225 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2226 return DE.getUnsigned(&ValueOffset, Size); 2227 } 2228 2229 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2230 size_t Size) const { 2231 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2232 if (!Section) 2233 return std::make_error_code(std::errc::bad_address); 2234 2235 if (Section->isVirtual()) 2236 return 0; 2237 2238 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2239 AsmInfo->getCodePointerSize()); 2240 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2241 return DE.getSigned(&ValueOffset, Size); 2242 } 2243 2244 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2245 uint64_t Type, uint64_t Addend, 2246 uint64_t Value) { 2247 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2248 assert(Section && "cannot find section for address"); 2249 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2250 Value); 2251 } 2252 2253 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2254 uint64_t Type, uint64_t Addend, 2255 uint64_t Value) { 2256 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2257 assert(Section && "cannot find section for address"); 2258 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2259 Addend, Value); 2260 } 2261 2262 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2263 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2264 assert(Section && "cannot find section for address"); 2265 return Section->removeRelocationAt(Address - Section->getAddress()); 2266 } 2267 2268 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2269 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2270 if (!Section) 2271 return nullptr; 2272 2273 return Section->getRelocationAt(Address - Section->getAddress()); 2274 } 2275 2276 const Relocation * 2277 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2278 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2279 if (!Section) 2280 return nullptr; 2281 2282 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2283 } 2284 2285 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2286 const uint64_t Address) { 2287 auto setImmovable = [&](BinaryData &BD) { 2288 BinaryData *Root = BD.getAtomicRoot(); 2289 LLVM_DEBUG(if (Root->isMoveable()) { 2290 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2291 << "due to ambiguous relocation referencing 0x" 2292 << Twine::utohexstr(Address) << '\n'; 2293 }); 2294 Root->setIsMoveable(false); 2295 }; 2296 2297 if (Address == BD.getAddress()) { 2298 setImmovable(BD); 2299 2300 // Set previous symbol as immovable 2301 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2302 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2303 setImmovable(*Prev); 2304 } 2305 2306 if (Address == BD.getEndAddress()) { 2307 setImmovable(BD); 2308 2309 // Set next symbol as immovable 2310 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2311 if (Next && Next->getAddress() == BD.getEndAddress()) 2312 setImmovable(*Next); 2313 } 2314 } 2315 2316 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2317 uint64_t *EntryDesc) { 2318 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2319 auto BFI = SymbolToFunctionMap.find(Symbol); 2320 if (BFI == SymbolToFunctionMap.end()) 2321 return nullptr; 2322 2323 BinaryFunction *BF = BFI->second; 2324 if (EntryDesc) 2325 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2326 2327 return BF; 2328 } 2329 2330 std::string 2331 BinaryContext::generateBugReportMessage(StringRef Message, 2332 const BinaryFunction &Function) const { 2333 std::string Msg; 2334 raw_string_ostream SS(Msg); 2335 SS << "=======================================\n"; 2336 SS << "BOLT is unable to proceed because it couldn't properly understand " 2337 "this function.\n"; 2338 SS << "If you are running the most recent version of BOLT, you may " 2339 "want to " 2340 "report this and paste this dump.\nPlease check that there is no " 2341 "sensitive contents being shared in this dump.\n"; 2342 SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2343 ScopedPrinter SP(SS); 2344 SP.printBinaryBlock("Function contents", *Function.getData()); 2345 SS << "\n"; 2346 const_cast<BinaryFunction &>(Function).print(SS, ""); 2347 SS << "ERROR: " << Message; 2348 SS << "\n=======================================\n"; 2349 return Msg; 2350 } 2351 2352 BinaryFunction * 2353 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2354 bool IsSimple) { 2355 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2356 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2357 setSymbolToFunctionMap(BF->getSymbol(), BF); 2358 BF->CurrentState = BinaryFunction::State::CFG; 2359 return BF; 2360 } 2361 2362 std::pair<size_t, size_t> 2363 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2364 // Adjust branch instruction to match the current layout. 2365 if (FixBranches) 2366 BF.fixBranches(); 2367 2368 // Create local MC context to isolate the effect of ephemeral code emission. 2369 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2370 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2371 MCAsmBackend *MAB = 2372 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2373 2374 SmallString<256> Code; 2375 raw_svector_ostream VecOS(Code); 2376 2377 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2378 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2379 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2380 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI)); 2381 2382 Streamer->initSections(false, *STI); 2383 2384 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2385 Section->setHasInstructions(true); 2386 2387 // Create symbols in the LocalCtx so that they get destroyed with it. 2388 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2389 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2390 2391 Streamer->switchSection(Section); 2392 Streamer->emitLabel(StartLabel); 2393 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2394 /*EmitCodeOnly=*/true); 2395 Streamer->emitLabel(EndLabel); 2396 2397 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2398 SmallVector<LabelRange> SplitLabels; 2399 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2400 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2401 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2402 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2403 2404 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2405 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2406 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2407 SplitSection->setHasInstructions(true); 2408 Streamer->switchSection(SplitSection); 2409 2410 Streamer->emitLabel(SplitStartLabel); 2411 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2412 Streamer->emitLabel(SplitEndLabel); 2413 } 2414 2415 MCAssembler &Assembler = 2416 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2417 Assembler.layout(); 2418 2419 // Obtain fragment sizes. 2420 std::vector<uint64_t> FragmentSizes; 2421 // Main fragment size. 2422 const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) - 2423 Assembler.getSymbolOffset(*StartLabel); 2424 FragmentSizes.push_back(HotSize); 2425 // Split fragment sizes. 2426 uint64_t ColdSize = 0; 2427 for (const auto &Labels : SplitLabels) { 2428 uint64_t Size = Assembler.getSymbolOffset(*Labels.second) - 2429 Assembler.getSymbolOffset(*Labels.first); 2430 FragmentSizes.push_back(Size); 2431 ColdSize += Size; 2432 } 2433 2434 // Populate new start and end offsets of each basic block. 2435 uint64_t FragmentIndex = 0; 2436 for (FunctionFragment &FF : BF.getLayout().fragments()) { 2437 BinaryBasicBlock *PrevBB = nullptr; 2438 for (BinaryBasicBlock *BB : FF) { 2439 const uint64_t BBStartOffset = 2440 Assembler.getSymbolOffset(*(BB->getLabel())); 2441 BB->setOutputStartAddress(BBStartOffset); 2442 if (PrevBB) 2443 PrevBB->setOutputEndAddress(BBStartOffset); 2444 PrevBB = BB; 2445 } 2446 if (PrevBB) 2447 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2448 FragmentIndex++; 2449 } 2450 2451 // Clean-up the effect of the code emission. 2452 for (const MCSymbol &Symbol : Assembler.symbols()) { 2453 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2454 MutableSymbol->setUndefined(); 2455 MutableSymbol->setIsRegistered(false); 2456 } 2457 2458 return std::make_pair(HotSize, ColdSize); 2459 } 2460 2461 bool BinaryContext::validateInstructionEncoding( 2462 ArrayRef<uint8_t> InputSequence) const { 2463 MCInst Inst; 2464 uint64_t InstSize; 2465 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2466 assert(InstSize == InputSequence.size() && 2467 "Disassembled instruction size does not match the sequence."); 2468 2469 SmallString<256> Code; 2470 SmallVector<MCFixup, 4> Fixups; 2471 2472 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2473 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2474 if (InputSequence != OutputSequence) { 2475 if (opts::Verbosity > 1) { 2476 this->errs() << "BOLT-WARNING: mismatched encoding detected\n" 2477 << " input: " << InputSequence << '\n' 2478 << " output: " << OutputSequence << '\n'; 2479 } 2480 return false; 2481 } 2482 2483 return true; 2484 } 2485 2486 uint64_t BinaryContext::getHotThreshold() const { 2487 static uint64_t Threshold = 0; 2488 if (Threshold == 0) { 2489 Threshold = std::max( 2490 (uint64_t)opts::ExecutionCountThreshold, 2491 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2492 } 2493 return Threshold; 2494 } 2495 2496 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2497 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2498 auto FI = BinaryFunctions.upper_bound(Address); 2499 if (FI == BinaryFunctions.begin()) 2500 return nullptr; 2501 --FI; 2502 2503 const uint64_t UsedSize = 2504 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2505 2506 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2507 return nullptr; 2508 2509 return &FI->second; 2510 } 2511 2512 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2513 // First, try to find a function starting at the given address. If the 2514 // function was folded, this will get us the original folded function if it 2515 // wasn't removed from the list, e.g. in non-relocation mode. 2516 auto BFI = BinaryFunctions.find(Address); 2517 if (BFI != BinaryFunctions.end()) 2518 return &BFI->second; 2519 2520 // We might have folded the function matching the object at the given 2521 // address. In such case, we look for a function matching the symbol 2522 // registered at the original address. The new function (the one that the 2523 // original was folded into) will hold the symbol. 2524 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2525 uint64_t EntryID = 0; 2526 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2527 if (BF && EntryID == 0) 2528 return BF; 2529 } 2530 return nullptr; 2531 } 2532 2533 /// Deregister JumpTable registered at a given \p Address and delete it. 2534 void BinaryContext::deleteJumpTable(uint64_t Address) { 2535 assert(JumpTables.count(Address) && "Must have a jump table at address"); 2536 JumpTable *JT = JumpTables.at(Address); 2537 for (BinaryFunction *Parent : JT->Parents) 2538 Parent->JumpTables.erase(Address); 2539 JumpTables.erase(Address); 2540 delete JT; 2541 } 2542 2543 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2544 const DWARFAddressRangesVector &InputRanges) const { 2545 DebugAddressRangesVector OutputRanges; 2546 2547 for (const DWARFAddressRange Range : InputRanges) { 2548 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2549 while (BFI != BinaryFunctions.end()) { 2550 const BinaryFunction &Function = BFI->second; 2551 if (Function.getAddress() >= Range.HighPC) 2552 break; 2553 const DebugAddressRangesVector FunctionRanges = 2554 Function.getOutputAddressRanges(); 2555 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2556 std::advance(BFI, 1); 2557 } 2558 } 2559 2560 return OutputRanges; 2561 } 2562 2563 } // namespace bolt 2564 } // namespace llvm 2565