1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/Utils.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAssembler.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26 #include "llvm/MC/MCInstPrinter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionELF.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/Regex.h" 37 #include <algorithm> 38 #include <functional> 39 #include <iterator> 40 #include <unordered_set> 41 42 using namespace llvm; 43 44 #undef DEBUG_TYPE 45 #define DEBUG_TYPE "bolt" 46 47 namespace opts { 48 49 cl::opt<bool> NoHugePages("no-huge-pages", 50 cl::desc("use regular size pages for code alignment"), 51 cl::Hidden, cl::cat(BoltCategory)); 52 53 static cl::opt<bool> 54 PrintDebugInfo("print-debug-info", 55 cl::desc("print debug info when printing functions"), 56 cl::Hidden, 57 cl::ZeroOrMore, 58 cl::cat(BoltCategory)); 59 60 cl::opt<bool> PrintRelocations( 61 "print-relocations", 62 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 63 cl::cat(BoltCategory)); 64 65 static cl::opt<bool> 66 PrintMemData("print-mem-data", 67 cl::desc("print memory data annotations when printing functions"), 68 cl::Hidden, 69 cl::ZeroOrMore, 70 cl::cat(BoltCategory)); 71 72 cl::opt<std::string> CompDirOverride( 73 "comp-dir-override", 74 cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base " 75 "location, which is used with DW_AT_dwo_name to construct a path " 76 "to *.dwo files."), 77 cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 char BOLTError::ID = 0; 84 85 BOLTError::BOLTError(bool IsFatal, const Twine &S) 86 : IsFatal(IsFatal), Msg(S.str()) {} 87 88 void BOLTError::log(raw_ostream &OS) const { 89 if (IsFatal) 90 OS << "FATAL "; 91 StringRef ErrMsg = StringRef(Msg); 92 // Prepend our error prefix if it is missing 93 if (ErrMsg.empty()) { 94 OS << "BOLT-ERROR\n"; 95 } else { 96 if (!ErrMsg.starts_with("BOLT-ERROR")) 97 OS << "BOLT-ERROR: "; 98 OS << ErrMsg << "\n"; 99 } 100 } 101 102 std::error_code BOLTError::convertToErrorCode() const { 103 return inconvertibleErrorCode(); 104 } 105 106 Error createNonFatalBOLTError(const Twine &S) { 107 return make_error<BOLTError>(/*IsFatal*/ false, S); 108 } 109 110 Error createFatalBOLTError(const Twine &S) { 111 return make_error<BOLTError>(/*IsFatal*/ true, S); 112 } 113 114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { 115 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { 116 if (!E.getMessage().empty()) 117 E.log(this->errs()); 118 if (E.isFatal()) 119 exit(1); 120 }); 121 } 122 123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 124 std::unique_ptr<DWARFContext> DwCtx, 125 std::unique_ptr<Triple> TheTriple, 126 const Target *TheTarget, std::string TripleName, 127 std::unique_ptr<MCCodeEmitter> MCE, 128 std::unique_ptr<MCObjectFileInfo> MOFI, 129 std::unique_ptr<const MCAsmInfo> AsmInfo, 130 std::unique_ptr<const MCInstrInfo> MII, 131 std::unique_ptr<const MCSubtargetInfo> STI, 132 std::unique_ptr<MCInstPrinter> InstPrinter, 133 std::unique_ptr<const MCInstrAnalysis> MIA, 134 std::unique_ptr<MCPlusBuilder> MIB, 135 std::unique_ptr<const MCRegisterInfo> MRI, 136 std::unique_ptr<MCDisassembler> DisAsm, 137 JournalingStreams Logger) 138 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 139 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 140 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 141 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 142 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 143 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), 144 Logger(Logger), InitialDynoStats(isAArch64()) { 145 Relocation::Arch = this->TheTriple->getArch(); 146 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 147 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 148 } 149 150 BinaryContext::~BinaryContext() { 151 for (BinarySection *Section : Sections) 152 delete Section; 153 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 154 delete InjectedFunction; 155 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 156 delete JTI.second; 157 clearBinaryData(); 158 } 159 160 /// Create BinaryContext for a given architecture \p ArchName and 161 /// triple \p TripleName. 162 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( 163 Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features, 164 bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { 165 StringRef ArchName = ""; 166 std::string FeaturesStr = ""; 167 switch (TheTriple.getArch()) { 168 case llvm::Triple::x86_64: 169 if (Features) 170 return createFatalBOLTError( 171 "x86_64 target does not use SubtargetFeatures"); 172 ArchName = "x86-64"; 173 FeaturesStr = "+nopl"; 174 break; 175 case llvm::Triple::aarch64: 176 if (Features) 177 return createFatalBOLTError( 178 "AArch64 target does not use SubtargetFeatures"); 179 ArchName = "aarch64"; 180 FeaturesStr = "+all"; 181 break; 182 case llvm::Triple::riscv64: { 183 ArchName = "riscv64"; 184 if (!Features) 185 return createFatalBOLTError("RISCV target needs SubtargetFeatures"); 186 // We rely on relaxation for some transformations (e.g., promoting all calls 187 // to PseudoCALL and then making JITLink relax them). Since the relax 188 // feature is not stored in the object file, we manually enable it. 189 Features->AddFeature("relax"); 190 FeaturesStr = Features->getString(); 191 break; 192 } 193 default: 194 return createStringError(std::errc::not_supported, 195 "BOLT-ERROR: Unrecognized machine in ELF file"); 196 } 197 198 const std::string TripleName = TheTriple.str(); 199 200 std::string Error; 201 const Target *TheTarget = 202 TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); 203 if (!TheTarget) 204 return createStringError(make_error_code(std::errc::not_supported), 205 Twine("BOLT-ERROR: ", Error)); 206 207 std::unique_ptr<const MCRegisterInfo> MRI( 208 TheTarget->createMCRegInfo(TripleName)); 209 if (!MRI) 210 return createStringError( 211 make_error_code(std::errc::not_supported), 212 Twine("BOLT-ERROR: no register info for target ", TripleName)); 213 214 // Set up disassembler. 215 std::unique_ptr<MCAsmInfo> AsmInfo( 216 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 217 if (!AsmInfo) 218 return createStringError( 219 make_error_code(std::errc::not_supported), 220 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 221 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 222 // we want to emit such names as using @PLT without double quotes to convey 223 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 224 // override the default AsmInfo behavior to emit names the way we want. 225 AsmInfo->setAllowAtInName(true); 226 227 std::unique_ptr<const MCSubtargetInfo> STI( 228 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 229 if (!STI) 230 return createStringError( 231 make_error_code(std::errc::not_supported), 232 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 233 234 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 235 if (!MII) 236 return createStringError( 237 make_error_code(std::errc::not_supported), 238 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 239 240 std::unique_ptr<MCContext> Ctx( 241 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 242 std::unique_ptr<MCObjectFileInfo> MOFI( 243 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 244 Ctx->setObjectFileInfo(MOFI.get()); 245 // We do not support X86 Large code model. Change this in the future. 246 bool Large = false; 247 if (TheTriple.getArch() == llvm::Triple::aarch64) 248 Large = true; 249 unsigned LSDAEncoding = 250 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 251 if (IsPIC) { 252 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 253 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 254 } 255 256 std::unique_ptr<MCDisassembler> DisAsm( 257 TheTarget->createMCDisassembler(*STI, *Ctx)); 258 259 if (!DisAsm) 260 return createStringError( 261 make_error_code(std::errc::not_supported), 262 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 263 264 std::unique_ptr<const MCInstrAnalysis> MIA( 265 TheTarget->createMCInstrAnalysis(MII.get())); 266 if (!MIA) 267 return createStringError( 268 make_error_code(std::errc::not_supported), 269 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 270 TripleName)); 271 272 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 273 std::unique_ptr<MCInstPrinter> InstructionPrinter( 274 TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, 275 *MII, *MRI)); 276 if (!InstructionPrinter) 277 return createStringError( 278 make_error_code(std::errc::not_supported), 279 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 280 InstructionPrinter->setPrintImmHex(true); 281 282 std::unique_ptr<MCCodeEmitter> MCE( 283 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 284 285 auto BC = std::make_unique<BinaryContext>( 286 std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), 287 TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI), 288 std::move(AsmInfo), std::move(MII), std::move(STI), 289 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 290 std::move(DisAsm), Logger); 291 292 BC->LSDAEncoding = LSDAEncoding; 293 294 BC->MAB = std::unique_ptr<MCAsmBackend>( 295 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 296 297 BC->setFilename(InputFileName); 298 299 BC->HasFixedLoadAddress = !IsPIC; 300 301 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 302 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 303 304 if (!BC->SymbolicDisAsm) 305 return createStringError( 306 make_error_code(std::errc::not_supported), 307 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 308 309 return std::move(BC); 310 } 311 312 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 313 if (opts::HotText && 314 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 315 return true; 316 317 if (opts::HotData && 318 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 319 return true; 320 321 if (SymbolName == "_end") 322 return true; 323 324 return false; 325 } 326 327 std::unique_ptr<MCObjectWriter> 328 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 329 return MAB->createObjectWriter(OS); 330 } 331 332 bool BinaryContext::validateObjectNesting() const { 333 auto Itr = BinaryDataMap.begin(); 334 auto End = BinaryDataMap.end(); 335 bool Valid = true; 336 while (Itr != End) { 337 auto Next = std::next(Itr); 338 while (Next != End && 339 Itr->second->getSection() == Next->second->getSection() && 340 Itr->second->containsRange(Next->second->getAddress(), 341 Next->second->getSize())) { 342 if (Next->second->Parent != Itr->second) { 343 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" 344 << "BOLT-WARNING: " << *Itr->second << "\n" 345 << "BOLT-WARNING: " << *Next->second << "\n"; 346 Valid = false; 347 } 348 ++Next; 349 } 350 Itr = Next; 351 } 352 return Valid; 353 } 354 355 bool BinaryContext::validateHoles() const { 356 bool Valid = true; 357 for (BinarySection &Section : sections()) { 358 for (const Relocation &Rel : Section.relocations()) { 359 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 360 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 361 if (!BD) { 362 this->errs() 363 << "BOLT-WARNING: no BinaryData found for relocation at address" 364 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() 365 << "\n"; 366 Valid = false; 367 } else if (!BD->getAtomicRoot()) { 368 this->errs() 369 << "BOLT-WARNING: no atomic BinaryData found for relocation at " 370 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 371 << Section.getName() << "\n"; 372 Valid = false; 373 } 374 } 375 } 376 return Valid; 377 } 378 379 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 380 const uint64_t Address = GAI->second->getAddress(); 381 const uint64_t Size = GAI->second->getSize(); 382 383 auto fixParents = [&](BinaryDataMapType::iterator Itr, 384 BinaryData *NewParent) { 385 BinaryData *OldParent = Itr->second->Parent; 386 Itr->second->Parent = NewParent; 387 ++Itr; 388 while (Itr != BinaryDataMap.end() && OldParent && 389 Itr->second->Parent == OldParent) { 390 Itr->second->Parent = NewParent; 391 ++Itr; 392 } 393 }; 394 395 // Check if the previous symbol contains the newly added symbol. 396 if (GAI != BinaryDataMap.begin()) { 397 BinaryData *Prev = std::prev(GAI)->second; 398 while (Prev) { 399 if (Prev->getSection() == GAI->second->getSection() && 400 Prev->containsRange(Address, Size)) { 401 fixParents(GAI, Prev); 402 } else { 403 fixParents(GAI, nullptr); 404 } 405 Prev = Prev->Parent; 406 } 407 } 408 409 // Check if the newly added symbol contains any subsequent symbols. 410 if (Size != 0) { 411 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 412 auto Itr = std::next(GAI); 413 while ( 414 Itr != BinaryDataMap.end() && 415 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 416 Itr->second->Parent = BD; 417 ++Itr; 418 } 419 } 420 } 421 422 iterator_range<BinaryContext::binary_data_iterator> 423 BinaryContext::getSubBinaryData(BinaryData *BD) { 424 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 425 auto End = Start; 426 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 427 ++End; 428 return make_range(Start, End); 429 } 430 431 std::pair<const MCSymbol *, uint64_t> 432 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 433 bool IsPCRel) { 434 if (isAArch64()) { 435 // Check if this is an access to a constant island and create bookkeeping 436 // to keep track of it and emit it later as part of this function. 437 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 438 return std::make_pair(IslandSym, 0); 439 440 // Detect custom code written in assembly that refers to arbitrary 441 // constant islands from other functions. Write this reference so we 442 // can pull this constant island and emit it as part of this function 443 // too. 444 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 445 446 if (IslandIter != AddressToConstantIslandMap.begin() && 447 (IslandIter == AddressToConstantIslandMap.end() || 448 IslandIter->first > Address)) 449 --IslandIter; 450 451 if (IslandIter != AddressToConstantIslandMap.end()) { 452 // Fall-back to referencing the original constant island in the presence 453 // of dynamic relocs, as we currently do not support cloning them. 454 // Notice: we might fail to link because of this, if the original constant 455 // island we are referring would be emitted too far away. 456 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 457 MCSymbol *IslandSym = 458 IslandIter->second->getOrCreateIslandAccess(Address); 459 if (IslandSym) 460 return std::make_pair(IslandSym, 0); 461 } else if (MCSymbol *IslandSym = 462 IslandIter->second->getOrCreateProxyIslandAccess(Address, 463 BF)) { 464 BF.createIslandDependency(IslandSym, IslandIter->second); 465 return std::make_pair(IslandSym, 0); 466 } 467 } 468 } 469 470 // Note that the address does not necessarily have to reside inside 471 // a section, it could be an absolute address too. 472 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 473 if (Section && Section->isText()) { 474 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 475 if (Address != BF.getAddress()) { 476 // The address could potentially escape. Mark it as another entry 477 // point into the function. 478 if (opts::Verbosity >= 1) { 479 this->outs() << "BOLT-INFO: potentially escaped address 0x" 480 << Twine::utohexstr(Address) << " in function " << BF 481 << '\n'; 482 } 483 BF.HasInternalLabelReference = true; 484 return std::make_pair( 485 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 486 } 487 } else { 488 addInterproceduralReference(&BF, Address); 489 } 490 } 491 492 // With relocations, catch jump table references outside of the basic block 493 // containing the indirect jump. 494 if (HasRelocations) { 495 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 496 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 497 const MCSymbol *Symbol = 498 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 499 500 return std::make_pair(Symbol, 0); 501 } 502 } 503 504 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 505 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 506 507 // TODO: use DWARF info to get size/alignment here? 508 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 509 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 510 return std::make_pair(TargetSymbol, 0); 511 } 512 513 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 514 BinaryFunction &BF) { 515 if (!isX86()) 516 return MemoryContentsType::UNKNOWN; 517 518 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 519 if (!Section) { 520 // No section - possibly an absolute address. Since we don't allow 521 // internal function addresses to escape the function scope - we 522 // consider it a tail call. 523 if (opts::Verbosity > 1) { 524 this->errs() << "BOLT-WARNING: no section for address 0x" 525 << Twine::utohexstr(Address) << " referenced from function " 526 << BF << '\n'; 527 } 528 return MemoryContentsType::UNKNOWN; 529 } 530 531 if (Section->isVirtual()) { 532 // The contents are filled at runtime. 533 return MemoryContentsType::UNKNOWN; 534 } 535 536 // No support for jump tables in code yet. 537 if (Section->isText()) 538 return MemoryContentsType::UNKNOWN; 539 540 // Start with checking for PIC jump table. We expect non-PIC jump tables 541 // to have high 32 bits set to 0. 542 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 543 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 544 545 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 546 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 547 548 return MemoryContentsType::UNKNOWN; 549 } 550 551 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 552 const JumpTable::JumpTableType Type, 553 const BinaryFunction &BF, 554 const uint64_t NextJTAddress, 555 JumpTable::AddressesType *EntriesAsAddress, 556 bool *HasEntryInFragment) const { 557 // Target address of __builtin_unreachable. 558 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); 559 560 // Is one of the targets __builtin_unreachable? 561 bool HasUnreachable = false; 562 563 // Does one of the entries match function start address? 564 bool HasStartAsEntry = false; 565 566 // Number of targets other than __builtin_unreachable. 567 uint64_t NumRealEntries = 0; 568 569 // Size of the jump table without trailing __builtin_unreachable entries. 570 size_t TrimmedSize = 0; 571 572 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { 573 if (!EntriesAsAddress) 574 return; 575 EntriesAsAddress->emplace_back(EntryAddress); 576 if (!Unreachable) 577 TrimmedSize = EntriesAsAddress->size(); 578 }; 579 580 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 581 if (!Section) 582 return false; 583 584 // The upper bound is defined by containing object, section limits, and 585 // the next jump table in memory. 586 uint64_t UpperBound = Section->getEndAddress(); 587 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 588 if (JumpTableBD && JumpTableBD->getSize()) { 589 assert(JumpTableBD->getEndAddress() <= UpperBound && 590 "data object cannot cross a section boundary"); 591 UpperBound = JumpTableBD->getEndAddress(); 592 } 593 if (NextJTAddress) 594 UpperBound = std::min(NextJTAddress, UpperBound); 595 596 LLVM_DEBUG({ 597 using JTT = JumpTable::JumpTableType; 598 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 599 Address, BF.getPrintName(), 600 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 601 }); 602 const uint64_t EntrySize = getJumpTableEntrySize(Type); 603 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 604 EntryAddress += EntrySize) { 605 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 606 << " -> "); 607 // Check if there's a proper relocation against the jump table entry. 608 if (HasRelocations) { 609 if (Type == JumpTable::JTT_PIC && 610 !DataPCRelocations.count(EntryAddress)) { 611 LLVM_DEBUG( 612 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 613 break; 614 } 615 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 616 LLVM_DEBUG( 617 dbgs() 618 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 619 break; 620 } 621 } 622 623 const uint64_t Value = 624 (Type == JumpTable::JTT_PIC) 625 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 626 : *getPointerAtAddress(EntryAddress); 627 628 // __builtin_unreachable() case. 629 if (Value == UnreachableAddress) { 630 addEntryAddress(Value, /*Unreachable*/ true); 631 HasUnreachable = true; 632 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 633 continue; 634 } 635 636 // Function start is another special case. It is allowed in the jump table, 637 // but we need at least one another regular entry to distinguish the table 638 // from, e.g. a function pointer array. 639 if (Value == BF.getAddress()) { 640 HasStartAsEntry = true; 641 addEntryAddress(Value); 642 continue; 643 } 644 645 // Function or one of its fragments. 646 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 647 const bool DoesBelongToFunction = 648 BF.containsAddress(Value) || 649 (TargetBF && TargetBF->isParentOrChildOf(BF)); 650 if (!DoesBelongToFunction) { 651 LLVM_DEBUG({ 652 if (!BF.containsAddress(Value)) { 653 dbgs() << "FAIL: function doesn't contain this address\n"; 654 if (TargetBF) { 655 dbgs() << " ! function containing this address: " 656 << TargetBF->getPrintName() << '\n'; 657 if (TargetBF->isFragment()) { 658 dbgs() << " ! is a fragment"; 659 for (BinaryFunction *Parent : TargetBF->ParentFragments) 660 dbgs() << ", parent: " << Parent->getPrintName(); 661 dbgs() << '\n'; 662 } 663 } 664 } 665 }); 666 break; 667 } 668 669 // Check there's an instruction at this offset. 670 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 671 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 672 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 673 break; 674 } 675 676 ++NumRealEntries; 677 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 678 679 if (TargetBF != &BF && HasEntryInFragment) 680 *HasEntryInFragment = true; 681 addEntryAddress(Value); 682 } 683 684 // Trim direct/normal jump table to exclude trailing unreachable entries that 685 // can collide with a function address. 686 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && 687 TrimmedSize != EntriesAsAddress->size() && 688 getBinaryFunctionAtAddress(UnreachableAddress)) 689 EntriesAsAddress->resize(TrimmedSize); 690 691 // It's a jump table if the number of real entries is more than 1, or there's 692 // one real entry and one or more special targets. If there are only multiple 693 // special targets, then it's not a jump table. 694 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 695 } 696 697 void BinaryContext::populateJumpTables() { 698 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 699 << '\n'); 700 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 701 ++JTI) { 702 JumpTable *JT = JTI->second; 703 704 bool NonSimpleParent = false; 705 for (BinaryFunction *BF : JT->Parents) 706 NonSimpleParent |= !BF->isSimple(); 707 if (NonSimpleParent) 708 continue; 709 710 uint64_t NextJTAddress = 0; 711 auto NextJTI = std::next(JTI); 712 if (NextJTI != JTE) 713 NextJTAddress = NextJTI->second->getAddress(); 714 715 const bool Success = 716 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 717 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 718 if (!Success) { 719 LLVM_DEBUG({ 720 dbgs() << "failed to analyze "; 721 JT->print(dbgs()); 722 if (NextJTI != JTE) { 723 dbgs() << "next "; 724 NextJTI->second->print(dbgs()); 725 } 726 }); 727 llvm_unreachable("jump table heuristic failure"); 728 } 729 for (BinaryFunction *Frag : JT->Parents) { 730 if (JT->IsSplit) 731 Frag->setHasIndirectTargetToSplitFragment(true); 732 for (uint64_t EntryAddress : JT->EntriesAsAddress) 733 // if target is builtin_unreachable 734 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 735 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 736 Frag->getSize()); 737 } else if (EntryAddress >= Frag->getAddress() && 738 EntryAddress < Frag->getAddress() + Frag->getSize()) { 739 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 740 } 741 } 742 743 // In strict mode, erase PC-relative relocation record. Later we check that 744 // all such records are erased and thus have been accounted for. 745 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 746 for (uint64_t Address = JT->getAddress(); 747 Address < JT->getAddress() + JT->getSize(); 748 Address += JT->EntrySize) { 749 DataPCRelocations.erase(DataPCRelocations.find(Address)); 750 } 751 } 752 753 // Mark to skip the function and all its fragments. 754 for (BinaryFunction *Frag : JT->Parents) 755 if (Frag->hasIndirectTargetToSplitFragment()) 756 addFragmentsToSkip(Frag); 757 } 758 759 if (opts::StrictMode && DataPCRelocations.size()) { 760 LLVM_DEBUG({ 761 dbgs() << DataPCRelocations.size() 762 << " unclaimed PC-relative relocations left in data:\n"; 763 for (uint64_t Reloc : DataPCRelocations) 764 dbgs() << Twine::utohexstr(Reloc) << '\n'; 765 }); 766 assert(0 && "unclaimed PC-relative relocations left in data\n"); 767 } 768 clearList(DataPCRelocations); 769 } 770 771 void BinaryContext::skipMarkedFragments() { 772 std::vector<BinaryFunction *> FragmentQueue; 773 // Copy the functions to FragmentQueue. 774 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 775 auto addToWorklist = [&](BinaryFunction *Function) -> void { 776 if (FragmentsToSkip.count(Function)) 777 return; 778 FragmentQueue.push_back(Function); 779 addFragmentsToSkip(Function); 780 }; 781 // Functions containing split jump tables need to be skipped with all 782 // fragments (transitively). 783 for (size_t I = 0; I != FragmentQueue.size(); I++) { 784 BinaryFunction *BF = FragmentQueue[I]; 785 assert(FragmentsToSkip.count(BF) && 786 "internal error in traversing function fragments"); 787 if (opts::Verbosity >= 1) 788 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 789 BF->setSimple(false); 790 BF->setHasIndirectTargetToSplitFragment(true); 791 792 llvm::for_each(BF->Fragments, addToWorklist); 793 llvm::for_each(BF->ParentFragments, addToWorklist); 794 } 795 if (!FragmentsToSkip.empty()) 796 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() 797 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") 798 << " due to cold fragments\n"; 799 } 800 801 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 802 uint64_t Size, 803 uint16_t Alignment, 804 unsigned Flags) { 805 auto Itr = BinaryDataMap.find(Address); 806 if (Itr != BinaryDataMap.end()) { 807 assert(Itr->second->getSize() == Size || !Size); 808 return Itr->second->getSymbol(); 809 } 810 811 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 812 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 813 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 814 } 815 816 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 817 return Ctx->getOrCreateSymbol(Name); 818 } 819 820 BinaryFunction *BinaryContext::createBinaryFunction( 821 const std::string &Name, BinarySection &Section, uint64_t Address, 822 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 823 auto Result = BinaryFunctions.emplace( 824 Address, BinaryFunction(Name, Section, Address, Size, *this)); 825 assert(Result.second == true && "unexpected duplicate function"); 826 BinaryFunction *BF = &Result.first->second; 827 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 828 Alignment); 829 setSymbolToFunctionMap(BF->getSymbol(), BF); 830 return BF; 831 } 832 833 const MCSymbol * 834 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 835 JumpTable::JumpTableType Type) { 836 // Two fragments of same function access same jump table 837 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 838 assert(JT->Type == Type && "jump table types have to match"); 839 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 840 841 // Prevent associating a jump table to a specific fragment twice. 842 // This simple check arises from the assumption: no more than 2 fragments. 843 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 844 assert(JT->Parents[0]->isParentOrChildOf(Function) && 845 "cannot re-use jump table of a different function"); 846 // Duplicate the entry for the parent function for easy access 847 JT->Parents.push_back(&Function); 848 if (opts::Verbosity > 2) { 849 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " 850 << JT->Parents[0]->getPrintName() << "; " 851 << Function.getPrintName() << "\n"; 852 JT->print(this->outs()); 853 } 854 Function.JumpTables.emplace(Address, JT); 855 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 856 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 857 } 858 859 bool IsJumpTableParent = false; 860 (void)IsJumpTableParent; 861 for (BinaryFunction *Frag : JT->Parents) 862 if (Frag == &Function) 863 IsJumpTableParent = true; 864 assert(IsJumpTableParent && 865 "cannot re-use jump table of a different function"); 866 return JT->getFirstLabel(); 867 } 868 869 // Re-use the existing symbol if possible. 870 MCSymbol *JTLabel = nullptr; 871 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 872 if (!isInternalSymbolName(Object->getSymbol()->getName())) 873 JTLabel = Object->getSymbol(); 874 } 875 876 const uint64_t EntrySize = getJumpTableEntrySize(Type); 877 if (!JTLabel) { 878 const std::string JumpTableName = generateJumpTableName(Function, Address); 879 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 880 } 881 882 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 883 << " in function " << Function << '\n'); 884 885 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 886 JumpTable::LabelMapType{{0, JTLabel}}, 887 *getSectionForAddress(Address)); 888 JT->Parents.push_back(&Function); 889 if (opts::Verbosity > 2) 890 JT->print(this->outs()); 891 JumpTables.emplace(Address, JT); 892 893 // Duplicate the entry for the parent function for easy access. 894 Function.JumpTables.emplace(Address, JT); 895 return JTLabel; 896 } 897 898 std::pair<uint64_t, const MCSymbol *> 899 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 900 const MCSymbol *OldLabel) { 901 auto L = scopeLock(); 902 unsigned Offset = 0; 903 bool Found = false; 904 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 905 if (Elmt.second != OldLabel) 906 continue; 907 Offset = Elmt.first; 908 Found = true; 909 break; 910 } 911 assert(Found && "Label not found"); 912 (void)Found; 913 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 914 JumpTable *NewJT = 915 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 916 JumpTable::LabelMapType{{Offset, NewLabel}}, 917 *getSectionForAddress(JT->getAddress())); 918 NewJT->Parents = JT->Parents; 919 NewJT->Entries = JT->Entries; 920 NewJT->Counts = JT->Counts; 921 uint64_t JumpTableID = ++DuplicatedJumpTables; 922 // Invert it to differentiate from regular jump tables whose IDs are their 923 // addresses in the input binary memory space 924 JumpTableID = ~JumpTableID; 925 JumpTables.emplace(JumpTableID, NewJT); 926 Function.JumpTables.emplace(JumpTableID, NewJT); 927 return std::make_pair(JumpTableID, NewLabel); 928 } 929 930 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 931 uint64_t Address) { 932 size_t Id; 933 uint64_t Offset = 0; 934 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 935 Offset = Address - JT->getAddress(); 936 auto JTLabelsIt = JT->Labels.find(Offset); 937 if (JTLabelsIt != JT->Labels.end()) 938 return std::string(JTLabelsIt->second->getName()); 939 940 auto JTIdsIt = JumpTableIds.find(JT->getAddress()); 941 assert(JTIdsIt != JumpTableIds.end()); 942 Id = JTIdsIt->second; 943 } else { 944 Id = JumpTableIds[Address] = BF.JumpTables.size(); 945 } 946 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 947 (Offset ? ("." + std::to_string(Offset)) : "")); 948 } 949 950 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 951 // FIXME: aarch64 support is missing. 952 if (!isX86()) 953 return true; 954 955 if (BF.getSize() == BF.getMaxSize()) 956 return true; 957 958 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 959 assert(FunctionData && "cannot get function as data"); 960 961 uint64_t Offset = BF.getSize(); 962 MCInst Instr; 963 uint64_t InstrSize = 0; 964 uint64_t InstrAddress = BF.getAddress() + Offset; 965 using std::placeholders::_1; 966 967 // Skip instructions that satisfy the predicate condition. 968 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 969 const uint64_t StartOffset = Offset; 970 for (; Offset < BF.getMaxSize(); 971 Offset += InstrSize, InstrAddress += InstrSize) { 972 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 973 InstrAddress, nulls())) 974 break; 975 if (!Predicate(Instr)) 976 break; 977 } 978 979 return Offset - StartOffset; 980 }; 981 982 // Skip a sequence of zero bytes. 983 auto skipZeros = [&]() { 984 const uint64_t StartOffset = Offset; 985 for (; Offset < BF.getMaxSize(); ++Offset) 986 if ((*FunctionData)[Offset] != 0) 987 break; 988 989 return Offset - StartOffset; 990 }; 991 992 // Accept the whole padding area filled with breakpoints. 993 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 994 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 995 return true; 996 997 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 998 999 // Some functions have a jump to the next function or to the padding area 1000 // inserted after the body. 1001 auto isSkipJump = [&](const MCInst &Instr) { 1002 uint64_t TargetAddress = 0; 1003 if (MIB->isUnconditionalBranch(Instr) && 1004 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 1005 if (TargetAddress >= InstrAddress + InstrSize && 1006 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 1007 return true; 1008 } 1009 } 1010 return false; 1011 }; 1012 1013 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 1014 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 1015 skipZeros()) 1016 ; 1017 1018 if (Offset == BF.getMaxSize()) 1019 return true; 1020 1021 if (opts::Verbosity >= 1) { 1022 this->errs() << "BOLT-WARNING: bad padding at address 0x" 1023 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 1024 << " starting at offset " << (Offset - BF.getSize()) 1025 << " in function " << BF << '\n' 1026 << FunctionData->slice(BF.getSize(), 1027 BF.getMaxSize() - BF.getSize()) 1028 << '\n'; 1029 } 1030 1031 return false; 1032 } 1033 1034 void BinaryContext::adjustCodePadding() { 1035 for (auto &BFI : BinaryFunctions) { 1036 BinaryFunction &BF = BFI.second; 1037 if (!shouldEmit(BF)) 1038 continue; 1039 1040 if (!hasValidCodePadding(BF)) { 1041 if (HasRelocations) { 1042 if (opts::Verbosity >= 1) { 1043 this->outs() << "BOLT-INFO: function " << BF 1044 << " has invalid padding. Ignoring the function.\n"; 1045 } 1046 BF.setIgnored(); 1047 } else { 1048 BF.setMaxSize(BF.getSize()); 1049 } 1050 } 1051 } 1052 } 1053 1054 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1055 uint64_t Size, 1056 uint16_t Alignment, 1057 unsigned Flags) { 1058 // Register the name with MCContext. 1059 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1060 1061 auto GAI = BinaryDataMap.find(Address); 1062 BinaryData *BD; 1063 if (GAI == BinaryDataMap.end()) { 1064 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1065 BinarySection &Section = 1066 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1067 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1068 Section, Flags); 1069 GAI = BinaryDataMap.emplace(Address, BD).first; 1070 GlobalSymbols[Name] = BD; 1071 updateObjectNesting(GAI); 1072 } else { 1073 BD = GAI->second; 1074 if (!BD->hasName(Name)) { 1075 GlobalSymbols[Name] = BD; 1076 BD->Symbols.push_back(Symbol); 1077 } 1078 } 1079 1080 return Symbol; 1081 } 1082 1083 const BinaryData * 1084 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1085 auto NI = BinaryDataMap.lower_bound(Address); 1086 auto End = BinaryDataMap.end(); 1087 if ((NI != End && Address == NI->first) || 1088 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1089 if (NI->second->containsAddress(Address)) 1090 return NI->second; 1091 1092 // If this is a sub-symbol, see if a parent data contains the address. 1093 const BinaryData *BD = NI->second->getParent(); 1094 while (BD) { 1095 if (BD->containsAddress(Address)) 1096 return BD; 1097 BD = BD->getParent(); 1098 } 1099 } 1100 return nullptr; 1101 } 1102 1103 BinaryData *BinaryContext::getGOTSymbol() { 1104 // First tries to find a global symbol with that name 1105 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1106 if (GOTSymBD) 1107 return GOTSymBD; 1108 1109 // This symbol might be hidden from run-time link, so fetch the local 1110 // definition if available. 1111 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1112 if (!GOTSymBD) 1113 return nullptr; 1114 1115 // If the local symbol is not unique, fail 1116 unsigned Index = 2; 1117 SmallString<30> Storage; 1118 while (const BinaryData *BD = 1119 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1120 .concat(Twine(Index++)) 1121 .toStringRef(Storage))) 1122 if (BD->getAddress() != GOTSymBD->getAddress()) 1123 return nullptr; 1124 1125 return GOTSymBD; 1126 } 1127 1128 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1129 auto NI = BinaryDataMap.find(Address); 1130 assert(NI != BinaryDataMap.end()); 1131 if (NI == BinaryDataMap.end()) 1132 return false; 1133 // TODO: it's possible that a jump table starts at the same address 1134 // as a larger blob of private data. When we set the size of the 1135 // jump table, it might be smaller than the total blob size. In this 1136 // case we just leave the original size since (currently) it won't really 1137 // affect anything. 1138 assert((!NI->second->Size || NI->second->Size == Size || 1139 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1140 "can't change the size of a symbol that has already had its " 1141 "size set"); 1142 if (!NI->second->Size) { 1143 NI->second->Size = Size; 1144 updateObjectNesting(NI); 1145 return true; 1146 } 1147 return false; 1148 } 1149 1150 void BinaryContext::generateSymbolHashes() { 1151 auto isPadding = [](const BinaryData &BD) { 1152 StringRef Contents = BD.getSection().getContents(); 1153 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1154 return (BD.getName().starts_with("HOLEat") || 1155 SymData.find_first_not_of(0) == StringRef::npos); 1156 }; 1157 1158 uint64_t NumCollisions = 0; 1159 for (auto &Entry : BinaryDataMap) { 1160 BinaryData &BD = *Entry.second; 1161 StringRef Name = BD.getName(); 1162 1163 if (!isInternalSymbolName(Name)) 1164 continue; 1165 1166 // First check if a non-anonymous alias exists and move it to the front. 1167 if (BD.getSymbols().size() > 1) { 1168 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1169 return !isInternalSymbolName(Symbol->getName()); 1170 }); 1171 if (Itr != BD.getSymbols().end()) { 1172 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1173 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1174 continue; 1175 } 1176 } 1177 1178 // We have to skip 0 size symbols since they will all collide. 1179 if (BD.getSize() == 0) { 1180 continue; 1181 } 1182 1183 const uint64_t Hash = BD.getSection().hash(BD); 1184 const size_t Idx = Name.find("0x"); 1185 std::string NewName = 1186 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1187 if (getBinaryDataByName(NewName)) { 1188 // Ignore collisions for symbols that appear to be padding 1189 // (i.e. all zeros or a "hole") 1190 if (!isPadding(BD)) { 1191 if (opts::Verbosity) { 1192 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD 1193 << " with new name (" << NewName << "), skipping.\n"; 1194 } 1195 ++NumCollisions; 1196 } 1197 continue; 1198 } 1199 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1200 GlobalSymbols[NewName] = &BD; 1201 } 1202 if (NumCollisions) { 1203 this->errs() << "BOLT-WARNING: " << NumCollisions 1204 << " collisions detected while hashing binary objects"; 1205 if (!opts::Verbosity) 1206 this->errs() << ". Use -v=1 to see the list."; 1207 this->errs() << '\n'; 1208 } 1209 } 1210 1211 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1212 BinaryFunction &Function) const { 1213 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1214 if (TargetFunction.isChildOf(Function)) 1215 return true; 1216 TargetFunction.addParentFragment(Function); 1217 Function.addFragment(TargetFunction); 1218 if (!HasRelocations) { 1219 TargetFunction.setSimple(false); 1220 Function.setSimple(false); 1221 } 1222 if (opts::Verbosity >= 1) { 1223 this->outs() << "BOLT-INFO: marking " << TargetFunction 1224 << " as a fragment of " << Function << '\n'; 1225 } 1226 return true; 1227 } 1228 1229 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1230 MCInst &LoadLowBits, 1231 MCInst &LoadHiBits, 1232 uint64_t Target) { 1233 const MCSymbol *TargetSymbol; 1234 uint64_t Addend = 0; 1235 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1236 /*IsPCRel*/ true); 1237 int64_t Val; 1238 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1239 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1240 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1241 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1242 } 1243 1244 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1245 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1246 if (TargetFunction) 1247 return false; 1248 1249 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1250 assert(Section && "cannot get section for referenced address"); 1251 if (!Section->isText()) 1252 return false; 1253 1254 bool Ret = false; 1255 StringRef SectionContents = Section->getContents(); 1256 uint64_t Offset = Address - Section->getAddress(); 1257 const uint64_t MaxSize = SectionContents.size() - Offset; 1258 const uint8_t *Bytes = 1259 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1260 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1261 1262 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1263 MCInst &Instruction, uint64_t Offset, 1264 uint64_t AbsoluteInstrAddr, 1265 uint64_t TotalSize) -> bool { 1266 MCInst *TargetHiBits, *TargetLowBits; 1267 uint64_t TargetAddress, Count; 1268 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1269 AbsoluteInstrAddr, Instruction, TargetHiBits, 1270 TargetLowBits, TargetAddress); 1271 if (!Count) 1272 return false; 1273 1274 if (MatchOnly) 1275 return true; 1276 1277 // NOTE The target symbol was created during disassemble's 1278 // handleExternalReference 1279 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1280 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1281 *Section, Address, TotalSize); 1282 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1283 TargetAddress); 1284 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1285 Veneer->addInstruction(Offset, std::move(Instruction)); 1286 --Count; 1287 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1288 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1289 Veneer->addInstruction(It->first, std::move(It->second)); 1290 } 1291 1292 Veneer->getOrCreateLocalLabel(Address); 1293 Veneer->setMaxSize(TotalSize); 1294 Veneer->updateState(BinaryFunction::State::Disassembled); 1295 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1296 << "\n"); 1297 return true; 1298 }; 1299 1300 uint64_t Size = 0, TotalSize = 0; 1301 BinaryFunction::InstrMapType VeneerInstructions; 1302 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1303 MCInst Instruction; 1304 const uint64_t AbsoluteInstrAddr = Address + Offset; 1305 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1306 AbsoluteInstrAddr, nulls())) 1307 break; 1308 1309 TotalSize += Size; 1310 if (MIB->isBranch(Instruction)) { 1311 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1312 AbsoluteInstrAddr, TotalSize); 1313 break; 1314 } 1315 1316 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1317 } 1318 1319 return Ret; 1320 } 1321 1322 void BinaryContext::processInterproceduralReferences() { 1323 for (const std::pair<BinaryFunction *, uint64_t> &It : 1324 InterproceduralReferences) { 1325 BinaryFunction &Function = *It.first; 1326 uint64_t Address = It.second; 1327 // Process interprocedural references from ignored functions in BAT mode 1328 // (non-simple in non-relocation mode) to properly register entry points 1329 if (!Address || (Function.isIgnored() && !HasBATSection)) 1330 continue; 1331 1332 BinaryFunction *TargetFunction = 1333 getBinaryFunctionContainingAddress(Address); 1334 if (&Function == TargetFunction) 1335 continue; 1336 1337 if (TargetFunction) { 1338 if (TargetFunction->isFragment() && 1339 !TargetFunction->isChildOf(Function)) { 1340 this->errs() 1341 << "BOLT-WARNING: interprocedural reference between unrelated " 1342 "fragments: " 1343 << Function.getPrintName() << " and " 1344 << TargetFunction->getPrintName() << '\n'; 1345 } 1346 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1347 TargetFunction->addEntryPointAtOffset(Offset); 1348 1349 continue; 1350 } 1351 1352 // Check if address falls in function padding space - this could be 1353 // unmarked data in code. In this case adjust the padding space size. 1354 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1355 assert(Section && "cannot get section for referenced address"); 1356 1357 if (!Section->isText()) 1358 continue; 1359 1360 // PLT requires special handling and could be ignored in this context. 1361 StringRef SectionName = Section->getName(); 1362 if (SectionName == ".plt" || SectionName == ".plt.got") 1363 continue; 1364 1365 // Check if it is aarch64 veneer written at Address 1366 if (isAArch64() && handleAArch64Veneer(Address)) 1367 continue; 1368 1369 if (opts::processAllFunctions()) { 1370 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1371 << "object in code at address 0x" 1372 << Twine::utohexstr(Address) << " belonging to section " 1373 << SectionName << " in current mode\n"; 1374 exit(1); 1375 } 1376 1377 TargetFunction = getBinaryFunctionContainingAddress(Address, 1378 /*CheckPastEnd=*/false, 1379 /*UseMaxSize=*/true); 1380 // We are not going to overwrite non-simple functions, but for simple 1381 // ones - adjust the padding size. 1382 if (TargetFunction && TargetFunction->isSimple()) { 1383 this->errs() 1384 << "BOLT-WARNING: function " << *TargetFunction 1385 << " has an object detected in a padding region at address 0x" 1386 << Twine::utohexstr(Address) << '\n'; 1387 TargetFunction->setMaxSize(TargetFunction->getSize()); 1388 } 1389 } 1390 1391 InterproceduralReferences.clear(); 1392 } 1393 1394 void BinaryContext::postProcessSymbolTable() { 1395 fixBinaryDataHoles(); 1396 bool Valid = true; 1397 for (auto &Entry : BinaryDataMap) { 1398 BinaryData *BD = Entry.second; 1399 if ((BD->getName().starts_with("SYMBOLat") || 1400 BD->getName().starts_with("DATAat")) && 1401 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1402 BD->getSection()) { 1403 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD 1404 << "\n"; 1405 Valid = false; 1406 } 1407 } 1408 assert(Valid); 1409 (void)Valid; 1410 generateSymbolHashes(); 1411 } 1412 1413 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1414 BinaryFunction &ParentBF) { 1415 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1416 "cannot merge functions with multiple entry points"); 1417 1418 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1419 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1420 SymbolToFunctionMapMutex, std::defer_lock); 1421 1422 const StringRef ChildName = ChildBF.getOneName(); 1423 1424 // Move symbols over and update bookkeeping info. 1425 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1426 ParentBF.getSymbols().push_back(Symbol); 1427 WriteSymbolMapLock.lock(); 1428 SymbolToFunctionMap[Symbol] = &ParentBF; 1429 WriteSymbolMapLock.unlock(); 1430 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1431 } 1432 ChildBF.getSymbols().clear(); 1433 1434 // Move other names the child function is known under. 1435 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1436 ChildBF.Aliases.clear(); 1437 1438 if (HasRelocations) { 1439 // Merge execution counts of ChildBF into those of ParentBF. 1440 // Without relocations, we cannot reliably merge profiles as both functions 1441 // continue to exist and either one can be executed. 1442 ChildBF.mergeProfileDataInto(ParentBF); 1443 1444 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1445 std::defer_lock); 1446 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1447 std::defer_lock); 1448 // Remove ChildBF from the global set of functions in relocs mode. 1449 ReadBfsLock.lock(); 1450 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1451 ReadBfsLock.unlock(); 1452 1453 assert(FI != BinaryFunctions.end() && "function not found"); 1454 assert(&ChildBF == &FI->second && "function mismatch"); 1455 1456 WriteBfsLock.lock(); 1457 ChildBF.clearDisasmState(); 1458 FI = BinaryFunctions.erase(FI); 1459 WriteBfsLock.unlock(); 1460 1461 } else { 1462 // In non-relocation mode we keep the function, but rename it. 1463 std::string NewName = "__ICF_" + ChildName.str(); 1464 1465 WriteCtxLock.lock(); 1466 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1467 WriteCtxLock.unlock(); 1468 1469 ChildBF.setFolded(&ParentBF); 1470 } 1471 1472 ParentBF.setHasFunctionsFoldedInto(); 1473 } 1474 1475 void BinaryContext::fixBinaryDataHoles() { 1476 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1477 1478 for (BinarySection &Section : allocatableSections()) { 1479 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1480 1481 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1482 BinaryData *BD = Itr->second; 1483 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1484 (BD->getName().starts_with("SYMBOLat0x") || 1485 BD->getName().starts_with("DATAat0x") || 1486 BD->getName().starts_with("ANONYMOUS"))); 1487 return !isHole && BD->getSection() == Section && !BD->getParent(); 1488 }; 1489 1490 auto BDStart = BinaryDataMap.begin(); 1491 auto BDEnd = BinaryDataMap.end(); 1492 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1493 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1494 1495 uint64_t EndAddress = Section.getAddress(); 1496 1497 while (Itr != End) { 1498 if (Itr->second->getAddress() > EndAddress) { 1499 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1500 Holes.emplace_back(EndAddress, Gap); 1501 } 1502 EndAddress = Itr->second->getEndAddress(); 1503 ++Itr; 1504 } 1505 1506 if (EndAddress < Section.getEndAddress()) 1507 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1508 1509 // If there is already a symbol at the start of the hole, grow that symbol 1510 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1511 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1512 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1513 if (BD) { 1514 // BD->getSection() can be != Section if there are sections that 1515 // overlap. In this case it is probably safe to just skip the holes 1516 // since the overlapping section will not(?) have any symbols in it. 1517 if (BD->getSection() == Section) 1518 setBinaryDataSize(Hole.first, Hole.second); 1519 } else { 1520 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1521 } 1522 } 1523 } 1524 1525 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1526 assert(validateHoles() && "top level hole detected in object map"); 1527 } 1528 1529 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1530 const BinarySection *CurrentSection = nullptr; 1531 bool FirstSection = true; 1532 1533 for (auto &Entry : BinaryDataMap) { 1534 const BinaryData *BD = Entry.second; 1535 const BinarySection &Section = BD->getSection(); 1536 if (FirstSection || Section != *CurrentSection) { 1537 uint64_t Address, Size; 1538 StringRef Name = Section.getName(); 1539 if (Section) { 1540 Address = Section.getAddress(); 1541 Size = Section.getSize(); 1542 } else { 1543 Address = BD->getAddress(); 1544 Size = BD->getSize(); 1545 } 1546 OS << "BOLT-INFO: Section " << Name << ", " 1547 << "0x" + Twine::utohexstr(Address) << ":" 1548 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1549 CurrentSection = &Section; 1550 FirstSection = false; 1551 } 1552 1553 OS << "BOLT-INFO: "; 1554 const BinaryData *P = BD->getParent(); 1555 while (P) { 1556 OS << " "; 1557 P = P->getParent(); 1558 } 1559 OS << *BD << "\n"; 1560 } 1561 } 1562 1563 Expected<unsigned> BinaryContext::getDwarfFile( 1564 StringRef Directory, StringRef FileName, unsigned FileNumber, 1565 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1566 unsigned CUID, unsigned DWARFVersion) { 1567 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1568 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1569 FileNumber); 1570 } 1571 1572 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1573 const uint32_t SrcCUID, 1574 unsigned FileIndex) { 1575 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1576 const DWARFDebugLine::LineTable *LineTable = 1577 DwCtx->getLineTableForUnit(SrcUnit); 1578 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1579 LineTable->Prologue.FileNames; 1580 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1581 // means empty dir. 1582 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1583 "FileIndex out of range for the compilation unit."); 1584 StringRef Dir = ""; 1585 if (FileNames[FileIndex - 1].DirIdx != 0) { 1586 if (std::optional<const char *> DirName = dwarf::toString( 1587 LineTable->Prologue 1588 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1589 Dir = *DirName; 1590 } 1591 } 1592 StringRef FileName = ""; 1593 if (std::optional<const char *> FName = 1594 dwarf::toString(FileNames[FileIndex - 1].Name)) 1595 FileName = *FName; 1596 assert(FileName != ""); 1597 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1598 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1599 DestCUID, DstUnit->getVersion())); 1600 } 1601 1602 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1603 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1604 llvm::transform(llvm::make_second_range(BinaryFunctions), 1605 SortedFunctions.begin(), 1606 [](BinaryFunction &BF) { return &BF; }); 1607 1608 llvm::stable_sort(SortedFunctions, 1609 [](const BinaryFunction *A, const BinaryFunction *B) { 1610 if (A->hasValidIndex() && B->hasValidIndex()) { 1611 return A->getIndex() < B->getIndex(); 1612 } 1613 return A->hasValidIndex(); 1614 }); 1615 return SortedFunctions; 1616 } 1617 1618 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1619 std::vector<BinaryFunction *> AllFunctions; 1620 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1621 llvm::transform(llvm::make_second_range(BinaryFunctions), 1622 std::back_inserter(AllFunctions), 1623 [](BinaryFunction &BF) { return &BF; }); 1624 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1625 1626 return AllFunctions; 1627 } 1628 1629 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1630 auto Iter = DWOCUs.find(DWOId); 1631 if (Iter == DWOCUs.end()) 1632 return std::nullopt; 1633 1634 return Iter->second; 1635 } 1636 1637 DWARFContext *BinaryContext::getDWOContext() const { 1638 if (DWOCUs.empty()) 1639 return nullptr; 1640 return &DWOCUs.begin()->second->getContext(); 1641 } 1642 1643 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1644 void BinaryContext::preprocessDWODebugInfo() { 1645 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1646 DWARFUnit *const DwarfUnit = CU.get(); 1647 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1648 std::string DWOName = dwarf::toString( 1649 DwarfUnit->getUnitDIE().find( 1650 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1651 ""); 1652 SmallString<16> AbsolutePath; 1653 if (!opts::CompDirOverride.empty()) { 1654 sys::path::append(AbsolutePath, opts::CompDirOverride); 1655 sys::path::append(AbsolutePath, DWOName); 1656 } 1657 DWARFUnit *DWOCU = 1658 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 1659 if (!DWOCU->isDWOUnit()) { 1660 this->outs() 1661 << "BOLT-WARNING: Debug Fission: DWO debug information for " 1662 << DWOName 1663 << " was not retrieved and won't be updated. Please check " 1664 "relative path.\n"; 1665 continue; 1666 } 1667 DWOCUs[*DWOId] = DWOCU; 1668 } 1669 } 1670 if (!DWOCUs.empty()) 1671 this->outs() << "BOLT-INFO: processing split DWARF\n"; 1672 } 1673 1674 void BinaryContext::preprocessDebugInfo() { 1675 struct CURange { 1676 uint64_t LowPC; 1677 uint64_t HighPC; 1678 DWARFUnit *Unit; 1679 1680 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1681 }; 1682 1683 // Building a map of address ranges to CUs similar to .debug_aranges and use 1684 // it to assign CU to functions. 1685 std::vector<CURange> AllRanges; 1686 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1687 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1688 Expected<DWARFAddressRangesVector> RangesOrError = 1689 CU->getUnitDIE().getAddressRanges(); 1690 if (!RangesOrError) { 1691 consumeError(RangesOrError.takeError()); 1692 continue; 1693 } 1694 for (DWARFAddressRange &Range : *RangesOrError) { 1695 // Parts of the debug info could be invalidated due to corresponding code 1696 // being removed from the binary by the linker. Hence we check if the 1697 // address is a valid one. 1698 if (containsAddress(Range.LowPC)) 1699 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1700 } 1701 1702 ContainsDwarf5 |= CU->getVersion() >= 5; 1703 ContainsDwarfLegacy |= CU->getVersion() < 5; 1704 } 1705 1706 llvm::sort(AllRanges); 1707 for (auto &KV : BinaryFunctions) { 1708 const uint64_t FunctionAddress = KV.first; 1709 BinaryFunction &Function = KV.second; 1710 1711 auto It = llvm::partition_point( 1712 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1713 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1714 Function.setDWARFUnit(It->Unit); 1715 } 1716 1717 // Discover units with debug info that needs to be updated. 1718 for (const auto &KV : BinaryFunctions) { 1719 const BinaryFunction &BF = KV.second; 1720 if (shouldEmit(BF) && BF.getDWARFUnit()) 1721 ProcessedCUs.insert(BF.getDWARFUnit()); 1722 } 1723 1724 // Clear debug info for functions from units that we are not going to process. 1725 for (auto &KV : BinaryFunctions) { 1726 BinaryFunction &BF = KV.second; 1727 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1728 BF.setDWARFUnit(nullptr); 1729 } 1730 1731 if (opts::Verbosity >= 1) { 1732 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1733 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1734 } 1735 1736 preprocessDWODebugInfo(); 1737 1738 // Populate MCContext with DWARF files from all units. 1739 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1740 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1741 const uint64_t CUID = CU->getOffset(); 1742 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1743 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1744 GlobalPrefix + "line_table_start" + Twine(CUID))); 1745 1746 if (!ProcessedCUs.count(CU.get())) 1747 continue; 1748 1749 const DWARFDebugLine::LineTable *LineTable = 1750 DwCtx->getLineTableForUnit(CU.get()); 1751 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1752 LineTable->Prologue.FileNames; 1753 1754 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1755 if (DwarfVersion >= 5) { 1756 std::optional<MD5::MD5Result> Checksum; 1757 if (LineTable->Prologue.ContentTypes.HasMD5) 1758 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1759 std::optional<const char *> Name = 1760 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1761 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1762 auto Iter = DWOCUs.find(*DWOID); 1763 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1764 Name = dwarf::toString( 1765 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1766 } 1767 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1768 std::nullopt); 1769 } 1770 1771 BinaryLineTable.setDwarfVersion(DwarfVersion); 1772 1773 // Assign a unique label to every line table, one per CU. 1774 // Make sure empty debug line tables are registered too. 1775 if (FileNames.empty()) { 1776 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1777 CUID, DwarfVersion)); 1778 continue; 1779 } 1780 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1781 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1782 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1783 // means empty dir. 1784 StringRef Dir = ""; 1785 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1786 if (std::optional<const char *> DirName = dwarf::toString( 1787 LineTable->Prologue 1788 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1789 Dir = *DirName; 1790 StringRef FileName = ""; 1791 if (std::optional<const char *> FName = 1792 dwarf::toString(FileNames[I].Name)) 1793 FileName = *FName; 1794 assert(FileName != ""); 1795 std::optional<MD5::MD5Result> Checksum; 1796 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1797 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1798 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1799 DwarfVersion)); 1800 } 1801 } 1802 } 1803 1804 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1805 if (Function.isPseudo()) 1806 return false; 1807 1808 if (opts::processAllFunctions()) 1809 return true; 1810 1811 if (Function.isIgnored()) 1812 return false; 1813 1814 // In relocation mode we will emit non-simple functions with CFG. 1815 // If the function does not have a CFG it should be marked as ignored. 1816 return HasRelocations || Function.isSimple(); 1817 } 1818 1819 void BinaryContext::dump(const MCInst &Inst) const { 1820 if (LLVM_UNLIKELY(!InstPrinter)) { 1821 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1822 return; 1823 } 1824 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1825 dbgs() << "\n"; 1826 } 1827 1828 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1829 uint32_t Operation = Inst.getOperation(); 1830 switch (Operation) { 1831 case MCCFIInstruction::OpSameValue: 1832 OS << "OpSameValue Reg" << Inst.getRegister(); 1833 break; 1834 case MCCFIInstruction::OpRememberState: 1835 OS << "OpRememberState"; 1836 break; 1837 case MCCFIInstruction::OpRestoreState: 1838 OS << "OpRestoreState"; 1839 break; 1840 case MCCFIInstruction::OpOffset: 1841 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1842 break; 1843 case MCCFIInstruction::OpDefCfaRegister: 1844 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1845 break; 1846 case MCCFIInstruction::OpDefCfaOffset: 1847 OS << "OpDefCfaOffset " << Inst.getOffset(); 1848 break; 1849 case MCCFIInstruction::OpDefCfa: 1850 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1851 break; 1852 case MCCFIInstruction::OpRelOffset: 1853 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1854 break; 1855 case MCCFIInstruction::OpAdjustCfaOffset: 1856 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1857 break; 1858 case MCCFIInstruction::OpEscape: 1859 OS << "OpEscape"; 1860 break; 1861 case MCCFIInstruction::OpRestore: 1862 OS << "OpRestore Reg" << Inst.getRegister(); 1863 break; 1864 case MCCFIInstruction::OpUndefined: 1865 OS << "OpUndefined Reg" << Inst.getRegister(); 1866 break; 1867 case MCCFIInstruction::OpRegister: 1868 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1869 << Inst.getRegister2(); 1870 break; 1871 case MCCFIInstruction::OpWindowSave: 1872 OS << "OpWindowSave"; 1873 break; 1874 case MCCFIInstruction::OpGnuArgsSize: 1875 OS << "OpGnuArgsSize"; 1876 break; 1877 default: 1878 OS << "Op#" << Operation; 1879 break; 1880 } 1881 } 1882 1883 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1884 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1885 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1886 // the end of a data chunk inside code, $d identifies start of data. 1887 if (isX86() || ELFSymbolRef(Symbol).getSize()) 1888 return MarkerSymType::NONE; 1889 1890 Expected<StringRef> NameOrError = Symbol.getName(); 1891 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1892 1893 if (!TypeOrError || !NameOrError) 1894 return MarkerSymType::NONE; 1895 1896 if (*TypeOrError != SymbolRef::ST_Unknown) 1897 return MarkerSymType::NONE; 1898 1899 if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 1900 return MarkerSymType::CODE; 1901 1902 // $x<ISA> 1903 if (isRISCV() && NameOrError->starts_with("$x")) 1904 return MarkerSymType::CODE; 1905 1906 if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 1907 return MarkerSymType::DATA; 1908 1909 return MarkerSymType::NONE; 1910 } 1911 1912 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1913 return getMarkerType(Symbol) != MarkerSymType::NONE; 1914 } 1915 1916 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1917 const BinaryFunction *Function, 1918 DWARFContext *DwCtx) { 1919 DebugLineTableRowRef RowRef = 1920 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1921 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1922 return; 1923 1924 const DWARFDebugLine::LineTable *LineTable; 1925 if (Function && Function->getDWARFUnit() && 1926 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1927 LineTable = Function->getDWARFLineTable(); 1928 } else { 1929 LineTable = DwCtx->getLineTableForUnit( 1930 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1931 } 1932 assert(LineTable && "line table expected for instruction with debug info"); 1933 1934 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1935 StringRef FileName = ""; 1936 if (std::optional<const char *> FName = 1937 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1938 FileName = *FName; 1939 OS << " # debug line " << FileName << ":" << Row.Line; 1940 if (Row.Column) 1941 OS << ":" << Row.Column; 1942 if (Row.Discriminator) 1943 OS << " discriminator:" << Row.Discriminator; 1944 } 1945 1946 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1947 uint64_t Offset, 1948 const BinaryFunction *Function, 1949 bool PrintMCInst, bool PrintMemData, 1950 bool PrintRelocations, 1951 StringRef Endl) const { 1952 OS << format(" %08" PRIx64 ": ", Offset); 1953 if (MIB->isCFI(Instruction)) { 1954 uint32_t Offset = Instruction.getOperand(0).getImm(); 1955 OS << "\t!CFI\t$" << Offset << "\t; "; 1956 if (Function) 1957 printCFI(OS, *Function->getCFIFor(Instruction)); 1958 OS << Endl; 1959 return; 1960 } 1961 if (std::optional<uint32_t> DynamicID = 1962 MIB->getDynamicBranchID(Instruction)) { 1963 OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName() 1964 << " # ID: " << DynamicID; 1965 } else { 1966 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1967 } 1968 if (MIB->isCall(Instruction)) { 1969 if (MIB->isTailCall(Instruction)) 1970 OS << " # TAILCALL "; 1971 if (MIB->isInvoke(Instruction)) { 1972 const std::optional<MCPlus::MCLandingPad> EHInfo = 1973 MIB->getEHInfo(Instruction); 1974 OS << " # handler: "; 1975 if (EHInfo->first) 1976 OS << *EHInfo->first; 1977 else 1978 OS << '0'; 1979 OS << "; action: " << EHInfo->second; 1980 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1981 if (GnuArgsSize >= 0) 1982 OS << "; GNU_args_size = " << GnuArgsSize; 1983 } 1984 } else if (MIB->isIndirectBranch(Instruction)) { 1985 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1986 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1987 } else { 1988 OS << " # UNKNOWN CONTROL FLOW"; 1989 } 1990 } 1991 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1992 OS << " # Offset: " << *Offset; 1993 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 1994 OS << " # Size: " << *Size; 1995 if (MCSymbol *Label = MIB->getInstLabel(Instruction)) 1996 OS << " # Label: " << *Label; 1997 1998 MIB->printAnnotations(Instruction, OS); 1999 2000 if (opts::PrintDebugInfo) 2001 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 2002 2003 if ((opts::PrintRelocations || PrintRelocations) && Function) { 2004 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 2005 Function->printRelocations(OS, Offset, Size); 2006 } 2007 2008 OS << Endl; 2009 2010 if (PrintMCInst) { 2011 Instruction.dump_pretty(OS, InstPrinter.get()); 2012 OS << Endl; 2013 } 2014 } 2015 2016 std::optional<uint64_t> 2017 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 2018 uint64_t FileOffset) const { 2019 // Find a segment with a matching file offset. 2020 for (auto &KV : SegmentMapInfo) { 2021 const SegmentInfo &SegInfo = KV.second; 2022 // FileOffset is got from perf event, 2023 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 2024 // If the pagesize is not equal to SegInfo.Alignment. 2025 // FileOffset and SegInfo.FileOffset should be aligned first, 2026 // and then judge whether they are equal. 2027 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 2028 alignDown(FileOffset, SegInfo.Alignment)) { 2029 // The function's offset from base address in VAS is aligned by pagesize 2030 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 2031 // However, The ELF document says that SegInfo.FileOffset should equal 2032 // to SegInfo.Address, modulo the pagesize. 2033 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 2034 2035 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 2036 // alignDown(SegInfo.Address, pagesize) 2037 // = SegInfo.Address - (SegInfo.Address % pagesize) 2038 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 2039 // = SegInfo.Address - SegInfo.FileOffset + 2040 // alignDown(SegInfo.FileOffset, pagesize) 2041 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2042 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 2043 } 2044 } 2045 2046 return std::nullopt; 2047 } 2048 2049 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2050 auto SI = AddressToSection.upper_bound(Address); 2051 if (SI != AddressToSection.begin()) { 2052 --SI; 2053 uint64_t UpperBound = SI->first + SI->second->getSize(); 2054 if (!SI->second->getSize()) 2055 UpperBound += 1; 2056 if (UpperBound > Address) 2057 return *SI->second; 2058 } 2059 return std::make_error_code(std::errc::bad_address); 2060 } 2061 2062 ErrorOr<StringRef> 2063 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 2064 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2065 return Section->getName(); 2066 return std::make_error_code(std::errc::bad_address); 2067 } 2068 2069 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2070 auto Res = Sections.insert(Section); 2071 (void)Res; 2072 assert(Res.second && "can't register the same section twice."); 2073 2074 // Only register allocatable sections in the AddressToSection map. 2075 if (Section->isAllocatable() && Section->getAddress()) 2076 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2077 NameToSection.insert( 2078 std::make_pair(std::string(Section->getName()), Section)); 2079 if (Section->hasSectionRef()) 2080 SectionRefToBinarySection.insert( 2081 std::make_pair(Section->getSectionRef(), Section)); 2082 2083 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2084 return *Section; 2085 } 2086 2087 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2088 return registerSection(new BinarySection(*this, Section)); 2089 } 2090 2091 BinarySection & 2092 BinaryContext::registerSection(const Twine &SectionName, 2093 const BinarySection &OriginalSection) { 2094 return registerSection( 2095 new BinarySection(*this, SectionName, OriginalSection)); 2096 } 2097 2098 BinarySection & 2099 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2100 unsigned ELFFlags, uint8_t *Data, 2101 uint64_t Size, unsigned Alignment) { 2102 auto NamedSections = getSectionByName(Name); 2103 if (NamedSections.begin() != NamedSections.end()) { 2104 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2105 "can only update unique sections"); 2106 BinarySection *Section = NamedSections.begin()->second; 2107 2108 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2109 const bool Flag = Section->isAllocatable(); 2110 (void)Flag; 2111 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2112 LLVM_DEBUG(dbgs() << *Section << "\n"); 2113 // FIXME: Fix section flags/attributes for MachO. 2114 if (isELF()) 2115 assert(Flag == Section->isAllocatable() && 2116 "can't change section allocation status"); 2117 return *Section; 2118 } 2119 2120 return registerSection( 2121 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2122 } 2123 2124 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2125 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2126 while (NameRange.first != NameRange.second) { 2127 if (NameRange.first->second == &Section) { 2128 NameToSection.erase(NameRange.first); 2129 break; 2130 } 2131 ++NameRange.first; 2132 } 2133 } 2134 2135 void BinaryContext::deregisterUnusedSections() { 2136 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2137 for (auto SI = Sections.begin(); SI != Sections.end();) { 2138 BinarySection *Section = *SI; 2139 // We check getOutputData() instead of getOutputSize() because sometimes 2140 // zero-sized .text.cold sections are allocated. 2141 if (Section->hasSectionRef() || Section->getOutputData() || 2142 (AbsSection && Section == &AbsSection.get())) { 2143 ++SI; 2144 continue; 2145 } 2146 2147 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2148 << '\n';); 2149 deregisterSectionName(*Section); 2150 SI = Sections.erase(SI); 2151 delete Section; 2152 } 2153 } 2154 2155 bool BinaryContext::deregisterSection(BinarySection &Section) { 2156 BinarySection *SectionPtr = &Section; 2157 auto Itr = Sections.find(SectionPtr); 2158 if (Itr != Sections.end()) { 2159 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2160 while (Range.first != Range.second) { 2161 if (Range.first->second == SectionPtr) { 2162 AddressToSection.erase(Range.first); 2163 break; 2164 } 2165 ++Range.first; 2166 } 2167 2168 deregisterSectionName(*SectionPtr); 2169 Sections.erase(Itr); 2170 delete SectionPtr; 2171 return true; 2172 } 2173 return false; 2174 } 2175 2176 void BinaryContext::renameSection(BinarySection &Section, 2177 const Twine &NewName) { 2178 auto Itr = Sections.find(&Section); 2179 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2180 Sections.erase(Itr); 2181 2182 deregisterSectionName(Section); 2183 2184 Section.Name = NewName.str(); 2185 Section.setOutputName(Section.Name); 2186 2187 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2188 2189 // Reinsert with the new name. 2190 Sections.insert(&Section); 2191 } 2192 2193 void BinaryContext::printSections(raw_ostream &OS) const { 2194 for (BinarySection *const &Section : Sections) 2195 OS << "BOLT-INFO: " << *Section << "\n"; 2196 } 2197 2198 BinarySection &BinaryContext::absoluteSection() { 2199 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2200 return *Section; 2201 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2202 } 2203 2204 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2205 size_t Size) const { 2206 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2207 if (!Section) 2208 return std::make_error_code(std::errc::bad_address); 2209 2210 if (Section->isVirtual()) 2211 return 0; 2212 2213 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2214 AsmInfo->getCodePointerSize()); 2215 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2216 return DE.getUnsigned(&ValueOffset, Size); 2217 } 2218 2219 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2220 size_t Size) const { 2221 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2222 if (!Section) 2223 return std::make_error_code(std::errc::bad_address); 2224 2225 if (Section->isVirtual()) 2226 return 0; 2227 2228 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2229 AsmInfo->getCodePointerSize()); 2230 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2231 return DE.getSigned(&ValueOffset, Size); 2232 } 2233 2234 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2235 uint64_t Type, uint64_t Addend, 2236 uint64_t Value) { 2237 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2238 assert(Section && "cannot find section for address"); 2239 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2240 Value); 2241 } 2242 2243 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2244 uint64_t Type, uint64_t Addend, 2245 uint64_t Value) { 2246 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2247 assert(Section && "cannot find section for address"); 2248 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2249 Addend, Value); 2250 } 2251 2252 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2253 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2254 assert(Section && "cannot find section for address"); 2255 return Section->removeRelocationAt(Address - Section->getAddress()); 2256 } 2257 2258 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2259 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2260 if (!Section) 2261 return nullptr; 2262 2263 return Section->getRelocationAt(Address - Section->getAddress()); 2264 } 2265 2266 const Relocation * 2267 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2268 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2269 if (!Section) 2270 return nullptr; 2271 2272 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2273 } 2274 2275 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2276 const uint64_t Address) { 2277 auto setImmovable = [&](BinaryData &BD) { 2278 BinaryData *Root = BD.getAtomicRoot(); 2279 LLVM_DEBUG(if (Root->isMoveable()) { 2280 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2281 << "due to ambiguous relocation referencing 0x" 2282 << Twine::utohexstr(Address) << '\n'; 2283 }); 2284 Root->setIsMoveable(false); 2285 }; 2286 2287 if (Address == BD.getAddress()) { 2288 setImmovable(BD); 2289 2290 // Set previous symbol as immovable 2291 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2292 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2293 setImmovable(*Prev); 2294 } 2295 2296 if (Address == BD.getEndAddress()) { 2297 setImmovable(BD); 2298 2299 // Set next symbol as immovable 2300 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2301 if (Next && Next->getAddress() == BD.getEndAddress()) 2302 setImmovable(*Next); 2303 } 2304 } 2305 2306 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2307 uint64_t *EntryDesc) { 2308 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2309 auto BFI = SymbolToFunctionMap.find(Symbol); 2310 if (BFI == SymbolToFunctionMap.end()) 2311 return nullptr; 2312 2313 BinaryFunction *BF = BFI->second; 2314 if (EntryDesc) 2315 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2316 2317 return BF; 2318 } 2319 2320 std::string 2321 BinaryContext::generateBugReportMessage(StringRef Message, 2322 const BinaryFunction &Function) const { 2323 std::string Msg; 2324 raw_string_ostream SS(Msg); 2325 SS << "=======================================\n"; 2326 SS << "BOLT is unable to proceed because it couldn't properly understand " 2327 "this function.\n"; 2328 SS << "If you are running the most recent version of BOLT, you may " 2329 "want to " 2330 "report this and paste this dump.\nPlease check that there is no " 2331 "sensitive contents being shared in this dump.\n"; 2332 SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2333 ScopedPrinter SP(SS); 2334 SP.printBinaryBlock("Function contents", *Function.getData()); 2335 SS << "\n"; 2336 const_cast<BinaryFunction &>(Function).print(SS, ""); 2337 SS << "ERROR: " << Message; 2338 SS << "\n=======================================\n"; 2339 return Msg; 2340 } 2341 2342 BinaryFunction * 2343 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2344 bool IsSimple) { 2345 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2346 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2347 setSymbolToFunctionMap(BF->getSymbol(), BF); 2348 BF->CurrentState = BinaryFunction::State::CFG; 2349 return BF; 2350 } 2351 2352 std::pair<size_t, size_t> 2353 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2354 // Adjust branch instruction to match the current layout. 2355 if (FixBranches) 2356 BF.fixBranches(); 2357 2358 // Create local MC context to isolate the effect of ephemeral code emission. 2359 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2360 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2361 MCAsmBackend *MAB = 2362 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2363 2364 SmallString<256> Code; 2365 raw_svector_ostream VecOS(Code); 2366 2367 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2368 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2369 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2370 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2371 /*RelaxAll=*/false, 2372 /*IncrementalLinkerCompatible=*/false, 2373 /*DWARFMustBeAtTheEnd=*/false)); 2374 2375 Streamer->initSections(false, *STI); 2376 2377 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2378 Section->setHasInstructions(true); 2379 2380 // Create symbols in the LocalCtx so that they get destroyed with it. 2381 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2382 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2383 2384 Streamer->switchSection(Section); 2385 Streamer->emitLabel(StartLabel); 2386 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2387 /*EmitCodeOnly=*/true); 2388 Streamer->emitLabel(EndLabel); 2389 2390 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2391 SmallVector<LabelRange> SplitLabels; 2392 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2393 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2394 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2395 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2396 2397 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2398 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2399 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2400 SplitSection->setHasInstructions(true); 2401 Streamer->switchSection(SplitSection); 2402 2403 Streamer->emitLabel(SplitStartLabel); 2404 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2405 Streamer->emitLabel(SplitEndLabel); 2406 } 2407 2408 MCAssembler &Assembler = 2409 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2410 Assembler.layout(); 2411 2412 // Obtain fragment sizes. 2413 std::vector<uint64_t> FragmentSizes; 2414 // Main fragment size. 2415 const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) - 2416 Assembler.getSymbolOffset(*StartLabel); 2417 FragmentSizes.push_back(HotSize); 2418 // Split fragment sizes. 2419 uint64_t ColdSize = 0; 2420 for (const auto &Labels : SplitLabels) { 2421 uint64_t Size = Assembler.getSymbolOffset(*Labels.second) - 2422 Assembler.getSymbolOffset(*Labels.first); 2423 FragmentSizes.push_back(Size); 2424 ColdSize += Size; 2425 } 2426 2427 // Populate new start and end offsets of each basic block. 2428 uint64_t FragmentIndex = 0; 2429 for (FunctionFragment &FF : BF.getLayout().fragments()) { 2430 BinaryBasicBlock *PrevBB = nullptr; 2431 for (BinaryBasicBlock *BB : FF) { 2432 const uint64_t BBStartOffset = 2433 Assembler.getSymbolOffset(*(BB->getLabel())); 2434 BB->setOutputStartAddress(BBStartOffset); 2435 if (PrevBB) 2436 PrevBB->setOutputEndAddress(BBStartOffset); 2437 PrevBB = BB; 2438 } 2439 if (PrevBB) 2440 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2441 FragmentIndex++; 2442 } 2443 2444 // Clean-up the effect of the code emission. 2445 for (const MCSymbol &Symbol : Assembler.symbols()) { 2446 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2447 MutableSymbol->setUndefined(); 2448 MutableSymbol->setIsRegistered(false); 2449 } 2450 2451 return std::make_pair(HotSize, ColdSize); 2452 } 2453 2454 bool BinaryContext::validateInstructionEncoding( 2455 ArrayRef<uint8_t> InputSequence) const { 2456 MCInst Inst; 2457 uint64_t InstSize; 2458 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2459 assert(InstSize == InputSequence.size() && 2460 "Disassembled instruction size does not match the sequence."); 2461 2462 SmallString<256> Code; 2463 SmallVector<MCFixup, 4> Fixups; 2464 2465 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2466 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2467 if (InputSequence != OutputSequence) { 2468 if (opts::Verbosity > 1) { 2469 this->errs() << "BOLT-WARNING: mismatched encoding detected\n" 2470 << " input: " << InputSequence << '\n' 2471 << " output: " << OutputSequence << '\n'; 2472 } 2473 return false; 2474 } 2475 2476 return true; 2477 } 2478 2479 uint64_t BinaryContext::getHotThreshold() const { 2480 static uint64_t Threshold = 0; 2481 if (Threshold == 0) { 2482 Threshold = std::max( 2483 (uint64_t)opts::ExecutionCountThreshold, 2484 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2485 } 2486 return Threshold; 2487 } 2488 2489 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2490 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2491 auto FI = BinaryFunctions.upper_bound(Address); 2492 if (FI == BinaryFunctions.begin()) 2493 return nullptr; 2494 --FI; 2495 2496 const uint64_t UsedSize = 2497 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2498 2499 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2500 return nullptr; 2501 2502 return &FI->second; 2503 } 2504 2505 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2506 // First, try to find a function starting at the given address. If the 2507 // function was folded, this will get us the original folded function if it 2508 // wasn't removed from the list, e.g. in non-relocation mode. 2509 auto BFI = BinaryFunctions.find(Address); 2510 if (BFI != BinaryFunctions.end()) 2511 return &BFI->second; 2512 2513 // We might have folded the function matching the object at the given 2514 // address. In such case, we look for a function matching the symbol 2515 // registered at the original address. The new function (the one that the 2516 // original was folded into) will hold the symbol. 2517 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2518 uint64_t EntryID = 0; 2519 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2520 if (BF && EntryID == 0) 2521 return BF; 2522 } 2523 return nullptr; 2524 } 2525 2526 /// Deregister JumpTable registered at a given \p Address and delete it. 2527 void BinaryContext::deleteJumpTable(uint64_t Address) { 2528 assert(JumpTables.count(Address) && "Must have a jump table at address"); 2529 JumpTable *JT = JumpTables.at(Address); 2530 for (BinaryFunction *Parent : JT->Parents) 2531 Parent->JumpTables.erase(Address); 2532 JumpTables.erase(Address); 2533 delete JT; 2534 } 2535 2536 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2537 const DWARFAddressRangesVector &InputRanges) const { 2538 DebugAddressRangesVector OutputRanges; 2539 2540 for (const DWARFAddressRange Range : InputRanges) { 2541 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2542 while (BFI != BinaryFunctions.end()) { 2543 const BinaryFunction &Function = BFI->second; 2544 if (Function.getAddress() >= Range.HighPC) 2545 break; 2546 const DebugAddressRangesVector FunctionRanges = 2547 Function.getOutputAddressRanges(); 2548 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2549 std::advance(BFI, 1); 2550 } 2551 } 2552 2553 return OutputRanges; 2554 } 2555 2556 } // namespace bolt 2557 } // namespace llvm 2558