1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/Utils.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAssembler.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 26 #include "llvm/MC/MCInstPrinter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionELF.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/Regex.h" 37 #include <algorithm> 38 #include <functional> 39 #include <iterator> 40 #include <unordered_set> 41 42 using namespace llvm; 43 44 #undef DEBUG_TYPE 45 #define DEBUG_TYPE "bolt" 46 47 namespace opts { 48 49 cl::opt<bool> NoHugePages("no-huge-pages", 50 cl::desc("use regular size pages for code alignment"), 51 cl::Hidden, cl::cat(BoltCategory)); 52 53 static cl::opt<bool> 54 PrintDebugInfo("print-debug-info", 55 cl::desc("print debug info when printing functions"), 56 cl::Hidden, 57 cl::ZeroOrMore, 58 cl::cat(BoltCategory)); 59 60 cl::opt<bool> PrintRelocations( 61 "print-relocations", 62 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 63 cl::cat(BoltCategory)); 64 65 static cl::opt<bool> 66 PrintMemData("print-mem-data", 67 cl::desc("print memory data annotations when printing functions"), 68 cl::Hidden, 69 cl::ZeroOrMore, 70 cl::cat(BoltCategory)); 71 72 cl::opt<std::string> CompDirOverride( 73 "comp-dir-override", 74 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " 75 "location, which is used with DW_AT_dwo_name to construct a path " 76 "to *.dwo files."), 77 cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 char BOLTError::ID = 0; 84 85 BOLTError::BOLTError(bool IsFatal, const Twine &S) 86 : IsFatal(IsFatal), Msg(S.str()) {} 87 88 void BOLTError::log(raw_ostream &OS) const { 89 if (IsFatal) 90 OS << "FATAL "; 91 StringRef ErrMsg = StringRef(Msg); 92 // Prepend our error prefix if it is missing 93 if (ErrMsg.empty()) { 94 OS << "BOLT-ERROR\n"; 95 } else { 96 if (!ErrMsg.starts_with("BOLT-ERROR")) 97 OS << "BOLT-ERROR: "; 98 OS << ErrMsg << "\n"; 99 } 100 } 101 102 std::error_code BOLTError::convertToErrorCode() const { 103 return inconvertibleErrorCode(); 104 } 105 106 Error createNonFatalBOLTError(const Twine &S) { 107 return make_error<BOLTError>(/*IsFatal*/ false, S); 108 } 109 110 Error createFatalBOLTError(const Twine &S) { 111 return make_error<BOLTError>(/*IsFatal*/ true, S); 112 } 113 114 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { 115 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { 116 if (!E.getMessage().empty()) 117 E.log(this->errs()); 118 if (E.isFatal()) 119 exit(1); 120 }); 121 } 122 123 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 124 std::unique_ptr<DWARFContext> DwCtx, 125 std::unique_ptr<Triple> TheTriple, 126 const Target *TheTarget, std::string TripleName, 127 std::unique_ptr<MCCodeEmitter> MCE, 128 std::unique_ptr<MCObjectFileInfo> MOFI, 129 std::unique_ptr<const MCAsmInfo> AsmInfo, 130 std::unique_ptr<const MCInstrInfo> MII, 131 std::unique_ptr<const MCSubtargetInfo> STI, 132 std::unique_ptr<MCInstPrinter> InstPrinter, 133 std::unique_ptr<const MCInstrAnalysis> MIA, 134 std::unique_ptr<MCPlusBuilder> MIB, 135 std::unique_ptr<const MCRegisterInfo> MRI, 136 std::unique_ptr<MCDisassembler> DisAsm, 137 JournalingStreams Logger) 138 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 139 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 140 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 141 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 142 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 143 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), 144 Logger(Logger), InitialDynoStats(isAArch64()) { 145 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 146 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 147 } 148 149 BinaryContext::~BinaryContext() { 150 for (BinarySection *Section : Sections) 151 delete Section; 152 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 153 delete InjectedFunction; 154 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 155 delete JTI.second; 156 clearBinaryData(); 157 } 158 159 /// Create BinaryContext for a given architecture \p ArchName and 160 /// triple \p TripleName. 161 Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( 162 Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features, 163 bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { 164 StringRef ArchName = ""; 165 std::string FeaturesStr = ""; 166 switch (TheTriple.getArch()) { 167 case llvm::Triple::x86_64: 168 if (Features) 169 return createFatalBOLTError( 170 "x86_64 target does not use SubtargetFeatures"); 171 ArchName = "x86-64"; 172 FeaturesStr = "+nopl"; 173 break; 174 case llvm::Triple::aarch64: 175 if (Features) 176 return createFatalBOLTError( 177 "AArch64 target does not use SubtargetFeatures"); 178 ArchName = "aarch64"; 179 FeaturesStr = "+all"; 180 break; 181 case llvm::Triple::riscv64: { 182 ArchName = "riscv64"; 183 if (!Features) 184 return createFatalBOLTError("RISCV target needs SubtargetFeatures"); 185 // We rely on relaxation for some transformations (e.g., promoting all calls 186 // to PseudoCALL and then making JITLink relax them). Since the relax 187 // feature is not stored in the object file, we manually enable it. 188 Features->AddFeature("relax"); 189 FeaturesStr = Features->getString(); 190 break; 191 } 192 default: 193 return createStringError(std::errc::not_supported, 194 "BOLT-ERROR: Unrecognized machine in ELF file"); 195 } 196 197 const std::string TripleName = TheTriple.str(); 198 199 std::string Error; 200 const Target *TheTarget = 201 TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); 202 if (!TheTarget) 203 return createStringError(make_error_code(std::errc::not_supported), 204 Twine("BOLT-ERROR: ", Error)); 205 206 std::unique_ptr<const MCRegisterInfo> MRI( 207 TheTarget->createMCRegInfo(TripleName)); 208 if (!MRI) 209 return createStringError( 210 make_error_code(std::errc::not_supported), 211 Twine("BOLT-ERROR: no register info for target ", TripleName)); 212 213 // Set up disassembler. 214 std::unique_ptr<MCAsmInfo> AsmInfo( 215 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 216 if (!AsmInfo) 217 return createStringError( 218 make_error_code(std::errc::not_supported), 219 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 220 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 221 // we want to emit such names as using @PLT without double quotes to convey 222 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 223 // override the default AsmInfo behavior to emit names the way we want. 224 AsmInfo->setAllowAtInName(true); 225 226 std::unique_ptr<const MCSubtargetInfo> STI( 227 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 228 if (!STI) 229 return createStringError( 230 make_error_code(std::errc::not_supported), 231 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 232 233 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 234 if (!MII) 235 return createStringError( 236 make_error_code(std::errc::not_supported), 237 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 238 239 std::unique_ptr<MCContext> Ctx( 240 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 241 std::unique_ptr<MCObjectFileInfo> MOFI( 242 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 243 Ctx->setObjectFileInfo(MOFI.get()); 244 // We do not support X86 Large code model. Change this in the future. 245 bool Large = false; 246 if (TheTriple.getArch() == llvm::Triple::aarch64) 247 Large = true; 248 unsigned LSDAEncoding = 249 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 250 if (IsPIC) { 251 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 252 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 253 } 254 255 std::unique_ptr<MCDisassembler> DisAsm( 256 TheTarget->createMCDisassembler(*STI, *Ctx)); 257 258 if (!DisAsm) 259 return createStringError( 260 make_error_code(std::errc::not_supported), 261 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 262 263 std::unique_ptr<const MCInstrAnalysis> MIA( 264 TheTarget->createMCInstrAnalysis(MII.get())); 265 if (!MIA) 266 return createStringError( 267 make_error_code(std::errc::not_supported), 268 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 269 TripleName)); 270 271 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 272 std::unique_ptr<MCInstPrinter> InstructionPrinter( 273 TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, 274 *MII, *MRI)); 275 if (!InstructionPrinter) 276 return createStringError( 277 make_error_code(std::errc::not_supported), 278 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 279 InstructionPrinter->setPrintImmHex(true); 280 281 std::unique_ptr<MCCodeEmitter> MCE( 282 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 283 284 auto BC = std::make_unique<BinaryContext>( 285 std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), 286 TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI), 287 std::move(AsmInfo), std::move(MII), std::move(STI), 288 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 289 std::move(DisAsm), Logger); 290 291 BC->LSDAEncoding = LSDAEncoding; 292 293 BC->MAB = std::unique_ptr<MCAsmBackend>( 294 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 295 296 BC->setFilename(InputFileName); 297 298 BC->HasFixedLoadAddress = !IsPIC; 299 300 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 301 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 302 303 if (!BC->SymbolicDisAsm) 304 return createStringError( 305 make_error_code(std::errc::not_supported), 306 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 307 308 return std::move(BC); 309 } 310 311 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 312 if (opts::HotText && 313 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 314 return true; 315 316 if (opts::HotData && 317 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 318 return true; 319 320 if (SymbolName == "_end") 321 return true; 322 323 return false; 324 } 325 326 std::unique_ptr<MCObjectWriter> 327 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 328 return MAB->createObjectWriter(OS); 329 } 330 331 bool BinaryContext::validateObjectNesting() const { 332 auto Itr = BinaryDataMap.begin(); 333 auto End = BinaryDataMap.end(); 334 bool Valid = true; 335 while (Itr != End) { 336 auto Next = std::next(Itr); 337 while (Next != End && 338 Itr->second->getSection() == Next->second->getSection() && 339 Itr->second->containsRange(Next->second->getAddress(), 340 Next->second->getSize())) { 341 if (Next->second->Parent != Itr->second) { 342 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" 343 << "BOLT-WARNING: " << *Itr->second << "\n" 344 << "BOLT-WARNING: " << *Next->second << "\n"; 345 Valid = false; 346 } 347 ++Next; 348 } 349 Itr = Next; 350 } 351 return Valid; 352 } 353 354 bool BinaryContext::validateHoles() const { 355 bool Valid = true; 356 for (BinarySection &Section : sections()) { 357 for (const Relocation &Rel : Section.relocations()) { 358 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 359 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 360 if (!BD) { 361 this->errs() 362 << "BOLT-WARNING: no BinaryData found for relocation at address" 363 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() 364 << "\n"; 365 Valid = false; 366 } else if (!BD->getAtomicRoot()) { 367 this->errs() 368 << "BOLT-WARNING: no atomic BinaryData found for relocation at " 369 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 370 << Section.getName() << "\n"; 371 Valid = false; 372 } 373 } 374 } 375 return Valid; 376 } 377 378 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 379 const uint64_t Address = GAI->second->getAddress(); 380 const uint64_t Size = GAI->second->getSize(); 381 382 auto fixParents = [&](BinaryDataMapType::iterator Itr, 383 BinaryData *NewParent) { 384 BinaryData *OldParent = Itr->second->Parent; 385 Itr->second->Parent = NewParent; 386 ++Itr; 387 while (Itr != BinaryDataMap.end() && OldParent && 388 Itr->second->Parent == OldParent) { 389 Itr->second->Parent = NewParent; 390 ++Itr; 391 } 392 }; 393 394 // Check if the previous symbol contains the newly added symbol. 395 if (GAI != BinaryDataMap.begin()) { 396 BinaryData *Prev = std::prev(GAI)->second; 397 while (Prev) { 398 if (Prev->getSection() == GAI->second->getSection() && 399 Prev->containsRange(Address, Size)) { 400 fixParents(GAI, Prev); 401 } else { 402 fixParents(GAI, nullptr); 403 } 404 Prev = Prev->Parent; 405 } 406 } 407 408 // Check if the newly added symbol contains any subsequent symbols. 409 if (Size != 0) { 410 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 411 auto Itr = std::next(GAI); 412 while ( 413 Itr != BinaryDataMap.end() && 414 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 415 Itr->second->Parent = BD; 416 ++Itr; 417 } 418 } 419 } 420 421 iterator_range<BinaryContext::binary_data_iterator> 422 BinaryContext::getSubBinaryData(BinaryData *BD) { 423 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 424 auto End = Start; 425 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 426 ++End; 427 return make_range(Start, End); 428 } 429 430 std::pair<const MCSymbol *, uint64_t> 431 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 432 bool IsPCRel) { 433 if (isAArch64()) { 434 // Check if this is an access to a constant island and create bookkeeping 435 // to keep track of it and emit it later as part of this function. 436 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 437 return std::make_pair(IslandSym, 0); 438 439 // Detect custom code written in assembly that refers to arbitrary 440 // constant islands from other functions. Write this reference so we 441 // can pull this constant island and emit it as part of this function 442 // too. 443 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 444 445 if (IslandIter != AddressToConstantIslandMap.begin() && 446 (IslandIter == AddressToConstantIslandMap.end() || 447 IslandIter->first > Address)) 448 --IslandIter; 449 450 if (IslandIter != AddressToConstantIslandMap.end()) { 451 // Fall-back to referencing the original constant island in the presence 452 // of dynamic relocs, as we currently do not support cloning them. 453 // Notice: we might fail to link because of this, if the original constant 454 // island we are referring would be emitted too far away. 455 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 456 MCSymbol *IslandSym = 457 IslandIter->second->getOrCreateIslandAccess(Address); 458 if (IslandSym) 459 return std::make_pair(IslandSym, 0); 460 } else if (MCSymbol *IslandSym = 461 IslandIter->second->getOrCreateProxyIslandAccess(Address, 462 BF)) { 463 BF.createIslandDependency(IslandSym, IslandIter->second); 464 return std::make_pair(IslandSym, 0); 465 } 466 } 467 } 468 469 // Note that the address does not necessarily have to reside inside 470 // a section, it could be an absolute address too. 471 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 472 if (Section && Section->isText()) { 473 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 474 if (Address != BF.getAddress()) { 475 // The address could potentially escape. Mark it as another entry 476 // point into the function. 477 if (opts::Verbosity >= 1) { 478 this->outs() << "BOLT-INFO: potentially escaped address 0x" 479 << Twine::utohexstr(Address) << " in function " << BF 480 << '\n'; 481 } 482 BF.HasInternalLabelReference = true; 483 return std::make_pair( 484 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 485 } 486 } else { 487 addInterproceduralReference(&BF, Address); 488 } 489 } 490 491 // With relocations, catch jump table references outside of the basic block 492 // containing the indirect jump. 493 if (HasRelocations) { 494 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 495 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 496 const MCSymbol *Symbol = 497 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 498 499 return std::make_pair(Symbol, 0); 500 } 501 } 502 503 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 504 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 505 506 // TODO: use DWARF info to get size/alignment here? 507 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 508 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 509 return std::make_pair(TargetSymbol, 0); 510 } 511 512 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 513 BinaryFunction &BF) { 514 if (!isX86()) 515 return MemoryContentsType::UNKNOWN; 516 517 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 518 if (!Section) { 519 // No section - possibly an absolute address. Since we don't allow 520 // internal function addresses to escape the function scope - we 521 // consider it a tail call. 522 if (opts::Verbosity > 1) { 523 this->errs() << "BOLT-WARNING: no section for address 0x" 524 << Twine::utohexstr(Address) << " referenced from function " 525 << BF << '\n'; 526 } 527 return MemoryContentsType::UNKNOWN; 528 } 529 530 if (Section->isVirtual()) { 531 // The contents are filled at runtime. 532 return MemoryContentsType::UNKNOWN; 533 } 534 535 // No support for jump tables in code yet. 536 if (Section->isText()) 537 return MemoryContentsType::UNKNOWN; 538 539 // Start with checking for PIC jump table. We expect non-PIC jump tables 540 // to have high 32 bits set to 0. 541 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 542 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 543 544 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 545 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 546 547 return MemoryContentsType::UNKNOWN; 548 } 549 550 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 551 const JumpTable::JumpTableType Type, 552 const BinaryFunction &BF, 553 const uint64_t NextJTAddress, 554 JumpTable::AddressesType *EntriesAsAddress, 555 bool *HasEntryInFragment) const { 556 // Target address of __builtin_unreachable. 557 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); 558 559 // Is one of the targets __builtin_unreachable? 560 bool HasUnreachable = false; 561 562 // Does one of the entries match function start address? 563 bool HasStartAsEntry = false; 564 565 // Number of targets other than __builtin_unreachable. 566 uint64_t NumRealEntries = 0; 567 568 // Size of the jump table without trailing __builtin_unreachable entries. 569 size_t TrimmedSize = 0; 570 571 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { 572 if (!EntriesAsAddress) 573 return; 574 EntriesAsAddress->emplace_back(EntryAddress); 575 if (!Unreachable) 576 TrimmedSize = EntriesAsAddress->size(); 577 }; 578 579 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 580 if (!Section) 581 return false; 582 583 // The upper bound is defined by containing object, section limits, and 584 // the next jump table in memory. 585 uint64_t UpperBound = Section->getEndAddress(); 586 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 587 if (JumpTableBD && JumpTableBD->getSize()) { 588 assert(JumpTableBD->getEndAddress() <= UpperBound && 589 "data object cannot cross a section boundary"); 590 UpperBound = JumpTableBD->getEndAddress(); 591 } 592 if (NextJTAddress) 593 UpperBound = std::min(NextJTAddress, UpperBound); 594 595 LLVM_DEBUG({ 596 using JTT = JumpTable::JumpTableType; 597 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 598 Address, BF.getPrintName(), 599 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 600 }); 601 const uint64_t EntrySize = getJumpTableEntrySize(Type); 602 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 603 EntryAddress += EntrySize) { 604 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 605 << " -> "); 606 // Check if there's a proper relocation against the jump table entry. 607 if (HasRelocations) { 608 if (Type == JumpTable::JTT_PIC && 609 !DataPCRelocations.count(EntryAddress)) { 610 LLVM_DEBUG( 611 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 612 break; 613 } 614 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 615 LLVM_DEBUG( 616 dbgs() 617 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 618 break; 619 } 620 } 621 622 const uint64_t Value = 623 (Type == JumpTable::JTT_PIC) 624 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 625 : *getPointerAtAddress(EntryAddress); 626 627 // __builtin_unreachable() case. 628 if (Value == UnreachableAddress) { 629 addEntryAddress(Value, /*Unreachable*/ true); 630 HasUnreachable = true; 631 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 632 continue; 633 } 634 635 // Function start is another special case. It is allowed in the jump table, 636 // but we need at least one another regular entry to distinguish the table 637 // from, e.g. a function pointer array. 638 if (Value == BF.getAddress()) { 639 HasStartAsEntry = true; 640 addEntryAddress(Value); 641 continue; 642 } 643 644 // Function or one of its fragments. 645 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 646 const bool DoesBelongToFunction = 647 BF.containsAddress(Value) || 648 (TargetBF && areRelatedFragments(TargetBF, &BF)); 649 if (!DoesBelongToFunction) { 650 LLVM_DEBUG({ 651 if (!BF.containsAddress(Value)) { 652 dbgs() << "FAIL: function doesn't contain this address\n"; 653 if (TargetBF) { 654 dbgs() << " ! function containing this address: " 655 << TargetBF->getPrintName() << '\n'; 656 if (TargetBF->isFragment()) { 657 dbgs() << " ! is a fragment"; 658 for (BinaryFunction *Parent : TargetBF->ParentFragments) 659 dbgs() << ", parent: " << Parent->getPrintName(); 660 dbgs() << '\n'; 661 } 662 } 663 } 664 }); 665 break; 666 } 667 668 // Check there's an instruction at this offset. 669 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 670 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 671 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 672 break; 673 } 674 675 ++NumRealEntries; 676 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 677 678 if (TargetBF != &BF && HasEntryInFragment) 679 *HasEntryInFragment = true; 680 addEntryAddress(Value); 681 } 682 683 // Trim direct/normal jump table to exclude trailing unreachable entries that 684 // can collide with a function address. 685 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && 686 TrimmedSize != EntriesAsAddress->size() && 687 getBinaryFunctionAtAddress(UnreachableAddress)) 688 EntriesAsAddress->resize(TrimmedSize); 689 690 // It's a jump table if the number of real entries is more than 1, or there's 691 // one real entry and one or more special targets. If there are only multiple 692 // special targets, then it's not a jump table. 693 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 694 } 695 696 void BinaryContext::populateJumpTables() { 697 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 698 << '\n'); 699 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 700 ++JTI) { 701 JumpTable *JT = JTI->second; 702 703 bool NonSimpleParent = false; 704 for (BinaryFunction *BF : JT->Parents) 705 NonSimpleParent |= !BF->isSimple(); 706 if (NonSimpleParent) 707 continue; 708 709 uint64_t NextJTAddress = 0; 710 auto NextJTI = std::next(JTI); 711 if (NextJTI != JTE) 712 NextJTAddress = NextJTI->second->getAddress(); 713 714 const bool Success = 715 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 716 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 717 if (!Success) { 718 LLVM_DEBUG({ 719 dbgs() << "failed to analyze "; 720 JT->print(dbgs()); 721 if (NextJTI != JTE) { 722 dbgs() << "next "; 723 NextJTI->second->print(dbgs()); 724 } 725 }); 726 llvm_unreachable("jump table heuristic failure"); 727 } 728 for (BinaryFunction *Frag : JT->Parents) { 729 if (JT->IsSplit) 730 Frag->setHasIndirectTargetToSplitFragment(true); 731 for (uint64_t EntryAddress : JT->EntriesAsAddress) 732 // if target is builtin_unreachable 733 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 734 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 735 Frag->getSize()); 736 } else if (EntryAddress >= Frag->getAddress() && 737 EntryAddress < Frag->getAddress() + Frag->getSize()) { 738 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 739 } 740 } 741 742 // In strict mode, erase PC-relative relocation record. Later we check that 743 // all such records are erased and thus have been accounted for. 744 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 745 for (uint64_t Address = JT->getAddress(); 746 Address < JT->getAddress() + JT->getSize(); 747 Address += JT->EntrySize) { 748 DataPCRelocations.erase(DataPCRelocations.find(Address)); 749 } 750 } 751 752 // Mark to skip the function and all its fragments. 753 for (BinaryFunction *Frag : JT->Parents) 754 if (Frag->hasIndirectTargetToSplitFragment()) 755 addFragmentsToSkip(Frag); 756 } 757 758 if (opts::StrictMode && DataPCRelocations.size()) { 759 LLVM_DEBUG({ 760 dbgs() << DataPCRelocations.size() 761 << " unclaimed PC-relative relocations left in data:\n"; 762 for (uint64_t Reloc : DataPCRelocations) 763 dbgs() << Twine::utohexstr(Reloc) << '\n'; 764 }); 765 assert(0 && "unclaimed PC-relative relocations left in data\n"); 766 } 767 clearList(DataPCRelocations); 768 } 769 770 void BinaryContext::skipMarkedFragments() { 771 std::vector<BinaryFunction *> FragmentQueue; 772 // Copy the functions to FragmentQueue. 773 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 774 auto addToWorklist = [&](BinaryFunction *Function) -> void { 775 if (FragmentsToSkip.count(Function)) 776 return; 777 FragmentQueue.push_back(Function); 778 addFragmentsToSkip(Function); 779 }; 780 // Functions containing split jump tables need to be skipped with all 781 // fragments (transitively). 782 for (size_t I = 0; I != FragmentQueue.size(); I++) { 783 BinaryFunction *BF = FragmentQueue[I]; 784 assert(FragmentsToSkip.count(BF) && 785 "internal error in traversing function fragments"); 786 if (opts::Verbosity >= 1) 787 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 788 BF->setSimple(false); 789 BF->setHasIndirectTargetToSplitFragment(true); 790 791 llvm::for_each(BF->Fragments, addToWorklist); 792 llvm::for_each(BF->ParentFragments, addToWorklist); 793 } 794 if (!FragmentsToSkip.empty()) 795 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() 796 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") 797 << " due to cold fragments\n"; 798 } 799 800 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 801 uint64_t Size, 802 uint16_t Alignment, 803 unsigned Flags) { 804 auto Itr = BinaryDataMap.find(Address); 805 if (Itr != BinaryDataMap.end()) { 806 assert(Itr->second->getSize() == Size || !Size); 807 return Itr->second->getSymbol(); 808 } 809 810 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 811 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 812 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 813 } 814 815 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 816 return Ctx->getOrCreateSymbol(Name); 817 } 818 819 BinaryFunction *BinaryContext::createBinaryFunction( 820 const std::string &Name, BinarySection &Section, uint64_t Address, 821 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 822 auto Result = BinaryFunctions.emplace( 823 Address, BinaryFunction(Name, Section, Address, Size, *this)); 824 assert(Result.second == true && "unexpected duplicate function"); 825 BinaryFunction *BF = &Result.first->second; 826 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 827 Alignment); 828 setSymbolToFunctionMap(BF->getSymbol(), BF); 829 return BF; 830 } 831 832 const MCSymbol * 833 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 834 JumpTable::JumpTableType Type) { 835 // Two fragments of same function access same jump table 836 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 837 assert(JT->Type == Type && "jump table types have to match"); 838 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 839 840 // Prevent associating a jump table to a specific fragment twice. 841 if (!llvm::is_contained(JT->Parents, &Function)) { 842 assert(llvm::all_of(JT->Parents, 843 [&](const BinaryFunction *BF) { 844 return areRelatedFragments(&Function, BF); 845 }) && 846 "cannot re-use jump table of a different function"); 847 // Duplicate the entry for the parent function for easy access 848 JT->Parents.push_back(&Function); 849 if (opts::Verbosity > 2) { 850 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " 851 << JT->Parents[0]->getPrintName() << "; " 852 << Function.getPrintName() << "\n"; 853 JT->print(this->outs()); 854 } 855 Function.JumpTables.emplace(Address, JT); 856 for (BinaryFunction *Parent : JT->Parents) 857 Parent->setHasIndirectTargetToSplitFragment(true); 858 } 859 860 bool IsJumpTableParent = false; 861 (void)IsJumpTableParent; 862 for (BinaryFunction *Frag : JT->Parents) 863 if (Frag == &Function) 864 IsJumpTableParent = true; 865 assert(IsJumpTableParent && 866 "cannot re-use jump table of a different function"); 867 return JT->getFirstLabel(); 868 } 869 870 // Re-use the existing symbol if possible. 871 MCSymbol *JTLabel = nullptr; 872 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 873 if (!isInternalSymbolName(Object->getSymbol()->getName())) 874 JTLabel = Object->getSymbol(); 875 } 876 877 const uint64_t EntrySize = getJumpTableEntrySize(Type); 878 if (!JTLabel) { 879 const std::string JumpTableName = generateJumpTableName(Function, Address); 880 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 881 } 882 883 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 884 << " in function " << Function << '\n'); 885 886 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 887 JumpTable::LabelMapType{{0, JTLabel}}, 888 *getSectionForAddress(Address)); 889 JT->Parents.push_back(&Function); 890 if (opts::Verbosity > 2) 891 JT->print(this->outs()); 892 JumpTables.emplace(Address, JT); 893 894 // Duplicate the entry for the parent function for easy access. 895 Function.JumpTables.emplace(Address, JT); 896 return JTLabel; 897 } 898 899 std::pair<uint64_t, const MCSymbol *> 900 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 901 const MCSymbol *OldLabel) { 902 auto L = scopeLock(); 903 unsigned Offset = 0; 904 bool Found = false; 905 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 906 if (Elmt.second != OldLabel) 907 continue; 908 Offset = Elmt.first; 909 Found = true; 910 break; 911 } 912 assert(Found && "Label not found"); 913 (void)Found; 914 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 915 JumpTable *NewJT = 916 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 917 JumpTable::LabelMapType{{Offset, NewLabel}}, 918 *getSectionForAddress(JT->getAddress())); 919 NewJT->Parents = JT->Parents; 920 NewJT->Entries = JT->Entries; 921 NewJT->Counts = JT->Counts; 922 uint64_t JumpTableID = ++DuplicatedJumpTables; 923 // Invert it to differentiate from regular jump tables whose IDs are their 924 // addresses in the input binary memory space 925 JumpTableID = ~JumpTableID; 926 JumpTables.emplace(JumpTableID, NewJT); 927 Function.JumpTables.emplace(JumpTableID, NewJT); 928 return std::make_pair(JumpTableID, NewLabel); 929 } 930 931 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 932 uint64_t Address) { 933 size_t Id; 934 uint64_t Offset = 0; 935 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 936 Offset = Address - JT->getAddress(); 937 auto JTLabelsIt = JT->Labels.find(Offset); 938 if (JTLabelsIt != JT->Labels.end()) 939 return std::string(JTLabelsIt->second->getName()); 940 941 auto JTIdsIt = JumpTableIds.find(JT->getAddress()); 942 assert(JTIdsIt != JumpTableIds.end()); 943 Id = JTIdsIt->second; 944 } else { 945 Id = JumpTableIds[Address] = BF.JumpTables.size(); 946 } 947 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 948 (Offset ? ("." + std::to_string(Offset)) : "")); 949 } 950 951 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 952 // FIXME: aarch64 support is missing. 953 if (!isX86()) 954 return true; 955 956 if (BF.getSize() == BF.getMaxSize()) 957 return true; 958 959 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 960 assert(FunctionData && "cannot get function as data"); 961 962 uint64_t Offset = BF.getSize(); 963 MCInst Instr; 964 uint64_t InstrSize = 0; 965 uint64_t InstrAddress = BF.getAddress() + Offset; 966 using std::placeholders::_1; 967 968 // Skip instructions that satisfy the predicate condition. 969 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 970 const uint64_t StartOffset = Offset; 971 for (; Offset < BF.getMaxSize(); 972 Offset += InstrSize, InstrAddress += InstrSize) { 973 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 974 InstrAddress, nulls())) 975 break; 976 if (!Predicate(Instr)) 977 break; 978 } 979 980 return Offset - StartOffset; 981 }; 982 983 // Skip a sequence of zero bytes. 984 auto skipZeros = [&]() { 985 const uint64_t StartOffset = Offset; 986 for (; Offset < BF.getMaxSize(); ++Offset) 987 if ((*FunctionData)[Offset] != 0) 988 break; 989 990 return Offset - StartOffset; 991 }; 992 993 // Accept the whole padding area filled with breakpoints. 994 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 995 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 996 return true; 997 998 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 999 1000 // Some functions have a jump to the next function or to the padding area 1001 // inserted after the body. 1002 auto isSkipJump = [&](const MCInst &Instr) { 1003 uint64_t TargetAddress = 0; 1004 if (MIB->isUnconditionalBranch(Instr) && 1005 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 1006 if (TargetAddress >= InstrAddress + InstrSize && 1007 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 1008 return true; 1009 } 1010 } 1011 return false; 1012 }; 1013 1014 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 1015 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 1016 skipZeros()) 1017 ; 1018 1019 if (Offset == BF.getMaxSize()) 1020 return true; 1021 1022 if (opts::Verbosity >= 1) { 1023 this->errs() << "BOLT-WARNING: bad padding at address 0x" 1024 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 1025 << " starting at offset " << (Offset - BF.getSize()) 1026 << " in function " << BF << '\n' 1027 << FunctionData->slice(BF.getSize(), 1028 BF.getMaxSize() - BF.getSize()) 1029 << '\n'; 1030 } 1031 1032 return false; 1033 } 1034 1035 void BinaryContext::adjustCodePadding() { 1036 for (auto &BFI : BinaryFunctions) { 1037 BinaryFunction &BF = BFI.second; 1038 if (!shouldEmit(BF)) 1039 continue; 1040 1041 if (!hasValidCodePadding(BF)) { 1042 if (HasRelocations) { 1043 if (opts::Verbosity >= 1) { 1044 this->outs() << "BOLT-INFO: function " << BF 1045 << " has invalid padding. Ignoring the function.\n"; 1046 } 1047 BF.setIgnored(); 1048 } else { 1049 BF.setMaxSize(BF.getSize()); 1050 } 1051 } 1052 } 1053 } 1054 1055 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1056 uint64_t Size, 1057 uint16_t Alignment, 1058 unsigned Flags) { 1059 // Register the name with MCContext. 1060 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1061 1062 auto GAI = BinaryDataMap.find(Address); 1063 BinaryData *BD; 1064 if (GAI == BinaryDataMap.end()) { 1065 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1066 BinarySection &Section = 1067 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1068 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1069 Section, Flags); 1070 GAI = BinaryDataMap.emplace(Address, BD).first; 1071 GlobalSymbols[Name] = BD; 1072 updateObjectNesting(GAI); 1073 } else { 1074 BD = GAI->second; 1075 if (!BD->hasName(Name)) { 1076 GlobalSymbols[Name] = BD; 1077 BD->Symbols.push_back(Symbol); 1078 } 1079 } 1080 1081 return Symbol; 1082 } 1083 1084 const BinaryData * 1085 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1086 auto NI = BinaryDataMap.lower_bound(Address); 1087 auto End = BinaryDataMap.end(); 1088 if ((NI != End && Address == NI->first) || 1089 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1090 if (NI->second->containsAddress(Address)) 1091 return NI->second; 1092 1093 // If this is a sub-symbol, see if a parent data contains the address. 1094 const BinaryData *BD = NI->second->getParent(); 1095 while (BD) { 1096 if (BD->containsAddress(Address)) 1097 return BD; 1098 BD = BD->getParent(); 1099 } 1100 } 1101 return nullptr; 1102 } 1103 1104 BinaryData *BinaryContext::getGOTSymbol() { 1105 // First tries to find a global symbol with that name 1106 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1107 if (GOTSymBD) 1108 return GOTSymBD; 1109 1110 // This symbol might be hidden from run-time link, so fetch the local 1111 // definition if available. 1112 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1113 if (!GOTSymBD) 1114 return nullptr; 1115 1116 // If the local symbol is not unique, fail 1117 unsigned Index = 2; 1118 SmallString<30> Storage; 1119 while (const BinaryData *BD = 1120 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1121 .concat(Twine(Index++)) 1122 .toStringRef(Storage))) 1123 if (BD->getAddress() != GOTSymBD->getAddress()) 1124 return nullptr; 1125 1126 return GOTSymBD; 1127 } 1128 1129 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1130 auto NI = BinaryDataMap.find(Address); 1131 assert(NI != BinaryDataMap.end()); 1132 if (NI == BinaryDataMap.end()) 1133 return false; 1134 // TODO: it's possible that a jump table starts at the same address 1135 // as a larger blob of private data. When we set the size of the 1136 // jump table, it might be smaller than the total blob size. In this 1137 // case we just leave the original size since (currently) it won't really 1138 // affect anything. 1139 assert((!NI->second->Size || NI->second->Size == Size || 1140 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1141 "can't change the size of a symbol that has already had its " 1142 "size set"); 1143 if (!NI->second->Size) { 1144 NI->second->Size = Size; 1145 updateObjectNesting(NI); 1146 return true; 1147 } 1148 return false; 1149 } 1150 1151 void BinaryContext::generateSymbolHashes() { 1152 auto isPadding = [](const BinaryData &BD) { 1153 StringRef Contents = BD.getSection().getContents(); 1154 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1155 return (BD.getName().starts_with("HOLEat") || 1156 SymData.find_first_not_of(0) == StringRef::npos); 1157 }; 1158 1159 uint64_t NumCollisions = 0; 1160 for (auto &Entry : BinaryDataMap) { 1161 BinaryData &BD = *Entry.second; 1162 StringRef Name = BD.getName(); 1163 1164 if (!isInternalSymbolName(Name)) 1165 continue; 1166 1167 // First check if a non-anonymous alias exists and move it to the front. 1168 if (BD.getSymbols().size() > 1) { 1169 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1170 return !isInternalSymbolName(Symbol->getName()); 1171 }); 1172 if (Itr != BD.getSymbols().end()) { 1173 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1174 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1175 continue; 1176 } 1177 } 1178 1179 // We have to skip 0 size symbols since they will all collide. 1180 if (BD.getSize() == 0) { 1181 continue; 1182 } 1183 1184 const uint64_t Hash = BD.getSection().hash(BD); 1185 const size_t Idx = Name.find("0x"); 1186 std::string NewName = 1187 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1188 if (getBinaryDataByName(NewName)) { 1189 // Ignore collisions for symbols that appear to be padding 1190 // (i.e. all zeros or a "hole") 1191 if (!isPadding(BD)) { 1192 if (opts::Verbosity) { 1193 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD 1194 << " with new name (" << NewName << "), skipping.\n"; 1195 } 1196 ++NumCollisions; 1197 } 1198 continue; 1199 } 1200 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1201 GlobalSymbols[NewName] = &BD; 1202 } 1203 if (NumCollisions) { 1204 this->errs() << "BOLT-WARNING: " << NumCollisions 1205 << " collisions detected while hashing binary objects"; 1206 if (!opts::Verbosity) 1207 this->errs() << ". Use -v=1 to see the list."; 1208 this->errs() << '\n'; 1209 } 1210 } 1211 1212 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1213 BinaryFunction &Function) { 1214 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1215 if (TargetFunction.isChildOf(Function)) 1216 return true; 1217 TargetFunction.addParentFragment(Function); 1218 Function.addFragment(TargetFunction); 1219 FragmentClasses.unionSets(&TargetFunction, &Function); 1220 if (!HasRelocations) { 1221 TargetFunction.setSimple(false); 1222 Function.setSimple(false); 1223 } 1224 if (opts::Verbosity >= 1) { 1225 this->outs() << "BOLT-INFO: marking " << TargetFunction 1226 << " as a fragment of " << Function << '\n'; 1227 } 1228 return true; 1229 } 1230 1231 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1232 MCInst &LoadLowBits, 1233 MCInst &LoadHiBits, 1234 uint64_t Target) { 1235 const MCSymbol *TargetSymbol; 1236 uint64_t Addend = 0; 1237 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1238 /*IsPCRel*/ true); 1239 int64_t Val; 1240 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1241 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1242 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1243 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1244 } 1245 1246 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1247 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1248 if (TargetFunction) 1249 return false; 1250 1251 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1252 assert(Section && "cannot get section for referenced address"); 1253 if (!Section->isText()) 1254 return false; 1255 1256 bool Ret = false; 1257 StringRef SectionContents = Section->getContents(); 1258 uint64_t Offset = Address - Section->getAddress(); 1259 const uint64_t MaxSize = SectionContents.size() - Offset; 1260 const uint8_t *Bytes = 1261 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1262 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1263 1264 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1265 MCInst &Instruction, uint64_t Offset, 1266 uint64_t AbsoluteInstrAddr, 1267 uint64_t TotalSize) -> bool { 1268 MCInst *TargetHiBits, *TargetLowBits; 1269 uint64_t TargetAddress, Count; 1270 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1271 AbsoluteInstrAddr, Instruction, TargetHiBits, 1272 TargetLowBits, TargetAddress); 1273 if (!Count) 1274 return false; 1275 1276 if (MatchOnly) 1277 return true; 1278 1279 // NOTE The target symbol was created during disassemble's 1280 // handleExternalReference 1281 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1282 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1283 *Section, Address, TotalSize); 1284 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1285 TargetAddress); 1286 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1287 Veneer->addInstruction(Offset, std::move(Instruction)); 1288 --Count; 1289 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1290 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1291 Veneer->addInstruction(It->first, std::move(It->second)); 1292 } 1293 1294 Veneer->getOrCreateLocalLabel(Address); 1295 Veneer->setMaxSize(TotalSize); 1296 Veneer->updateState(BinaryFunction::State::Disassembled); 1297 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" 1298 << Twine::utohexstr(Address) << "\n"); 1299 return true; 1300 }; 1301 1302 uint64_t Size = 0, TotalSize = 0; 1303 BinaryFunction::InstrMapType VeneerInstructions; 1304 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1305 MCInst Instruction; 1306 const uint64_t AbsoluteInstrAddr = Address + Offset; 1307 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1308 AbsoluteInstrAddr, nulls())) 1309 break; 1310 1311 TotalSize += Size; 1312 if (MIB->isBranch(Instruction)) { 1313 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1314 AbsoluteInstrAddr, TotalSize); 1315 break; 1316 } 1317 1318 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1319 } 1320 1321 return Ret; 1322 } 1323 1324 void BinaryContext::processInterproceduralReferences() { 1325 for (const std::pair<BinaryFunction *, uint64_t> &It : 1326 InterproceduralReferences) { 1327 BinaryFunction &Function = *It.first; 1328 uint64_t Address = It.second; 1329 // Process interprocedural references from ignored functions in BAT mode 1330 // (non-simple in non-relocation mode) to properly register entry points 1331 if (!Address || (Function.isIgnored() && !HasBATSection)) 1332 continue; 1333 1334 BinaryFunction *TargetFunction = 1335 getBinaryFunctionContainingAddress(Address); 1336 if (&Function == TargetFunction) 1337 continue; 1338 1339 if (TargetFunction) { 1340 if (TargetFunction->isFragment() && 1341 !areRelatedFragments(TargetFunction, &Function)) { 1342 this->errs() 1343 << "BOLT-WARNING: interprocedural reference between unrelated " 1344 "fragments: " 1345 << Function.getPrintName() << " and " 1346 << TargetFunction->getPrintName() << '\n'; 1347 } 1348 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1349 TargetFunction->addEntryPointAtOffset(Offset); 1350 1351 continue; 1352 } 1353 1354 // Check if address falls in function padding space - this could be 1355 // unmarked data in code. In this case adjust the padding space size. 1356 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1357 assert(Section && "cannot get section for referenced address"); 1358 1359 if (!Section->isText()) 1360 continue; 1361 1362 // PLT requires special handling and could be ignored in this context. 1363 StringRef SectionName = Section->getName(); 1364 if (SectionName == ".plt" || SectionName == ".plt.got") 1365 continue; 1366 1367 // Check if it is aarch64 veneer written at Address 1368 if (isAArch64() && handleAArch64Veneer(Address)) 1369 continue; 1370 1371 if (opts::processAllFunctions()) { 1372 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1373 << "object in code at address 0x" 1374 << Twine::utohexstr(Address) << " belonging to section " 1375 << SectionName << " in current mode\n"; 1376 exit(1); 1377 } 1378 1379 TargetFunction = getBinaryFunctionContainingAddress(Address, 1380 /*CheckPastEnd=*/false, 1381 /*UseMaxSize=*/true); 1382 // We are not going to overwrite non-simple functions, but for simple 1383 // ones - adjust the padding size. 1384 if (TargetFunction && TargetFunction->isSimple()) { 1385 this->errs() 1386 << "BOLT-WARNING: function " << *TargetFunction 1387 << " has an object detected in a padding region at address 0x" 1388 << Twine::utohexstr(Address) << '\n'; 1389 TargetFunction->setMaxSize(TargetFunction->getSize()); 1390 } 1391 } 1392 1393 InterproceduralReferences.clear(); 1394 } 1395 1396 void BinaryContext::postProcessSymbolTable() { 1397 fixBinaryDataHoles(); 1398 bool Valid = true; 1399 for (auto &Entry : BinaryDataMap) { 1400 BinaryData *BD = Entry.second; 1401 if ((BD->getName().starts_with("SYMBOLat") || 1402 BD->getName().starts_with("DATAat")) && 1403 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1404 BD->getSection()) { 1405 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD 1406 << "\n"; 1407 Valid = false; 1408 } 1409 } 1410 assert(Valid); 1411 (void)Valid; 1412 generateSymbolHashes(); 1413 } 1414 1415 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1416 BinaryFunction &ParentBF) { 1417 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1418 "cannot merge functions with multiple entry points"); 1419 1420 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1421 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1422 SymbolToFunctionMapMutex, std::defer_lock); 1423 1424 const StringRef ChildName = ChildBF.getOneName(); 1425 1426 // Move symbols over and update bookkeeping info. 1427 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1428 ParentBF.getSymbols().push_back(Symbol); 1429 WriteSymbolMapLock.lock(); 1430 SymbolToFunctionMap[Symbol] = &ParentBF; 1431 WriteSymbolMapLock.unlock(); 1432 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1433 } 1434 ChildBF.getSymbols().clear(); 1435 1436 // Move other names the child function is known under. 1437 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1438 ChildBF.Aliases.clear(); 1439 1440 if (HasRelocations) { 1441 // Merge execution counts of ChildBF into those of ParentBF. 1442 // Without relocations, we cannot reliably merge profiles as both functions 1443 // continue to exist and either one can be executed. 1444 ChildBF.mergeProfileDataInto(ParentBF); 1445 1446 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1447 std::defer_lock); 1448 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1449 std::defer_lock); 1450 // Remove ChildBF from the global set of functions in relocs mode. 1451 ReadBfsLock.lock(); 1452 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1453 ReadBfsLock.unlock(); 1454 1455 assert(FI != BinaryFunctions.end() && "function not found"); 1456 assert(&ChildBF == &FI->second && "function mismatch"); 1457 1458 WriteBfsLock.lock(); 1459 ChildBF.clearDisasmState(); 1460 FI = BinaryFunctions.erase(FI); 1461 WriteBfsLock.unlock(); 1462 1463 } else { 1464 // In non-relocation mode we keep the function, but rename it. 1465 std::string NewName = "__ICF_" + ChildName.str(); 1466 1467 WriteCtxLock.lock(); 1468 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1469 WriteCtxLock.unlock(); 1470 1471 ChildBF.setFolded(&ParentBF); 1472 } 1473 1474 ParentBF.setHasFunctionsFoldedInto(); 1475 } 1476 1477 void BinaryContext::fixBinaryDataHoles() { 1478 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1479 1480 for (BinarySection &Section : allocatableSections()) { 1481 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1482 1483 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1484 BinaryData *BD = Itr->second; 1485 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1486 (BD->getName().starts_with("SYMBOLat0x") || 1487 BD->getName().starts_with("DATAat0x") || 1488 BD->getName().starts_with("ANONYMOUS"))); 1489 return !isHole && BD->getSection() == Section && !BD->getParent(); 1490 }; 1491 1492 auto BDStart = BinaryDataMap.begin(); 1493 auto BDEnd = BinaryDataMap.end(); 1494 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1495 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1496 1497 uint64_t EndAddress = Section.getAddress(); 1498 1499 while (Itr != End) { 1500 if (Itr->second->getAddress() > EndAddress) { 1501 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1502 Holes.emplace_back(EndAddress, Gap); 1503 } 1504 EndAddress = Itr->second->getEndAddress(); 1505 ++Itr; 1506 } 1507 1508 if (EndAddress < Section.getEndAddress()) 1509 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1510 1511 // If there is already a symbol at the start of the hole, grow that symbol 1512 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1513 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1514 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1515 if (BD) { 1516 // BD->getSection() can be != Section if there are sections that 1517 // overlap. In this case it is probably safe to just skip the holes 1518 // since the overlapping section will not(?) have any symbols in it. 1519 if (BD->getSection() == Section) 1520 setBinaryDataSize(Hole.first, Hole.second); 1521 } else { 1522 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1523 } 1524 } 1525 } 1526 1527 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1528 assert(validateHoles() && "top level hole detected in object map"); 1529 } 1530 1531 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1532 const BinarySection *CurrentSection = nullptr; 1533 bool FirstSection = true; 1534 1535 for (auto &Entry : BinaryDataMap) { 1536 const BinaryData *BD = Entry.second; 1537 const BinarySection &Section = BD->getSection(); 1538 if (FirstSection || Section != *CurrentSection) { 1539 uint64_t Address, Size; 1540 StringRef Name = Section.getName(); 1541 if (Section) { 1542 Address = Section.getAddress(); 1543 Size = Section.getSize(); 1544 } else { 1545 Address = BD->getAddress(); 1546 Size = BD->getSize(); 1547 } 1548 OS << "BOLT-INFO: Section " << Name << ", " 1549 << "0x" + Twine::utohexstr(Address) << ":" 1550 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1551 CurrentSection = &Section; 1552 FirstSection = false; 1553 } 1554 1555 OS << "BOLT-INFO: "; 1556 const BinaryData *P = BD->getParent(); 1557 while (P) { 1558 OS << " "; 1559 P = P->getParent(); 1560 } 1561 OS << *BD << "\n"; 1562 } 1563 } 1564 1565 Expected<unsigned> BinaryContext::getDwarfFile( 1566 StringRef Directory, StringRef FileName, unsigned FileNumber, 1567 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1568 unsigned CUID, unsigned DWARFVersion) { 1569 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1570 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1571 FileNumber); 1572 } 1573 1574 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1575 const uint32_t SrcCUID, 1576 unsigned FileIndex) { 1577 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1578 const DWARFDebugLine::LineTable *LineTable = 1579 DwCtx->getLineTableForUnit(SrcUnit); 1580 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1581 LineTable->Prologue.FileNames; 1582 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1583 // means empty dir. 1584 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1585 "FileIndex out of range for the compilation unit."); 1586 StringRef Dir = ""; 1587 if (FileNames[FileIndex - 1].DirIdx != 0) { 1588 if (std::optional<const char *> DirName = dwarf::toString( 1589 LineTable->Prologue 1590 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1591 Dir = *DirName; 1592 } 1593 } 1594 StringRef FileName = ""; 1595 if (std::optional<const char *> FName = 1596 dwarf::toString(FileNames[FileIndex - 1].Name)) 1597 FileName = *FName; 1598 assert(FileName != ""); 1599 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1600 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1601 DestCUID, DstUnit->getVersion())); 1602 } 1603 1604 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1605 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1606 llvm::transform(llvm::make_second_range(BinaryFunctions), 1607 SortedFunctions.begin(), 1608 [](BinaryFunction &BF) { return &BF; }); 1609 1610 llvm::stable_sort(SortedFunctions, 1611 [](const BinaryFunction *A, const BinaryFunction *B) { 1612 if (A->hasValidIndex() && B->hasValidIndex()) { 1613 return A->getIndex() < B->getIndex(); 1614 } 1615 return A->hasValidIndex(); 1616 }); 1617 return SortedFunctions; 1618 } 1619 1620 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1621 std::vector<BinaryFunction *> AllFunctions; 1622 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1623 llvm::transform(llvm::make_second_range(BinaryFunctions), 1624 std::back_inserter(AllFunctions), 1625 [](BinaryFunction &BF) { return &BF; }); 1626 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1627 1628 return AllFunctions; 1629 } 1630 1631 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1632 auto Iter = DWOCUs.find(DWOId); 1633 if (Iter == DWOCUs.end()) 1634 return std::nullopt; 1635 1636 return Iter->second; 1637 } 1638 1639 DWARFContext *BinaryContext::getDWOContext() const { 1640 if (DWOCUs.empty()) 1641 return nullptr; 1642 return &DWOCUs.begin()->second->getContext(); 1643 } 1644 1645 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1646 void BinaryContext::preprocessDWODebugInfo() { 1647 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1648 DWARFUnit *const DwarfUnit = CU.get(); 1649 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1650 std::string DWOName = dwarf::toString( 1651 DwarfUnit->getUnitDIE().find( 1652 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1653 ""); 1654 SmallString<16> AbsolutePath; 1655 if (!opts::CompDirOverride.empty()) { 1656 sys::path::append(AbsolutePath, opts::CompDirOverride); 1657 sys::path::append(AbsolutePath, DWOName); 1658 } 1659 DWARFUnit *DWOCU = 1660 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 1661 if (!DWOCU->isDWOUnit()) { 1662 this->outs() 1663 << "BOLT-WARNING: Debug Fission: DWO debug information for " 1664 << DWOName 1665 << " was not retrieved and won't be updated. Please check " 1666 "relative path.\n"; 1667 continue; 1668 } 1669 DWOCUs[*DWOId] = DWOCU; 1670 } 1671 } 1672 if (!DWOCUs.empty()) 1673 this->outs() << "BOLT-INFO: processing split DWARF\n"; 1674 } 1675 1676 void BinaryContext::preprocessDebugInfo() { 1677 struct CURange { 1678 uint64_t LowPC; 1679 uint64_t HighPC; 1680 DWARFUnit *Unit; 1681 1682 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1683 }; 1684 1685 // Building a map of address ranges to CUs similar to .debug_aranges and use 1686 // it to assign CU to functions. 1687 std::vector<CURange> AllRanges; 1688 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1689 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1690 Expected<DWARFAddressRangesVector> RangesOrError = 1691 CU->getUnitDIE().getAddressRanges(); 1692 if (!RangesOrError) { 1693 consumeError(RangesOrError.takeError()); 1694 continue; 1695 } 1696 for (DWARFAddressRange &Range : *RangesOrError) { 1697 // Parts of the debug info could be invalidated due to corresponding code 1698 // being removed from the binary by the linker. Hence we check if the 1699 // address is a valid one. 1700 if (containsAddress(Range.LowPC)) 1701 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1702 } 1703 1704 ContainsDwarf5 |= CU->getVersion() >= 5; 1705 ContainsDwarfLegacy |= CU->getVersion() < 5; 1706 } 1707 1708 llvm::sort(AllRanges); 1709 for (auto &KV : BinaryFunctions) { 1710 const uint64_t FunctionAddress = KV.first; 1711 BinaryFunction &Function = KV.second; 1712 1713 auto It = llvm::partition_point( 1714 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1715 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1716 Function.setDWARFUnit(It->Unit); 1717 } 1718 1719 // Discover units with debug info that needs to be updated. 1720 for (const auto &KV : BinaryFunctions) { 1721 const BinaryFunction &BF = KV.second; 1722 if (shouldEmit(BF) && BF.getDWARFUnit()) 1723 ProcessedCUs.insert(BF.getDWARFUnit()); 1724 } 1725 1726 // Clear debug info for functions from units that we are not going to process. 1727 for (auto &KV : BinaryFunctions) { 1728 BinaryFunction &BF = KV.second; 1729 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1730 BF.setDWARFUnit(nullptr); 1731 } 1732 1733 if (opts::Verbosity >= 1) { 1734 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1735 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1736 } 1737 1738 preprocessDWODebugInfo(); 1739 1740 // Populate MCContext with DWARF files from all units. 1741 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1742 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1743 const uint64_t CUID = CU->getOffset(); 1744 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1745 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1746 GlobalPrefix + "line_table_start" + Twine(CUID))); 1747 1748 if (!ProcessedCUs.count(CU.get())) 1749 continue; 1750 1751 const DWARFDebugLine::LineTable *LineTable = 1752 DwCtx->getLineTableForUnit(CU.get()); 1753 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1754 LineTable->Prologue.FileNames; 1755 1756 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1757 if (DwarfVersion >= 5) { 1758 std::optional<MD5::MD5Result> Checksum; 1759 if (LineTable->Prologue.ContentTypes.HasMD5) 1760 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1761 std::optional<const char *> Name = 1762 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1763 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1764 auto Iter = DWOCUs.find(*DWOID); 1765 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1766 Name = dwarf::toString( 1767 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1768 } 1769 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1770 std::nullopt); 1771 } 1772 1773 BinaryLineTable.setDwarfVersion(DwarfVersion); 1774 1775 // Assign a unique label to every line table, one per CU. 1776 // Make sure empty debug line tables are registered too. 1777 if (FileNames.empty()) { 1778 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1779 CUID, DwarfVersion)); 1780 continue; 1781 } 1782 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1783 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1784 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1785 // means empty dir. 1786 StringRef Dir = ""; 1787 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1788 if (std::optional<const char *> DirName = dwarf::toString( 1789 LineTable->Prologue 1790 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1791 Dir = *DirName; 1792 StringRef FileName = ""; 1793 if (std::optional<const char *> FName = 1794 dwarf::toString(FileNames[I].Name)) 1795 FileName = *FName; 1796 assert(FileName != ""); 1797 std::optional<MD5::MD5Result> Checksum; 1798 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1799 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1800 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1801 DwarfVersion)); 1802 } 1803 } 1804 } 1805 1806 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1807 if (Function.isPseudo()) 1808 return false; 1809 1810 if (opts::processAllFunctions()) 1811 return true; 1812 1813 if (Function.isIgnored()) 1814 return false; 1815 1816 // In relocation mode we will emit non-simple functions with CFG. 1817 // If the function does not have a CFG it should be marked as ignored. 1818 return HasRelocations || Function.isSimple(); 1819 } 1820 1821 void BinaryContext::dump(const MCInst &Inst) const { 1822 if (LLVM_UNLIKELY(!InstPrinter)) { 1823 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1824 return; 1825 } 1826 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1827 dbgs() << "\n"; 1828 } 1829 1830 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1831 uint32_t Operation = Inst.getOperation(); 1832 switch (Operation) { 1833 case MCCFIInstruction::OpSameValue: 1834 OS << "OpSameValue Reg" << Inst.getRegister(); 1835 break; 1836 case MCCFIInstruction::OpRememberState: 1837 OS << "OpRememberState"; 1838 break; 1839 case MCCFIInstruction::OpRestoreState: 1840 OS << "OpRestoreState"; 1841 break; 1842 case MCCFIInstruction::OpOffset: 1843 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1844 break; 1845 case MCCFIInstruction::OpDefCfaRegister: 1846 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1847 break; 1848 case MCCFIInstruction::OpDefCfaOffset: 1849 OS << "OpDefCfaOffset " << Inst.getOffset(); 1850 break; 1851 case MCCFIInstruction::OpDefCfa: 1852 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1853 break; 1854 case MCCFIInstruction::OpRelOffset: 1855 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1856 break; 1857 case MCCFIInstruction::OpAdjustCfaOffset: 1858 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1859 break; 1860 case MCCFIInstruction::OpEscape: 1861 OS << "OpEscape"; 1862 break; 1863 case MCCFIInstruction::OpRestore: 1864 OS << "OpRestore Reg" << Inst.getRegister(); 1865 break; 1866 case MCCFIInstruction::OpUndefined: 1867 OS << "OpUndefined Reg" << Inst.getRegister(); 1868 break; 1869 case MCCFIInstruction::OpRegister: 1870 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1871 << Inst.getRegister2(); 1872 break; 1873 case MCCFIInstruction::OpWindowSave: 1874 OS << "OpWindowSave"; 1875 break; 1876 case MCCFIInstruction::OpGnuArgsSize: 1877 OS << "OpGnuArgsSize"; 1878 break; 1879 default: 1880 OS << "Op#" << Operation; 1881 break; 1882 } 1883 } 1884 1885 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1886 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1887 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1888 // the end of a data chunk inside code, $d identifies start of data. 1889 if (isX86() || ELFSymbolRef(Symbol).getSize()) 1890 return MarkerSymType::NONE; 1891 1892 Expected<StringRef> NameOrError = Symbol.getName(); 1893 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1894 1895 if (!TypeOrError || !NameOrError) 1896 return MarkerSymType::NONE; 1897 1898 if (*TypeOrError != SymbolRef::ST_Unknown) 1899 return MarkerSymType::NONE; 1900 1901 if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 1902 return MarkerSymType::CODE; 1903 1904 // $x<ISA> 1905 if (isRISCV() && NameOrError->starts_with("$x")) 1906 return MarkerSymType::CODE; 1907 1908 if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 1909 return MarkerSymType::DATA; 1910 1911 return MarkerSymType::NONE; 1912 } 1913 1914 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1915 return getMarkerType(Symbol) != MarkerSymType::NONE; 1916 } 1917 1918 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1919 const BinaryFunction *Function, 1920 DWARFContext *DwCtx) { 1921 DebugLineTableRowRef RowRef = 1922 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1923 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1924 return; 1925 1926 const DWARFDebugLine::LineTable *LineTable; 1927 if (Function && Function->getDWARFUnit() && 1928 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1929 LineTable = Function->getDWARFLineTable(); 1930 } else { 1931 LineTable = DwCtx->getLineTableForUnit( 1932 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1933 } 1934 assert(LineTable && "line table expected for instruction with debug info"); 1935 1936 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1937 StringRef FileName = ""; 1938 if (std::optional<const char *> FName = 1939 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1940 FileName = *FName; 1941 OS << " # debug line " << FileName << ":" << Row.Line; 1942 if (Row.Column) 1943 OS << ":" << Row.Column; 1944 if (Row.Discriminator) 1945 OS << " discriminator:" << Row.Discriminator; 1946 } 1947 1948 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1949 uint64_t Offset, 1950 const BinaryFunction *Function, 1951 bool PrintMCInst, bool PrintMemData, 1952 bool PrintRelocations, 1953 StringRef Endl) const { 1954 OS << format(" %08" PRIx64 ": ", Offset); 1955 if (MIB->isCFI(Instruction)) { 1956 uint32_t Offset = Instruction.getOperand(0).getImm(); 1957 OS << "\t!CFI\t$" << Offset << "\t; "; 1958 if (Function) 1959 printCFI(OS, *Function->getCFIFor(Instruction)); 1960 OS << Endl; 1961 return; 1962 } 1963 if (std::optional<uint32_t> DynamicID = 1964 MIB->getDynamicBranchID(Instruction)) { 1965 OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName() 1966 << " # ID: " << DynamicID; 1967 } else { 1968 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1969 } 1970 if (MIB->isCall(Instruction)) { 1971 if (MIB->isTailCall(Instruction)) 1972 OS << " # TAILCALL "; 1973 if (MIB->isInvoke(Instruction)) { 1974 const std::optional<MCPlus::MCLandingPad> EHInfo = 1975 MIB->getEHInfo(Instruction); 1976 OS << " # handler: "; 1977 if (EHInfo->first) 1978 OS << *EHInfo->first; 1979 else 1980 OS << '0'; 1981 OS << "; action: " << EHInfo->second; 1982 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1983 if (GnuArgsSize >= 0) 1984 OS << "; GNU_args_size = " << GnuArgsSize; 1985 } 1986 } else if (MIB->isIndirectBranch(Instruction)) { 1987 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1988 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1989 } else { 1990 OS << " # UNKNOWN CONTROL FLOW"; 1991 } 1992 } 1993 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1994 OS << " # Offset: " << *Offset; 1995 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 1996 OS << " # Size: " << *Size; 1997 if (MCSymbol *Label = MIB->getInstLabel(Instruction)) 1998 OS << " # Label: " << *Label; 1999 2000 MIB->printAnnotations(Instruction, OS); 2001 2002 if (opts::PrintDebugInfo) 2003 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 2004 2005 if ((opts::PrintRelocations || PrintRelocations) && Function) { 2006 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 2007 Function->printRelocations(OS, Offset, Size); 2008 } 2009 2010 OS << Endl; 2011 2012 if (PrintMCInst) { 2013 Instruction.dump_pretty(OS, InstPrinter.get()); 2014 OS << Endl; 2015 } 2016 } 2017 2018 std::optional<uint64_t> 2019 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 2020 uint64_t FileOffset) const { 2021 // Find a segment with a matching file offset. 2022 for (auto &KV : SegmentMapInfo) { 2023 const SegmentInfo &SegInfo = KV.second; 2024 // Only consider executable segments. 2025 if (!SegInfo.IsExecutable) 2026 continue; 2027 // FileOffset is got from perf event, 2028 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 2029 // If the pagesize is not equal to SegInfo.Alignment. 2030 // FileOffset and SegInfo.FileOffset should be aligned first, 2031 // and then judge whether they are equal. 2032 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 2033 alignDown(FileOffset, SegInfo.Alignment)) { 2034 // The function's offset from base address in VAS is aligned by pagesize 2035 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 2036 // However, The ELF document says that SegInfo.FileOffset should equal 2037 // to SegInfo.Address, modulo the pagesize. 2038 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 2039 2040 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 2041 // alignDown(SegInfo.Address, pagesize) 2042 // = SegInfo.Address - (SegInfo.Address % pagesize) 2043 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 2044 // = SegInfo.Address - SegInfo.FileOffset + 2045 // alignDown(SegInfo.FileOffset, pagesize) 2046 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2047 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 2048 } 2049 } 2050 2051 return std::nullopt; 2052 } 2053 2054 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2055 auto SI = AddressToSection.upper_bound(Address); 2056 if (SI != AddressToSection.begin()) { 2057 --SI; 2058 uint64_t UpperBound = SI->first + SI->second->getSize(); 2059 if (!SI->second->getSize()) 2060 UpperBound += 1; 2061 if (UpperBound > Address) 2062 return *SI->second; 2063 } 2064 return std::make_error_code(std::errc::bad_address); 2065 } 2066 2067 ErrorOr<StringRef> 2068 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 2069 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2070 return Section->getName(); 2071 return std::make_error_code(std::errc::bad_address); 2072 } 2073 2074 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2075 auto Res = Sections.insert(Section); 2076 (void)Res; 2077 assert(Res.second && "can't register the same section twice."); 2078 2079 // Only register allocatable sections in the AddressToSection map. 2080 if (Section->isAllocatable() && Section->getAddress()) 2081 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2082 NameToSection.insert( 2083 std::make_pair(std::string(Section->getName()), Section)); 2084 if (Section->hasSectionRef()) 2085 SectionRefToBinarySection.insert( 2086 std::make_pair(Section->getSectionRef(), Section)); 2087 2088 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2089 return *Section; 2090 } 2091 2092 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2093 return registerSection(new BinarySection(*this, Section)); 2094 } 2095 2096 BinarySection & 2097 BinaryContext::registerSection(const Twine &SectionName, 2098 const BinarySection &OriginalSection) { 2099 return registerSection( 2100 new BinarySection(*this, SectionName, OriginalSection)); 2101 } 2102 2103 BinarySection & 2104 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2105 unsigned ELFFlags, uint8_t *Data, 2106 uint64_t Size, unsigned Alignment) { 2107 auto NamedSections = getSectionByName(Name); 2108 if (NamedSections.begin() != NamedSections.end()) { 2109 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2110 "can only update unique sections"); 2111 BinarySection *Section = NamedSections.begin()->second; 2112 2113 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2114 const bool Flag = Section->isAllocatable(); 2115 (void)Flag; 2116 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2117 LLVM_DEBUG(dbgs() << *Section << "\n"); 2118 // FIXME: Fix section flags/attributes for MachO. 2119 if (isELF()) 2120 assert(Flag == Section->isAllocatable() && 2121 "can't change section allocation status"); 2122 return *Section; 2123 } 2124 2125 return registerSection( 2126 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2127 } 2128 2129 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2130 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2131 while (NameRange.first != NameRange.second) { 2132 if (NameRange.first->second == &Section) { 2133 NameToSection.erase(NameRange.first); 2134 break; 2135 } 2136 ++NameRange.first; 2137 } 2138 } 2139 2140 void BinaryContext::deregisterUnusedSections() { 2141 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2142 for (auto SI = Sections.begin(); SI != Sections.end();) { 2143 BinarySection *Section = *SI; 2144 // We check getOutputData() instead of getOutputSize() because sometimes 2145 // zero-sized .text.cold sections are allocated. 2146 if (Section->hasSectionRef() || Section->getOutputData() || 2147 (AbsSection && Section == &AbsSection.get())) { 2148 ++SI; 2149 continue; 2150 } 2151 2152 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2153 << '\n';); 2154 deregisterSectionName(*Section); 2155 SI = Sections.erase(SI); 2156 delete Section; 2157 } 2158 } 2159 2160 bool BinaryContext::deregisterSection(BinarySection &Section) { 2161 BinarySection *SectionPtr = &Section; 2162 auto Itr = Sections.find(SectionPtr); 2163 if (Itr != Sections.end()) { 2164 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2165 while (Range.first != Range.second) { 2166 if (Range.first->second == SectionPtr) { 2167 AddressToSection.erase(Range.first); 2168 break; 2169 } 2170 ++Range.first; 2171 } 2172 2173 deregisterSectionName(*SectionPtr); 2174 Sections.erase(Itr); 2175 delete SectionPtr; 2176 return true; 2177 } 2178 return false; 2179 } 2180 2181 void BinaryContext::renameSection(BinarySection &Section, 2182 const Twine &NewName) { 2183 auto Itr = Sections.find(&Section); 2184 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2185 Sections.erase(Itr); 2186 2187 deregisterSectionName(Section); 2188 2189 Section.Name = NewName.str(); 2190 Section.setOutputName(Section.Name); 2191 2192 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2193 2194 // Reinsert with the new name. 2195 Sections.insert(&Section); 2196 } 2197 2198 void BinaryContext::printSections(raw_ostream &OS) const { 2199 for (BinarySection *const &Section : Sections) 2200 OS << "BOLT-INFO: " << *Section << "\n"; 2201 } 2202 2203 BinarySection &BinaryContext::absoluteSection() { 2204 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2205 return *Section; 2206 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2207 } 2208 2209 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2210 size_t Size) const { 2211 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2212 if (!Section) 2213 return std::make_error_code(std::errc::bad_address); 2214 2215 if (Section->isVirtual()) 2216 return 0; 2217 2218 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2219 AsmInfo->getCodePointerSize()); 2220 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2221 return DE.getUnsigned(&ValueOffset, Size); 2222 } 2223 2224 ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2225 size_t Size) const { 2226 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2227 if (!Section) 2228 return std::make_error_code(std::errc::bad_address); 2229 2230 if (Section->isVirtual()) 2231 return 0; 2232 2233 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2234 AsmInfo->getCodePointerSize()); 2235 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2236 return DE.getSigned(&ValueOffset, Size); 2237 } 2238 2239 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2240 uint64_t Type, uint64_t Addend, 2241 uint64_t Value) { 2242 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2243 assert(Section && "cannot find section for address"); 2244 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2245 Value); 2246 } 2247 2248 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2249 uint64_t Type, uint64_t Addend, 2250 uint64_t Value) { 2251 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2252 assert(Section && "cannot find section for address"); 2253 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2254 Addend, Value); 2255 } 2256 2257 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2258 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2259 assert(Section && "cannot find section for address"); 2260 return Section->removeRelocationAt(Address - Section->getAddress()); 2261 } 2262 2263 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2264 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2265 if (!Section) 2266 return nullptr; 2267 2268 return Section->getRelocationAt(Address - Section->getAddress()); 2269 } 2270 2271 const Relocation * 2272 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2273 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2274 if (!Section) 2275 return nullptr; 2276 2277 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2278 } 2279 2280 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2281 const uint64_t Address) { 2282 auto setImmovable = [&](BinaryData &BD) { 2283 BinaryData *Root = BD.getAtomicRoot(); 2284 LLVM_DEBUG(if (Root->isMoveable()) { 2285 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2286 << "due to ambiguous relocation referencing 0x" 2287 << Twine::utohexstr(Address) << '\n'; 2288 }); 2289 Root->setIsMoveable(false); 2290 }; 2291 2292 if (Address == BD.getAddress()) { 2293 setImmovable(BD); 2294 2295 // Set previous symbol as immovable 2296 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2297 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2298 setImmovable(*Prev); 2299 } 2300 2301 if (Address == BD.getEndAddress()) { 2302 setImmovable(BD); 2303 2304 // Set next symbol as immovable 2305 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2306 if (Next && Next->getAddress() == BD.getEndAddress()) 2307 setImmovable(*Next); 2308 } 2309 } 2310 2311 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2312 uint64_t *EntryDesc) { 2313 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2314 auto BFI = SymbolToFunctionMap.find(Symbol); 2315 if (BFI == SymbolToFunctionMap.end()) 2316 return nullptr; 2317 2318 BinaryFunction *BF = BFI->second; 2319 if (EntryDesc) 2320 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2321 2322 return BF; 2323 } 2324 2325 std::string 2326 BinaryContext::generateBugReportMessage(StringRef Message, 2327 const BinaryFunction &Function) const { 2328 std::string Msg; 2329 raw_string_ostream SS(Msg); 2330 SS << "=======================================\n"; 2331 SS << "BOLT is unable to proceed because it couldn't properly understand " 2332 "this function.\n"; 2333 SS << "If you are running the most recent version of BOLT, you may " 2334 "want to " 2335 "report this and paste this dump.\nPlease check that there is no " 2336 "sensitive contents being shared in this dump.\n"; 2337 SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2338 ScopedPrinter SP(SS); 2339 SP.printBinaryBlock("Function contents", *Function.getData()); 2340 SS << "\n"; 2341 const_cast<BinaryFunction &>(Function).print(SS, ""); 2342 SS << "ERROR: " << Message; 2343 SS << "\n=======================================\n"; 2344 return Msg; 2345 } 2346 2347 BinaryFunction * 2348 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2349 bool IsSimple) { 2350 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2351 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2352 setSymbolToFunctionMap(BF->getSymbol(), BF); 2353 BF->CurrentState = BinaryFunction::State::CFG; 2354 return BF; 2355 } 2356 2357 std::pair<size_t, size_t> 2358 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2359 // Adjust branch instruction to match the current layout. 2360 if (FixBranches) 2361 BF.fixBranches(); 2362 2363 // Create local MC context to isolate the effect of ephemeral code emission. 2364 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2365 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2366 MCAsmBackend *MAB = 2367 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2368 2369 SmallString<256> Code; 2370 raw_svector_ostream VecOS(Code); 2371 2372 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2373 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2374 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2375 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI)); 2376 2377 Streamer->initSections(false, *STI); 2378 2379 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2380 Section->setHasInstructions(true); 2381 2382 // Create symbols in the LocalCtx so that they get destroyed with it. 2383 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2384 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2385 2386 Streamer->switchSection(Section); 2387 Streamer->emitLabel(StartLabel); 2388 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2389 /*EmitCodeOnly=*/true); 2390 Streamer->emitLabel(EndLabel); 2391 2392 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2393 SmallVector<LabelRange> SplitLabels; 2394 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2395 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2396 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2397 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2398 2399 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2400 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2401 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2402 SplitSection->setHasInstructions(true); 2403 Streamer->switchSection(SplitSection); 2404 2405 Streamer->emitLabel(SplitStartLabel); 2406 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2407 Streamer->emitLabel(SplitEndLabel); 2408 } 2409 2410 MCAssembler &Assembler = 2411 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2412 Assembler.layout(); 2413 2414 // Obtain fragment sizes. 2415 std::vector<uint64_t> FragmentSizes; 2416 // Main fragment size. 2417 const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) - 2418 Assembler.getSymbolOffset(*StartLabel); 2419 FragmentSizes.push_back(HotSize); 2420 // Split fragment sizes. 2421 uint64_t ColdSize = 0; 2422 for (const auto &Labels : SplitLabels) { 2423 uint64_t Size = Assembler.getSymbolOffset(*Labels.second) - 2424 Assembler.getSymbolOffset(*Labels.first); 2425 FragmentSizes.push_back(Size); 2426 ColdSize += Size; 2427 } 2428 2429 // Populate new start and end offsets of each basic block. 2430 uint64_t FragmentIndex = 0; 2431 for (FunctionFragment &FF : BF.getLayout().fragments()) { 2432 BinaryBasicBlock *PrevBB = nullptr; 2433 for (BinaryBasicBlock *BB : FF) { 2434 const uint64_t BBStartOffset = 2435 Assembler.getSymbolOffset(*(BB->getLabel())); 2436 BB->setOutputStartAddress(BBStartOffset); 2437 if (PrevBB) 2438 PrevBB->setOutputEndAddress(BBStartOffset); 2439 PrevBB = BB; 2440 } 2441 if (PrevBB) 2442 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2443 FragmentIndex++; 2444 } 2445 2446 // Clean-up the effect of the code emission. 2447 for (const MCSymbol &Symbol : Assembler.symbols()) { 2448 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2449 MutableSymbol->setUndefined(); 2450 MutableSymbol->setIsRegistered(false); 2451 } 2452 2453 return std::make_pair(HotSize, ColdSize); 2454 } 2455 2456 bool BinaryContext::validateInstructionEncoding( 2457 ArrayRef<uint8_t> InputSequence) const { 2458 MCInst Inst; 2459 uint64_t InstSize; 2460 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2461 assert(InstSize == InputSequence.size() && 2462 "Disassembled instruction size does not match the sequence."); 2463 2464 SmallString<256> Code; 2465 SmallVector<MCFixup, 4> Fixups; 2466 2467 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2468 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2469 if (InputSequence != OutputSequence) { 2470 if (opts::Verbosity > 1) { 2471 this->errs() << "BOLT-WARNING: mismatched encoding detected\n" 2472 << " input: " << InputSequence << '\n' 2473 << " output: " << OutputSequence << '\n'; 2474 } 2475 return false; 2476 } 2477 2478 return true; 2479 } 2480 2481 uint64_t BinaryContext::getHotThreshold() const { 2482 static uint64_t Threshold = 0; 2483 if (Threshold == 0) { 2484 Threshold = std::max( 2485 (uint64_t)opts::ExecutionCountThreshold, 2486 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2487 } 2488 return Threshold; 2489 } 2490 2491 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2492 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2493 auto FI = BinaryFunctions.upper_bound(Address); 2494 if (FI == BinaryFunctions.begin()) 2495 return nullptr; 2496 --FI; 2497 2498 const uint64_t UsedSize = 2499 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2500 2501 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2502 return nullptr; 2503 2504 return &FI->second; 2505 } 2506 2507 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2508 // First, try to find a function starting at the given address. If the 2509 // function was folded, this will get us the original folded function if it 2510 // wasn't removed from the list, e.g. in non-relocation mode. 2511 auto BFI = BinaryFunctions.find(Address); 2512 if (BFI != BinaryFunctions.end()) 2513 return &BFI->second; 2514 2515 // We might have folded the function matching the object at the given 2516 // address. In such case, we look for a function matching the symbol 2517 // registered at the original address. The new function (the one that the 2518 // original was folded into) will hold the symbol. 2519 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2520 uint64_t EntryID = 0; 2521 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2522 if (BF && EntryID == 0) 2523 return BF; 2524 } 2525 return nullptr; 2526 } 2527 2528 /// Deregister JumpTable registered at a given \p Address and delete it. 2529 void BinaryContext::deleteJumpTable(uint64_t Address) { 2530 assert(JumpTables.count(Address) && "Must have a jump table at address"); 2531 JumpTable *JT = JumpTables.at(Address); 2532 for (BinaryFunction *Parent : JT->Parents) 2533 Parent->JumpTables.erase(Address); 2534 JumpTables.erase(Address); 2535 delete JT; 2536 } 2537 2538 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2539 const DWARFAddressRangesVector &InputRanges) const { 2540 DebugAddressRangesVector OutputRanges; 2541 2542 for (const DWARFAddressRange Range : InputRanges) { 2543 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2544 while (BFI != BinaryFunctions.end()) { 2545 const BinaryFunction &Function = BFI->second; 2546 if (Function.getAddress() >= Range.HighPC) 2547 break; 2548 const DebugAddressRangesVector FunctionRanges = 2549 Function.getOutputAddressRanges(); 2550 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2551 std::advance(BFI, 1); 2552 } 2553 } 2554 2555 return OutputRanges; 2556 } 2557 2558 } // namespace bolt 2559 } // namespace llvm 2560