1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 cl::opt<std::string> CompDirOverride( 76 "comp-dir-override", 77 cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base " 78 "location, which is used with DW_AT_dwo_name to construct a path " 79 "to *.dwo files."), 80 cl::Hidden, cl::init(""), cl::cat(BoltCategory)); 81 } // namespace opts 82 83 namespace llvm { 84 namespace bolt { 85 86 char BOLTError::ID = 0; 87 88 BOLTError::BOLTError(bool IsFatal, const Twine &S) 89 : IsFatal(IsFatal), Msg(S.str()) {} 90 91 void BOLTError::log(raw_ostream &OS) const { 92 if (IsFatal) 93 OS << "FATAL "; 94 StringRef ErrMsg = StringRef(Msg); 95 // Prepend our error prefix if it is missing 96 if (ErrMsg.empty()) { 97 OS << "BOLT-ERROR\n"; 98 } else { 99 if (!ErrMsg.starts_with("BOLT-ERROR")) 100 OS << "BOLT-ERROR: "; 101 OS << ErrMsg << "\n"; 102 } 103 } 104 105 std::error_code BOLTError::convertToErrorCode() const { 106 return inconvertibleErrorCode(); 107 } 108 109 Error createNonFatalBOLTError(const Twine &S) { 110 return make_error<BOLTError>(/*IsFatal*/ false, S); 111 } 112 113 Error createFatalBOLTError(const Twine &S) { 114 return make_error<BOLTError>(/*IsFatal*/ true, S); 115 } 116 117 void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { 118 handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { 119 if (!E.getMessage().empty()) 120 E.log(this->errs()); 121 if (E.isFatal()) 122 exit(1); 123 }); 124 } 125 126 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 127 std::unique_ptr<DWARFContext> DwCtx, 128 std::unique_ptr<Triple> TheTriple, 129 const Target *TheTarget, std::string TripleName, 130 std::unique_ptr<MCCodeEmitter> MCE, 131 std::unique_ptr<MCObjectFileInfo> MOFI, 132 std::unique_ptr<const MCAsmInfo> AsmInfo, 133 std::unique_ptr<const MCInstrInfo> MII, 134 std::unique_ptr<const MCSubtargetInfo> STI, 135 std::unique_ptr<MCInstPrinter> InstPrinter, 136 std::unique_ptr<const MCInstrAnalysis> MIA, 137 std::unique_ptr<MCPlusBuilder> MIB, 138 std::unique_ptr<const MCRegisterInfo> MRI, 139 std::unique_ptr<MCDisassembler> DisAsm, 140 JournalingStreams Logger) 141 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 142 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 143 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 144 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 145 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 146 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), 147 Logger(Logger) { 148 Relocation::Arch = this->TheTriple->getArch(); 149 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 150 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 151 } 152 153 BinaryContext::~BinaryContext() { 154 for (BinarySection *Section : Sections) 155 delete Section; 156 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 157 delete InjectedFunction; 158 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 159 delete JTI.second; 160 clearBinaryData(); 161 } 162 163 /// Create BinaryContext for a given architecture \p ArchName and 164 /// triple \p TripleName. 165 Expected<std::unique_ptr<BinaryContext>> 166 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 167 std::unique_ptr<DWARFContext> DwCtx, 168 JournalingStreams Logger) { 169 StringRef ArchName = ""; 170 std::string FeaturesStr = ""; 171 switch (File->getArch()) { 172 case llvm::Triple::x86_64: 173 ArchName = "x86-64"; 174 FeaturesStr = "+nopl"; 175 break; 176 case llvm::Triple::aarch64: 177 ArchName = "aarch64"; 178 FeaturesStr = "+all"; 179 break; 180 case llvm::Triple::riscv64: { 181 ArchName = "riscv64"; 182 Expected<SubtargetFeatures> Features = File->getFeatures(); 183 184 if (auto E = Features.takeError()) 185 return std::move(E); 186 187 // We rely on relaxation for some transformations (e.g., promoting all calls 188 // to PseudoCALL and then making JITLink relax them). Since the relax 189 // feature is not stored in the object file, we manually enable it. 190 Features->AddFeature("relax"); 191 FeaturesStr = Features->getString(); 192 break; 193 } 194 default: 195 return createStringError(std::errc::not_supported, 196 "BOLT-ERROR: Unrecognized machine in ELF file"); 197 } 198 199 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 200 const std::string TripleName = TheTriple->str(); 201 202 std::string Error; 203 const Target *TheTarget = 204 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 205 if (!TheTarget) 206 return createStringError(make_error_code(std::errc::not_supported), 207 Twine("BOLT-ERROR: ", Error)); 208 209 std::unique_ptr<const MCRegisterInfo> MRI( 210 TheTarget->createMCRegInfo(TripleName)); 211 if (!MRI) 212 return createStringError( 213 make_error_code(std::errc::not_supported), 214 Twine("BOLT-ERROR: no register info for target ", TripleName)); 215 216 // Set up disassembler. 217 std::unique_ptr<MCAsmInfo> AsmInfo( 218 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 219 if (!AsmInfo) 220 return createStringError( 221 make_error_code(std::errc::not_supported), 222 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 223 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 224 // we want to emit such names as using @PLT without double quotes to convey 225 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 226 // override the default AsmInfo behavior to emit names the way we want. 227 AsmInfo->setAllowAtInName(true); 228 229 std::unique_ptr<const MCSubtargetInfo> STI( 230 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 231 if (!STI) 232 return createStringError( 233 make_error_code(std::errc::not_supported), 234 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 235 236 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 237 if (!MII) 238 return createStringError( 239 make_error_code(std::errc::not_supported), 240 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 241 242 std::unique_ptr<MCContext> Ctx( 243 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 244 std::unique_ptr<MCObjectFileInfo> MOFI( 245 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 246 Ctx->setObjectFileInfo(MOFI.get()); 247 // We do not support X86 Large code model. Change this in the future. 248 bool Large = false; 249 if (TheTriple->getArch() == llvm::Triple::aarch64) 250 Large = true; 251 unsigned LSDAEncoding = 252 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 253 if (IsPIC) { 254 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 255 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 256 } 257 258 std::unique_ptr<MCDisassembler> DisAsm( 259 TheTarget->createMCDisassembler(*STI, *Ctx)); 260 261 if (!DisAsm) 262 return createStringError( 263 make_error_code(std::errc::not_supported), 264 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 265 266 std::unique_ptr<const MCInstrAnalysis> MIA( 267 TheTarget->createMCInstrAnalysis(MII.get())); 268 if (!MIA) 269 return createStringError( 270 make_error_code(std::errc::not_supported), 271 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 272 TripleName)); 273 274 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 275 std::unique_ptr<MCInstPrinter> InstructionPrinter( 276 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 277 *MII, *MRI)); 278 if (!InstructionPrinter) 279 return createStringError( 280 make_error_code(std::errc::not_supported), 281 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 282 InstructionPrinter->setPrintImmHex(true); 283 284 std::unique_ptr<MCCodeEmitter> MCE( 285 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 286 287 auto BC = std::make_unique<BinaryContext>( 288 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 289 std::string(TripleName), std::move(MCE), std::move(MOFI), 290 std::move(AsmInfo), std::move(MII), std::move(STI), 291 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 292 std::move(DisAsm), Logger); 293 294 BC->LSDAEncoding = LSDAEncoding; 295 296 BC->MAB = std::unique_ptr<MCAsmBackend>( 297 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 298 299 BC->setFilename(File->getFileName()); 300 301 BC->HasFixedLoadAddress = !IsPIC; 302 303 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 304 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 305 306 if (!BC->SymbolicDisAsm) 307 return createStringError( 308 make_error_code(std::errc::not_supported), 309 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 310 311 return std::move(BC); 312 } 313 314 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 315 if (opts::HotText && 316 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 317 return true; 318 319 if (opts::HotData && 320 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 321 return true; 322 323 if (SymbolName == "_end") 324 return true; 325 326 return false; 327 } 328 329 std::unique_ptr<MCObjectWriter> 330 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 331 return MAB->createObjectWriter(OS); 332 } 333 334 bool BinaryContext::validateObjectNesting() const { 335 auto Itr = BinaryDataMap.begin(); 336 auto End = BinaryDataMap.end(); 337 bool Valid = true; 338 while (Itr != End) { 339 auto Next = std::next(Itr); 340 while (Next != End && 341 Itr->second->getSection() == Next->second->getSection() && 342 Itr->second->containsRange(Next->second->getAddress(), 343 Next->second->getSize())) { 344 if (Next->second->Parent != Itr->second) { 345 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" 346 << "BOLT-WARNING: " << *Itr->second << "\n" 347 << "BOLT-WARNING: " << *Next->second << "\n"; 348 Valid = false; 349 } 350 ++Next; 351 } 352 Itr = Next; 353 } 354 return Valid; 355 } 356 357 bool BinaryContext::validateHoles() const { 358 bool Valid = true; 359 for (BinarySection &Section : sections()) { 360 for (const Relocation &Rel : Section.relocations()) { 361 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 362 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 363 if (!BD) { 364 this->errs() 365 << "BOLT-WARNING: no BinaryData found for relocation at address" 366 << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() 367 << "\n"; 368 Valid = false; 369 } else if (!BD->getAtomicRoot()) { 370 this->errs() 371 << "BOLT-WARNING: no atomic BinaryData found for relocation at " 372 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 373 << Section.getName() << "\n"; 374 Valid = false; 375 } 376 } 377 } 378 return Valid; 379 } 380 381 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 382 const uint64_t Address = GAI->second->getAddress(); 383 const uint64_t Size = GAI->second->getSize(); 384 385 auto fixParents = [&](BinaryDataMapType::iterator Itr, 386 BinaryData *NewParent) { 387 BinaryData *OldParent = Itr->second->Parent; 388 Itr->second->Parent = NewParent; 389 ++Itr; 390 while (Itr != BinaryDataMap.end() && OldParent && 391 Itr->second->Parent == OldParent) { 392 Itr->second->Parent = NewParent; 393 ++Itr; 394 } 395 }; 396 397 // Check if the previous symbol contains the newly added symbol. 398 if (GAI != BinaryDataMap.begin()) { 399 BinaryData *Prev = std::prev(GAI)->second; 400 while (Prev) { 401 if (Prev->getSection() == GAI->second->getSection() && 402 Prev->containsRange(Address, Size)) { 403 fixParents(GAI, Prev); 404 } else { 405 fixParents(GAI, nullptr); 406 } 407 Prev = Prev->Parent; 408 } 409 } 410 411 // Check if the newly added symbol contains any subsequent symbols. 412 if (Size != 0) { 413 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 414 auto Itr = std::next(GAI); 415 while ( 416 Itr != BinaryDataMap.end() && 417 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 418 Itr->second->Parent = BD; 419 ++Itr; 420 } 421 } 422 } 423 424 iterator_range<BinaryContext::binary_data_iterator> 425 BinaryContext::getSubBinaryData(BinaryData *BD) { 426 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 427 auto End = Start; 428 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 429 ++End; 430 return make_range(Start, End); 431 } 432 433 std::pair<const MCSymbol *, uint64_t> 434 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 435 bool IsPCRel) { 436 if (isAArch64()) { 437 // Check if this is an access to a constant island and create bookkeeping 438 // to keep track of it and emit it later as part of this function. 439 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 440 return std::make_pair(IslandSym, 0); 441 442 // Detect custom code written in assembly that refers to arbitrary 443 // constant islands from other functions. Write this reference so we 444 // can pull this constant island and emit it as part of this function 445 // too. 446 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 447 448 if (IslandIter != AddressToConstantIslandMap.begin() && 449 (IslandIter == AddressToConstantIslandMap.end() || 450 IslandIter->first > Address)) 451 --IslandIter; 452 453 if (IslandIter != AddressToConstantIslandMap.end()) { 454 // Fall-back to referencing the original constant island in the presence 455 // of dynamic relocs, as we currently do not support cloning them. 456 // Notice: we might fail to link because of this, if the original constant 457 // island we are referring would be emitted too far away. 458 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 459 MCSymbol *IslandSym = 460 IslandIter->second->getOrCreateIslandAccess(Address); 461 if (IslandSym) 462 return std::make_pair(IslandSym, 0); 463 } else if (MCSymbol *IslandSym = 464 IslandIter->second->getOrCreateProxyIslandAccess(Address, 465 BF)) { 466 BF.createIslandDependency(IslandSym, IslandIter->second); 467 return std::make_pair(IslandSym, 0); 468 } 469 } 470 } 471 472 // Note that the address does not necessarily have to reside inside 473 // a section, it could be an absolute address too. 474 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 475 if (Section && Section->isText()) { 476 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 477 if (Address != BF.getAddress()) { 478 // The address could potentially escape. Mark it as another entry 479 // point into the function. 480 if (opts::Verbosity >= 1) { 481 this->outs() << "BOLT-INFO: potentially escaped address 0x" 482 << Twine::utohexstr(Address) << " in function " << BF 483 << '\n'; 484 } 485 BF.HasInternalLabelReference = true; 486 return std::make_pair( 487 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 488 } 489 } else { 490 addInterproceduralReference(&BF, Address); 491 } 492 } 493 494 // With relocations, catch jump table references outside of the basic block 495 // containing the indirect jump. 496 if (HasRelocations) { 497 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 498 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 499 const MCSymbol *Symbol = 500 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 501 502 return std::make_pair(Symbol, 0); 503 } 504 } 505 506 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 507 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 508 509 // TODO: use DWARF info to get size/alignment here? 510 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 511 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 512 return std::make_pair(TargetSymbol, 0); 513 } 514 515 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 516 BinaryFunction &BF) { 517 if (!isX86()) 518 return MemoryContentsType::UNKNOWN; 519 520 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 521 if (!Section) { 522 // No section - possibly an absolute address. Since we don't allow 523 // internal function addresses to escape the function scope - we 524 // consider it a tail call. 525 if (opts::Verbosity > 1) { 526 this->errs() << "BOLT-WARNING: no section for address 0x" 527 << Twine::utohexstr(Address) << " referenced from function " 528 << BF << '\n'; 529 } 530 return MemoryContentsType::UNKNOWN; 531 } 532 533 if (Section->isVirtual()) { 534 // The contents are filled at runtime. 535 return MemoryContentsType::UNKNOWN; 536 } 537 538 // No support for jump tables in code yet. 539 if (Section->isText()) 540 return MemoryContentsType::UNKNOWN; 541 542 // Start with checking for PIC jump table. We expect non-PIC jump tables 543 // to have high 32 bits set to 0. 544 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 545 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 546 547 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 548 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 549 550 return MemoryContentsType::UNKNOWN; 551 } 552 553 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 554 const JumpTable::JumpTableType Type, 555 const BinaryFunction &BF, 556 const uint64_t NextJTAddress, 557 JumpTable::AddressesType *EntriesAsAddress, 558 bool *HasEntryInFragment) const { 559 // Is one of the targets __builtin_unreachable? 560 bool HasUnreachable = false; 561 562 // Does one of the entries match function start address? 563 bool HasStartAsEntry = false; 564 565 // Number of targets other than __builtin_unreachable. 566 uint64_t NumRealEntries = 0; 567 568 auto addEntryAddress = [&](uint64_t EntryAddress) { 569 if (EntriesAsAddress) 570 EntriesAsAddress->emplace_back(EntryAddress); 571 }; 572 573 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 574 if (!Section) 575 return false; 576 577 // The upper bound is defined by containing object, section limits, and 578 // the next jump table in memory. 579 uint64_t UpperBound = Section->getEndAddress(); 580 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 581 if (JumpTableBD && JumpTableBD->getSize()) { 582 assert(JumpTableBD->getEndAddress() <= UpperBound && 583 "data object cannot cross a section boundary"); 584 UpperBound = JumpTableBD->getEndAddress(); 585 } 586 if (NextJTAddress) 587 UpperBound = std::min(NextJTAddress, UpperBound); 588 589 LLVM_DEBUG({ 590 using JTT = JumpTable::JumpTableType; 591 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 592 Address, BF.getPrintName(), 593 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 594 }); 595 const uint64_t EntrySize = getJumpTableEntrySize(Type); 596 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 597 EntryAddress += EntrySize) { 598 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 599 << " -> "); 600 // Check if there's a proper relocation against the jump table entry. 601 if (HasRelocations) { 602 if (Type == JumpTable::JTT_PIC && 603 !DataPCRelocations.count(EntryAddress)) { 604 LLVM_DEBUG( 605 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 606 break; 607 } 608 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 609 LLVM_DEBUG( 610 dbgs() 611 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 612 break; 613 } 614 } 615 616 const uint64_t Value = 617 (Type == JumpTable::JTT_PIC) 618 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 619 : *getPointerAtAddress(EntryAddress); 620 621 // __builtin_unreachable() case. 622 if (Value == BF.getAddress() + BF.getSize()) { 623 addEntryAddress(Value); 624 HasUnreachable = true; 625 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 626 continue; 627 } 628 629 // Function start is another special case. It is allowed in the jump table, 630 // but we need at least one another regular entry to distinguish the table 631 // from, e.g. a function pointer array. 632 if (Value == BF.getAddress()) { 633 HasStartAsEntry = true; 634 addEntryAddress(Value); 635 continue; 636 } 637 638 // Function or one of its fragments. 639 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 640 const bool DoesBelongToFunction = 641 BF.containsAddress(Value) || 642 (TargetBF && TargetBF->isParentOrChildOf(BF)); 643 if (!DoesBelongToFunction) { 644 LLVM_DEBUG({ 645 if (!BF.containsAddress(Value)) { 646 dbgs() << "FAIL: function doesn't contain this address\n"; 647 if (TargetBF) { 648 dbgs() << " ! function containing this address: " 649 << TargetBF->getPrintName() << '\n'; 650 if (TargetBF->isFragment()) { 651 dbgs() << " ! is a fragment"; 652 for (BinaryFunction *Parent : TargetBF->ParentFragments) 653 dbgs() << ", parent: " << Parent->getPrintName(); 654 dbgs() << '\n'; 655 } 656 } 657 } 658 }); 659 break; 660 } 661 662 // Check there's an instruction at this offset. 663 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 664 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 665 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 666 break; 667 } 668 669 ++NumRealEntries; 670 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 671 672 if (TargetBF != &BF && HasEntryInFragment) 673 *HasEntryInFragment = true; 674 addEntryAddress(Value); 675 } 676 677 // It's a jump table if the number of real entries is more than 1, or there's 678 // one real entry and one or more special targets. If there are only multiple 679 // special targets, then it's not a jump table. 680 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 681 } 682 683 void BinaryContext::populateJumpTables() { 684 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 685 << '\n'); 686 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 687 ++JTI) { 688 JumpTable *JT = JTI->second; 689 690 bool NonSimpleParent = false; 691 for (BinaryFunction *BF : JT->Parents) 692 NonSimpleParent |= !BF->isSimple(); 693 if (NonSimpleParent) 694 continue; 695 696 uint64_t NextJTAddress = 0; 697 auto NextJTI = std::next(JTI); 698 if (NextJTI != JTE) 699 NextJTAddress = NextJTI->second->getAddress(); 700 701 const bool Success = 702 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 703 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 704 if (!Success) { 705 LLVM_DEBUG({ 706 dbgs() << "failed to analyze "; 707 JT->print(dbgs()); 708 if (NextJTI != JTE) { 709 dbgs() << "next "; 710 NextJTI->second->print(dbgs()); 711 } 712 }); 713 llvm_unreachable("jump table heuristic failure"); 714 } 715 for (BinaryFunction *Frag : JT->Parents) { 716 if (JT->IsSplit) 717 Frag->setHasIndirectTargetToSplitFragment(true); 718 for (uint64_t EntryAddress : JT->EntriesAsAddress) 719 // if target is builtin_unreachable 720 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 721 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 722 Frag->getSize()); 723 } else if (EntryAddress >= Frag->getAddress() && 724 EntryAddress < Frag->getAddress() + Frag->getSize()) { 725 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 726 } 727 } 728 729 // In strict mode, erase PC-relative relocation record. Later we check that 730 // all such records are erased and thus have been accounted for. 731 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 732 for (uint64_t Address = JT->getAddress(); 733 Address < JT->getAddress() + JT->getSize(); 734 Address += JT->EntrySize) { 735 DataPCRelocations.erase(DataPCRelocations.find(Address)); 736 } 737 } 738 739 // Mark to skip the function and all its fragments. 740 for (BinaryFunction *Frag : JT->Parents) 741 if (Frag->hasIndirectTargetToSplitFragment()) 742 addFragmentsToSkip(Frag); 743 } 744 745 if (opts::StrictMode && DataPCRelocations.size()) { 746 LLVM_DEBUG({ 747 dbgs() << DataPCRelocations.size() 748 << " unclaimed PC-relative relocations left in data:\n"; 749 for (uint64_t Reloc : DataPCRelocations) 750 dbgs() << Twine::utohexstr(Reloc) << '\n'; 751 }); 752 assert(0 && "unclaimed PC-relative relocations left in data\n"); 753 } 754 clearList(DataPCRelocations); 755 } 756 757 void BinaryContext::skipMarkedFragments() { 758 std::vector<BinaryFunction *> FragmentQueue; 759 // Copy the functions to FragmentQueue. 760 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 761 auto addToWorklist = [&](BinaryFunction *Function) -> void { 762 if (FragmentsToSkip.count(Function)) 763 return; 764 FragmentQueue.push_back(Function); 765 addFragmentsToSkip(Function); 766 }; 767 // Functions containing split jump tables need to be skipped with all 768 // fragments (transitively). 769 for (size_t I = 0; I != FragmentQueue.size(); I++) { 770 BinaryFunction *BF = FragmentQueue[I]; 771 assert(FragmentsToSkip.count(BF) && 772 "internal error in traversing function fragments"); 773 if (opts::Verbosity >= 1) 774 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 775 BF->setSimple(false); 776 BF->setHasIndirectTargetToSplitFragment(true); 777 778 llvm::for_each(BF->Fragments, addToWorklist); 779 llvm::for_each(BF->ParentFragments, addToWorklist); 780 } 781 if (!FragmentsToSkip.empty()) 782 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() 783 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") 784 << " due to cold fragments\n"; 785 } 786 787 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 788 uint64_t Size, 789 uint16_t Alignment, 790 unsigned Flags) { 791 auto Itr = BinaryDataMap.find(Address); 792 if (Itr != BinaryDataMap.end()) { 793 assert(Itr->second->getSize() == Size || !Size); 794 return Itr->second->getSymbol(); 795 } 796 797 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 798 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 799 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 800 } 801 802 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 803 return Ctx->getOrCreateSymbol(Name); 804 } 805 806 BinaryFunction *BinaryContext::createBinaryFunction( 807 const std::string &Name, BinarySection &Section, uint64_t Address, 808 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 809 auto Result = BinaryFunctions.emplace( 810 Address, BinaryFunction(Name, Section, Address, Size, *this)); 811 assert(Result.second == true && "unexpected duplicate function"); 812 BinaryFunction *BF = &Result.first->second; 813 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 814 Alignment); 815 setSymbolToFunctionMap(BF->getSymbol(), BF); 816 return BF; 817 } 818 819 const MCSymbol * 820 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 821 JumpTable::JumpTableType Type) { 822 // Two fragments of same function access same jump table 823 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 824 assert(JT->Type == Type && "jump table types have to match"); 825 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 826 827 // Prevent associating a jump table to a specific fragment twice. 828 // This simple check arises from the assumption: no more than 2 fragments. 829 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 830 assert(JT->Parents[0]->isParentOrChildOf(Function) && 831 "cannot re-use jump table of a different function"); 832 // Duplicate the entry for the parent function for easy access 833 JT->Parents.push_back(&Function); 834 if (opts::Verbosity > 2) { 835 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " 836 << JT->Parents[0]->getPrintName() << "; " 837 << Function.getPrintName() << "\n"; 838 JT->print(this->outs()); 839 } 840 Function.JumpTables.emplace(Address, JT); 841 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 842 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 843 } 844 845 bool IsJumpTableParent = false; 846 (void)IsJumpTableParent; 847 for (BinaryFunction *Frag : JT->Parents) 848 if (Frag == &Function) 849 IsJumpTableParent = true; 850 assert(IsJumpTableParent && 851 "cannot re-use jump table of a different function"); 852 return JT->getFirstLabel(); 853 } 854 855 // Re-use the existing symbol if possible. 856 MCSymbol *JTLabel = nullptr; 857 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 858 if (!isInternalSymbolName(Object->getSymbol()->getName())) 859 JTLabel = Object->getSymbol(); 860 } 861 862 const uint64_t EntrySize = getJumpTableEntrySize(Type); 863 if (!JTLabel) { 864 const std::string JumpTableName = generateJumpTableName(Function, Address); 865 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 866 } 867 868 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 869 << " in function " << Function << '\n'); 870 871 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 872 JumpTable::LabelMapType{{0, JTLabel}}, 873 *getSectionForAddress(Address)); 874 JT->Parents.push_back(&Function); 875 if (opts::Verbosity > 2) 876 JT->print(this->outs()); 877 JumpTables.emplace(Address, JT); 878 879 // Duplicate the entry for the parent function for easy access. 880 Function.JumpTables.emplace(Address, JT); 881 return JTLabel; 882 } 883 884 std::pair<uint64_t, const MCSymbol *> 885 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 886 const MCSymbol *OldLabel) { 887 auto L = scopeLock(); 888 unsigned Offset = 0; 889 bool Found = false; 890 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 891 if (Elmt.second != OldLabel) 892 continue; 893 Offset = Elmt.first; 894 Found = true; 895 break; 896 } 897 assert(Found && "Label not found"); 898 (void)Found; 899 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 900 JumpTable *NewJT = 901 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 902 JumpTable::LabelMapType{{Offset, NewLabel}}, 903 *getSectionForAddress(JT->getAddress())); 904 NewJT->Parents = JT->Parents; 905 NewJT->Entries = JT->Entries; 906 NewJT->Counts = JT->Counts; 907 uint64_t JumpTableID = ++DuplicatedJumpTables; 908 // Invert it to differentiate from regular jump tables whose IDs are their 909 // addresses in the input binary memory space 910 JumpTableID = ~JumpTableID; 911 JumpTables.emplace(JumpTableID, NewJT); 912 Function.JumpTables.emplace(JumpTableID, NewJT); 913 return std::make_pair(JumpTableID, NewLabel); 914 } 915 916 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 917 uint64_t Address) { 918 size_t Id; 919 uint64_t Offset = 0; 920 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 921 Offset = Address - JT->getAddress(); 922 auto Itr = JT->Labels.find(Offset); 923 if (Itr != JT->Labels.end()) 924 return std::string(Itr->second->getName()); 925 Id = JumpTableIds.at(JT->getAddress()); 926 } else { 927 Id = JumpTableIds[Address] = BF.JumpTables.size(); 928 } 929 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 930 (Offset ? ("." + std::to_string(Offset)) : "")); 931 } 932 933 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 934 // FIXME: aarch64 support is missing. 935 if (!isX86()) 936 return true; 937 938 if (BF.getSize() == BF.getMaxSize()) 939 return true; 940 941 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 942 assert(FunctionData && "cannot get function as data"); 943 944 uint64_t Offset = BF.getSize(); 945 MCInst Instr; 946 uint64_t InstrSize = 0; 947 uint64_t InstrAddress = BF.getAddress() + Offset; 948 using std::placeholders::_1; 949 950 // Skip instructions that satisfy the predicate condition. 951 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 952 const uint64_t StartOffset = Offset; 953 for (; Offset < BF.getMaxSize(); 954 Offset += InstrSize, InstrAddress += InstrSize) { 955 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 956 InstrAddress, nulls())) 957 break; 958 if (!Predicate(Instr)) 959 break; 960 } 961 962 return Offset - StartOffset; 963 }; 964 965 // Skip a sequence of zero bytes. 966 auto skipZeros = [&]() { 967 const uint64_t StartOffset = Offset; 968 for (; Offset < BF.getMaxSize(); ++Offset) 969 if ((*FunctionData)[Offset] != 0) 970 break; 971 972 return Offset - StartOffset; 973 }; 974 975 // Accept the whole padding area filled with breakpoints. 976 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 977 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 978 return true; 979 980 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 981 982 // Some functions have a jump to the next function or to the padding area 983 // inserted after the body. 984 auto isSkipJump = [&](const MCInst &Instr) { 985 uint64_t TargetAddress = 0; 986 if (MIB->isUnconditionalBranch(Instr) && 987 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 988 if (TargetAddress >= InstrAddress + InstrSize && 989 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 990 return true; 991 } 992 } 993 return false; 994 }; 995 996 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 997 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 998 skipZeros()) 999 ; 1000 1001 if (Offset == BF.getMaxSize()) 1002 return true; 1003 1004 if (opts::Verbosity >= 1) { 1005 this->errs() << "BOLT-WARNING: bad padding at address 0x" 1006 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 1007 << " starting at offset " << (Offset - BF.getSize()) 1008 << " in function " << BF << '\n' 1009 << FunctionData->slice(BF.getSize(), 1010 BF.getMaxSize() - BF.getSize()) 1011 << '\n'; 1012 } 1013 1014 return false; 1015 } 1016 1017 void BinaryContext::adjustCodePadding() { 1018 for (auto &BFI : BinaryFunctions) { 1019 BinaryFunction &BF = BFI.second; 1020 if (!shouldEmit(BF)) 1021 continue; 1022 1023 if (!hasValidCodePadding(BF)) { 1024 if (HasRelocations) { 1025 if (opts::Verbosity >= 1) { 1026 this->outs() << "BOLT-INFO: function " << BF 1027 << " has invalid padding. Ignoring the function.\n"; 1028 } 1029 BF.setIgnored(); 1030 } else { 1031 BF.setMaxSize(BF.getSize()); 1032 } 1033 } 1034 } 1035 } 1036 1037 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 1038 uint64_t Size, 1039 uint16_t Alignment, 1040 unsigned Flags) { 1041 // Register the name with MCContext. 1042 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 1043 1044 auto GAI = BinaryDataMap.find(Address); 1045 BinaryData *BD; 1046 if (GAI == BinaryDataMap.end()) { 1047 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 1048 BinarySection &Section = 1049 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1050 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1051 Section, Flags); 1052 GAI = BinaryDataMap.emplace(Address, BD).first; 1053 GlobalSymbols[Name] = BD; 1054 updateObjectNesting(GAI); 1055 } else { 1056 BD = GAI->second; 1057 if (!BD->hasName(Name)) { 1058 GlobalSymbols[Name] = BD; 1059 BD->Symbols.push_back(Symbol); 1060 } 1061 } 1062 1063 return Symbol; 1064 } 1065 1066 const BinaryData * 1067 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1068 auto NI = BinaryDataMap.lower_bound(Address); 1069 auto End = BinaryDataMap.end(); 1070 if ((NI != End && Address == NI->first) || 1071 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1072 if (NI->second->containsAddress(Address)) 1073 return NI->second; 1074 1075 // If this is a sub-symbol, see if a parent data contains the address. 1076 const BinaryData *BD = NI->second->getParent(); 1077 while (BD) { 1078 if (BD->containsAddress(Address)) 1079 return BD; 1080 BD = BD->getParent(); 1081 } 1082 } 1083 return nullptr; 1084 } 1085 1086 BinaryData *BinaryContext::getGOTSymbol() { 1087 // First tries to find a global symbol with that name 1088 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1089 if (GOTSymBD) 1090 return GOTSymBD; 1091 1092 // This symbol might be hidden from run-time link, so fetch the local 1093 // definition if available. 1094 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1095 if (!GOTSymBD) 1096 return nullptr; 1097 1098 // If the local symbol is not unique, fail 1099 unsigned Index = 2; 1100 SmallString<30> Storage; 1101 while (const BinaryData *BD = 1102 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1103 .concat(Twine(Index++)) 1104 .toStringRef(Storage))) 1105 if (BD->getAddress() != GOTSymBD->getAddress()) 1106 return nullptr; 1107 1108 return GOTSymBD; 1109 } 1110 1111 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1112 auto NI = BinaryDataMap.find(Address); 1113 assert(NI != BinaryDataMap.end()); 1114 if (NI == BinaryDataMap.end()) 1115 return false; 1116 // TODO: it's possible that a jump table starts at the same address 1117 // as a larger blob of private data. When we set the size of the 1118 // jump table, it might be smaller than the total blob size. In this 1119 // case we just leave the original size since (currently) it won't really 1120 // affect anything. 1121 assert((!NI->second->Size || NI->second->Size == Size || 1122 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1123 "can't change the size of a symbol that has already had its " 1124 "size set"); 1125 if (!NI->second->Size) { 1126 NI->second->Size = Size; 1127 updateObjectNesting(NI); 1128 return true; 1129 } 1130 return false; 1131 } 1132 1133 void BinaryContext::generateSymbolHashes() { 1134 auto isPadding = [](const BinaryData &BD) { 1135 StringRef Contents = BD.getSection().getContents(); 1136 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1137 return (BD.getName().starts_with("HOLEat") || 1138 SymData.find_first_not_of(0) == StringRef::npos); 1139 }; 1140 1141 uint64_t NumCollisions = 0; 1142 for (auto &Entry : BinaryDataMap) { 1143 BinaryData &BD = *Entry.second; 1144 StringRef Name = BD.getName(); 1145 1146 if (!isInternalSymbolName(Name)) 1147 continue; 1148 1149 // First check if a non-anonymous alias exists and move it to the front. 1150 if (BD.getSymbols().size() > 1) { 1151 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1152 return !isInternalSymbolName(Symbol->getName()); 1153 }); 1154 if (Itr != BD.getSymbols().end()) { 1155 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1156 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1157 continue; 1158 } 1159 } 1160 1161 // We have to skip 0 size symbols since they will all collide. 1162 if (BD.getSize() == 0) { 1163 continue; 1164 } 1165 1166 const uint64_t Hash = BD.getSection().hash(BD); 1167 const size_t Idx = Name.find("0x"); 1168 std::string NewName = 1169 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1170 if (getBinaryDataByName(NewName)) { 1171 // Ignore collisions for symbols that appear to be padding 1172 // (i.e. all zeros or a "hole") 1173 if (!isPadding(BD)) { 1174 if (opts::Verbosity) { 1175 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD 1176 << " with new name (" << NewName << "), skipping.\n"; 1177 } 1178 ++NumCollisions; 1179 } 1180 continue; 1181 } 1182 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1183 GlobalSymbols[NewName] = &BD; 1184 } 1185 if (NumCollisions) { 1186 this->errs() << "BOLT-WARNING: " << NumCollisions 1187 << " collisions detected while hashing binary objects"; 1188 if (!opts::Verbosity) 1189 this->errs() << ". Use -v=1 to see the list."; 1190 this->errs() << '\n'; 1191 } 1192 } 1193 1194 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1195 BinaryFunction &Function) const { 1196 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1197 if (TargetFunction.isChildOf(Function)) 1198 return true; 1199 TargetFunction.addParentFragment(Function); 1200 Function.addFragment(TargetFunction); 1201 if (!HasRelocations) { 1202 TargetFunction.setSimple(false); 1203 Function.setSimple(false); 1204 } 1205 if (opts::Verbosity >= 1) { 1206 this->outs() << "BOLT-INFO: marking " << TargetFunction 1207 << " as a fragment of " << Function << '\n'; 1208 } 1209 return true; 1210 } 1211 1212 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1213 MCInst &LoadLowBits, 1214 MCInst &LoadHiBits, 1215 uint64_t Target) { 1216 const MCSymbol *TargetSymbol; 1217 uint64_t Addend = 0; 1218 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1219 /*IsPCRel*/ true); 1220 int64_t Val; 1221 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1222 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1223 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1224 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1225 } 1226 1227 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1228 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1229 if (TargetFunction) 1230 return false; 1231 1232 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1233 assert(Section && "cannot get section for referenced address"); 1234 if (!Section->isText()) 1235 return false; 1236 1237 bool Ret = false; 1238 StringRef SectionContents = Section->getContents(); 1239 uint64_t Offset = Address - Section->getAddress(); 1240 const uint64_t MaxSize = SectionContents.size() - Offset; 1241 const uint8_t *Bytes = 1242 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1243 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1244 1245 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1246 MCInst &Instruction, uint64_t Offset, 1247 uint64_t AbsoluteInstrAddr, 1248 uint64_t TotalSize) -> bool { 1249 MCInst *TargetHiBits, *TargetLowBits; 1250 uint64_t TargetAddress, Count; 1251 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1252 AbsoluteInstrAddr, Instruction, TargetHiBits, 1253 TargetLowBits, TargetAddress); 1254 if (!Count) 1255 return false; 1256 1257 if (MatchOnly) 1258 return true; 1259 1260 // NOTE The target symbol was created during disassemble's 1261 // handleExternalReference 1262 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1263 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1264 *Section, Address, TotalSize); 1265 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1266 TargetAddress); 1267 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1268 Veneer->addInstruction(Offset, std::move(Instruction)); 1269 --Count; 1270 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1271 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1272 Veneer->addInstruction(It->first, std::move(It->second)); 1273 } 1274 1275 Veneer->getOrCreateLocalLabel(Address); 1276 Veneer->setMaxSize(TotalSize); 1277 Veneer->updateState(BinaryFunction::State::Disassembled); 1278 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1279 << "\n"); 1280 return true; 1281 }; 1282 1283 uint64_t Size = 0, TotalSize = 0; 1284 BinaryFunction::InstrMapType VeneerInstructions; 1285 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1286 MCInst Instruction; 1287 const uint64_t AbsoluteInstrAddr = Address + Offset; 1288 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1289 AbsoluteInstrAddr, nulls())) 1290 break; 1291 1292 TotalSize += Size; 1293 if (MIB->isBranch(Instruction)) { 1294 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1295 AbsoluteInstrAddr, TotalSize); 1296 break; 1297 } 1298 1299 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1300 } 1301 1302 return Ret; 1303 } 1304 1305 void BinaryContext::processInterproceduralReferences() { 1306 for (const std::pair<BinaryFunction *, uint64_t> &It : 1307 InterproceduralReferences) { 1308 BinaryFunction &Function = *It.first; 1309 uint64_t Address = It.second; 1310 if (!Address || Function.isIgnored()) 1311 continue; 1312 1313 BinaryFunction *TargetFunction = 1314 getBinaryFunctionContainingAddress(Address); 1315 if (&Function == TargetFunction) 1316 continue; 1317 1318 if (TargetFunction) { 1319 if (TargetFunction->isFragment() && 1320 !TargetFunction->isChildOf(Function)) { 1321 this->errs() 1322 << "BOLT-WARNING: interprocedural reference between unrelated " 1323 "fragments: " 1324 << Function.getPrintName() << " and " 1325 << TargetFunction->getPrintName() << '\n'; 1326 } 1327 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1328 TargetFunction->addEntryPointAtOffset(Offset); 1329 1330 continue; 1331 } 1332 1333 // Check if address falls in function padding space - this could be 1334 // unmarked data in code. In this case adjust the padding space size. 1335 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1336 assert(Section && "cannot get section for referenced address"); 1337 1338 if (!Section->isText()) 1339 continue; 1340 1341 // PLT requires special handling and could be ignored in this context. 1342 StringRef SectionName = Section->getName(); 1343 if (SectionName == ".plt" || SectionName == ".plt.got") 1344 continue; 1345 1346 // Check if it is aarch64 veneer written at Address 1347 if (isAArch64() && handleAArch64Veneer(Address)) 1348 continue; 1349 1350 if (opts::processAllFunctions()) { 1351 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1352 << "object in code at address 0x" 1353 << Twine::utohexstr(Address) << " belonging to section " 1354 << SectionName << " in current mode\n"; 1355 exit(1); 1356 } 1357 1358 TargetFunction = getBinaryFunctionContainingAddress(Address, 1359 /*CheckPastEnd=*/false, 1360 /*UseMaxSize=*/true); 1361 // We are not going to overwrite non-simple functions, but for simple 1362 // ones - adjust the padding size. 1363 if (TargetFunction && TargetFunction->isSimple()) { 1364 this->errs() 1365 << "BOLT-WARNING: function " << *TargetFunction 1366 << " has an object detected in a padding region at address 0x" 1367 << Twine::utohexstr(Address) << '\n'; 1368 TargetFunction->setMaxSize(TargetFunction->getSize()); 1369 } 1370 } 1371 1372 InterproceduralReferences.clear(); 1373 } 1374 1375 void BinaryContext::postProcessSymbolTable() { 1376 fixBinaryDataHoles(); 1377 bool Valid = true; 1378 for (auto &Entry : BinaryDataMap) { 1379 BinaryData *BD = Entry.second; 1380 if ((BD->getName().starts_with("SYMBOLat") || 1381 BD->getName().starts_with("DATAat")) && 1382 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1383 BD->getSection()) { 1384 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD 1385 << "\n"; 1386 Valid = false; 1387 } 1388 } 1389 assert(Valid); 1390 (void)Valid; 1391 generateSymbolHashes(); 1392 } 1393 1394 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1395 BinaryFunction &ParentBF) { 1396 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1397 "cannot merge functions with multiple entry points"); 1398 1399 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1400 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1401 SymbolToFunctionMapMutex, std::defer_lock); 1402 1403 const StringRef ChildName = ChildBF.getOneName(); 1404 1405 // Move symbols over and update bookkeeping info. 1406 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1407 ParentBF.getSymbols().push_back(Symbol); 1408 WriteSymbolMapLock.lock(); 1409 SymbolToFunctionMap[Symbol] = &ParentBF; 1410 WriteSymbolMapLock.unlock(); 1411 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1412 } 1413 ChildBF.getSymbols().clear(); 1414 1415 // Move other names the child function is known under. 1416 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1417 ChildBF.Aliases.clear(); 1418 1419 if (HasRelocations) { 1420 // Merge execution counts of ChildBF into those of ParentBF. 1421 // Without relocations, we cannot reliably merge profiles as both functions 1422 // continue to exist and either one can be executed. 1423 ChildBF.mergeProfileDataInto(ParentBF); 1424 1425 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1426 std::defer_lock); 1427 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1428 std::defer_lock); 1429 // Remove ChildBF from the global set of functions in relocs mode. 1430 ReadBfsLock.lock(); 1431 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1432 ReadBfsLock.unlock(); 1433 1434 assert(FI != BinaryFunctions.end() && "function not found"); 1435 assert(&ChildBF == &FI->second && "function mismatch"); 1436 1437 WriteBfsLock.lock(); 1438 ChildBF.clearDisasmState(); 1439 FI = BinaryFunctions.erase(FI); 1440 WriteBfsLock.unlock(); 1441 1442 } else { 1443 // In non-relocation mode we keep the function, but rename it. 1444 std::string NewName = "__ICF_" + ChildName.str(); 1445 1446 WriteCtxLock.lock(); 1447 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1448 WriteCtxLock.unlock(); 1449 1450 ChildBF.setFolded(&ParentBF); 1451 } 1452 1453 ParentBF.setHasFunctionsFoldedInto(); 1454 } 1455 1456 void BinaryContext::fixBinaryDataHoles() { 1457 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1458 1459 for (BinarySection &Section : allocatableSections()) { 1460 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1461 1462 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1463 BinaryData *BD = Itr->second; 1464 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1465 (BD->getName().starts_with("SYMBOLat0x") || 1466 BD->getName().starts_with("DATAat0x") || 1467 BD->getName().starts_with("ANONYMOUS"))); 1468 return !isHole && BD->getSection() == Section && !BD->getParent(); 1469 }; 1470 1471 auto BDStart = BinaryDataMap.begin(); 1472 auto BDEnd = BinaryDataMap.end(); 1473 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1474 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1475 1476 uint64_t EndAddress = Section.getAddress(); 1477 1478 while (Itr != End) { 1479 if (Itr->second->getAddress() > EndAddress) { 1480 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1481 Holes.emplace_back(EndAddress, Gap); 1482 } 1483 EndAddress = Itr->second->getEndAddress(); 1484 ++Itr; 1485 } 1486 1487 if (EndAddress < Section.getEndAddress()) 1488 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1489 1490 // If there is already a symbol at the start of the hole, grow that symbol 1491 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1492 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1493 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1494 if (BD) { 1495 // BD->getSection() can be != Section if there are sections that 1496 // overlap. In this case it is probably safe to just skip the holes 1497 // since the overlapping section will not(?) have any symbols in it. 1498 if (BD->getSection() == Section) 1499 setBinaryDataSize(Hole.first, Hole.second); 1500 } else { 1501 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1502 } 1503 } 1504 } 1505 1506 assert(validateObjectNesting() && "object nesting inconsistency detected"); 1507 assert(validateHoles() && "top level hole detected in object map"); 1508 } 1509 1510 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1511 const BinarySection *CurrentSection = nullptr; 1512 bool FirstSection = true; 1513 1514 for (auto &Entry : BinaryDataMap) { 1515 const BinaryData *BD = Entry.second; 1516 const BinarySection &Section = BD->getSection(); 1517 if (FirstSection || Section != *CurrentSection) { 1518 uint64_t Address, Size; 1519 StringRef Name = Section.getName(); 1520 if (Section) { 1521 Address = Section.getAddress(); 1522 Size = Section.getSize(); 1523 } else { 1524 Address = BD->getAddress(); 1525 Size = BD->getSize(); 1526 } 1527 OS << "BOLT-INFO: Section " << Name << ", " 1528 << "0x" + Twine::utohexstr(Address) << ":" 1529 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1530 CurrentSection = &Section; 1531 FirstSection = false; 1532 } 1533 1534 OS << "BOLT-INFO: "; 1535 const BinaryData *P = BD->getParent(); 1536 while (P) { 1537 OS << " "; 1538 P = P->getParent(); 1539 } 1540 OS << *BD << "\n"; 1541 } 1542 } 1543 1544 Expected<unsigned> BinaryContext::getDwarfFile( 1545 StringRef Directory, StringRef FileName, unsigned FileNumber, 1546 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1547 unsigned CUID, unsigned DWARFVersion) { 1548 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1549 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1550 FileNumber); 1551 } 1552 1553 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1554 const uint32_t SrcCUID, 1555 unsigned FileIndex) { 1556 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1557 const DWARFDebugLine::LineTable *LineTable = 1558 DwCtx->getLineTableForUnit(SrcUnit); 1559 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1560 LineTable->Prologue.FileNames; 1561 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1562 // means empty dir. 1563 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1564 "FileIndex out of range for the compilation unit."); 1565 StringRef Dir = ""; 1566 if (FileNames[FileIndex - 1].DirIdx != 0) { 1567 if (std::optional<const char *> DirName = dwarf::toString( 1568 LineTable->Prologue 1569 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1570 Dir = *DirName; 1571 } 1572 } 1573 StringRef FileName = ""; 1574 if (std::optional<const char *> FName = 1575 dwarf::toString(FileNames[FileIndex - 1].Name)) 1576 FileName = *FName; 1577 assert(FileName != ""); 1578 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1579 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1580 DestCUID, DstUnit->getVersion())); 1581 } 1582 1583 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1584 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1585 llvm::transform(llvm::make_second_range(BinaryFunctions), 1586 SortedFunctions.begin(), 1587 [](BinaryFunction &BF) { return &BF; }); 1588 1589 llvm::stable_sort(SortedFunctions, 1590 [](const BinaryFunction *A, const BinaryFunction *B) { 1591 if (A->hasValidIndex() && B->hasValidIndex()) { 1592 return A->getIndex() < B->getIndex(); 1593 } 1594 return A->hasValidIndex(); 1595 }); 1596 return SortedFunctions; 1597 } 1598 1599 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1600 std::vector<BinaryFunction *> AllFunctions; 1601 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1602 llvm::transform(llvm::make_second_range(BinaryFunctions), 1603 std::back_inserter(AllFunctions), 1604 [](BinaryFunction &BF) { return &BF; }); 1605 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1606 1607 return AllFunctions; 1608 } 1609 1610 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1611 auto Iter = DWOCUs.find(DWOId); 1612 if (Iter == DWOCUs.end()) 1613 return std::nullopt; 1614 1615 return Iter->second; 1616 } 1617 1618 DWARFContext *BinaryContext::getDWOContext() const { 1619 if (DWOCUs.empty()) 1620 return nullptr; 1621 return &DWOCUs.begin()->second->getContext(); 1622 } 1623 1624 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1625 void BinaryContext::preprocessDWODebugInfo() { 1626 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1627 DWARFUnit *const DwarfUnit = CU.get(); 1628 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1629 std::string DWOName = dwarf::toString( 1630 DwarfUnit->getUnitDIE().find( 1631 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1632 ""); 1633 SmallString<16> AbsolutePath; 1634 if (!opts::CompDirOverride.empty()) { 1635 sys::path::append(AbsolutePath, opts::CompDirOverride); 1636 sys::path::append(AbsolutePath, DWOName); 1637 } 1638 DWARFUnit *DWOCU = 1639 DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); 1640 if (!DWOCU->isDWOUnit()) { 1641 this->outs() 1642 << "BOLT-WARNING: Debug Fission: DWO debug information for " 1643 << DWOName 1644 << " was not retrieved and won't be updated. Please check " 1645 "relative path.\n"; 1646 continue; 1647 } 1648 DWOCUs[*DWOId] = DWOCU; 1649 } 1650 } 1651 if (!DWOCUs.empty()) 1652 this->outs() << "BOLT-INFO: processing split DWARF\n"; 1653 } 1654 1655 void BinaryContext::preprocessDebugInfo() { 1656 struct CURange { 1657 uint64_t LowPC; 1658 uint64_t HighPC; 1659 DWARFUnit *Unit; 1660 1661 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1662 }; 1663 1664 // Building a map of address ranges to CUs similar to .debug_aranges and use 1665 // it to assign CU to functions. 1666 std::vector<CURange> AllRanges; 1667 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1668 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1669 Expected<DWARFAddressRangesVector> RangesOrError = 1670 CU->getUnitDIE().getAddressRanges(); 1671 if (!RangesOrError) { 1672 consumeError(RangesOrError.takeError()); 1673 continue; 1674 } 1675 for (DWARFAddressRange &Range : *RangesOrError) { 1676 // Parts of the debug info could be invalidated due to corresponding code 1677 // being removed from the binary by the linker. Hence we check if the 1678 // address is a valid one. 1679 if (containsAddress(Range.LowPC)) 1680 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1681 } 1682 1683 ContainsDwarf5 |= CU->getVersion() >= 5; 1684 ContainsDwarfLegacy |= CU->getVersion() < 5; 1685 } 1686 1687 llvm::sort(AllRanges); 1688 for (auto &KV : BinaryFunctions) { 1689 const uint64_t FunctionAddress = KV.first; 1690 BinaryFunction &Function = KV.second; 1691 1692 auto It = llvm::partition_point( 1693 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1694 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1695 Function.setDWARFUnit(It->Unit); 1696 } 1697 1698 // Discover units with debug info that needs to be updated. 1699 for (const auto &KV : BinaryFunctions) { 1700 const BinaryFunction &BF = KV.second; 1701 if (shouldEmit(BF) && BF.getDWARFUnit()) 1702 ProcessedCUs.insert(BF.getDWARFUnit()); 1703 } 1704 1705 // Clear debug info for functions from units that we are not going to process. 1706 for (auto &KV : BinaryFunctions) { 1707 BinaryFunction &BF = KV.second; 1708 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1709 BF.setDWARFUnit(nullptr); 1710 } 1711 1712 if (opts::Verbosity >= 1) { 1713 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1714 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1715 } 1716 1717 preprocessDWODebugInfo(); 1718 1719 // Populate MCContext with DWARF files from all units. 1720 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1721 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1722 const uint64_t CUID = CU->getOffset(); 1723 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1724 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1725 GlobalPrefix + "line_table_start" + Twine(CUID))); 1726 1727 if (!ProcessedCUs.count(CU.get())) 1728 continue; 1729 1730 const DWARFDebugLine::LineTable *LineTable = 1731 DwCtx->getLineTableForUnit(CU.get()); 1732 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1733 LineTable->Prologue.FileNames; 1734 1735 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1736 if (DwarfVersion >= 5) { 1737 std::optional<MD5::MD5Result> Checksum; 1738 if (LineTable->Prologue.ContentTypes.HasMD5) 1739 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1740 std::optional<const char *> Name = 1741 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1742 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1743 auto Iter = DWOCUs.find(*DWOID); 1744 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1745 Name = dwarf::toString( 1746 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1747 } 1748 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1749 std::nullopt); 1750 } 1751 1752 BinaryLineTable.setDwarfVersion(DwarfVersion); 1753 1754 // Assign a unique label to every line table, one per CU. 1755 // Make sure empty debug line tables are registered too. 1756 if (FileNames.empty()) { 1757 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1758 CUID, DwarfVersion)); 1759 continue; 1760 } 1761 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1762 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1763 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1764 // means empty dir. 1765 StringRef Dir = ""; 1766 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1767 if (std::optional<const char *> DirName = dwarf::toString( 1768 LineTable->Prologue 1769 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1770 Dir = *DirName; 1771 StringRef FileName = ""; 1772 if (std::optional<const char *> FName = 1773 dwarf::toString(FileNames[I].Name)) 1774 FileName = *FName; 1775 assert(FileName != ""); 1776 std::optional<MD5::MD5Result> Checksum; 1777 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1778 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1779 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1780 DwarfVersion)); 1781 } 1782 } 1783 } 1784 1785 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1786 if (Function.isPseudo()) 1787 return false; 1788 1789 if (opts::processAllFunctions()) 1790 return true; 1791 1792 if (Function.isIgnored()) 1793 return false; 1794 1795 // In relocation mode we will emit non-simple functions with CFG. 1796 // If the function does not have a CFG it should be marked as ignored. 1797 return HasRelocations || Function.isSimple(); 1798 } 1799 1800 void BinaryContext::dump(const MCInst &Inst) const { 1801 if (LLVM_UNLIKELY(!InstPrinter)) { 1802 dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; 1803 return; 1804 } 1805 InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); 1806 dbgs() << "\n"; 1807 } 1808 1809 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1810 uint32_t Operation = Inst.getOperation(); 1811 switch (Operation) { 1812 case MCCFIInstruction::OpSameValue: 1813 OS << "OpSameValue Reg" << Inst.getRegister(); 1814 break; 1815 case MCCFIInstruction::OpRememberState: 1816 OS << "OpRememberState"; 1817 break; 1818 case MCCFIInstruction::OpRestoreState: 1819 OS << "OpRestoreState"; 1820 break; 1821 case MCCFIInstruction::OpOffset: 1822 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1823 break; 1824 case MCCFIInstruction::OpDefCfaRegister: 1825 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1826 break; 1827 case MCCFIInstruction::OpDefCfaOffset: 1828 OS << "OpDefCfaOffset " << Inst.getOffset(); 1829 break; 1830 case MCCFIInstruction::OpDefCfa: 1831 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1832 break; 1833 case MCCFIInstruction::OpRelOffset: 1834 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1835 break; 1836 case MCCFIInstruction::OpAdjustCfaOffset: 1837 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1838 break; 1839 case MCCFIInstruction::OpEscape: 1840 OS << "OpEscape"; 1841 break; 1842 case MCCFIInstruction::OpRestore: 1843 OS << "OpRestore Reg" << Inst.getRegister(); 1844 break; 1845 case MCCFIInstruction::OpUndefined: 1846 OS << "OpUndefined Reg" << Inst.getRegister(); 1847 break; 1848 case MCCFIInstruction::OpRegister: 1849 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1850 << Inst.getRegister2(); 1851 break; 1852 case MCCFIInstruction::OpWindowSave: 1853 OS << "OpWindowSave"; 1854 break; 1855 case MCCFIInstruction::OpGnuArgsSize: 1856 OS << "OpGnuArgsSize"; 1857 break; 1858 default: 1859 OS << "Op#" << Operation; 1860 break; 1861 } 1862 } 1863 1864 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1865 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1866 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1867 // the end of a data chunk inside code, $d identifies start of data. 1868 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize()) 1869 return MarkerSymType::NONE; 1870 1871 Expected<StringRef> NameOrError = Symbol.getName(); 1872 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1873 1874 if (!TypeOrError || !NameOrError) 1875 return MarkerSymType::NONE; 1876 1877 if (*TypeOrError != SymbolRef::ST_Unknown) 1878 return MarkerSymType::NONE; 1879 1880 if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) 1881 return MarkerSymType::CODE; 1882 1883 // $x<ISA> 1884 if (isRISCV() && NameOrError->starts_with("$x")) 1885 return MarkerSymType::CODE; 1886 1887 if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) 1888 return MarkerSymType::DATA; 1889 1890 return MarkerSymType::NONE; 1891 } 1892 1893 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1894 return getMarkerType(Symbol) != MarkerSymType::NONE; 1895 } 1896 1897 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1898 const BinaryFunction *Function, 1899 DWARFContext *DwCtx) { 1900 DebugLineTableRowRef RowRef = 1901 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1902 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1903 return; 1904 1905 const DWARFDebugLine::LineTable *LineTable; 1906 if (Function && Function->getDWARFUnit() && 1907 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1908 LineTable = Function->getDWARFLineTable(); 1909 } else { 1910 LineTable = DwCtx->getLineTableForUnit( 1911 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1912 } 1913 assert(LineTable && "line table expected for instruction with debug info"); 1914 1915 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1916 StringRef FileName = ""; 1917 if (std::optional<const char *> FName = 1918 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1919 FileName = *FName; 1920 OS << " # debug line " << FileName << ":" << Row.Line; 1921 if (Row.Column) 1922 OS << ":" << Row.Column; 1923 if (Row.Discriminator) 1924 OS << " discriminator:" << Row.Discriminator; 1925 } 1926 1927 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1928 uint64_t Offset, 1929 const BinaryFunction *Function, 1930 bool PrintMCInst, bool PrintMemData, 1931 bool PrintRelocations, 1932 StringRef Endl) const { 1933 OS << format(" %08" PRIx64 ": ", Offset); 1934 if (MIB->isCFI(Instruction)) { 1935 uint32_t Offset = Instruction.getOperand(0).getImm(); 1936 OS << "\t!CFI\t$" << Offset << "\t; "; 1937 if (Function) 1938 printCFI(OS, *Function->getCFIFor(Instruction)); 1939 OS << Endl; 1940 return; 1941 } 1942 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1943 if (MIB->isCall(Instruction)) { 1944 if (MIB->isTailCall(Instruction)) 1945 OS << " # TAILCALL "; 1946 if (MIB->isInvoke(Instruction)) { 1947 const std::optional<MCPlus::MCLandingPad> EHInfo = 1948 MIB->getEHInfo(Instruction); 1949 OS << " # handler: "; 1950 if (EHInfo->first) 1951 OS << *EHInfo->first; 1952 else 1953 OS << '0'; 1954 OS << "; action: " << EHInfo->second; 1955 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1956 if (GnuArgsSize >= 0) 1957 OS << "; GNU_args_size = " << GnuArgsSize; 1958 } 1959 } else if (MIB->isIndirectBranch(Instruction)) { 1960 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1961 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1962 } else { 1963 OS << " # UNKNOWN CONTROL FLOW"; 1964 } 1965 } 1966 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1967 OS << " # Offset: " << *Offset; 1968 if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) 1969 OS << " # Size: " << *Size; 1970 if (MCSymbol *Label = MIB->getLabel(Instruction)) 1971 OS << " # Label: " << *Label; 1972 1973 MIB->printAnnotations(Instruction, OS); 1974 1975 if (opts::PrintDebugInfo) 1976 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1977 1978 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1979 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1980 Function->printRelocations(OS, Offset, Size); 1981 } 1982 1983 OS << Endl; 1984 1985 if (PrintMCInst) { 1986 Instruction.dump_pretty(OS, InstPrinter.get()); 1987 OS << Endl; 1988 } 1989 } 1990 1991 std::optional<uint64_t> 1992 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1993 uint64_t FileOffset) const { 1994 // Find a segment with a matching file offset. 1995 for (auto &KV : SegmentMapInfo) { 1996 const SegmentInfo &SegInfo = KV.second; 1997 // FileOffset is got from perf event, 1998 // and it is equal to alignDown(SegInfo.FileOffset, pagesize). 1999 // If the pagesize is not equal to SegInfo.Alignment. 2000 // FileOffset and SegInfo.FileOffset should be aligned first, 2001 // and then judge whether they are equal. 2002 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == 2003 alignDown(FileOffset, SegInfo.Alignment)) { 2004 // The function's offset from base address in VAS is aligned by pagesize 2005 // instead of SegInfo.Alignment. Pagesize can't be got from perf events. 2006 // However, The ELF document says that SegInfo.FileOffset should equal 2007 // to SegInfo.Address, modulo the pagesize. 2008 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf 2009 2010 // So alignDown(SegInfo.Address, pagesize) can be calculated by: 2011 // alignDown(SegInfo.Address, pagesize) 2012 // = SegInfo.Address - (SegInfo.Address % pagesize) 2013 // = SegInfo.Address - (SegInfo.FileOffset % pagesize) 2014 // = SegInfo.Address - SegInfo.FileOffset + 2015 // alignDown(SegInfo.FileOffset, pagesize) 2016 // = SegInfo.Address - SegInfo.FileOffset + FileOffset 2017 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); 2018 } 2019 } 2020 2021 return std::nullopt; 2022 } 2023 2024 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 2025 auto SI = AddressToSection.upper_bound(Address); 2026 if (SI != AddressToSection.begin()) { 2027 --SI; 2028 uint64_t UpperBound = SI->first + SI->second->getSize(); 2029 if (!SI->second->getSize()) 2030 UpperBound += 1; 2031 if (UpperBound > Address) 2032 return *SI->second; 2033 } 2034 return std::make_error_code(std::errc::bad_address); 2035 } 2036 2037 ErrorOr<StringRef> 2038 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 2039 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 2040 return Section->getName(); 2041 return std::make_error_code(std::errc::bad_address); 2042 } 2043 2044 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 2045 auto Res = Sections.insert(Section); 2046 (void)Res; 2047 assert(Res.second && "can't register the same section twice."); 2048 2049 // Only register allocatable sections in the AddressToSection map. 2050 if (Section->isAllocatable() && Section->getAddress()) 2051 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 2052 NameToSection.insert( 2053 std::make_pair(std::string(Section->getName()), Section)); 2054 if (Section->hasSectionRef()) 2055 SectionRefToBinarySection.insert( 2056 std::make_pair(Section->getSectionRef(), Section)); 2057 2058 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 2059 return *Section; 2060 } 2061 2062 BinarySection &BinaryContext::registerSection(SectionRef Section) { 2063 return registerSection(new BinarySection(*this, Section)); 2064 } 2065 2066 BinarySection & 2067 BinaryContext::registerSection(const Twine &SectionName, 2068 const BinarySection &OriginalSection) { 2069 return registerSection( 2070 new BinarySection(*this, SectionName, OriginalSection)); 2071 } 2072 2073 BinarySection & 2074 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 2075 unsigned ELFFlags, uint8_t *Data, 2076 uint64_t Size, unsigned Alignment) { 2077 auto NamedSections = getSectionByName(Name); 2078 if (NamedSections.begin() != NamedSections.end()) { 2079 assert(std::next(NamedSections.begin()) == NamedSections.end() && 2080 "can only update unique sections"); 2081 BinarySection *Section = NamedSections.begin()->second; 2082 2083 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 2084 const bool Flag = Section->isAllocatable(); 2085 (void)Flag; 2086 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 2087 LLVM_DEBUG(dbgs() << *Section << "\n"); 2088 // FIXME: Fix section flags/attributes for MachO. 2089 if (isELF()) 2090 assert(Flag == Section->isAllocatable() && 2091 "can't change section allocation status"); 2092 return *Section; 2093 } 2094 2095 return registerSection( 2096 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2097 } 2098 2099 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2100 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2101 while (NameRange.first != NameRange.second) { 2102 if (NameRange.first->second == &Section) { 2103 NameToSection.erase(NameRange.first); 2104 break; 2105 } 2106 ++NameRange.first; 2107 } 2108 } 2109 2110 void BinaryContext::deregisterUnusedSections() { 2111 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2112 for (auto SI = Sections.begin(); SI != Sections.end();) { 2113 BinarySection *Section = *SI; 2114 // We check getOutputData() instead of getOutputSize() because sometimes 2115 // zero-sized .text.cold sections are allocated. 2116 if (Section->hasSectionRef() || Section->getOutputData() || 2117 (AbsSection && Section == &AbsSection.get())) { 2118 ++SI; 2119 continue; 2120 } 2121 2122 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2123 << '\n';); 2124 deregisterSectionName(*Section); 2125 SI = Sections.erase(SI); 2126 delete Section; 2127 } 2128 } 2129 2130 bool BinaryContext::deregisterSection(BinarySection &Section) { 2131 BinarySection *SectionPtr = &Section; 2132 auto Itr = Sections.find(SectionPtr); 2133 if (Itr != Sections.end()) { 2134 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2135 while (Range.first != Range.second) { 2136 if (Range.first->second == SectionPtr) { 2137 AddressToSection.erase(Range.first); 2138 break; 2139 } 2140 ++Range.first; 2141 } 2142 2143 deregisterSectionName(*SectionPtr); 2144 Sections.erase(Itr); 2145 delete SectionPtr; 2146 return true; 2147 } 2148 return false; 2149 } 2150 2151 void BinaryContext::renameSection(BinarySection &Section, 2152 const Twine &NewName) { 2153 auto Itr = Sections.find(&Section); 2154 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2155 Sections.erase(Itr); 2156 2157 deregisterSectionName(Section); 2158 2159 Section.Name = NewName.str(); 2160 Section.setOutputName(Section.Name); 2161 2162 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2163 2164 // Reinsert with the new name. 2165 Sections.insert(&Section); 2166 } 2167 2168 void BinaryContext::printSections(raw_ostream &OS) const { 2169 for (BinarySection *const &Section : Sections) 2170 OS << "BOLT-INFO: " << *Section << "\n"; 2171 } 2172 2173 BinarySection &BinaryContext::absoluteSection() { 2174 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2175 return *Section; 2176 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2177 } 2178 2179 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2180 size_t Size) const { 2181 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2182 if (!Section) 2183 return std::make_error_code(std::errc::bad_address); 2184 2185 if (Section->isVirtual()) 2186 return 0; 2187 2188 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2189 AsmInfo->getCodePointerSize()); 2190 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2191 return DE.getUnsigned(&ValueOffset, Size); 2192 } 2193 2194 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2195 size_t Size) const { 2196 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2197 if (!Section) 2198 return std::make_error_code(std::errc::bad_address); 2199 2200 if (Section->isVirtual()) 2201 return 0; 2202 2203 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2204 AsmInfo->getCodePointerSize()); 2205 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2206 return DE.getSigned(&ValueOffset, Size); 2207 } 2208 2209 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2210 uint64_t Type, uint64_t Addend, 2211 uint64_t Value) { 2212 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2213 assert(Section && "cannot find section for address"); 2214 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2215 Value); 2216 } 2217 2218 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2219 uint64_t Type, uint64_t Addend, 2220 uint64_t Value) { 2221 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2222 assert(Section && "cannot find section for address"); 2223 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2224 Addend, Value); 2225 } 2226 2227 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2228 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2229 assert(Section && "cannot find section for address"); 2230 return Section->removeRelocationAt(Address - Section->getAddress()); 2231 } 2232 2233 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2234 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2235 if (!Section) 2236 return nullptr; 2237 2238 return Section->getRelocationAt(Address - Section->getAddress()); 2239 } 2240 2241 const Relocation * 2242 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2243 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2244 if (!Section) 2245 return nullptr; 2246 2247 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2248 } 2249 2250 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2251 const uint64_t Address) { 2252 auto setImmovable = [&](BinaryData &BD) { 2253 BinaryData *Root = BD.getAtomicRoot(); 2254 LLVM_DEBUG(if (Root->isMoveable()) { 2255 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2256 << "due to ambiguous relocation referencing 0x" 2257 << Twine::utohexstr(Address) << '\n'; 2258 }); 2259 Root->setIsMoveable(false); 2260 }; 2261 2262 if (Address == BD.getAddress()) { 2263 setImmovable(BD); 2264 2265 // Set previous symbol as immovable 2266 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2267 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2268 setImmovable(*Prev); 2269 } 2270 2271 if (Address == BD.getEndAddress()) { 2272 setImmovable(BD); 2273 2274 // Set next symbol as immovable 2275 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2276 if (Next && Next->getAddress() == BD.getEndAddress()) 2277 setImmovable(*Next); 2278 } 2279 } 2280 2281 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2282 uint64_t *EntryDesc) { 2283 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2284 auto BFI = SymbolToFunctionMap.find(Symbol); 2285 if (BFI == SymbolToFunctionMap.end()) 2286 return nullptr; 2287 2288 BinaryFunction *BF = BFI->second; 2289 if (EntryDesc) 2290 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2291 2292 return BF; 2293 } 2294 2295 std::string 2296 BinaryContext::generateBugReportMessage(StringRef Message, 2297 const BinaryFunction &Function) const { 2298 std::string Msg; 2299 raw_string_ostream SS(Msg); 2300 SS << "=======================================\n"; 2301 SS << "BOLT is unable to proceed because it couldn't properly understand " 2302 "this function.\n"; 2303 SS << "If you are running the most recent version of BOLT, you may " 2304 "want to " 2305 "report this and paste this dump.\nPlease check that there is no " 2306 "sensitive contents being shared in this dump.\n"; 2307 SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2308 ScopedPrinter SP(SS); 2309 SP.printBinaryBlock("Function contents", *Function.getData()); 2310 SS << "\n"; 2311 const_cast<BinaryFunction &>(Function).print(SS, ""); 2312 SS << "ERROR: " << Message; 2313 SS << "\n=======================================\n"; 2314 return Msg; 2315 } 2316 2317 BinaryFunction * 2318 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2319 bool IsSimple) { 2320 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2321 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2322 setSymbolToFunctionMap(BF->getSymbol(), BF); 2323 BF->CurrentState = BinaryFunction::State::CFG; 2324 return BF; 2325 } 2326 2327 std::pair<size_t, size_t> 2328 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2329 // Adjust branch instruction to match the current layout. 2330 if (FixBranches) 2331 BF.fixBranches(); 2332 2333 // Create local MC context to isolate the effect of ephemeral code emission. 2334 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2335 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2336 MCAsmBackend *MAB = 2337 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2338 2339 SmallString<256> Code; 2340 raw_svector_ostream VecOS(Code); 2341 2342 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2343 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2344 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2345 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2346 /*RelaxAll=*/false, 2347 /*IncrementalLinkerCompatible=*/false, 2348 /*DWARFMustBeAtTheEnd=*/false)); 2349 2350 Streamer->initSections(false, *STI); 2351 2352 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2353 Section->setHasInstructions(true); 2354 2355 // Create symbols in the LocalCtx so that they get destroyed with it. 2356 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2357 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2358 2359 Streamer->switchSection(Section); 2360 Streamer->emitLabel(StartLabel); 2361 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2362 /*EmitCodeOnly=*/true); 2363 Streamer->emitLabel(EndLabel); 2364 2365 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2366 SmallVector<LabelRange> SplitLabels; 2367 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2368 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2369 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2370 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2371 2372 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2373 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2374 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2375 SplitSection->setHasInstructions(true); 2376 Streamer->switchSection(SplitSection); 2377 2378 Streamer->emitLabel(SplitStartLabel); 2379 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2380 Streamer->emitLabel(SplitEndLabel); 2381 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2382 // private 2383 Streamer->emitBytes(StringRef("")); 2384 Streamer->switchSection(Section); 2385 } 2386 2387 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2388 // MCStreamer::Finish(), which does more than we want 2389 Streamer->emitBytes(StringRef("")); 2390 2391 MCAssembler &Assembler = 2392 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2393 MCAsmLayout Layout(Assembler); 2394 Assembler.layout(Layout); 2395 2396 // Obtain fragment sizes. 2397 std::vector<uint64_t> FragmentSizes; 2398 // Main fragment size. 2399 const uint64_t HotSize = 2400 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2401 FragmentSizes.push_back(HotSize); 2402 // Split fragment sizes. 2403 uint64_t ColdSize = 0; 2404 for (const auto &Labels : SplitLabels) { 2405 uint64_t Size = Layout.getSymbolOffset(*Labels.second) - 2406 Layout.getSymbolOffset(*Labels.first); 2407 FragmentSizes.push_back(Size); 2408 ColdSize += Size; 2409 } 2410 2411 // Populate new start and end offsets of each basic block. 2412 uint64_t FragmentIndex = 0; 2413 for (FunctionFragment &FF : BF.getLayout().fragments()) { 2414 BinaryBasicBlock *PrevBB = nullptr; 2415 for (BinaryBasicBlock *BB : FF) { 2416 const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel())); 2417 BB->setOutputStartAddress(BBStartOffset); 2418 if (PrevBB) 2419 PrevBB->setOutputEndAddress(BBStartOffset); 2420 PrevBB = BB; 2421 } 2422 if (PrevBB) 2423 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); 2424 FragmentIndex++; 2425 } 2426 2427 // Clean-up the effect of the code emission. 2428 for (const MCSymbol &Symbol : Assembler.symbols()) { 2429 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2430 MutableSymbol->setUndefined(); 2431 MutableSymbol->setIsRegistered(false); 2432 } 2433 2434 return std::make_pair(HotSize, ColdSize); 2435 } 2436 2437 bool BinaryContext::validateInstructionEncoding( 2438 ArrayRef<uint8_t> InputSequence) const { 2439 MCInst Inst; 2440 uint64_t InstSize; 2441 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2442 assert(InstSize == InputSequence.size() && 2443 "Disassembled instruction size does not match the sequence."); 2444 2445 SmallString<256> Code; 2446 SmallVector<MCFixup, 4> Fixups; 2447 2448 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2449 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2450 if (InputSequence != OutputSequence) { 2451 if (opts::Verbosity > 1) { 2452 this->errs() << "BOLT-WARNING: mismatched encoding detected\n" 2453 << " input: " << InputSequence << '\n' 2454 << " output: " << OutputSequence << '\n'; 2455 } 2456 return false; 2457 } 2458 2459 return true; 2460 } 2461 2462 uint64_t BinaryContext::getHotThreshold() const { 2463 static uint64_t Threshold = 0; 2464 if (Threshold == 0) { 2465 Threshold = std::max( 2466 (uint64_t)opts::ExecutionCountThreshold, 2467 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2468 } 2469 return Threshold; 2470 } 2471 2472 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2473 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2474 auto FI = BinaryFunctions.upper_bound(Address); 2475 if (FI == BinaryFunctions.begin()) 2476 return nullptr; 2477 --FI; 2478 2479 const uint64_t UsedSize = 2480 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2481 2482 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2483 return nullptr; 2484 2485 return &FI->second; 2486 } 2487 2488 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2489 // First, try to find a function starting at the given address. If the 2490 // function was folded, this will get us the original folded function if it 2491 // wasn't removed from the list, e.g. in non-relocation mode. 2492 auto BFI = BinaryFunctions.find(Address); 2493 if (BFI != BinaryFunctions.end()) 2494 return &BFI->second; 2495 2496 // We might have folded the function matching the object at the given 2497 // address. In such case, we look for a function matching the symbol 2498 // registered at the original address. The new function (the one that the 2499 // original was folded into) will hold the symbol. 2500 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2501 uint64_t EntryID = 0; 2502 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2503 if (BF && EntryID == 0) 2504 return BF; 2505 } 2506 return nullptr; 2507 } 2508 2509 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2510 const DWARFAddressRangesVector &InputRanges) const { 2511 DebugAddressRangesVector OutputRanges; 2512 2513 for (const DWARFAddressRange Range : InputRanges) { 2514 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2515 while (BFI != BinaryFunctions.end()) { 2516 const BinaryFunction &Function = BFI->second; 2517 if (Function.getAddress() >= Range.HighPC) 2518 break; 2519 const DebugAddressRangesVector FunctionRanges = 2520 Function.getOutputAddressRanges(); 2521 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2522 std::advance(BFI, 1); 2523 } 2524 } 2525 2526 return OutputRanges; 2527 } 2528 2529 } // namespace bolt 2530 } // namespace llvm 2531