1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 } // namespace opts 76 77 namespace llvm { 78 namespace bolt { 79 80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 81 std::unique_ptr<DWARFContext> DwCtx, 82 std::unique_ptr<Triple> TheTriple, 83 const Target *TheTarget, std::string TripleName, 84 std::unique_ptr<MCCodeEmitter> MCE, 85 std::unique_ptr<MCObjectFileInfo> MOFI, 86 std::unique_ptr<const MCAsmInfo> AsmInfo, 87 std::unique_ptr<const MCInstrInfo> MII, 88 std::unique_ptr<const MCSubtargetInfo> STI, 89 std::unique_ptr<MCInstPrinter> InstPrinter, 90 std::unique_ptr<const MCInstrAnalysis> MIA, 91 std::unique_ptr<MCPlusBuilder> MIB, 92 std::unique_ptr<const MCRegisterInfo> MRI, 93 std::unique_ptr<MCDisassembler> DisAsm) 94 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 95 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 96 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 97 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 98 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 99 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 100 Relocation::Arch = this->TheTriple->getArch(); 101 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103 } 104 105 BinaryContext::~BinaryContext() { 106 for (BinarySection *Section : Sections) 107 delete Section; 108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109 delete InjectedFunction; 110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111 delete JTI.second; 112 clearBinaryData(); 113 } 114 115 /// Create BinaryContext for a given architecture \p ArchName and 116 /// triple \p TripleName. 117 Expected<std::unique_ptr<BinaryContext>> 118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119 std::unique_ptr<DWARFContext> DwCtx) { 120 StringRef ArchName = ""; 121 StringRef FeaturesStr = ""; 122 switch (File->getArch()) { 123 case llvm::Triple::x86_64: 124 ArchName = "x86-64"; 125 FeaturesStr = "+nopl"; 126 break; 127 case llvm::Triple::aarch64: 128 ArchName = "aarch64"; 129 FeaturesStr = "+all"; 130 break; 131 case llvm::Triple::riscv64: 132 ArchName = "riscv64"; 133 // RV64GC 134 FeaturesStr = "+m,+a,+f,+d,+zicsr,+zifencei,+c,+relax"; 135 break; 136 default: 137 return createStringError(std::errc::not_supported, 138 "BOLT-ERROR: Unrecognized machine in ELF file"); 139 } 140 141 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 142 const std::string TripleName = TheTriple->str(); 143 144 std::string Error; 145 const Target *TheTarget = 146 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 147 if (!TheTarget) 148 return createStringError(make_error_code(std::errc::not_supported), 149 Twine("BOLT-ERROR: ", Error)); 150 151 std::unique_ptr<const MCRegisterInfo> MRI( 152 TheTarget->createMCRegInfo(TripleName)); 153 if (!MRI) 154 return createStringError( 155 make_error_code(std::errc::not_supported), 156 Twine("BOLT-ERROR: no register info for target ", TripleName)); 157 158 // Set up disassembler. 159 std::unique_ptr<MCAsmInfo> AsmInfo( 160 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 161 if (!AsmInfo) 162 return createStringError( 163 make_error_code(std::errc::not_supported), 164 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 165 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 166 // we want to emit such names as using @PLT without double quotes to convey 167 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 168 // override the default AsmInfo behavior to emit names the way we want. 169 AsmInfo->setAllowAtInName(true); 170 171 std::unique_ptr<const MCSubtargetInfo> STI( 172 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 173 if (!STI) 174 return createStringError( 175 make_error_code(std::errc::not_supported), 176 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 177 178 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 179 if (!MII) 180 return createStringError( 181 make_error_code(std::errc::not_supported), 182 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 183 184 std::unique_ptr<MCContext> Ctx( 185 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 186 std::unique_ptr<MCObjectFileInfo> MOFI( 187 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 188 Ctx->setObjectFileInfo(MOFI.get()); 189 // We do not support X86 Large code model. Change this in the future. 190 bool Large = false; 191 if (TheTriple->getArch() == llvm::Triple::aarch64) 192 Large = true; 193 unsigned LSDAEncoding = 194 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 195 if (IsPIC) { 196 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 197 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 198 } 199 200 std::unique_ptr<MCDisassembler> DisAsm( 201 TheTarget->createMCDisassembler(*STI, *Ctx)); 202 203 if (!DisAsm) 204 return createStringError( 205 make_error_code(std::errc::not_supported), 206 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 207 208 std::unique_ptr<const MCInstrAnalysis> MIA( 209 TheTarget->createMCInstrAnalysis(MII.get())); 210 if (!MIA) 211 return createStringError( 212 make_error_code(std::errc::not_supported), 213 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 214 TripleName)); 215 216 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 217 std::unique_ptr<MCInstPrinter> InstructionPrinter( 218 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 219 *MII, *MRI)); 220 if (!InstructionPrinter) 221 return createStringError( 222 make_error_code(std::errc::not_supported), 223 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 224 InstructionPrinter->setPrintImmHex(true); 225 226 std::unique_ptr<MCCodeEmitter> MCE( 227 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 228 229 // Make sure we don't miss any output on core dumps. 230 outs().SetUnbuffered(); 231 errs().SetUnbuffered(); 232 dbgs().SetUnbuffered(); 233 234 auto BC = std::make_unique<BinaryContext>( 235 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 236 std::string(TripleName), std::move(MCE), std::move(MOFI), 237 std::move(AsmInfo), std::move(MII), std::move(STI), 238 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 239 std::move(DisAsm)); 240 241 BC->LSDAEncoding = LSDAEncoding; 242 243 BC->MAB = std::unique_ptr<MCAsmBackend>( 244 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 245 246 BC->setFilename(File->getFileName()); 247 248 BC->HasFixedLoadAddress = !IsPIC; 249 250 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 251 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 252 253 if (!BC->SymbolicDisAsm) 254 return createStringError( 255 make_error_code(std::errc::not_supported), 256 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 257 258 return std::move(BC); 259 } 260 261 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 262 if (opts::HotText && 263 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 264 return true; 265 266 if (opts::HotData && 267 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 268 return true; 269 270 if (SymbolName == "_end") 271 return true; 272 273 return false; 274 } 275 276 std::unique_ptr<MCObjectWriter> 277 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 278 return MAB->createObjectWriter(OS); 279 } 280 281 bool BinaryContext::validateObjectNesting() const { 282 auto Itr = BinaryDataMap.begin(); 283 auto End = BinaryDataMap.end(); 284 bool Valid = true; 285 while (Itr != End) { 286 auto Next = std::next(Itr); 287 while (Next != End && 288 Itr->second->getSection() == Next->second->getSection() && 289 Itr->second->containsRange(Next->second->getAddress(), 290 Next->second->getSize())) { 291 if (Next->second->Parent != Itr->second) { 292 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 293 << "BOLT-WARNING: " << *Itr->second << "\n" 294 << "BOLT-WARNING: " << *Next->second << "\n"; 295 Valid = false; 296 } 297 ++Next; 298 } 299 Itr = Next; 300 } 301 return Valid; 302 } 303 304 bool BinaryContext::validateHoles() const { 305 bool Valid = true; 306 for (BinarySection &Section : sections()) { 307 for (const Relocation &Rel : Section.relocations()) { 308 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 309 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 310 if (!BD) { 311 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 312 << " 0x" << Twine::utohexstr(RelAddr) << " in " 313 << Section.getName() << "\n"; 314 Valid = false; 315 } else if (!BD->getAtomicRoot()) { 316 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 317 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 318 << Section.getName() << "\n"; 319 Valid = false; 320 } 321 } 322 } 323 return Valid; 324 } 325 326 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 327 const uint64_t Address = GAI->second->getAddress(); 328 const uint64_t Size = GAI->second->getSize(); 329 330 auto fixParents = [&](BinaryDataMapType::iterator Itr, 331 BinaryData *NewParent) { 332 BinaryData *OldParent = Itr->second->Parent; 333 Itr->second->Parent = NewParent; 334 ++Itr; 335 while (Itr != BinaryDataMap.end() && OldParent && 336 Itr->second->Parent == OldParent) { 337 Itr->second->Parent = NewParent; 338 ++Itr; 339 } 340 }; 341 342 // Check if the previous symbol contains the newly added symbol. 343 if (GAI != BinaryDataMap.begin()) { 344 BinaryData *Prev = std::prev(GAI)->second; 345 while (Prev) { 346 if (Prev->getSection() == GAI->second->getSection() && 347 Prev->containsRange(Address, Size)) { 348 fixParents(GAI, Prev); 349 } else { 350 fixParents(GAI, nullptr); 351 } 352 Prev = Prev->Parent; 353 } 354 } 355 356 // Check if the newly added symbol contains any subsequent symbols. 357 if (Size != 0) { 358 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 359 auto Itr = std::next(GAI); 360 while ( 361 Itr != BinaryDataMap.end() && 362 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 363 Itr->second->Parent = BD; 364 ++Itr; 365 } 366 } 367 } 368 369 iterator_range<BinaryContext::binary_data_iterator> 370 BinaryContext::getSubBinaryData(BinaryData *BD) { 371 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 372 auto End = Start; 373 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 374 ++End; 375 return make_range(Start, End); 376 } 377 378 std::pair<const MCSymbol *, uint64_t> 379 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 380 bool IsPCRel) { 381 if (isAArch64()) { 382 // Check if this is an access to a constant island and create bookkeeping 383 // to keep track of it and emit it later as part of this function. 384 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 385 return std::make_pair(IslandSym, 0); 386 387 // Detect custom code written in assembly that refers to arbitrary 388 // constant islands from other functions. Write this reference so we 389 // can pull this constant island and emit it as part of this function 390 // too. 391 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 392 393 if (IslandIter != AddressToConstantIslandMap.begin() && 394 (IslandIter == AddressToConstantIslandMap.end() || 395 IslandIter->first > Address)) 396 --IslandIter; 397 398 if (IslandIter != AddressToConstantIslandMap.end()) { 399 // Fall-back to referencing the original constant island in the presence 400 // of dynamic relocs, as we currently do not support cloning them. 401 // Notice: we might fail to link because of this, if the original constant 402 // island we are referring would be emitted too far away. 403 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 404 MCSymbol *IslandSym = 405 IslandIter->second->getOrCreateIslandAccess(Address); 406 if (IslandSym) 407 return std::make_pair(IslandSym, 0); 408 } else if (MCSymbol *IslandSym = 409 IslandIter->second->getOrCreateProxyIslandAccess(Address, 410 BF)) { 411 BF.createIslandDependency(IslandSym, IslandIter->second); 412 return std::make_pair(IslandSym, 0); 413 } 414 } 415 } 416 417 // Note that the address does not necessarily have to reside inside 418 // a section, it could be an absolute address too. 419 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 420 if (Section && Section->isText()) { 421 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 422 if (Address != BF.getAddress()) { 423 // The address could potentially escape. Mark it as another entry 424 // point into the function. 425 if (opts::Verbosity >= 1) { 426 outs() << "BOLT-INFO: potentially escaped address 0x" 427 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 428 } 429 BF.HasInternalLabelReference = true; 430 return std::make_pair( 431 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 432 } 433 } else { 434 addInterproceduralReference(&BF, Address); 435 } 436 } 437 438 // With relocations, catch jump table references outside of the basic block 439 // containing the indirect jump. 440 if (HasRelocations) { 441 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 442 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 443 const MCSymbol *Symbol = 444 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 445 446 return std::make_pair(Symbol, 0); 447 } 448 } 449 450 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 451 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 452 453 // TODO: use DWARF info to get size/alignment here? 454 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 455 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 456 return std::make_pair(TargetSymbol, 0); 457 } 458 459 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 460 BinaryFunction &BF) { 461 if (!isX86()) 462 return MemoryContentsType::UNKNOWN; 463 464 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 465 if (!Section) { 466 // No section - possibly an absolute address. Since we don't allow 467 // internal function addresses to escape the function scope - we 468 // consider it a tail call. 469 if (opts::Verbosity > 1) { 470 errs() << "BOLT-WARNING: no section for address 0x" 471 << Twine::utohexstr(Address) << " referenced from function " << BF 472 << '\n'; 473 } 474 return MemoryContentsType::UNKNOWN; 475 } 476 477 if (Section->isVirtual()) { 478 // The contents are filled at runtime. 479 return MemoryContentsType::UNKNOWN; 480 } 481 482 // No support for jump tables in code yet. 483 if (Section->isText()) 484 return MemoryContentsType::UNKNOWN; 485 486 // Start with checking for PIC jump table. We expect non-PIC jump tables 487 // to have high 32 bits set to 0. 488 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 489 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 490 491 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 492 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 493 494 return MemoryContentsType::UNKNOWN; 495 } 496 497 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 498 const JumpTable::JumpTableType Type, 499 const BinaryFunction &BF, 500 const uint64_t NextJTAddress, 501 JumpTable::AddressesType *EntriesAsAddress, 502 bool *HasEntryInFragment) const { 503 // Is one of the targets __builtin_unreachable? 504 bool HasUnreachable = false; 505 506 // Does one of the entries match function start address? 507 bool HasStartAsEntry = false; 508 509 // Number of targets other than __builtin_unreachable. 510 uint64_t NumRealEntries = 0; 511 512 auto addEntryAddress = [&](uint64_t EntryAddress) { 513 if (EntriesAsAddress) 514 EntriesAsAddress->emplace_back(EntryAddress); 515 }; 516 517 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 518 if (!Section) 519 return false; 520 521 // The upper bound is defined by containing object, section limits, and 522 // the next jump table in memory. 523 uint64_t UpperBound = Section->getEndAddress(); 524 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 525 if (JumpTableBD && JumpTableBD->getSize()) { 526 assert(JumpTableBD->getEndAddress() <= UpperBound && 527 "data object cannot cross a section boundary"); 528 UpperBound = JumpTableBD->getEndAddress(); 529 } 530 if (NextJTAddress) 531 UpperBound = std::min(NextJTAddress, UpperBound); 532 533 LLVM_DEBUG({ 534 using JTT = JumpTable::JumpTableType; 535 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 536 Address, BF.getPrintName(), 537 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 538 }); 539 const uint64_t EntrySize = getJumpTableEntrySize(Type); 540 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 541 EntryAddress += EntrySize) { 542 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 543 << " -> "); 544 // Check if there's a proper relocation against the jump table entry. 545 if (HasRelocations) { 546 if (Type == JumpTable::JTT_PIC && 547 !DataPCRelocations.count(EntryAddress)) { 548 LLVM_DEBUG( 549 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 550 break; 551 } 552 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 553 LLVM_DEBUG( 554 dbgs() 555 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 556 break; 557 } 558 } 559 560 const uint64_t Value = 561 (Type == JumpTable::JTT_PIC) 562 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 563 : *getPointerAtAddress(EntryAddress); 564 565 // __builtin_unreachable() case. 566 if (Value == BF.getAddress() + BF.getSize()) { 567 addEntryAddress(Value); 568 HasUnreachable = true; 569 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 570 continue; 571 } 572 573 // Function start is another special case. It is allowed in the jump table, 574 // but we need at least one another regular entry to distinguish the table 575 // from, e.g. a function pointer array. 576 if (Value == BF.getAddress()) { 577 HasStartAsEntry = true; 578 addEntryAddress(Value); 579 continue; 580 } 581 582 // Function or one of its fragments. 583 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 584 const bool DoesBelongToFunction = 585 BF.containsAddress(Value) || 586 (TargetBF && TargetBF->isParentOrChildOf(BF)); 587 if (!DoesBelongToFunction) { 588 LLVM_DEBUG({ 589 if (!BF.containsAddress(Value)) { 590 dbgs() << "FAIL: function doesn't contain this address\n"; 591 if (TargetBF) { 592 dbgs() << " ! function containing this address: " 593 << TargetBF->getPrintName() << '\n'; 594 if (TargetBF->isFragment()) { 595 dbgs() << " ! is a fragment"; 596 for (BinaryFunction *Parent : TargetBF->ParentFragments) 597 dbgs() << ", parent: " << Parent->getPrintName(); 598 dbgs() << '\n'; 599 } 600 } 601 } 602 }); 603 break; 604 } 605 606 // Check there's an instruction at this offset. 607 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 608 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 609 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 610 break; 611 } 612 613 ++NumRealEntries; 614 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 615 616 if (TargetBF != &BF && HasEntryInFragment) 617 *HasEntryInFragment = true; 618 addEntryAddress(Value); 619 } 620 621 // It's a jump table if the number of real entries is more than 1, or there's 622 // one real entry and one or more special targets. If there are only multiple 623 // special targets, then it's not a jump table. 624 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; 625 } 626 627 void BinaryContext::populateJumpTables() { 628 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 629 << '\n'); 630 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 631 ++JTI) { 632 JumpTable *JT = JTI->second; 633 634 bool NonSimpleParent = false; 635 for (BinaryFunction *BF : JT->Parents) 636 NonSimpleParent |= !BF->isSimple(); 637 if (NonSimpleParent) 638 continue; 639 640 uint64_t NextJTAddress = 0; 641 auto NextJTI = std::next(JTI); 642 if (NextJTI != JTE) 643 NextJTAddress = NextJTI->second->getAddress(); 644 645 const bool Success = 646 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 647 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 648 if (!Success) { 649 LLVM_DEBUG({ 650 dbgs() << "failed to analyze "; 651 JT->print(dbgs()); 652 if (NextJTI != JTE) { 653 dbgs() << "next "; 654 NextJTI->second->print(dbgs()); 655 } 656 }); 657 llvm_unreachable("jump table heuristic failure"); 658 } 659 for (BinaryFunction *Frag : JT->Parents) { 660 if (JT->IsSplit) 661 Frag->setHasIndirectTargetToSplitFragment(true); 662 for (uint64_t EntryAddress : JT->EntriesAsAddress) 663 // if target is builtin_unreachable 664 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 665 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 666 Frag->getSize()); 667 } else if (EntryAddress >= Frag->getAddress() && 668 EntryAddress < Frag->getAddress() + Frag->getSize()) { 669 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 670 } 671 } 672 673 // In strict mode, erase PC-relative relocation record. Later we check that 674 // all such records are erased and thus have been accounted for. 675 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 676 for (uint64_t Address = JT->getAddress(); 677 Address < JT->getAddress() + JT->getSize(); 678 Address += JT->EntrySize) { 679 DataPCRelocations.erase(DataPCRelocations.find(Address)); 680 } 681 } 682 683 // Mark to skip the function and all its fragments. 684 for (BinaryFunction *Frag : JT->Parents) 685 if (Frag->hasIndirectTargetToSplitFragment()) 686 addFragmentsToSkip(Frag); 687 } 688 689 if (opts::StrictMode && DataPCRelocations.size()) { 690 LLVM_DEBUG({ 691 dbgs() << DataPCRelocations.size() 692 << " unclaimed PC-relative relocations left in data:\n"; 693 for (uint64_t Reloc : DataPCRelocations) 694 dbgs() << Twine::utohexstr(Reloc) << '\n'; 695 }); 696 assert(0 && "unclaimed PC-relative relocations left in data\n"); 697 } 698 clearList(DataPCRelocations); 699 } 700 701 void BinaryContext::skipMarkedFragments() { 702 std::vector<BinaryFunction *> FragmentQueue; 703 // Copy the functions to FragmentQueue. 704 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 705 auto addToWorklist = [&](BinaryFunction *Function) -> void { 706 if (FragmentsToSkip.count(Function)) 707 return; 708 FragmentQueue.push_back(Function); 709 addFragmentsToSkip(Function); 710 }; 711 // Functions containing split jump tables need to be skipped with all 712 // fragments (transitively). 713 for (size_t I = 0; I != FragmentQueue.size(); I++) { 714 BinaryFunction *BF = FragmentQueue[I]; 715 assert(FragmentsToSkip.count(BF) && 716 "internal error in traversing function fragments"); 717 if (opts::Verbosity >= 1) 718 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 719 BF->setSimple(false); 720 BF->setHasIndirectTargetToSplitFragment(true); 721 722 llvm::for_each(BF->Fragments, addToWorklist); 723 llvm::for_each(BF->ParentFragments, addToWorklist); 724 } 725 if (!FragmentsToSkip.empty()) 726 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 727 << (FragmentsToSkip.size() == 1 ? "" : "s") 728 << " due to cold fragments\n"; 729 } 730 731 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 732 uint64_t Size, 733 uint16_t Alignment, 734 unsigned Flags) { 735 auto Itr = BinaryDataMap.find(Address); 736 if (Itr != BinaryDataMap.end()) { 737 assert(Itr->second->getSize() == Size || !Size); 738 return Itr->second->getSymbol(); 739 } 740 741 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 742 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 743 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 744 } 745 746 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 747 return Ctx->getOrCreateSymbol(Name); 748 } 749 750 BinaryFunction *BinaryContext::createBinaryFunction( 751 const std::string &Name, BinarySection &Section, uint64_t Address, 752 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 753 auto Result = BinaryFunctions.emplace( 754 Address, BinaryFunction(Name, Section, Address, Size, *this)); 755 assert(Result.second == true && "unexpected duplicate function"); 756 BinaryFunction *BF = &Result.first->second; 757 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 758 Alignment); 759 setSymbolToFunctionMap(BF->getSymbol(), BF); 760 return BF; 761 } 762 763 const MCSymbol * 764 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 765 JumpTable::JumpTableType Type) { 766 // Two fragments of same function access same jump table 767 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 768 assert(JT->Type == Type && "jump table types have to match"); 769 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 770 771 // Prevent associating a jump table to a specific fragment twice. 772 // This simple check arises from the assumption: no more than 2 fragments. 773 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 774 assert(JT->Parents[0]->isParentOrChildOf(Function) && 775 "cannot re-use jump table of a different function"); 776 // Duplicate the entry for the parent function for easy access 777 JT->Parents.push_back(&Function); 778 if (opts::Verbosity > 2) { 779 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 780 << JT->Parents[0]->getPrintName() << "; " 781 << Function.getPrintName() << "\n"; 782 JT->print(outs()); 783 } 784 Function.JumpTables.emplace(Address, JT); 785 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 786 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 787 } 788 789 bool IsJumpTableParent = false; 790 (void)IsJumpTableParent; 791 for (BinaryFunction *Frag : JT->Parents) 792 if (Frag == &Function) 793 IsJumpTableParent = true; 794 assert(IsJumpTableParent && 795 "cannot re-use jump table of a different function"); 796 return JT->getFirstLabel(); 797 } 798 799 // Re-use the existing symbol if possible. 800 MCSymbol *JTLabel = nullptr; 801 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 802 if (!isInternalSymbolName(Object->getSymbol()->getName())) 803 JTLabel = Object->getSymbol(); 804 } 805 806 const uint64_t EntrySize = getJumpTableEntrySize(Type); 807 if (!JTLabel) { 808 const std::string JumpTableName = generateJumpTableName(Function, Address); 809 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 810 } 811 812 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 813 << " in function " << Function << '\n'); 814 815 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 816 JumpTable::LabelMapType{{0, JTLabel}}, 817 *getSectionForAddress(Address)); 818 JT->Parents.push_back(&Function); 819 if (opts::Verbosity > 2) 820 JT->print(outs()); 821 JumpTables.emplace(Address, JT); 822 823 // Duplicate the entry for the parent function for easy access. 824 Function.JumpTables.emplace(Address, JT); 825 return JTLabel; 826 } 827 828 std::pair<uint64_t, const MCSymbol *> 829 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 830 const MCSymbol *OldLabel) { 831 auto L = scopeLock(); 832 unsigned Offset = 0; 833 bool Found = false; 834 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 835 if (Elmt.second != OldLabel) 836 continue; 837 Offset = Elmt.first; 838 Found = true; 839 break; 840 } 841 assert(Found && "Label not found"); 842 (void)Found; 843 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 844 JumpTable *NewJT = 845 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 846 JumpTable::LabelMapType{{Offset, NewLabel}}, 847 *getSectionForAddress(JT->getAddress())); 848 NewJT->Parents = JT->Parents; 849 NewJT->Entries = JT->Entries; 850 NewJT->Counts = JT->Counts; 851 uint64_t JumpTableID = ++DuplicatedJumpTables; 852 // Invert it to differentiate from regular jump tables whose IDs are their 853 // addresses in the input binary memory space 854 JumpTableID = ~JumpTableID; 855 JumpTables.emplace(JumpTableID, NewJT); 856 Function.JumpTables.emplace(JumpTableID, NewJT); 857 return std::make_pair(JumpTableID, NewLabel); 858 } 859 860 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 861 uint64_t Address) { 862 size_t Id; 863 uint64_t Offset = 0; 864 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 865 Offset = Address - JT->getAddress(); 866 auto Itr = JT->Labels.find(Offset); 867 if (Itr != JT->Labels.end()) 868 return std::string(Itr->second->getName()); 869 Id = JumpTableIds.at(JT->getAddress()); 870 } else { 871 Id = JumpTableIds[Address] = BF.JumpTables.size(); 872 } 873 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 874 (Offset ? ("." + std::to_string(Offset)) : "")); 875 } 876 877 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 878 // FIXME: aarch64 support is missing. 879 if (!isX86()) 880 return true; 881 882 if (BF.getSize() == BF.getMaxSize()) 883 return true; 884 885 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 886 assert(FunctionData && "cannot get function as data"); 887 888 uint64_t Offset = BF.getSize(); 889 MCInst Instr; 890 uint64_t InstrSize = 0; 891 uint64_t InstrAddress = BF.getAddress() + Offset; 892 using std::placeholders::_1; 893 894 // Skip instructions that satisfy the predicate condition. 895 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 896 const uint64_t StartOffset = Offset; 897 for (; Offset < BF.getMaxSize(); 898 Offset += InstrSize, InstrAddress += InstrSize) { 899 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 900 InstrAddress, nulls())) 901 break; 902 if (!Predicate(Instr)) 903 break; 904 } 905 906 return Offset - StartOffset; 907 }; 908 909 // Skip a sequence of zero bytes. 910 auto skipZeros = [&]() { 911 const uint64_t StartOffset = Offset; 912 for (; Offset < BF.getMaxSize(); ++Offset) 913 if ((*FunctionData)[Offset] != 0) 914 break; 915 916 return Offset - StartOffset; 917 }; 918 919 // Accept the whole padding area filled with breakpoints. 920 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 921 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 922 return true; 923 924 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 925 926 // Some functions have a jump to the next function or to the padding area 927 // inserted after the body. 928 auto isSkipJump = [&](const MCInst &Instr) { 929 uint64_t TargetAddress = 0; 930 if (MIB->isUnconditionalBranch(Instr) && 931 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 932 if (TargetAddress >= InstrAddress + InstrSize && 933 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 934 return true; 935 } 936 } 937 return false; 938 }; 939 940 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 941 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 942 skipZeros()) 943 ; 944 945 if (Offset == BF.getMaxSize()) 946 return true; 947 948 if (opts::Verbosity >= 1) { 949 errs() << "BOLT-WARNING: bad padding at address 0x" 950 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 951 << " starting at offset " << (Offset - BF.getSize()) 952 << " in function " << BF << '\n' 953 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 954 << '\n'; 955 } 956 957 return false; 958 } 959 960 void BinaryContext::adjustCodePadding() { 961 for (auto &BFI : BinaryFunctions) { 962 BinaryFunction &BF = BFI.second; 963 if (!shouldEmit(BF)) 964 continue; 965 966 if (!hasValidCodePadding(BF)) { 967 if (HasRelocations) { 968 if (opts::Verbosity >= 1) { 969 outs() << "BOLT-INFO: function " << BF 970 << " has invalid padding. Ignoring the function.\n"; 971 } 972 BF.setIgnored(); 973 } else { 974 BF.setMaxSize(BF.getSize()); 975 } 976 } 977 } 978 } 979 980 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 981 uint64_t Size, 982 uint16_t Alignment, 983 unsigned Flags) { 984 // Register the name with MCContext. 985 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 986 987 auto GAI = BinaryDataMap.find(Address); 988 BinaryData *BD; 989 if (GAI == BinaryDataMap.end()) { 990 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 991 BinarySection &Section = 992 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 993 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 994 Section, Flags); 995 GAI = BinaryDataMap.emplace(Address, BD).first; 996 GlobalSymbols[Name] = BD; 997 updateObjectNesting(GAI); 998 } else { 999 BD = GAI->second; 1000 if (!BD->hasName(Name)) { 1001 GlobalSymbols[Name] = BD; 1002 BD->Symbols.push_back(Symbol); 1003 } 1004 } 1005 1006 return Symbol; 1007 } 1008 1009 const BinaryData * 1010 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1011 auto NI = BinaryDataMap.lower_bound(Address); 1012 auto End = BinaryDataMap.end(); 1013 if ((NI != End && Address == NI->first) || 1014 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1015 if (NI->second->containsAddress(Address)) 1016 return NI->second; 1017 1018 // If this is a sub-symbol, see if a parent data contains the address. 1019 const BinaryData *BD = NI->second->getParent(); 1020 while (BD) { 1021 if (BD->containsAddress(Address)) 1022 return BD; 1023 BD = BD->getParent(); 1024 } 1025 } 1026 return nullptr; 1027 } 1028 1029 BinaryData *BinaryContext::getGOTSymbol() { 1030 // First tries to find a global symbol with that name 1031 BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); 1032 if (GOTSymBD) 1033 return GOTSymBD; 1034 1035 // This symbol might be hidden from run-time link, so fetch the local 1036 // definition if available. 1037 GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); 1038 if (!GOTSymBD) 1039 return nullptr; 1040 1041 // If the local symbol is not unique, fail 1042 unsigned Index = 2; 1043 SmallString<30> Storage; 1044 while (const BinaryData *BD = 1045 getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") 1046 .concat(Twine(Index++)) 1047 .toStringRef(Storage))) 1048 if (BD->getAddress() != GOTSymBD->getAddress()) 1049 return nullptr; 1050 1051 return GOTSymBD; 1052 } 1053 1054 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1055 auto NI = BinaryDataMap.find(Address); 1056 assert(NI != BinaryDataMap.end()); 1057 if (NI == BinaryDataMap.end()) 1058 return false; 1059 // TODO: it's possible that a jump table starts at the same address 1060 // as a larger blob of private data. When we set the size of the 1061 // jump table, it might be smaller than the total blob size. In this 1062 // case we just leave the original size since (currently) it won't really 1063 // affect anything. 1064 assert((!NI->second->Size || NI->second->Size == Size || 1065 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1066 "can't change the size of a symbol that has already had its " 1067 "size set"); 1068 if (!NI->second->Size) { 1069 NI->second->Size = Size; 1070 updateObjectNesting(NI); 1071 return true; 1072 } 1073 return false; 1074 } 1075 1076 void BinaryContext::generateSymbolHashes() { 1077 auto isPadding = [](const BinaryData &BD) { 1078 StringRef Contents = BD.getSection().getContents(); 1079 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1080 return (BD.getName().startswith("HOLEat") || 1081 SymData.find_first_not_of(0) == StringRef::npos); 1082 }; 1083 1084 uint64_t NumCollisions = 0; 1085 for (auto &Entry : BinaryDataMap) { 1086 BinaryData &BD = *Entry.second; 1087 StringRef Name = BD.getName(); 1088 1089 if (!isInternalSymbolName(Name)) 1090 continue; 1091 1092 // First check if a non-anonymous alias exists and move it to the front. 1093 if (BD.getSymbols().size() > 1) { 1094 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1095 return !isInternalSymbolName(Symbol->getName()); 1096 }); 1097 if (Itr != BD.getSymbols().end()) { 1098 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1099 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1100 continue; 1101 } 1102 } 1103 1104 // We have to skip 0 size symbols since they will all collide. 1105 if (BD.getSize() == 0) { 1106 continue; 1107 } 1108 1109 const uint64_t Hash = BD.getSection().hash(BD); 1110 const size_t Idx = Name.find("0x"); 1111 std::string NewName = 1112 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1113 if (getBinaryDataByName(NewName)) { 1114 // Ignore collisions for symbols that appear to be padding 1115 // (i.e. all zeros or a "hole") 1116 if (!isPadding(BD)) { 1117 if (opts::Verbosity) { 1118 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1119 << " with new name (" << NewName << "), skipping.\n"; 1120 } 1121 ++NumCollisions; 1122 } 1123 continue; 1124 } 1125 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1126 GlobalSymbols[NewName] = &BD; 1127 } 1128 if (NumCollisions) { 1129 errs() << "BOLT-WARNING: " << NumCollisions 1130 << " collisions detected while hashing binary objects"; 1131 if (!opts::Verbosity) 1132 errs() << ". Use -v=1 to see the list."; 1133 errs() << '\n'; 1134 } 1135 } 1136 1137 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1138 BinaryFunction &Function) const { 1139 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1140 if (TargetFunction.isChildOf(Function)) 1141 return true; 1142 TargetFunction.addParentFragment(Function); 1143 Function.addFragment(TargetFunction); 1144 if (!HasRelocations) { 1145 TargetFunction.setSimple(false); 1146 Function.setSimple(false); 1147 } 1148 if (opts::Verbosity >= 1) { 1149 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1150 << Function << '\n'; 1151 } 1152 return true; 1153 } 1154 1155 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1156 MCInst &LoadLowBits, 1157 MCInst &LoadHiBits, 1158 uint64_t Target) { 1159 const MCSymbol *TargetSymbol; 1160 uint64_t Addend = 0; 1161 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1162 /*IsPCRel*/ true); 1163 int64_t Val; 1164 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1165 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1166 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1167 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1168 } 1169 1170 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1171 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1172 if (TargetFunction) 1173 return false; 1174 1175 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1176 assert(Section && "cannot get section for referenced address"); 1177 if (!Section->isText()) 1178 return false; 1179 1180 bool Ret = false; 1181 StringRef SectionContents = Section->getContents(); 1182 uint64_t Offset = Address - Section->getAddress(); 1183 const uint64_t MaxSize = SectionContents.size() - Offset; 1184 const uint8_t *Bytes = 1185 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1186 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1187 1188 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1189 MCInst &Instruction, uint64_t Offset, 1190 uint64_t AbsoluteInstrAddr, 1191 uint64_t TotalSize) -> bool { 1192 MCInst *TargetHiBits, *TargetLowBits; 1193 uint64_t TargetAddress, Count; 1194 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1195 AbsoluteInstrAddr, Instruction, TargetHiBits, 1196 TargetLowBits, TargetAddress); 1197 if (!Count) 1198 return false; 1199 1200 if (MatchOnly) 1201 return true; 1202 1203 // NOTE The target symbol was created during disassemble's 1204 // handleExternalReference 1205 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1206 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1207 *Section, Address, TotalSize); 1208 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1209 TargetAddress); 1210 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1211 Veneer->addInstruction(Offset, std::move(Instruction)); 1212 --Count; 1213 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1214 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1215 Veneer->addInstruction(It->first, std::move(It->second)); 1216 } 1217 1218 Veneer->getOrCreateLocalLabel(Address); 1219 Veneer->setMaxSize(TotalSize); 1220 Veneer->updateState(BinaryFunction::State::Disassembled); 1221 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1222 << "\n"); 1223 return true; 1224 }; 1225 1226 uint64_t Size = 0, TotalSize = 0; 1227 BinaryFunction::InstrMapType VeneerInstructions; 1228 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1229 MCInst Instruction; 1230 const uint64_t AbsoluteInstrAddr = Address + Offset; 1231 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1232 AbsoluteInstrAddr, nulls())) 1233 break; 1234 1235 TotalSize += Size; 1236 if (MIB->isBranch(Instruction)) { 1237 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1238 AbsoluteInstrAddr, TotalSize); 1239 break; 1240 } 1241 1242 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1243 } 1244 1245 return Ret; 1246 } 1247 1248 void BinaryContext::processInterproceduralReferences() { 1249 for (const std::pair<BinaryFunction *, uint64_t> &It : 1250 InterproceduralReferences) { 1251 BinaryFunction &Function = *It.first; 1252 uint64_t Address = It.second; 1253 if (!Address || Function.isIgnored()) 1254 continue; 1255 1256 BinaryFunction *TargetFunction = 1257 getBinaryFunctionContainingAddress(Address); 1258 if (&Function == TargetFunction) 1259 continue; 1260 1261 if (TargetFunction) { 1262 if (TargetFunction->isFragment() && 1263 !TargetFunction->isChildOf(Function)) { 1264 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1265 "fragments: " 1266 << Function.getPrintName() << " and " 1267 << TargetFunction->getPrintName() << '\n'; 1268 } 1269 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1270 TargetFunction->addEntryPointAtOffset(Offset); 1271 1272 continue; 1273 } 1274 1275 // Check if address falls in function padding space - this could be 1276 // unmarked data in code. In this case adjust the padding space size. 1277 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1278 assert(Section && "cannot get section for referenced address"); 1279 1280 if (!Section->isText()) 1281 continue; 1282 1283 // PLT requires special handling and could be ignored in this context. 1284 StringRef SectionName = Section->getName(); 1285 if (SectionName == ".plt" || SectionName == ".plt.got") 1286 continue; 1287 1288 // Check if it is aarch64 veneer written at Address 1289 if (isAArch64() && handleAArch64Veneer(Address)) 1290 continue; 1291 1292 if (opts::processAllFunctions()) { 1293 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1294 << "object in code at address 0x" << Twine::utohexstr(Address) 1295 << " belonging to section " << SectionName << " in current mode\n"; 1296 exit(1); 1297 } 1298 1299 TargetFunction = getBinaryFunctionContainingAddress(Address, 1300 /*CheckPastEnd=*/false, 1301 /*UseMaxSize=*/true); 1302 // We are not going to overwrite non-simple functions, but for simple 1303 // ones - adjust the padding size. 1304 if (TargetFunction && TargetFunction->isSimple()) { 1305 errs() << "BOLT-WARNING: function " << *TargetFunction 1306 << " has an object detected in a padding region at address 0x" 1307 << Twine::utohexstr(Address) << '\n'; 1308 TargetFunction->setMaxSize(TargetFunction->getSize()); 1309 } 1310 } 1311 1312 InterproceduralReferences.clear(); 1313 } 1314 1315 void BinaryContext::postProcessSymbolTable() { 1316 fixBinaryDataHoles(); 1317 bool Valid = true; 1318 for (auto &Entry : BinaryDataMap) { 1319 BinaryData *BD = Entry.second; 1320 if ((BD->getName().startswith("SYMBOLat") || 1321 BD->getName().startswith("DATAat")) && 1322 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1323 BD->getSection()) { 1324 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1325 Valid = false; 1326 } 1327 } 1328 assert(Valid); 1329 (void)Valid; 1330 generateSymbolHashes(); 1331 } 1332 1333 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1334 BinaryFunction &ParentBF) { 1335 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1336 "cannot merge functions with multiple entry points"); 1337 1338 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1339 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1340 SymbolToFunctionMapMutex, std::defer_lock); 1341 1342 const StringRef ChildName = ChildBF.getOneName(); 1343 1344 // Move symbols over and update bookkeeping info. 1345 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1346 ParentBF.getSymbols().push_back(Symbol); 1347 WriteSymbolMapLock.lock(); 1348 SymbolToFunctionMap[Symbol] = &ParentBF; 1349 WriteSymbolMapLock.unlock(); 1350 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1351 } 1352 ChildBF.getSymbols().clear(); 1353 1354 // Move other names the child function is known under. 1355 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1356 ChildBF.Aliases.clear(); 1357 1358 if (HasRelocations) { 1359 // Merge execution counts of ChildBF into those of ParentBF. 1360 // Without relocations, we cannot reliably merge profiles as both functions 1361 // continue to exist and either one can be executed. 1362 ChildBF.mergeProfileDataInto(ParentBF); 1363 1364 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1365 std::defer_lock); 1366 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1367 std::defer_lock); 1368 // Remove ChildBF from the global set of functions in relocs mode. 1369 ReadBfsLock.lock(); 1370 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1371 ReadBfsLock.unlock(); 1372 1373 assert(FI != BinaryFunctions.end() && "function not found"); 1374 assert(&ChildBF == &FI->second && "function mismatch"); 1375 1376 WriteBfsLock.lock(); 1377 ChildBF.clearDisasmState(); 1378 FI = BinaryFunctions.erase(FI); 1379 WriteBfsLock.unlock(); 1380 1381 } else { 1382 // In non-relocation mode we keep the function, but rename it. 1383 std::string NewName = "__ICF_" + ChildName.str(); 1384 1385 WriteCtxLock.lock(); 1386 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1387 WriteCtxLock.unlock(); 1388 1389 ChildBF.setFolded(&ParentBF); 1390 } 1391 1392 ParentBF.setHasFunctionsFoldedInto(); 1393 } 1394 1395 void BinaryContext::fixBinaryDataHoles() { 1396 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1397 1398 for (BinarySection &Section : allocatableSections()) { 1399 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1400 1401 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1402 BinaryData *BD = Itr->second; 1403 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1404 (BD->getName().startswith("SYMBOLat0x") || 1405 BD->getName().startswith("DATAat0x") || 1406 BD->getName().startswith("ANONYMOUS"))); 1407 return !isHole && BD->getSection() == Section && !BD->getParent(); 1408 }; 1409 1410 auto BDStart = BinaryDataMap.begin(); 1411 auto BDEnd = BinaryDataMap.end(); 1412 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1413 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1414 1415 uint64_t EndAddress = Section.getAddress(); 1416 1417 while (Itr != End) { 1418 if (Itr->second->getAddress() > EndAddress) { 1419 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1420 Holes.emplace_back(EndAddress, Gap); 1421 } 1422 EndAddress = Itr->second->getEndAddress(); 1423 ++Itr; 1424 } 1425 1426 if (EndAddress < Section.getEndAddress()) 1427 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1428 1429 // If there is already a symbol at the start of the hole, grow that symbol 1430 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1431 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1432 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1433 if (BD) { 1434 // BD->getSection() can be != Section if there are sections that 1435 // overlap. In this case it is probably safe to just skip the holes 1436 // since the overlapping section will not(?) have any symbols in it. 1437 if (BD->getSection() == Section) 1438 setBinaryDataSize(Hole.first, Hole.second); 1439 } else { 1440 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1441 } 1442 } 1443 } 1444 1445 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1446 assert(validateHoles() && "top level hole detected in object map"); 1447 } 1448 1449 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1450 const BinarySection *CurrentSection = nullptr; 1451 bool FirstSection = true; 1452 1453 for (auto &Entry : BinaryDataMap) { 1454 const BinaryData *BD = Entry.second; 1455 const BinarySection &Section = BD->getSection(); 1456 if (FirstSection || Section != *CurrentSection) { 1457 uint64_t Address, Size; 1458 StringRef Name = Section.getName(); 1459 if (Section) { 1460 Address = Section.getAddress(); 1461 Size = Section.getSize(); 1462 } else { 1463 Address = BD->getAddress(); 1464 Size = BD->getSize(); 1465 } 1466 OS << "BOLT-INFO: Section " << Name << ", " 1467 << "0x" + Twine::utohexstr(Address) << ":" 1468 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1469 CurrentSection = &Section; 1470 FirstSection = false; 1471 } 1472 1473 OS << "BOLT-INFO: "; 1474 const BinaryData *P = BD->getParent(); 1475 while (P) { 1476 OS << " "; 1477 P = P->getParent(); 1478 } 1479 OS << *BD << "\n"; 1480 } 1481 } 1482 1483 Expected<unsigned> BinaryContext::getDwarfFile( 1484 StringRef Directory, StringRef FileName, unsigned FileNumber, 1485 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1486 unsigned CUID, unsigned DWARFVersion) { 1487 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1488 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1489 FileNumber); 1490 } 1491 1492 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1493 const uint32_t SrcCUID, 1494 unsigned FileIndex) { 1495 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1496 const DWARFDebugLine::LineTable *LineTable = 1497 DwCtx->getLineTableForUnit(SrcUnit); 1498 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1499 LineTable->Prologue.FileNames; 1500 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1501 // means empty dir. 1502 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1503 "FileIndex out of range for the compilation unit."); 1504 StringRef Dir = ""; 1505 if (FileNames[FileIndex - 1].DirIdx != 0) { 1506 if (std::optional<const char *> DirName = dwarf::toString( 1507 LineTable->Prologue 1508 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1509 Dir = *DirName; 1510 } 1511 } 1512 StringRef FileName = ""; 1513 if (std::optional<const char *> FName = 1514 dwarf::toString(FileNames[FileIndex - 1].Name)) 1515 FileName = *FName; 1516 assert(FileName != ""); 1517 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1518 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1519 DestCUID, DstUnit->getVersion())); 1520 } 1521 1522 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1523 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1524 llvm::transform(llvm::make_second_range(BinaryFunctions), 1525 SortedFunctions.begin(), 1526 [](BinaryFunction &BF) { return &BF; }); 1527 1528 llvm::stable_sort(SortedFunctions, 1529 [](const BinaryFunction *A, const BinaryFunction *B) { 1530 if (A->hasValidIndex() && B->hasValidIndex()) { 1531 return A->getIndex() < B->getIndex(); 1532 } 1533 return A->hasValidIndex(); 1534 }); 1535 return SortedFunctions; 1536 } 1537 1538 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1539 std::vector<BinaryFunction *> AllFunctions; 1540 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1541 llvm::transform(llvm::make_second_range(BinaryFunctions), 1542 std::back_inserter(AllFunctions), 1543 [](BinaryFunction &BF) { return &BF; }); 1544 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1545 1546 return AllFunctions; 1547 } 1548 1549 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1550 auto Iter = DWOCUs.find(DWOId); 1551 if (Iter == DWOCUs.end()) 1552 return std::nullopt; 1553 1554 return Iter->second; 1555 } 1556 1557 DWARFContext *BinaryContext::getDWOContext() const { 1558 if (DWOCUs.empty()) 1559 return nullptr; 1560 return &DWOCUs.begin()->second->getContext(); 1561 } 1562 1563 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1564 void BinaryContext::preprocessDWODebugInfo() { 1565 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1566 DWARFUnit *const DwarfUnit = CU.get(); 1567 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1568 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1569 if (!DWOCU->isDWOUnit()) { 1570 std::string DWOName = dwarf::toString( 1571 DwarfUnit->getUnitDIE().find( 1572 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1573 ""); 1574 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1575 << DWOName 1576 << " was not retrieved and won't be updated. Please check " 1577 "relative path.\n"; 1578 continue; 1579 } 1580 DWOCUs[*DWOId] = DWOCU; 1581 } 1582 } 1583 if (!DWOCUs.empty()) 1584 outs() << "BOLT-INFO: processing split DWARF\n"; 1585 } 1586 1587 void BinaryContext::preprocessDebugInfo() { 1588 struct CURange { 1589 uint64_t LowPC; 1590 uint64_t HighPC; 1591 DWARFUnit *Unit; 1592 1593 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1594 }; 1595 1596 // Building a map of address ranges to CUs similar to .debug_aranges and use 1597 // it to assign CU to functions. 1598 std::vector<CURange> AllRanges; 1599 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1600 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1601 Expected<DWARFAddressRangesVector> RangesOrError = 1602 CU->getUnitDIE().getAddressRanges(); 1603 if (!RangesOrError) { 1604 consumeError(RangesOrError.takeError()); 1605 continue; 1606 } 1607 for (DWARFAddressRange &Range : *RangesOrError) { 1608 // Parts of the debug info could be invalidated due to corresponding code 1609 // being removed from the binary by the linker. Hence we check if the 1610 // address is a valid one. 1611 if (containsAddress(Range.LowPC)) 1612 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1613 } 1614 1615 ContainsDwarf5 |= CU->getVersion() >= 5; 1616 ContainsDwarfLegacy |= CU->getVersion() < 5; 1617 } 1618 1619 llvm::sort(AllRanges); 1620 for (auto &KV : BinaryFunctions) { 1621 const uint64_t FunctionAddress = KV.first; 1622 BinaryFunction &Function = KV.second; 1623 1624 auto It = llvm::partition_point( 1625 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1626 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1627 Function.setDWARFUnit(It->Unit); 1628 } 1629 1630 // Discover units with debug info that needs to be updated. 1631 for (const auto &KV : BinaryFunctions) { 1632 const BinaryFunction &BF = KV.second; 1633 if (shouldEmit(BF) && BF.getDWARFUnit()) 1634 ProcessedCUs.insert(BF.getDWARFUnit()); 1635 } 1636 1637 // Clear debug info for functions from units that we are not going to process. 1638 for (auto &KV : BinaryFunctions) { 1639 BinaryFunction &BF = KV.second; 1640 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1641 BF.setDWARFUnit(nullptr); 1642 } 1643 1644 if (opts::Verbosity >= 1) { 1645 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1646 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1647 } 1648 1649 preprocessDWODebugInfo(); 1650 1651 // Populate MCContext with DWARF files from all units. 1652 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1653 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1654 const uint64_t CUID = CU->getOffset(); 1655 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1656 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1657 GlobalPrefix + "line_table_start" + Twine(CUID))); 1658 1659 if (!ProcessedCUs.count(CU.get())) 1660 continue; 1661 1662 const DWARFDebugLine::LineTable *LineTable = 1663 DwCtx->getLineTableForUnit(CU.get()); 1664 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1665 LineTable->Prologue.FileNames; 1666 1667 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1668 if (DwarfVersion >= 5) { 1669 std::optional<MD5::MD5Result> Checksum; 1670 if (LineTable->Prologue.ContentTypes.HasMD5) 1671 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1672 std::optional<const char *> Name = 1673 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1674 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1675 auto Iter = DWOCUs.find(*DWOID); 1676 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1677 Name = dwarf::toString( 1678 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1679 } 1680 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1681 std::nullopt); 1682 } 1683 1684 BinaryLineTable.setDwarfVersion(DwarfVersion); 1685 1686 // Assign a unique label to every line table, one per CU. 1687 // Make sure empty debug line tables are registered too. 1688 if (FileNames.empty()) { 1689 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1690 CUID, DwarfVersion)); 1691 continue; 1692 } 1693 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1694 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1695 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1696 // means empty dir. 1697 StringRef Dir = ""; 1698 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1699 if (std::optional<const char *> DirName = dwarf::toString( 1700 LineTable->Prologue 1701 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1702 Dir = *DirName; 1703 StringRef FileName = ""; 1704 if (std::optional<const char *> FName = 1705 dwarf::toString(FileNames[I].Name)) 1706 FileName = *FName; 1707 assert(FileName != ""); 1708 std::optional<MD5::MD5Result> Checksum; 1709 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1710 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1711 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1712 DwarfVersion)); 1713 } 1714 } 1715 } 1716 1717 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1718 if (Function.isPseudo()) 1719 return false; 1720 1721 if (opts::processAllFunctions()) 1722 return true; 1723 1724 if (Function.isIgnored()) 1725 return false; 1726 1727 // In relocation mode we will emit non-simple functions with CFG. 1728 // If the function does not have a CFG it should be marked as ignored. 1729 return HasRelocations || Function.isSimple(); 1730 } 1731 1732 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1733 uint32_t Operation = Inst.getOperation(); 1734 switch (Operation) { 1735 case MCCFIInstruction::OpSameValue: 1736 OS << "OpSameValue Reg" << Inst.getRegister(); 1737 break; 1738 case MCCFIInstruction::OpRememberState: 1739 OS << "OpRememberState"; 1740 break; 1741 case MCCFIInstruction::OpRestoreState: 1742 OS << "OpRestoreState"; 1743 break; 1744 case MCCFIInstruction::OpOffset: 1745 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1746 break; 1747 case MCCFIInstruction::OpDefCfaRegister: 1748 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1749 break; 1750 case MCCFIInstruction::OpDefCfaOffset: 1751 OS << "OpDefCfaOffset " << Inst.getOffset(); 1752 break; 1753 case MCCFIInstruction::OpDefCfa: 1754 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1755 break; 1756 case MCCFIInstruction::OpRelOffset: 1757 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1758 break; 1759 case MCCFIInstruction::OpAdjustCfaOffset: 1760 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1761 break; 1762 case MCCFIInstruction::OpEscape: 1763 OS << "OpEscape"; 1764 break; 1765 case MCCFIInstruction::OpRestore: 1766 OS << "OpRestore Reg" << Inst.getRegister(); 1767 break; 1768 case MCCFIInstruction::OpUndefined: 1769 OS << "OpUndefined Reg" << Inst.getRegister(); 1770 break; 1771 case MCCFIInstruction::OpRegister: 1772 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1773 << Inst.getRegister2(); 1774 break; 1775 case MCCFIInstruction::OpWindowSave: 1776 OS << "OpWindowSave"; 1777 break; 1778 case MCCFIInstruction::OpGnuArgsSize: 1779 OS << "OpGnuArgsSize"; 1780 break; 1781 default: 1782 OS << "Op#" << Operation; 1783 break; 1784 } 1785 } 1786 1787 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1788 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data 1789 // in the code section (see IHI0056B). $x identifies a symbol starting code or 1790 // the end of a data chunk inside code, $d indentifies start of data. 1791 if ((!isAArch64() && !isRISCV()) || ELFSymbolRef(Symbol).getSize()) 1792 return MarkerSymType::NONE; 1793 1794 Expected<StringRef> NameOrError = Symbol.getName(); 1795 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1796 1797 if (!TypeOrError || !NameOrError) 1798 return MarkerSymType::NONE; 1799 1800 if (*TypeOrError != SymbolRef::ST_Unknown) 1801 return MarkerSymType::NONE; 1802 1803 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1804 return MarkerSymType::CODE; 1805 1806 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1807 return MarkerSymType::DATA; 1808 1809 return MarkerSymType::NONE; 1810 } 1811 1812 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1813 return getMarkerType(Symbol) != MarkerSymType::NONE; 1814 } 1815 1816 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1817 const BinaryFunction *Function, 1818 DWARFContext *DwCtx) { 1819 DebugLineTableRowRef RowRef = 1820 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1821 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1822 return; 1823 1824 const DWARFDebugLine::LineTable *LineTable; 1825 if (Function && Function->getDWARFUnit() && 1826 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1827 LineTable = Function->getDWARFLineTable(); 1828 } else { 1829 LineTable = DwCtx->getLineTableForUnit( 1830 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1831 } 1832 assert(LineTable && "line table expected for instruction with debug info"); 1833 1834 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1835 StringRef FileName = ""; 1836 if (std::optional<const char *> FName = 1837 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1838 FileName = *FName; 1839 OS << " # debug line " << FileName << ":" << Row.Line; 1840 if (Row.Column) 1841 OS << ":" << Row.Column; 1842 if (Row.Discriminator) 1843 OS << " discriminator:" << Row.Discriminator; 1844 } 1845 1846 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1847 uint64_t Offset, 1848 const BinaryFunction *Function, 1849 bool PrintMCInst, bool PrintMemData, 1850 bool PrintRelocations, 1851 StringRef Endl) const { 1852 if (MIB->isEHLabel(Instruction)) { 1853 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1854 return; 1855 } 1856 OS << format(" %08" PRIx64 ": ", Offset); 1857 if (MIB->isCFI(Instruction)) { 1858 uint32_t Offset = Instruction.getOperand(0).getImm(); 1859 OS << "\t!CFI\t$" << Offset << "\t; "; 1860 if (Function) 1861 printCFI(OS, *Function->getCFIFor(Instruction)); 1862 OS << Endl; 1863 return; 1864 } 1865 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1866 if (MIB->isCall(Instruction)) { 1867 if (MIB->isTailCall(Instruction)) 1868 OS << " # TAILCALL "; 1869 if (MIB->isInvoke(Instruction)) { 1870 const std::optional<MCPlus::MCLandingPad> EHInfo = 1871 MIB->getEHInfo(Instruction); 1872 OS << " # handler: "; 1873 if (EHInfo->first) 1874 OS << *EHInfo->first; 1875 else 1876 OS << '0'; 1877 OS << "; action: " << EHInfo->second; 1878 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1879 if (GnuArgsSize >= 0) 1880 OS << "; GNU_args_size = " << GnuArgsSize; 1881 } 1882 } else if (MIB->isIndirectBranch(Instruction)) { 1883 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1884 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1885 } else { 1886 OS << " # UNKNOWN CONTROL FLOW"; 1887 } 1888 } 1889 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1890 OS << " # Offset: " << *Offset; 1891 if (auto Label = MIB->getLabel(Instruction)) 1892 OS << " # Label: " << **Label; 1893 1894 MIB->printAnnotations(Instruction, OS); 1895 1896 if (opts::PrintDebugInfo) 1897 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1898 1899 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1900 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1901 Function->printRelocations(OS, Offset, Size); 1902 } 1903 1904 OS << Endl; 1905 1906 if (PrintMCInst) { 1907 Instruction.dump_pretty(OS, InstPrinter.get()); 1908 OS << Endl; 1909 } 1910 } 1911 1912 std::optional<uint64_t> 1913 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1914 uint64_t FileOffset) const { 1915 // Find a segment with a matching file offset. 1916 for (auto &KV : SegmentMapInfo) { 1917 const SegmentInfo &SegInfo = KV.second; 1918 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1919 // Use segment's aligned memory offset to calculate the base address. 1920 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1921 return MMapAddress - MemOffset; 1922 } 1923 } 1924 1925 return std::nullopt; 1926 } 1927 1928 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1929 auto SI = AddressToSection.upper_bound(Address); 1930 if (SI != AddressToSection.begin()) { 1931 --SI; 1932 uint64_t UpperBound = SI->first + SI->second->getSize(); 1933 if (!SI->second->getSize()) 1934 UpperBound += 1; 1935 if (UpperBound > Address) 1936 return *SI->second; 1937 } 1938 return std::make_error_code(std::errc::bad_address); 1939 } 1940 1941 ErrorOr<StringRef> 1942 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1943 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1944 return Section->getName(); 1945 return std::make_error_code(std::errc::bad_address); 1946 } 1947 1948 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1949 auto Res = Sections.insert(Section); 1950 (void)Res; 1951 assert(Res.second && "can't register the same section twice."); 1952 1953 // Only register allocatable sections in the AddressToSection map. 1954 if (Section->isAllocatable() && Section->getAddress()) 1955 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1956 NameToSection.insert( 1957 std::make_pair(std::string(Section->getName()), Section)); 1958 if (Section->hasSectionRef()) 1959 SectionRefToBinarySection.insert( 1960 std::make_pair(Section->getSectionRef(), Section)); 1961 1962 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1963 return *Section; 1964 } 1965 1966 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1967 return registerSection(new BinarySection(*this, Section)); 1968 } 1969 1970 BinarySection & 1971 BinaryContext::registerSection(const Twine &SectionName, 1972 const BinarySection &OriginalSection) { 1973 return registerSection( 1974 new BinarySection(*this, SectionName, OriginalSection)); 1975 } 1976 1977 BinarySection & 1978 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 1979 unsigned ELFFlags, uint8_t *Data, 1980 uint64_t Size, unsigned Alignment) { 1981 auto NamedSections = getSectionByName(Name); 1982 if (NamedSections.begin() != NamedSections.end()) { 1983 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1984 "can only update unique sections"); 1985 BinarySection *Section = NamedSections.begin()->second; 1986 1987 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1988 const bool Flag = Section->isAllocatable(); 1989 (void)Flag; 1990 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1991 LLVM_DEBUG(dbgs() << *Section << "\n"); 1992 // FIXME: Fix section flags/attributes for MachO. 1993 if (isELF()) 1994 assert(Flag == Section->isAllocatable() && 1995 "can't change section allocation status"); 1996 return *Section; 1997 } 1998 1999 return registerSection( 2000 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 2001 } 2002 2003 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 2004 auto NameRange = NameToSection.equal_range(Section.getName().str()); 2005 while (NameRange.first != NameRange.second) { 2006 if (NameRange.first->second == &Section) { 2007 NameToSection.erase(NameRange.first); 2008 break; 2009 } 2010 ++NameRange.first; 2011 } 2012 } 2013 2014 void BinaryContext::deregisterUnusedSections() { 2015 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 2016 for (auto SI = Sections.begin(); SI != Sections.end();) { 2017 BinarySection *Section = *SI; 2018 // We check getOutputData() instead of getOutputSize() because sometimes 2019 // zero-sized .text.cold sections are allocated. 2020 if (Section->hasSectionRef() || Section->getOutputData() || 2021 (AbsSection && Section == &AbsSection.get())) { 2022 ++SI; 2023 continue; 2024 } 2025 2026 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2027 << '\n';); 2028 deregisterSectionName(*Section); 2029 SI = Sections.erase(SI); 2030 delete Section; 2031 } 2032 } 2033 2034 bool BinaryContext::deregisterSection(BinarySection &Section) { 2035 BinarySection *SectionPtr = &Section; 2036 auto Itr = Sections.find(SectionPtr); 2037 if (Itr != Sections.end()) { 2038 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2039 while (Range.first != Range.second) { 2040 if (Range.first->second == SectionPtr) { 2041 AddressToSection.erase(Range.first); 2042 break; 2043 } 2044 ++Range.first; 2045 } 2046 2047 deregisterSectionName(*SectionPtr); 2048 Sections.erase(Itr); 2049 delete SectionPtr; 2050 return true; 2051 } 2052 return false; 2053 } 2054 2055 void BinaryContext::renameSection(BinarySection &Section, 2056 const Twine &NewName) { 2057 auto Itr = Sections.find(&Section); 2058 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2059 Sections.erase(Itr); 2060 2061 deregisterSectionName(Section); 2062 2063 Section.Name = NewName.str(); 2064 Section.setOutputName(Section.Name); 2065 2066 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2067 2068 // Reinsert with the new name. 2069 Sections.insert(&Section); 2070 } 2071 2072 void BinaryContext::printSections(raw_ostream &OS) const { 2073 for (BinarySection *const &Section : Sections) 2074 OS << "BOLT-INFO: " << *Section << "\n"; 2075 } 2076 2077 BinarySection &BinaryContext::absoluteSection() { 2078 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2079 return *Section; 2080 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2081 } 2082 2083 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2084 size_t Size) const { 2085 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2086 if (!Section) 2087 return std::make_error_code(std::errc::bad_address); 2088 2089 if (Section->isVirtual()) 2090 return 0; 2091 2092 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2093 AsmInfo->getCodePointerSize()); 2094 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2095 return DE.getUnsigned(&ValueOffset, Size); 2096 } 2097 2098 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2099 size_t Size) const { 2100 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2101 if (!Section) 2102 return std::make_error_code(std::errc::bad_address); 2103 2104 if (Section->isVirtual()) 2105 return 0; 2106 2107 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2108 AsmInfo->getCodePointerSize()); 2109 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2110 return DE.getSigned(&ValueOffset, Size); 2111 } 2112 2113 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2114 uint64_t Type, uint64_t Addend, 2115 uint64_t Value) { 2116 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2117 assert(Section && "cannot find section for address"); 2118 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2119 Value); 2120 } 2121 2122 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2123 uint64_t Type, uint64_t Addend, 2124 uint64_t Value) { 2125 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2126 assert(Section && "cannot find section for address"); 2127 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2128 Addend, Value); 2129 } 2130 2131 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2132 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2133 assert(Section && "cannot find section for address"); 2134 return Section->removeRelocationAt(Address - Section->getAddress()); 2135 } 2136 2137 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2138 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2139 if (!Section) 2140 return nullptr; 2141 2142 return Section->getRelocationAt(Address - Section->getAddress()); 2143 } 2144 2145 const Relocation * 2146 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2147 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2148 if (!Section) 2149 return nullptr; 2150 2151 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2152 } 2153 2154 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2155 const uint64_t Address) { 2156 auto setImmovable = [&](BinaryData &BD) { 2157 BinaryData *Root = BD.getAtomicRoot(); 2158 LLVM_DEBUG(if (Root->isMoveable()) { 2159 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2160 << "due to ambiguous relocation referencing 0x" 2161 << Twine::utohexstr(Address) << '\n'; 2162 }); 2163 Root->setIsMoveable(false); 2164 }; 2165 2166 if (Address == BD.getAddress()) { 2167 setImmovable(BD); 2168 2169 // Set previous symbol as immovable 2170 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2171 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2172 setImmovable(*Prev); 2173 } 2174 2175 if (Address == BD.getEndAddress()) { 2176 setImmovable(BD); 2177 2178 // Set next symbol as immovable 2179 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2180 if (Next && Next->getAddress() == BD.getEndAddress()) 2181 setImmovable(*Next); 2182 } 2183 } 2184 2185 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2186 uint64_t *EntryDesc) { 2187 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2188 auto BFI = SymbolToFunctionMap.find(Symbol); 2189 if (BFI == SymbolToFunctionMap.end()) 2190 return nullptr; 2191 2192 BinaryFunction *BF = BFI->second; 2193 if (EntryDesc) 2194 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2195 2196 return BF; 2197 } 2198 2199 void BinaryContext::exitWithBugReport(StringRef Message, 2200 const BinaryFunction &Function) const { 2201 errs() << "=======================================\n"; 2202 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2203 "this function.\n"; 2204 errs() << "If you are running the most recent version of BOLT, you may " 2205 "want to " 2206 "report this and paste this dump.\nPlease check that there is no " 2207 "sensitive contents being shared in this dump.\n"; 2208 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2209 ScopedPrinter SP(errs()); 2210 SP.printBinaryBlock("Function contents", *Function.getData()); 2211 errs() << "\n"; 2212 Function.dump(); 2213 errs() << "ERROR: " << Message; 2214 errs() << "\n=======================================\n"; 2215 exit(1); 2216 } 2217 2218 BinaryFunction * 2219 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2220 bool IsSimple) { 2221 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2222 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2223 setSymbolToFunctionMap(BF->getSymbol(), BF); 2224 BF->CurrentState = BinaryFunction::State::CFG; 2225 return BF; 2226 } 2227 2228 std::pair<size_t, size_t> 2229 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2230 // Adjust branch instruction to match the current layout. 2231 if (FixBranches) 2232 BF.fixBranches(); 2233 2234 // Create local MC context to isolate the effect of ephemeral code emission. 2235 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2236 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2237 MCAsmBackend *MAB = 2238 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2239 2240 SmallString<256> Code; 2241 raw_svector_ostream VecOS(Code); 2242 2243 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2244 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2245 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2246 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2247 /*RelaxAll=*/false, 2248 /*IncrementalLinkerCompatible=*/false, 2249 /*DWARFMustBeAtTheEnd=*/false)); 2250 2251 Streamer->initSections(false, *STI); 2252 2253 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2254 Section->setHasInstructions(true); 2255 2256 // Create symbols in the LocalCtx so that they get destroyed with it. 2257 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2258 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2259 2260 Streamer->switchSection(Section); 2261 Streamer->emitLabel(StartLabel); 2262 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2263 /*EmitCodeOnly=*/true); 2264 Streamer->emitLabel(EndLabel); 2265 2266 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2267 SmallVector<LabelRange> SplitLabels; 2268 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2269 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2270 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2271 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2272 2273 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2274 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2275 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2276 SplitSection->setHasInstructions(true); 2277 Streamer->switchSection(SplitSection); 2278 2279 Streamer->emitLabel(SplitStartLabel); 2280 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2281 Streamer->emitLabel(SplitEndLabel); 2282 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2283 // private 2284 Streamer->emitBytes(StringRef("")); 2285 Streamer->switchSection(Section); 2286 } 2287 2288 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2289 // MCStreamer::Finish(), which does more than we want 2290 Streamer->emitBytes(StringRef("")); 2291 2292 MCAssembler &Assembler = 2293 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2294 MCAsmLayout Layout(Assembler); 2295 Assembler.layout(Layout); 2296 2297 const uint64_t HotSize = 2298 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2299 const uint64_t ColdSize = 2300 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2301 [&](const uint64_t Accu, const LabelRange &Labels) { 2302 return Accu + Layout.getSymbolOffset(*Labels.second) - 2303 Layout.getSymbolOffset(*Labels.first); 2304 }); 2305 2306 // Clean-up the effect of the code emission. 2307 for (const MCSymbol &Symbol : Assembler.symbols()) { 2308 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2309 MutableSymbol->setUndefined(); 2310 MutableSymbol->setIsRegistered(false); 2311 } 2312 2313 return std::make_pair(HotSize, ColdSize); 2314 } 2315 2316 bool BinaryContext::validateInstructionEncoding( 2317 ArrayRef<uint8_t> InputSequence) const { 2318 MCInst Inst; 2319 uint64_t InstSize; 2320 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2321 assert(InstSize == InputSequence.size() && 2322 "Disassembled instruction size does not match the sequence."); 2323 2324 SmallString<256> Code; 2325 SmallVector<MCFixup, 4> Fixups; 2326 2327 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2328 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2329 if (InputSequence != OutputSequence) { 2330 if (opts::Verbosity > 1) { 2331 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2332 << " input: " << InputSequence << '\n' 2333 << " output: " << OutputSequence << '\n'; 2334 } 2335 return false; 2336 } 2337 2338 return true; 2339 } 2340 2341 uint64_t BinaryContext::getHotThreshold() const { 2342 static uint64_t Threshold = 0; 2343 if (Threshold == 0) { 2344 Threshold = std::max( 2345 (uint64_t)opts::ExecutionCountThreshold, 2346 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2347 } 2348 return Threshold; 2349 } 2350 2351 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2352 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2353 auto FI = BinaryFunctions.upper_bound(Address); 2354 if (FI == BinaryFunctions.begin()) 2355 return nullptr; 2356 --FI; 2357 2358 const uint64_t UsedSize = 2359 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2360 2361 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2362 return nullptr; 2363 2364 return &FI->second; 2365 } 2366 2367 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2368 // First, try to find a function starting at the given address. If the 2369 // function was folded, this will get us the original folded function if it 2370 // wasn't removed from the list, e.g. in non-relocation mode. 2371 auto BFI = BinaryFunctions.find(Address); 2372 if (BFI != BinaryFunctions.end()) 2373 return &BFI->second; 2374 2375 // We might have folded the function matching the object at the given 2376 // address. In such case, we look for a function matching the symbol 2377 // registered at the original address. The new function (the one that the 2378 // original was folded into) will hold the symbol. 2379 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2380 uint64_t EntryID = 0; 2381 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2382 if (BF && EntryID == 0) 2383 return BF; 2384 } 2385 return nullptr; 2386 } 2387 2388 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2389 const DWARFAddressRangesVector &InputRanges) const { 2390 DebugAddressRangesVector OutputRanges; 2391 2392 for (const DWARFAddressRange Range : InputRanges) { 2393 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2394 while (BFI != BinaryFunctions.end()) { 2395 const BinaryFunction &Function = BFI->second; 2396 if (Function.getAddress() >= Range.HighPC) 2397 break; 2398 const DebugAddressRangesVector FunctionRanges = 2399 Function.getOutputAddressRanges(); 2400 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2401 std::advance(BFI, 1); 2402 } 2403 } 2404 2405 return OutputRanges; 2406 } 2407 2408 } // namespace bolt 2409 } // namespace llvm 2410