1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 } // namespace opts 76 77 namespace llvm { 78 namespace bolt { 79 80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 81 std::unique_ptr<DWARFContext> DwCtx, 82 std::unique_ptr<Triple> TheTriple, 83 const Target *TheTarget, std::string TripleName, 84 std::unique_ptr<MCCodeEmitter> MCE, 85 std::unique_ptr<MCObjectFileInfo> MOFI, 86 std::unique_ptr<const MCAsmInfo> AsmInfo, 87 std::unique_ptr<const MCInstrInfo> MII, 88 std::unique_ptr<const MCSubtargetInfo> STI, 89 std::unique_ptr<MCInstPrinter> InstPrinter, 90 std::unique_ptr<const MCInstrAnalysis> MIA, 91 std::unique_ptr<MCPlusBuilder> MIB, 92 std::unique_ptr<const MCRegisterInfo> MRI, 93 std::unique_ptr<MCDisassembler> DisAsm) 94 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 95 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 96 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 97 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 98 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 99 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 100 Relocation::Arch = this->TheTriple->getArch(); 101 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103 } 104 105 BinaryContext::~BinaryContext() { 106 for (BinarySection *Section : Sections) 107 delete Section; 108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109 delete InjectedFunction; 110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111 delete JTI.second; 112 clearBinaryData(); 113 } 114 115 /// Create BinaryContext for a given architecture \p ArchName and 116 /// triple \p TripleName. 117 Expected<std::unique_ptr<BinaryContext>> 118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119 std::unique_ptr<DWARFContext> DwCtx) { 120 StringRef ArchName = ""; 121 StringRef FeaturesStr = ""; 122 switch (File->getArch()) { 123 case llvm::Triple::x86_64: 124 ArchName = "x86-64"; 125 FeaturesStr = "+nopl"; 126 break; 127 case llvm::Triple::aarch64: 128 ArchName = "aarch64"; 129 FeaturesStr = "+all"; 130 break; 131 default: 132 return createStringError(std::errc::not_supported, 133 "BOLT-ERROR: Unrecognized machine in ELF file"); 134 } 135 136 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 137 const std::string TripleName = TheTriple->str(); 138 139 std::string Error; 140 const Target *TheTarget = 141 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 142 if (!TheTarget) 143 return createStringError(make_error_code(std::errc::not_supported), 144 Twine("BOLT-ERROR: ", Error)); 145 146 std::unique_ptr<const MCRegisterInfo> MRI( 147 TheTarget->createMCRegInfo(TripleName)); 148 if (!MRI) 149 return createStringError( 150 make_error_code(std::errc::not_supported), 151 Twine("BOLT-ERROR: no register info for target ", TripleName)); 152 153 // Set up disassembler. 154 std::unique_ptr<MCAsmInfo> AsmInfo( 155 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 156 if (!AsmInfo) 157 return createStringError( 158 make_error_code(std::errc::not_supported), 159 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 160 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 161 // we want to emit such names as using @PLT without double quotes to convey 162 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 163 // override the default AsmInfo behavior to emit names the way we want. 164 AsmInfo->setAllowAtInName(true); 165 166 std::unique_ptr<const MCSubtargetInfo> STI( 167 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 168 if (!STI) 169 return createStringError( 170 make_error_code(std::errc::not_supported), 171 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 172 173 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 174 if (!MII) 175 return createStringError( 176 make_error_code(std::errc::not_supported), 177 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 178 179 std::unique_ptr<MCContext> Ctx( 180 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 181 std::unique_ptr<MCObjectFileInfo> MOFI( 182 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 183 Ctx->setObjectFileInfo(MOFI.get()); 184 // We do not support X86 Large code model. Change this in the future. 185 bool Large = false; 186 if (TheTriple->getArch() == llvm::Triple::aarch64) 187 Large = true; 188 unsigned LSDAEncoding = 189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190 if (IsPIC) { 191 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 } 194 195 std::unique_ptr<MCDisassembler> DisAsm( 196 TheTarget->createMCDisassembler(*STI, *Ctx)); 197 198 if (!DisAsm) 199 return createStringError( 200 make_error_code(std::errc::not_supported), 201 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 202 203 std::unique_ptr<const MCInstrAnalysis> MIA( 204 TheTarget->createMCInstrAnalysis(MII.get())); 205 if (!MIA) 206 return createStringError( 207 make_error_code(std::errc::not_supported), 208 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 209 TripleName)); 210 211 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 212 std::unique_ptr<MCInstPrinter> InstructionPrinter( 213 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 214 *MII, *MRI)); 215 if (!InstructionPrinter) 216 return createStringError( 217 make_error_code(std::errc::not_supported), 218 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 219 InstructionPrinter->setPrintImmHex(true); 220 221 std::unique_ptr<MCCodeEmitter> MCE( 222 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 223 224 // Make sure we don't miss any output on core dumps. 225 outs().SetUnbuffered(); 226 errs().SetUnbuffered(); 227 dbgs().SetUnbuffered(); 228 229 auto BC = std::make_unique<BinaryContext>( 230 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 231 std::string(TripleName), std::move(MCE), std::move(MOFI), 232 std::move(AsmInfo), std::move(MII), std::move(STI), 233 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 234 std::move(DisAsm)); 235 236 BC->LSDAEncoding = LSDAEncoding; 237 238 BC->MAB = std::unique_ptr<MCAsmBackend>( 239 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 240 241 BC->setFilename(File->getFileName()); 242 243 BC->HasFixedLoadAddress = !IsPIC; 244 245 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 246 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 247 248 if (!BC->SymbolicDisAsm) 249 return createStringError( 250 make_error_code(std::errc::not_supported), 251 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 252 253 return std::move(BC); 254 } 255 256 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 257 if (opts::HotText && 258 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 259 return true; 260 261 if (opts::HotData && 262 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 263 return true; 264 265 if (SymbolName == "_end") 266 return true; 267 268 return false; 269 } 270 271 std::unique_ptr<MCObjectWriter> 272 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 273 return MAB->createObjectWriter(OS); 274 } 275 276 bool BinaryContext::validateObjectNesting() const { 277 auto Itr = BinaryDataMap.begin(); 278 auto End = BinaryDataMap.end(); 279 bool Valid = true; 280 while (Itr != End) { 281 auto Next = std::next(Itr); 282 while (Next != End && 283 Itr->second->getSection() == Next->second->getSection() && 284 Itr->second->containsRange(Next->second->getAddress(), 285 Next->second->getSize())) { 286 if (Next->second->Parent != Itr->second) { 287 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 288 << "BOLT-WARNING: " << *Itr->second << "\n" 289 << "BOLT-WARNING: " << *Next->second << "\n"; 290 Valid = false; 291 } 292 ++Next; 293 } 294 Itr = Next; 295 } 296 return Valid; 297 } 298 299 bool BinaryContext::validateHoles() const { 300 bool Valid = true; 301 for (BinarySection &Section : sections()) { 302 for (const Relocation &Rel : Section.relocations()) { 303 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 304 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 305 if (!BD) { 306 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 307 << " 0x" << Twine::utohexstr(RelAddr) << " in " 308 << Section.getName() << "\n"; 309 Valid = false; 310 } else if (!BD->getAtomicRoot()) { 311 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 312 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 313 << Section.getName() << "\n"; 314 Valid = false; 315 } 316 } 317 } 318 return Valid; 319 } 320 321 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 322 const uint64_t Address = GAI->second->getAddress(); 323 const uint64_t Size = GAI->second->getSize(); 324 325 auto fixParents = [&](BinaryDataMapType::iterator Itr, 326 BinaryData *NewParent) { 327 BinaryData *OldParent = Itr->second->Parent; 328 Itr->second->Parent = NewParent; 329 ++Itr; 330 while (Itr != BinaryDataMap.end() && OldParent && 331 Itr->second->Parent == OldParent) { 332 Itr->second->Parent = NewParent; 333 ++Itr; 334 } 335 }; 336 337 // Check if the previous symbol contains the newly added symbol. 338 if (GAI != BinaryDataMap.begin()) { 339 BinaryData *Prev = std::prev(GAI)->second; 340 while (Prev) { 341 if (Prev->getSection() == GAI->second->getSection() && 342 Prev->containsRange(Address, Size)) { 343 fixParents(GAI, Prev); 344 } else { 345 fixParents(GAI, nullptr); 346 } 347 Prev = Prev->Parent; 348 } 349 } 350 351 // Check if the newly added symbol contains any subsequent symbols. 352 if (Size != 0) { 353 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 354 auto Itr = std::next(GAI); 355 while ( 356 Itr != BinaryDataMap.end() && 357 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 358 Itr->second->Parent = BD; 359 ++Itr; 360 } 361 } 362 } 363 364 iterator_range<BinaryContext::binary_data_iterator> 365 BinaryContext::getSubBinaryData(BinaryData *BD) { 366 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 367 auto End = Start; 368 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 369 ++End; 370 return make_range(Start, End); 371 } 372 373 std::pair<const MCSymbol *, uint64_t> 374 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 375 bool IsPCRel) { 376 if (isAArch64()) { 377 // Check if this is an access to a constant island and create bookkeeping 378 // to keep track of it and emit it later as part of this function. 379 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 380 return std::make_pair(IslandSym, 0); 381 382 // Detect custom code written in assembly that refers to arbitrary 383 // constant islands from other functions. Write this reference so we 384 // can pull this constant island and emit it as part of this function 385 // too. 386 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 387 388 if (IslandIter != AddressToConstantIslandMap.begin() && 389 (IslandIter == AddressToConstantIslandMap.end() || 390 IslandIter->first > Address)) 391 --IslandIter; 392 393 if (IslandIter != AddressToConstantIslandMap.end()) { 394 // Fall-back to referencing the original constant island in the presence 395 // of dynamic relocs, as we currently do not support cloning them. 396 // Notice: we might fail to link because of this, if the original constant 397 // island we are referring would be emitted too far away. 398 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 399 MCSymbol *IslandSym = 400 IslandIter->second->getOrCreateIslandAccess(Address); 401 if (IslandSym) 402 return std::make_pair(IslandSym, 0); 403 } else if (MCSymbol *IslandSym = 404 IslandIter->second->getOrCreateProxyIslandAccess(Address, 405 BF)) { 406 BF.createIslandDependency(IslandSym, IslandIter->second); 407 return std::make_pair(IslandSym, 0); 408 } 409 } 410 } 411 412 // Note that the address does not necessarily have to reside inside 413 // a section, it could be an absolute address too. 414 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 415 if (Section && Section->isText()) { 416 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 417 if (Address != BF.getAddress()) { 418 // The address could potentially escape. Mark it as another entry 419 // point into the function. 420 if (opts::Verbosity >= 1) { 421 outs() << "BOLT-INFO: potentially escaped address 0x" 422 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 423 } 424 BF.HasInternalLabelReference = true; 425 return std::make_pair( 426 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 427 } 428 } else { 429 addInterproceduralReference(&BF, Address); 430 } 431 } 432 433 // With relocations, catch jump table references outside of the basic block 434 // containing the indirect jump. 435 if (HasRelocations) { 436 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 437 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 438 const MCSymbol *Symbol = 439 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 440 441 return std::make_pair(Symbol, 0); 442 } 443 } 444 445 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 446 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 447 448 // TODO: use DWARF info to get size/alignment here? 449 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 450 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 451 return std::make_pair(TargetSymbol, 0); 452 } 453 454 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 455 BinaryFunction &BF) { 456 if (!isX86()) 457 return MemoryContentsType::UNKNOWN; 458 459 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 460 if (!Section) { 461 // No section - possibly an absolute address. Since we don't allow 462 // internal function addresses to escape the function scope - we 463 // consider it a tail call. 464 if (opts::Verbosity > 1) { 465 errs() << "BOLT-WARNING: no section for address 0x" 466 << Twine::utohexstr(Address) << " referenced from function " << BF 467 << '\n'; 468 } 469 return MemoryContentsType::UNKNOWN; 470 } 471 472 if (Section->isVirtual()) { 473 // The contents are filled at runtime. 474 return MemoryContentsType::UNKNOWN; 475 } 476 477 // No support for jump tables in code yet. 478 if (Section->isText()) 479 return MemoryContentsType::UNKNOWN; 480 481 // Start with checking for PIC jump table. We expect non-PIC jump tables 482 // to have high 32 bits set to 0. 483 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 484 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 485 486 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 487 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 488 489 return MemoryContentsType::UNKNOWN; 490 } 491 492 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 493 const JumpTable::JumpTableType Type, 494 const BinaryFunction &BF, 495 const uint64_t NextJTAddress, 496 JumpTable::AddressesType *EntriesAsAddress, 497 bool *HasEntryInFragment) const { 498 // Is one of the targets __builtin_unreachable? 499 bool HasUnreachable = false; 500 501 // Number of targets other than __builtin_unreachable. 502 uint64_t NumRealEntries = 0; 503 504 auto addEntryAddress = [&](uint64_t EntryAddress) { 505 if (EntriesAsAddress) 506 EntriesAsAddress->emplace_back(EntryAddress); 507 }; 508 509 auto doesBelongToFunction = [&](const uint64_t Addr, 510 const BinaryFunction *TargetBF) -> bool { 511 if (BF.containsAddress(Addr)) 512 return true; 513 // Nothing to do if we failed to identify the containing function. 514 if (!TargetBF) 515 return false; 516 // Check if BF is a fragment of TargetBF or vice versa. 517 return BF.isChildOf(*TargetBF) || TargetBF->isChildOf(BF); 518 }; 519 520 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 521 if (!Section) 522 return false; 523 524 // The upper bound is defined by containing object, section limits, and 525 // the next jump table in memory. 526 uint64_t UpperBound = Section->getEndAddress(); 527 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 528 if (JumpTableBD && JumpTableBD->getSize()) { 529 assert(JumpTableBD->getEndAddress() <= UpperBound && 530 "data object cannot cross a section boundary"); 531 UpperBound = JumpTableBD->getEndAddress(); 532 } 533 if (NextJTAddress) 534 UpperBound = std::min(NextJTAddress, UpperBound); 535 536 LLVM_DEBUG({ 537 using JTT = JumpTable::JumpTableType; 538 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 539 Address, BF.getPrintName(), 540 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 541 }); 542 const uint64_t EntrySize = getJumpTableEntrySize(Type); 543 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 544 EntryAddress += EntrySize) { 545 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 546 << " -> "); 547 // Check if there's a proper relocation against the jump table entry. 548 if (HasRelocations) { 549 if (Type == JumpTable::JTT_PIC && 550 !DataPCRelocations.count(EntryAddress)) { 551 LLVM_DEBUG( 552 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 553 break; 554 } 555 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 556 LLVM_DEBUG( 557 dbgs() 558 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 559 break; 560 } 561 } 562 563 const uint64_t Value = 564 (Type == JumpTable::JTT_PIC) 565 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 566 : *getPointerAtAddress(EntryAddress); 567 568 // __builtin_unreachable() case. 569 if (Value == BF.getAddress() + BF.getSize()) { 570 addEntryAddress(Value); 571 HasUnreachable = true; 572 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 573 continue; 574 } 575 576 // Function or one of its fragments. 577 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 578 579 // We assume that a jump table cannot have function start as an entry. 580 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 581 LLVM_DEBUG({ 582 if (!BF.containsAddress(Value)) { 583 dbgs() << "FAIL: function doesn't contain this address\n"; 584 if (TargetBF) { 585 dbgs() << " ! function containing this address: " 586 << TargetBF->getPrintName() << '\n'; 587 if (TargetBF->isFragment()) { 588 dbgs() << " ! is a fragment"; 589 for (BinaryFunction *Parent : TargetBF->ParentFragments) 590 dbgs() << ", parent: " << Parent->getPrintName(); 591 dbgs() << '\n'; 592 } 593 } 594 } 595 if (Value == BF.getAddress()) 596 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 597 }); 598 break; 599 } 600 601 // Check there's an instruction at this offset. 602 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 603 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 604 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 605 break; 606 } 607 608 ++NumRealEntries; 609 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 610 611 if (TargetBF != &BF && HasEntryInFragment) 612 *HasEntryInFragment = true; 613 addEntryAddress(Value); 614 } 615 616 // It's a jump table if the number of real entries is more than 1, or there's 617 // one real entry and "unreachable" targets. If there are only multiple 618 // "unreachable" targets, then it's not a jump table. 619 return NumRealEntries + HasUnreachable >= 2; 620 } 621 622 void BinaryContext::populateJumpTables() { 623 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 624 << '\n'); 625 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 626 ++JTI) { 627 JumpTable *JT = JTI->second; 628 629 bool NonSimpleParent = false; 630 for (BinaryFunction *BF : JT->Parents) 631 NonSimpleParent |= !BF->isSimple(); 632 if (NonSimpleParent) 633 continue; 634 635 uint64_t NextJTAddress = 0; 636 auto NextJTI = std::next(JTI); 637 if (NextJTI != JTE) 638 NextJTAddress = NextJTI->second->getAddress(); 639 640 const bool Success = 641 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 642 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 643 if (!Success) { 644 LLVM_DEBUG({ 645 dbgs() << "failed to analyze "; 646 JT->print(dbgs()); 647 if (NextJTI != JTE) { 648 dbgs() << "next "; 649 NextJTI->second->print(dbgs()); 650 } 651 }); 652 llvm_unreachable("jump table heuristic failure"); 653 } 654 for (BinaryFunction *Frag : JT->Parents) { 655 if (JT->IsSplit) 656 Frag->setHasIndirectTargetToSplitFragment(true); 657 for (uint64_t EntryAddress : JT->EntriesAsAddress) 658 // if target is builtin_unreachable 659 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 660 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 661 Frag->getSize()); 662 } else if (EntryAddress >= Frag->getAddress() && 663 EntryAddress < Frag->getAddress() + Frag->getSize()) { 664 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 665 } 666 } 667 668 // In strict mode, erase PC-relative relocation record. Later we check that 669 // all such records are erased and thus have been accounted for. 670 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 671 for (uint64_t Address = JT->getAddress(); 672 Address < JT->getAddress() + JT->getSize(); 673 Address += JT->EntrySize) { 674 DataPCRelocations.erase(DataPCRelocations.find(Address)); 675 } 676 } 677 678 // Mark to skip the function and all its fragments. 679 for (BinaryFunction *Frag : JT->Parents) 680 if (Frag->hasIndirectTargetToSplitFragment()) 681 addFragmentsToSkip(Frag); 682 } 683 684 if (opts::StrictMode && DataPCRelocations.size()) { 685 LLVM_DEBUG({ 686 dbgs() << DataPCRelocations.size() 687 << " unclaimed PC-relative relocations left in data:\n"; 688 for (uint64_t Reloc : DataPCRelocations) 689 dbgs() << Twine::utohexstr(Reloc) << '\n'; 690 }); 691 assert(0 && "unclaimed PC-relative relocations left in data\n"); 692 } 693 clearList(DataPCRelocations); 694 } 695 696 void BinaryContext::skipMarkedFragments() { 697 std::vector<BinaryFunction *> FragmentQueue; 698 // Copy the functions to FragmentQueue. 699 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 700 auto addToWorklist = [&](BinaryFunction *Function) -> void { 701 if (FragmentsToSkip.count(Function)) 702 return; 703 FragmentQueue.push_back(Function); 704 addFragmentsToSkip(Function); 705 }; 706 // Functions containing split jump tables need to be skipped with all 707 // fragments (transitively). 708 for (size_t I = 0; I != FragmentQueue.size(); I++) { 709 BinaryFunction *BF = FragmentQueue[I]; 710 assert(FragmentsToSkip.count(BF) && 711 "internal error in traversing function fragments"); 712 if (opts::Verbosity >= 1) 713 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 714 BF->setSimple(false); 715 BF->setHasIndirectTargetToSplitFragment(true); 716 717 llvm::for_each(BF->Fragments, addToWorklist); 718 llvm::for_each(BF->ParentFragments, addToWorklist); 719 } 720 if (!FragmentsToSkip.empty()) 721 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 722 << (FragmentsToSkip.size() == 1 ? "" : "s") 723 << " due to cold fragments\n"; 724 } 725 726 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 727 uint64_t Size, 728 uint16_t Alignment, 729 unsigned Flags) { 730 auto Itr = BinaryDataMap.find(Address); 731 if (Itr != BinaryDataMap.end()) { 732 assert(Itr->second->getSize() == Size || !Size); 733 return Itr->second->getSymbol(); 734 } 735 736 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 737 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 738 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 739 } 740 741 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 742 return Ctx->getOrCreateSymbol(Name); 743 } 744 745 BinaryFunction *BinaryContext::createBinaryFunction( 746 const std::string &Name, BinarySection &Section, uint64_t Address, 747 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 748 auto Result = BinaryFunctions.emplace( 749 Address, BinaryFunction(Name, Section, Address, Size, *this)); 750 assert(Result.second == true && "unexpected duplicate function"); 751 BinaryFunction *BF = &Result.first->second; 752 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 753 Alignment); 754 setSymbolToFunctionMap(BF->getSymbol(), BF); 755 return BF; 756 } 757 758 const MCSymbol * 759 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 760 JumpTable::JumpTableType Type) { 761 // Two fragments of same function access same jump table 762 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 763 assert(JT->Type == Type && "jump table types have to match"); 764 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 765 766 // Prevent associating a jump table to a specific fragment twice. 767 // This simple check arises from the assumption: no more than 2 fragments. 768 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 769 assert((JT->Parents[0]->isChildOf(Function) || 770 Function.isChildOf(*JT->Parents[0])) && 771 "cannot re-use jump table of a different function"); 772 // Duplicate the entry for the parent function for easy access 773 JT->Parents.push_back(&Function); 774 if (opts::Verbosity > 2) { 775 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 776 << JT->Parents[0]->getPrintName() << "; " 777 << Function.getPrintName() << "\n"; 778 JT->print(outs()); 779 } 780 Function.JumpTables.emplace(Address, JT); 781 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 782 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 783 } 784 785 bool IsJumpTableParent = false; 786 (void)IsJumpTableParent; 787 for (BinaryFunction *Frag : JT->Parents) 788 if (Frag == &Function) 789 IsJumpTableParent = true; 790 assert(IsJumpTableParent && 791 "cannot re-use jump table of a different function"); 792 return JT->getFirstLabel(); 793 } 794 795 // Re-use the existing symbol if possible. 796 MCSymbol *JTLabel = nullptr; 797 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 798 if (!isInternalSymbolName(Object->getSymbol()->getName())) 799 JTLabel = Object->getSymbol(); 800 } 801 802 const uint64_t EntrySize = getJumpTableEntrySize(Type); 803 if (!JTLabel) { 804 const std::string JumpTableName = generateJumpTableName(Function, Address); 805 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 806 } 807 808 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 809 << " in function " << Function << '\n'); 810 811 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 812 JumpTable::LabelMapType{{0, JTLabel}}, 813 *getSectionForAddress(Address)); 814 JT->Parents.push_back(&Function); 815 if (opts::Verbosity > 2) 816 JT->print(outs()); 817 JumpTables.emplace(Address, JT); 818 819 // Duplicate the entry for the parent function for easy access. 820 Function.JumpTables.emplace(Address, JT); 821 return JTLabel; 822 } 823 824 std::pair<uint64_t, const MCSymbol *> 825 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 826 const MCSymbol *OldLabel) { 827 auto L = scopeLock(); 828 unsigned Offset = 0; 829 bool Found = false; 830 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 831 if (Elmt.second != OldLabel) 832 continue; 833 Offset = Elmt.first; 834 Found = true; 835 break; 836 } 837 assert(Found && "Label not found"); 838 (void)Found; 839 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 840 JumpTable *NewJT = 841 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 842 JumpTable::LabelMapType{{Offset, NewLabel}}, 843 *getSectionForAddress(JT->getAddress())); 844 NewJT->Parents = JT->Parents; 845 NewJT->Entries = JT->Entries; 846 NewJT->Counts = JT->Counts; 847 uint64_t JumpTableID = ++DuplicatedJumpTables; 848 // Invert it to differentiate from regular jump tables whose IDs are their 849 // addresses in the input binary memory space 850 JumpTableID = ~JumpTableID; 851 JumpTables.emplace(JumpTableID, NewJT); 852 Function.JumpTables.emplace(JumpTableID, NewJT); 853 return std::make_pair(JumpTableID, NewLabel); 854 } 855 856 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 857 uint64_t Address) { 858 size_t Id; 859 uint64_t Offset = 0; 860 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 861 Offset = Address - JT->getAddress(); 862 auto Itr = JT->Labels.find(Offset); 863 if (Itr != JT->Labels.end()) 864 return std::string(Itr->second->getName()); 865 Id = JumpTableIds.at(JT->getAddress()); 866 } else { 867 Id = JumpTableIds[Address] = BF.JumpTables.size(); 868 } 869 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 870 (Offset ? ("." + std::to_string(Offset)) : "")); 871 } 872 873 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 874 // FIXME: aarch64 support is missing. 875 if (!isX86()) 876 return true; 877 878 if (BF.getSize() == BF.getMaxSize()) 879 return true; 880 881 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 882 assert(FunctionData && "cannot get function as data"); 883 884 uint64_t Offset = BF.getSize(); 885 MCInst Instr; 886 uint64_t InstrSize = 0; 887 uint64_t InstrAddress = BF.getAddress() + Offset; 888 using std::placeholders::_1; 889 890 // Skip instructions that satisfy the predicate condition. 891 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 892 const uint64_t StartOffset = Offset; 893 for (; Offset < BF.getMaxSize(); 894 Offset += InstrSize, InstrAddress += InstrSize) { 895 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 896 InstrAddress, nulls())) 897 break; 898 if (!Predicate(Instr)) 899 break; 900 } 901 902 return Offset - StartOffset; 903 }; 904 905 // Skip a sequence of zero bytes. 906 auto skipZeros = [&]() { 907 const uint64_t StartOffset = Offset; 908 for (; Offset < BF.getMaxSize(); ++Offset) 909 if ((*FunctionData)[Offset] != 0) 910 break; 911 912 return Offset - StartOffset; 913 }; 914 915 // Accept the whole padding area filled with breakpoints. 916 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 917 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 918 return true; 919 920 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 921 922 // Some functions have a jump to the next function or to the padding area 923 // inserted after the body. 924 auto isSkipJump = [&](const MCInst &Instr) { 925 uint64_t TargetAddress = 0; 926 if (MIB->isUnconditionalBranch(Instr) && 927 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 928 if (TargetAddress >= InstrAddress + InstrSize && 929 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 930 return true; 931 } 932 } 933 return false; 934 }; 935 936 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 937 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 938 skipZeros()) 939 ; 940 941 if (Offset == BF.getMaxSize()) 942 return true; 943 944 if (opts::Verbosity >= 1) { 945 errs() << "BOLT-WARNING: bad padding at address 0x" 946 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 947 << " starting at offset " << (Offset - BF.getSize()) 948 << " in function " << BF << '\n' 949 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 950 << '\n'; 951 } 952 953 return false; 954 } 955 956 void BinaryContext::adjustCodePadding() { 957 for (auto &BFI : BinaryFunctions) { 958 BinaryFunction &BF = BFI.second; 959 if (!shouldEmit(BF)) 960 continue; 961 962 if (!hasValidCodePadding(BF)) { 963 if (HasRelocations) { 964 if (opts::Verbosity >= 1) { 965 outs() << "BOLT-INFO: function " << BF 966 << " has invalid padding. Ignoring the function.\n"; 967 } 968 BF.setIgnored(); 969 } else { 970 BF.setMaxSize(BF.getSize()); 971 } 972 } 973 } 974 } 975 976 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 977 uint64_t Size, 978 uint16_t Alignment, 979 unsigned Flags) { 980 // Register the name with MCContext. 981 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 982 983 auto GAI = BinaryDataMap.find(Address); 984 BinaryData *BD; 985 if (GAI == BinaryDataMap.end()) { 986 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 987 BinarySection &Section = 988 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 989 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 990 Section, Flags); 991 GAI = BinaryDataMap.emplace(Address, BD).first; 992 GlobalSymbols[Name] = BD; 993 updateObjectNesting(GAI); 994 } else { 995 BD = GAI->second; 996 if (!BD->hasName(Name)) { 997 GlobalSymbols[Name] = BD; 998 BD->Symbols.push_back(Symbol); 999 } 1000 } 1001 1002 return Symbol; 1003 } 1004 1005 const BinaryData * 1006 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1007 auto NI = BinaryDataMap.lower_bound(Address); 1008 auto End = BinaryDataMap.end(); 1009 if ((NI != End && Address == NI->first) || 1010 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1011 if (NI->second->containsAddress(Address)) 1012 return NI->second; 1013 1014 // If this is a sub-symbol, see if a parent data contains the address. 1015 const BinaryData *BD = NI->second->getParent(); 1016 while (BD) { 1017 if (BD->containsAddress(Address)) 1018 return BD; 1019 BD = BD->getParent(); 1020 } 1021 } 1022 return nullptr; 1023 } 1024 1025 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1026 auto NI = BinaryDataMap.find(Address); 1027 assert(NI != BinaryDataMap.end()); 1028 if (NI == BinaryDataMap.end()) 1029 return false; 1030 // TODO: it's possible that a jump table starts at the same address 1031 // as a larger blob of private data. When we set the size of the 1032 // jump table, it might be smaller than the total blob size. In this 1033 // case we just leave the original size since (currently) it won't really 1034 // affect anything. 1035 assert((!NI->second->Size || NI->second->Size == Size || 1036 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1037 "can't change the size of a symbol that has already had its " 1038 "size set"); 1039 if (!NI->second->Size) { 1040 NI->second->Size = Size; 1041 updateObjectNesting(NI); 1042 return true; 1043 } 1044 return false; 1045 } 1046 1047 void BinaryContext::generateSymbolHashes() { 1048 auto isPadding = [](const BinaryData &BD) { 1049 StringRef Contents = BD.getSection().getContents(); 1050 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1051 return (BD.getName().startswith("HOLEat") || 1052 SymData.find_first_not_of(0) == StringRef::npos); 1053 }; 1054 1055 uint64_t NumCollisions = 0; 1056 for (auto &Entry : BinaryDataMap) { 1057 BinaryData &BD = *Entry.second; 1058 StringRef Name = BD.getName(); 1059 1060 if (!isInternalSymbolName(Name)) 1061 continue; 1062 1063 // First check if a non-anonymous alias exists and move it to the front. 1064 if (BD.getSymbols().size() > 1) { 1065 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1066 return !isInternalSymbolName(Symbol->getName()); 1067 }); 1068 if (Itr != BD.getSymbols().end()) { 1069 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1070 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1071 continue; 1072 } 1073 } 1074 1075 // We have to skip 0 size symbols since they will all collide. 1076 if (BD.getSize() == 0) { 1077 continue; 1078 } 1079 1080 const uint64_t Hash = BD.getSection().hash(BD); 1081 const size_t Idx = Name.find("0x"); 1082 std::string NewName = 1083 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1084 if (getBinaryDataByName(NewName)) { 1085 // Ignore collisions for symbols that appear to be padding 1086 // (i.e. all zeros or a "hole") 1087 if (!isPadding(BD)) { 1088 if (opts::Verbosity) { 1089 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1090 << " with new name (" << NewName << "), skipping.\n"; 1091 } 1092 ++NumCollisions; 1093 } 1094 continue; 1095 } 1096 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1097 GlobalSymbols[NewName] = &BD; 1098 } 1099 if (NumCollisions) { 1100 errs() << "BOLT-WARNING: " << NumCollisions 1101 << " collisions detected while hashing binary objects"; 1102 if (!opts::Verbosity) 1103 errs() << ". Use -v=1 to see the list."; 1104 errs() << '\n'; 1105 } 1106 } 1107 1108 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1109 BinaryFunction &Function) const { 1110 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1111 if (TargetFunction.isChildOf(Function)) 1112 return true; 1113 TargetFunction.addParentFragment(Function); 1114 Function.addFragment(TargetFunction); 1115 if (!HasRelocations) { 1116 TargetFunction.setSimple(false); 1117 Function.setSimple(false); 1118 } 1119 if (opts::Verbosity >= 1) { 1120 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1121 << Function << '\n'; 1122 } 1123 return true; 1124 } 1125 1126 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1127 MCInst &LoadLowBits, 1128 MCInst &LoadHiBits, 1129 uint64_t Target) { 1130 const MCSymbol *TargetSymbol; 1131 uint64_t Addend = 0; 1132 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1133 /*IsPCRel*/ true); 1134 int64_t Val; 1135 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1136 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1137 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1138 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1139 } 1140 1141 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1142 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1143 if (TargetFunction) 1144 return false; 1145 1146 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1147 assert(Section && "cannot get section for referenced address"); 1148 if (!Section->isText()) 1149 return false; 1150 1151 bool Ret = false; 1152 StringRef SectionContents = Section->getContents(); 1153 uint64_t Offset = Address - Section->getAddress(); 1154 const uint64_t MaxSize = SectionContents.size() - Offset; 1155 const uint8_t *Bytes = 1156 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1157 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1158 1159 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1160 MCInst &Instruction, uint64_t Offset, 1161 uint64_t AbsoluteInstrAddr, 1162 uint64_t TotalSize) -> bool { 1163 MCInst *TargetHiBits, *TargetLowBits; 1164 uint64_t TargetAddress, Count; 1165 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1166 AbsoluteInstrAddr, Instruction, TargetHiBits, 1167 TargetLowBits, TargetAddress); 1168 if (!Count) 1169 return false; 1170 1171 if (MatchOnly) 1172 return true; 1173 1174 // NOTE The target symbol was created during disassemble's 1175 // handleExternalReference 1176 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1177 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1178 *Section, Address, TotalSize); 1179 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1180 TargetAddress); 1181 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1182 Veneer->addInstruction(Offset, std::move(Instruction)); 1183 --Count; 1184 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1185 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1186 Veneer->addInstruction(It->first, std::move(It->second)); 1187 } 1188 1189 Veneer->getOrCreateLocalLabel(Address); 1190 Veneer->setMaxSize(TotalSize); 1191 Veneer->updateState(BinaryFunction::State::Disassembled); 1192 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1193 << "\n"); 1194 return true; 1195 }; 1196 1197 uint64_t Size = 0, TotalSize = 0; 1198 BinaryFunction::InstrMapType VeneerInstructions; 1199 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1200 MCInst Instruction; 1201 const uint64_t AbsoluteInstrAddr = Address + Offset; 1202 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1203 AbsoluteInstrAddr, nulls())) 1204 break; 1205 1206 TotalSize += Size; 1207 if (MIB->isBranch(Instruction)) { 1208 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1209 AbsoluteInstrAddr, TotalSize); 1210 break; 1211 } 1212 1213 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1214 } 1215 1216 return Ret; 1217 } 1218 1219 void BinaryContext::processInterproceduralReferences() { 1220 for (const std::pair<BinaryFunction *, uint64_t> &It : 1221 InterproceduralReferences) { 1222 BinaryFunction &Function = *It.first; 1223 uint64_t Address = It.second; 1224 if (!Address || Function.isIgnored()) 1225 continue; 1226 1227 BinaryFunction *TargetFunction = 1228 getBinaryFunctionContainingAddress(Address); 1229 if (&Function == TargetFunction) 1230 continue; 1231 1232 if (TargetFunction) { 1233 if (TargetFunction->isFragment() && 1234 !TargetFunction->isChildOf(Function)) { 1235 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1236 "fragments: " 1237 << Function.getPrintName() << " and " 1238 << TargetFunction->getPrintName() << '\n'; 1239 } 1240 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1241 TargetFunction->addEntryPointAtOffset(Offset); 1242 1243 continue; 1244 } 1245 1246 // Check if address falls in function padding space - this could be 1247 // unmarked data in code. In this case adjust the padding space size. 1248 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1249 assert(Section && "cannot get section for referenced address"); 1250 1251 if (!Section->isText()) 1252 continue; 1253 1254 // PLT requires special handling and could be ignored in this context. 1255 StringRef SectionName = Section->getName(); 1256 if (SectionName == ".plt" || SectionName == ".plt.got") 1257 continue; 1258 1259 // Check if it is aarch64 veneer written at Address 1260 if (isAArch64() && handleAArch64Veneer(Address)) 1261 continue; 1262 1263 if (opts::processAllFunctions()) { 1264 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1265 << "object in code at address 0x" << Twine::utohexstr(Address) 1266 << " belonging to section " << SectionName << " in current mode\n"; 1267 exit(1); 1268 } 1269 1270 TargetFunction = getBinaryFunctionContainingAddress(Address, 1271 /*CheckPastEnd=*/false, 1272 /*UseMaxSize=*/true); 1273 // We are not going to overwrite non-simple functions, but for simple 1274 // ones - adjust the padding size. 1275 if (TargetFunction && TargetFunction->isSimple()) { 1276 errs() << "BOLT-WARNING: function " << *TargetFunction 1277 << " has an object detected in a padding region at address 0x" 1278 << Twine::utohexstr(Address) << '\n'; 1279 TargetFunction->setMaxSize(TargetFunction->getSize()); 1280 } 1281 } 1282 1283 InterproceduralReferences.clear(); 1284 } 1285 1286 void BinaryContext::postProcessSymbolTable() { 1287 fixBinaryDataHoles(); 1288 bool Valid = true; 1289 for (auto &Entry : BinaryDataMap) { 1290 BinaryData *BD = Entry.second; 1291 if ((BD->getName().startswith("SYMBOLat") || 1292 BD->getName().startswith("DATAat")) && 1293 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1294 BD->getSection()) { 1295 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1296 Valid = false; 1297 } 1298 } 1299 assert(Valid); 1300 (void)Valid; 1301 generateSymbolHashes(); 1302 } 1303 1304 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1305 BinaryFunction &ParentBF) { 1306 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1307 "cannot merge functions with multiple entry points"); 1308 1309 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1310 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1311 SymbolToFunctionMapMutex, std::defer_lock); 1312 1313 const StringRef ChildName = ChildBF.getOneName(); 1314 1315 // Move symbols over and update bookkeeping info. 1316 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1317 ParentBF.getSymbols().push_back(Symbol); 1318 WriteSymbolMapLock.lock(); 1319 SymbolToFunctionMap[Symbol] = &ParentBF; 1320 WriteSymbolMapLock.unlock(); 1321 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1322 } 1323 ChildBF.getSymbols().clear(); 1324 1325 // Move other names the child function is known under. 1326 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1327 ChildBF.Aliases.clear(); 1328 1329 if (HasRelocations) { 1330 // Merge execution counts of ChildBF into those of ParentBF. 1331 // Without relocations, we cannot reliably merge profiles as both functions 1332 // continue to exist and either one can be executed. 1333 ChildBF.mergeProfileDataInto(ParentBF); 1334 1335 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1336 std::defer_lock); 1337 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1338 std::defer_lock); 1339 // Remove ChildBF from the global set of functions in relocs mode. 1340 ReadBfsLock.lock(); 1341 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1342 ReadBfsLock.unlock(); 1343 1344 assert(FI != BinaryFunctions.end() && "function not found"); 1345 assert(&ChildBF == &FI->second && "function mismatch"); 1346 1347 WriteBfsLock.lock(); 1348 ChildBF.clearDisasmState(); 1349 FI = BinaryFunctions.erase(FI); 1350 WriteBfsLock.unlock(); 1351 1352 } else { 1353 // In non-relocation mode we keep the function, but rename it. 1354 std::string NewName = "__ICF_" + ChildName.str(); 1355 1356 WriteCtxLock.lock(); 1357 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1358 WriteCtxLock.unlock(); 1359 1360 ChildBF.setFolded(&ParentBF); 1361 } 1362 1363 ParentBF.setHasFunctionsFoldedInto(); 1364 } 1365 1366 void BinaryContext::fixBinaryDataHoles() { 1367 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1368 1369 for (BinarySection &Section : allocatableSections()) { 1370 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1371 1372 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1373 BinaryData *BD = Itr->second; 1374 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1375 (BD->getName().startswith("SYMBOLat0x") || 1376 BD->getName().startswith("DATAat0x") || 1377 BD->getName().startswith("ANONYMOUS"))); 1378 return !isHole && BD->getSection() == Section && !BD->getParent(); 1379 }; 1380 1381 auto BDStart = BinaryDataMap.begin(); 1382 auto BDEnd = BinaryDataMap.end(); 1383 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1384 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1385 1386 uint64_t EndAddress = Section.getAddress(); 1387 1388 while (Itr != End) { 1389 if (Itr->second->getAddress() > EndAddress) { 1390 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1391 Holes.emplace_back(EndAddress, Gap); 1392 } 1393 EndAddress = Itr->second->getEndAddress(); 1394 ++Itr; 1395 } 1396 1397 if (EndAddress < Section.getEndAddress()) 1398 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1399 1400 // If there is already a symbol at the start of the hole, grow that symbol 1401 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1402 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1403 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1404 if (BD) { 1405 // BD->getSection() can be != Section if there are sections that 1406 // overlap. In this case it is probably safe to just skip the holes 1407 // since the overlapping section will not(?) have any symbols in it. 1408 if (BD->getSection() == Section) 1409 setBinaryDataSize(Hole.first, Hole.second); 1410 } else { 1411 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1412 } 1413 } 1414 } 1415 1416 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1417 assert(validateHoles() && "top level hole detected in object map"); 1418 } 1419 1420 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1421 const BinarySection *CurrentSection = nullptr; 1422 bool FirstSection = true; 1423 1424 for (auto &Entry : BinaryDataMap) { 1425 const BinaryData *BD = Entry.second; 1426 const BinarySection &Section = BD->getSection(); 1427 if (FirstSection || Section != *CurrentSection) { 1428 uint64_t Address, Size; 1429 StringRef Name = Section.getName(); 1430 if (Section) { 1431 Address = Section.getAddress(); 1432 Size = Section.getSize(); 1433 } else { 1434 Address = BD->getAddress(); 1435 Size = BD->getSize(); 1436 } 1437 OS << "BOLT-INFO: Section " << Name << ", " 1438 << "0x" + Twine::utohexstr(Address) << ":" 1439 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1440 CurrentSection = &Section; 1441 FirstSection = false; 1442 } 1443 1444 OS << "BOLT-INFO: "; 1445 const BinaryData *P = BD->getParent(); 1446 while (P) { 1447 OS << " "; 1448 P = P->getParent(); 1449 } 1450 OS << *BD << "\n"; 1451 } 1452 } 1453 1454 Expected<unsigned> BinaryContext::getDwarfFile( 1455 StringRef Directory, StringRef FileName, unsigned FileNumber, 1456 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1457 unsigned CUID, unsigned DWARFVersion) { 1458 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1459 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1460 FileNumber); 1461 } 1462 1463 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1464 const uint32_t SrcCUID, 1465 unsigned FileIndex) { 1466 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1467 const DWARFDebugLine::LineTable *LineTable = 1468 DwCtx->getLineTableForUnit(SrcUnit); 1469 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1470 LineTable->Prologue.FileNames; 1471 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1472 // means empty dir. 1473 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1474 "FileIndex out of range for the compilation unit."); 1475 StringRef Dir = ""; 1476 if (FileNames[FileIndex - 1].DirIdx != 0) { 1477 if (std::optional<const char *> DirName = dwarf::toString( 1478 LineTable->Prologue 1479 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1480 Dir = *DirName; 1481 } 1482 } 1483 StringRef FileName = ""; 1484 if (std::optional<const char *> FName = 1485 dwarf::toString(FileNames[FileIndex - 1].Name)) 1486 FileName = *FName; 1487 assert(FileName != ""); 1488 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1489 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1490 DestCUID, DstUnit->getVersion())); 1491 } 1492 1493 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1494 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1495 llvm::transform(llvm::make_second_range(BinaryFunctions), 1496 SortedFunctions.begin(), 1497 [](BinaryFunction &BF) { return &BF; }); 1498 1499 llvm::stable_sort(SortedFunctions, 1500 [](const BinaryFunction *A, const BinaryFunction *B) { 1501 if (A->hasValidIndex() && B->hasValidIndex()) { 1502 return A->getIndex() < B->getIndex(); 1503 } 1504 return A->hasValidIndex(); 1505 }); 1506 return SortedFunctions; 1507 } 1508 1509 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1510 std::vector<BinaryFunction *> AllFunctions; 1511 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1512 llvm::transform(llvm::make_second_range(BinaryFunctions), 1513 std::back_inserter(AllFunctions), 1514 [](BinaryFunction &BF) { return &BF; }); 1515 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1516 1517 return AllFunctions; 1518 } 1519 1520 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1521 auto Iter = DWOCUs.find(DWOId); 1522 if (Iter == DWOCUs.end()) 1523 return std::nullopt; 1524 1525 return Iter->second; 1526 } 1527 1528 DWARFContext *BinaryContext::getDWOContext() const { 1529 if (DWOCUs.empty()) 1530 return nullptr; 1531 return &DWOCUs.begin()->second->getContext(); 1532 } 1533 1534 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1535 void BinaryContext::preprocessDWODebugInfo() { 1536 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1537 DWARFUnit *const DwarfUnit = CU.get(); 1538 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1539 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1540 if (!DWOCU->isDWOUnit()) { 1541 std::string DWOName = dwarf::toString( 1542 DwarfUnit->getUnitDIE().find( 1543 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1544 ""); 1545 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1546 << DWOName 1547 << " was not retrieved and won't be updated. Please check " 1548 "relative path.\n"; 1549 continue; 1550 } 1551 DWOCUs[*DWOId] = DWOCU; 1552 } 1553 } 1554 if (!DWOCUs.empty()) 1555 outs() << "BOLT-INFO: processing split DWARF\n"; 1556 } 1557 1558 void BinaryContext::preprocessDebugInfo() { 1559 struct CURange { 1560 uint64_t LowPC; 1561 uint64_t HighPC; 1562 DWARFUnit *Unit; 1563 1564 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1565 }; 1566 1567 // Building a map of address ranges to CUs similar to .debug_aranges and use 1568 // it to assign CU to functions. 1569 std::vector<CURange> AllRanges; 1570 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1571 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1572 Expected<DWARFAddressRangesVector> RangesOrError = 1573 CU->getUnitDIE().getAddressRanges(); 1574 if (!RangesOrError) { 1575 consumeError(RangesOrError.takeError()); 1576 continue; 1577 } 1578 for (DWARFAddressRange &Range : *RangesOrError) { 1579 // Parts of the debug info could be invalidated due to corresponding code 1580 // being removed from the binary by the linker. Hence we check if the 1581 // address is a valid one. 1582 if (containsAddress(Range.LowPC)) 1583 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1584 } 1585 1586 ContainsDwarf5 |= CU->getVersion() >= 5; 1587 ContainsDwarfLegacy |= CU->getVersion() < 5; 1588 } 1589 1590 llvm::sort(AllRanges); 1591 for (auto &KV : BinaryFunctions) { 1592 const uint64_t FunctionAddress = KV.first; 1593 BinaryFunction &Function = KV.second; 1594 1595 auto It = llvm::partition_point( 1596 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1597 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1598 Function.setDWARFUnit(It->Unit); 1599 } 1600 1601 // Discover units with debug info that needs to be updated. 1602 for (const auto &KV : BinaryFunctions) { 1603 const BinaryFunction &BF = KV.second; 1604 if (shouldEmit(BF) && BF.getDWARFUnit()) 1605 ProcessedCUs.insert(BF.getDWARFUnit()); 1606 } 1607 1608 // Clear debug info for functions from units that we are not going to process. 1609 for (auto &KV : BinaryFunctions) { 1610 BinaryFunction &BF = KV.second; 1611 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1612 BF.setDWARFUnit(nullptr); 1613 } 1614 1615 if (opts::Verbosity >= 1) { 1616 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1617 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1618 } 1619 1620 preprocessDWODebugInfo(); 1621 1622 // Populate MCContext with DWARF files from all units. 1623 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1624 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1625 const uint64_t CUID = CU->getOffset(); 1626 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1627 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1628 GlobalPrefix + "line_table_start" + Twine(CUID))); 1629 1630 if (!ProcessedCUs.count(CU.get())) 1631 continue; 1632 1633 const DWARFDebugLine::LineTable *LineTable = 1634 DwCtx->getLineTableForUnit(CU.get()); 1635 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1636 LineTable->Prologue.FileNames; 1637 1638 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1639 if (DwarfVersion >= 5) { 1640 std::optional<MD5::MD5Result> Checksum; 1641 if (LineTable->Prologue.ContentTypes.HasMD5) 1642 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1643 std::optional<const char *> Name = 1644 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1645 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1646 auto Iter = DWOCUs.find(*DWOID); 1647 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1648 Name = dwarf::toString( 1649 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1650 } 1651 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1652 std::nullopt); 1653 } 1654 1655 BinaryLineTable.setDwarfVersion(DwarfVersion); 1656 1657 // Assign a unique label to every line table, one per CU. 1658 // Make sure empty debug line tables are registered too. 1659 if (FileNames.empty()) { 1660 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1661 CUID, DwarfVersion)); 1662 continue; 1663 } 1664 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1665 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1666 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1667 // means empty dir. 1668 StringRef Dir = ""; 1669 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1670 if (std::optional<const char *> DirName = dwarf::toString( 1671 LineTable->Prologue 1672 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1673 Dir = *DirName; 1674 StringRef FileName = ""; 1675 if (std::optional<const char *> FName = 1676 dwarf::toString(FileNames[I].Name)) 1677 FileName = *FName; 1678 assert(FileName != ""); 1679 std::optional<MD5::MD5Result> Checksum; 1680 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1681 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1682 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1683 DwarfVersion)); 1684 } 1685 } 1686 } 1687 1688 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1689 if (Function.isPseudo()) 1690 return false; 1691 1692 if (opts::processAllFunctions()) 1693 return true; 1694 1695 if (Function.isIgnored()) 1696 return false; 1697 1698 // In relocation mode we will emit non-simple functions with CFG. 1699 // If the function does not have a CFG it should be marked as ignored. 1700 return HasRelocations || Function.isSimple(); 1701 } 1702 1703 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1704 uint32_t Operation = Inst.getOperation(); 1705 switch (Operation) { 1706 case MCCFIInstruction::OpSameValue: 1707 OS << "OpSameValue Reg" << Inst.getRegister(); 1708 break; 1709 case MCCFIInstruction::OpRememberState: 1710 OS << "OpRememberState"; 1711 break; 1712 case MCCFIInstruction::OpRestoreState: 1713 OS << "OpRestoreState"; 1714 break; 1715 case MCCFIInstruction::OpOffset: 1716 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1717 break; 1718 case MCCFIInstruction::OpDefCfaRegister: 1719 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1720 break; 1721 case MCCFIInstruction::OpDefCfaOffset: 1722 OS << "OpDefCfaOffset " << Inst.getOffset(); 1723 break; 1724 case MCCFIInstruction::OpDefCfa: 1725 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1726 break; 1727 case MCCFIInstruction::OpRelOffset: 1728 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1729 break; 1730 case MCCFIInstruction::OpAdjustCfaOffset: 1731 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1732 break; 1733 case MCCFIInstruction::OpEscape: 1734 OS << "OpEscape"; 1735 break; 1736 case MCCFIInstruction::OpRestore: 1737 OS << "OpRestore Reg" << Inst.getRegister(); 1738 break; 1739 case MCCFIInstruction::OpUndefined: 1740 OS << "OpUndefined Reg" << Inst.getRegister(); 1741 break; 1742 case MCCFIInstruction::OpRegister: 1743 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1744 << Inst.getRegister2(); 1745 break; 1746 case MCCFIInstruction::OpWindowSave: 1747 OS << "OpWindowSave"; 1748 break; 1749 case MCCFIInstruction::OpGnuArgsSize: 1750 OS << "OpGnuArgsSize"; 1751 break; 1752 default: 1753 OS << "Op#" << Operation; 1754 break; 1755 } 1756 } 1757 1758 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1759 // For aarch64, the ABI defines mapping symbols so we identify data in the 1760 // code section (see IHI0056B). $x identifies a symbol starting code or the 1761 // end of a data chunk inside code, $d indentifies start of data. 1762 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1763 return MarkerSymType::NONE; 1764 1765 Expected<StringRef> NameOrError = Symbol.getName(); 1766 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1767 1768 if (!TypeOrError || !NameOrError) 1769 return MarkerSymType::NONE; 1770 1771 if (*TypeOrError != SymbolRef::ST_Unknown) 1772 return MarkerSymType::NONE; 1773 1774 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1775 return MarkerSymType::CODE; 1776 1777 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1778 return MarkerSymType::DATA; 1779 1780 return MarkerSymType::NONE; 1781 } 1782 1783 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1784 return getMarkerType(Symbol) != MarkerSymType::NONE; 1785 } 1786 1787 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1788 const BinaryFunction *Function, 1789 DWARFContext *DwCtx) { 1790 DebugLineTableRowRef RowRef = 1791 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1792 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1793 return; 1794 1795 const DWARFDebugLine::LineTable *LineTable; 1796 if (Function && Function->getDWARFUnit() && 1797 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1798 LineTable = Function->getDWARFLineTable(); 1799 } else { 1800 LineTable = DwCtx->getLineTableForUnit( 1801 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1802 } 1803 assert(LineTable && "line table expected for instruction with debug info"); 1804 1805 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1806 StringRef FileName = ""; 1807 if (std::optional<const char *> FName = 1808 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1809 FileName = *FName; 1810 OS << " # debug line " << FileName << ":" << Row.Line; 1811 if (Row.Column) 1812 OS << ":" << Row.Column; 1813 if (Row.Discriminator) 1814 OS << " discriminator:" << Row.Discriminator; 1815 } 1816 1817 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1818 uint64_t Offset, 1819 const BinaryFunction *Function, 1820 bool PrintMCInst, bool PrintMemData, 1821 bool PrintRelocations, 1822 StringRef Endl) const { 1823 if (MIB->isEHLabel(Instruction)) { 1824 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1825 return; 1826 } 1827 OS << format(" %08" PRIx64 ": ", Offset); 1828 if (MIB->isCFI(Instruction)) { 1829 uint32_t Offset = Instruction.getOperand(0).getImm(); 1830 OS << "\t!CFI\t$" << Offset << "\t; "; 1831 if (Function) 1832 printCFI(OS, *Function->getCFIFor(Instruction)); 1833 OS << Endl; 1834 return; 1835 } 1836 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1837 if (MIB->isCall(Instruction)) { 1838 if (MIB->isTailCall(Instruction)) 1839 OS << " # TAILCALL "; 1840 if (MIB->isInvoke(Instruction)) { 1841 const std::optional<MCPlus::MCLandingPad> EHInfo = 1842 MIB->getEHInfo(Instruction); 1843 OS << " # handler: "; 1844 if (EHInfo->first) 1845 OS << *EHInfo->first; 1846 else 1847 OS << '0'; 1848 OS << "; action: " << EHInfo->second; 1849 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1850 if (GnuArgsSize >= 0) 1851 OS << "; GNU_args_size = " << GnuArgsSize; 1852 } 1853 } else if (MIB->isIndirectBranch(Instruction)) { 1854 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1855 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1856 } else { 1857 OS << " # UNKNOWN CONTROL FLOW"; 1858 } 1859 } 1860 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1861 OS << " # Offset: " << *Offset; 1862 1863 MIB->printAnnotations(Instruction, OS); 1864 1865 if (opts::PrintDebugInfo) 1866 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1867 1868 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1869 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1870 Function->printRelocations(OS, Offset, Size); 1871 } 1872 1873 OS << Endl; 1874 1875 if (PrintMCInst) { 1876 Instruction.dump_pretty(OS, InstPrinter.get()); 1877 OS << Endl; 1878 } 1879 } 1880 1881 std::optional<uint64_t> 1882 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1883 uint64_t FileOffset) const { 1884 // Find a segment with a matching file offset. 1885 for (auto &KV : SegmentMapInfo) { 1886 const SegmentInfo &SegInfo = KV.second; 1887 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1888 // Use segment's aligned memory offset to calculate the base address. 1889 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1890 return MMapAddress - MemOffset; 1891 } 1892 } 1893 1894 return std::nullopt; 1895 } 1896 1897 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1898 auto SI = AddressToSection.upper_bound(Address); 1899 if (SI != AddressToSection.begin()) { 1900 --SI; 1901 uint64_t UpperBound = SI->first + SI->second->getSize(); 1902 if (!SI->second->getSize()) 1903 UpperBound += 1; 1904 if (UpperBound > Address) 1905 return *SI->second; 1906 } 1907 return std::make_error_code(std::errc::bad_address); 1908 } 1909 1910 ErrorOr<StringRef> 1911 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1912 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1913 return Section->getName(); 1914 return std::make_error_code(std::errc::bad_address); 1915 } 1916 1917 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1918 auto Res = Sections.insert(Section); 1919 (void)Res; 1920 assert(Res.second && "can't register the same section twice."); 1921 1922 // Only register allocatable sections in the AddressToSection map. 1923 if (Section->isAllocatable() && Section->getAddress()) 1924 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1925 NameToSection.insert( 1926 std::make_pair(std::string(Section->getName()), Section)); 1927 if (Section->hasSectionRef()) 1928 SectionRefToBinarySection.insert( 1929 std::make_pair(Section->getSectionRef(), Section)); 1930 1931 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1932 return *Section; 1933 } 1934 1935 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1936 return registerSection(new BinarySection(*this, Section)); 1937 } 1938 1939 BinarySection & 1940 BinaryContext::registerSection(const Twine &SectionName, 1941 const BinarySection &OriginalSection) { 1942 return registerSection( 1943 new BinarySection(*this, SectionName, OriginalSection)); 1944 } 1945 1946 BinarySection & 1947 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 1948 unsigned ELFFlags, uint8_t *Data, 1949 uint64_t Size, unsigned Alignment) { 1950 auto NamedSections = getSectionByName(Name); 1951 if (NamedSections.begin() != NamedSections.end()) { 1952 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1953 "can only update unique sections"); 1954 BinarySection *Section = NamedSections.begin()->second; 1955 1956 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1957 const bool Flag = Section->isAllocatable(); 1958 (void)Flag; 1959 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1960 LLVM_DEBUG(dbgs() << *Section << "\n"); 1961 // FIXME: Fix section flags/attributes for MachO. 1962 if (isELF()) 1963 assert(Flag == Section->isAllocatable() && 1964 "can't change section allocation status"); 1965 return *Section; 1966 } 1967 1968 return registerSection( 1969 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1970 } 1971 1972 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 1973 auto NameRange = NameToSection.equal_range(Section.getName().str()); 1974 while (NameRange.first != NameRange.second) { 1975 if (NameRange.first->second == &Section) { 1976 NameToSection.erase(NameRange.first); 1977 break; 1978 } 1979 ++NameRange.first; 1980 } 1981 } 1982 1983 void BinaryContext::deregisterUnusedSections() { 1984 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 1985 for (auto SI = Sections.begin(); SI != Sections.end();) { 1986 BinarySection *Section = *SI; 1987 if (Section->hasSectionRef() || Section->getOutputSize() || 1988 (AbsSection && Section == &AbsSection.get())) { 1989 ++SI; 1990 continue; 1991 } 1992 1993 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 1994 << '\n';); 1995 deregisterSectionName(*Section); 1996 SI = Sections.erase(SI); 1997 delete Section; 1998 } 1999 } 2000 2001 bool BinaryContext::deregisterSection(BinarySection &Section) { 2002 BinarySection *SectionPtr = &Section; 2003 auto Itr = Sections.find(SectionPtr); 2004 if (Itr != Sections.end()) { 2005 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2006 while (Range.first != Range.second) { 2007 if (Range.first->second == SectionPtr) { 2008 AddressToSection.erase(Range.first); 2009 break; 2010 } 2011 ++Range.first; 2012 } 2013 2014 deregisterSectionName(*SectionPtr); 2015 Sections.erase(Itr); 2016 delete SectionPtr; 2017 return true; 2018 } 2019 return false; 2020 } 2021 2022 void BinaryContext::renameSection(BinarySection &Section, 2023 const Twine &NewName) { 2024 auto Itr = Sections.find(&Section); 2025 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2026 Sections.erase(Itr); 2027 2028 deregisterSectionName(Section); 2029 2030 Section.Name = NewName.str(); 2031 Section.setOutputName(Section.Name); 2032 2033 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2034 2035 // Reinsert with the new name. 2036 Sections.insert(&Section); 2037 } 2038 2039 void BinaryContext::printSections(raw_ostream &OS) const { 2040 for (BinarySection *const &Section : Sections) 2041 OS << "BOLT-INFO: " << *Section << "\n"; 2042 } 2043 2044 BinarySection &BinaryContext::absoluteSection() { 2045 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2046 return *Section; 2047 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2048 } 2049 2050 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2051 size_t Size) const { 2052 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2053 if (!Section) 2054 return std::make_error_code(std::errc::bad_address); 2055 2056 if (Section->isVirtual()) 2057 return 0; 2058 2059 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2060 AsmInfo->getCodePointerSize()); 2061 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2062 return DE.getUnsigned(&ValueOffset, Size); 2063 } 2064 2065 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2066 size_t Size) const { 2067 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2068 if (!Section) 2069 return std::make_error_code(std::errc::bad_address); 2070 2071 if (Section->isVirtual()) 2072 return 0; 2073 2074 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2075 AsmInfo->getCodePointerSize()); 2076 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2077 return DE.getSigned(&ValueOffset, Size); 2078 } 2079 2080 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2081 uint64_t Type, uint64_t Addend, 2082 uint64_t Value) { 2083 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2084 assert(Section && "cannot find section for address"); 2085 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2086 Value); 2087 } 2088 2089 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2090 uint64_t Type, uint64_t Addend, 2091 uint64_t Value) { 2092 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2093 assert(Section && "cannot find section for address"); 2094 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2095 Addend, Value); 2096 } 2097 2098 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2099 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2100 assert(Section && "cannot find section for address"); 2101 return Section->removeRelocationAt(Address - Section->getAddress()); 2102 } 2103 2104 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2105 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2106 if (!Section) 2107 return nullptr; 2108 2109 return Section->getRelocationAt(Address - Section->getAddress()); 2110 } 2111 2112 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 2113 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2114 if (!Section) 2115 return nullptr; 2116 2117 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2118 } 2119 2120 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2121 const uint64_t Address) { 2122 auto setImmovable = [&](BinaryData &BD) { 2123 BinaryData *Root = BD.getAtomicRoot(); 2124 LLVM_DEBUG(if (Root->isMoveable()) { 2125 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2126 << "due to ambiguous relocation referencing 0x" 2127 << Twine::utohexstr(Address) << '\n'; 2128 }); 2129 Root->setIsMoveable(false); 2130 }; 2131 2132 if (Address == BD.getAddress()) { 2133 setImmovable(BD); 2134 2135 // Set previous symbol as immovable 2136 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2137 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2138 setImmovable(*Prev); 2139 } 2140 2141 if (Address == BD.getEndAddress()) { 2142 setImmovable(BD); 2143 2144 // Set next symbol as immovable 2145 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2146 if (Next && Next->getAddress() == BD.getEndAddress()) 2147 setImmovable(*Next); 2148 } 2149 } 2150 2151 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2152 uint64_t *EntryDesc) { 2153 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2154 auto BFI = SymbolToFunctionMap.find(Symbol); 2155 if (BFI == SymbolToFunctionMap.end()) 2156 return nullptr; 2157 2158 BinaryFunction *BF = BFI->second; 2159 if (EntryDesc) 2160 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2161 2162 return BF; 2163 } 2164 2165 void BinaryContext::exitWithBugReport(StringRef Message, 2166 const BinaryFunction &Function) const { 2167 errs() << "=======================================\n"; 2168 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2169 "this function.\n"; 2170 errs() << "If you are running the most recent version of BOLT, you may " 2171 "want to " 2172 "report this and paste this dump.\nPlease check that there is no " 2173 "sensitive contents being shared in this dump.\n"; 2174 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2175 ScopedPrinter SP(errs()); 2176 SP.printBinaryBlock("Function contents", *Function.getData()); 2177 errs() << "\n"; 2178 Function.dump(); 2179 errs() << "ERROR: " << Message; 2180 errs() << "\n=======================================\n"; 2181 exit(1); 2182 } 2183 2184 BinaryFunction * 2185 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2186 bool IsSimple) { 2187 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2188 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2189 setSymbolToFunctionMap(BF->getSymbol(), BF); 2190 BF->CurrentState = BinaryFunction::State::CFG; 2191 return BF; 2192 } 2193 2194 std::pair<size_t, size_t> 2195 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2196 // Adjust branch instruction to match the current layout. 2197 if (FixBranches) 2198 BF.fixBranches(); 2199 2200 // Create local MC context to isolate the effect of ephemeral code emission. 2201 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2202 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2203 MCAsmBackend *MAB = 2204 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2205 2206 SmallString<256> Code; 2207 raw_svector_ostream VecOS(Code); 2208 2209 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2210 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2211 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2212 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2213 /*RelaxAll=*/false, 2214 /*IncrementalLinkerCompatible=*/false, 2215 /*DWARFMustBeAtTheEnd=*/false)); 2216 2217 Streamer->initSections(false, *STI); 2218 2219 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2220 Section->setHasInstructions(true); 2221 2222 // Create symbols in the LocalCtx so that they get destroyed with it. 2223 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2224 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2225 2226 Streamer->switchSection(Section); 2227 Streamer->emitLabel(StartLabel); 2228 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2229 /*EmitCodeOnly=*/true); 2230 Streamer->emitLabel(EndLabel); 2231 2232 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2233 SmallVector<LabelRange> SplitLabels; 2234 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2235 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2236 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2237 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2238 2239 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2240 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2241 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2242 SplitSection->setHasInstructions(true); 2243 Streamer->switchSection(SplitSection); 2244 2245 Streamer->emitLabel(SplitStartLabel); 2246 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2247 Streamer->emitLabel(SplitEndLabel); 2248 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2249 // private 2250 Streamer->emitBytes(StringRef("")); 2251 Streamer->switchSection(Section); 2252 } 2253 2254 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2255 // MCStreamer::Finish(), which does more than we want 2256 Streamer->emitBytes(StringRef("")); 2257 2258 MCAssembler &Assembler = 2259 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2260 MCAsmLayout Layout(Assembler); 2261 Assembler.layout(Layout); 2262 2263 const uint64_t HotSize = 2264 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2265 const uint64_t ColdSize = 2266 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2267 [&](const uint64_t Accu, const LabelRange &Labels) { 2268 return Accu + Layout.getSymbolOffset(*Labels.second) - 2269 Layout.getSymbolOffset(*Labels.first); 2270 }); 2271 2272 // Clean-up the effect of the code emission. 2273 for (const MCSymbol &Symbol : Assembler.symbols()) { 2274 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2275 MutableSymbol->setUndefined(); 2276 MutableSymbol->setIsRegistered(false); 2277 } 2278 2279 return std::make_pair(HotSize, ColdSize); 2280 } 2281 2282 bool BinaryContext::validateInstructionEncoding( 2283 ArrayRef<uint8_t> InputSequence) const { 2284 MCInst Inst; 2285 uint64_t InstSize; 2286 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2287 assert(InstSize == InputSequence.size() && 2288 "Disassembled instruction size does not match the sequence."); 2289 2290 SmallString<256> Code; 2291 SmallVector<MCFixup, 4> Fixups; 2292 2293 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2294 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2295 if (InputSequence != OutputSequence) { 2296 if (opts::Verbosity > 1) { 2297 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2298 << " input: " << InputSequence << '\n' 2299 << " output: " << OutputSequence << '\n'; 2300 } 2301 return false; 2302 } 2303 2304 return true; 2305 } 2306 2307 uint64_t BinaryContext::getHotThreshold() const { 2308 static uint64_t Threshold = 0; 2309 if (Threshold == 0) { 2310 Threshold = std::max( 2311 (uint64_t)opts::ExecutionCountThreshold, 2312 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2313 } 2314 return Threshold; 2315 } 2316 2317 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2318 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2319 auto FI = BinaryFunctions.upper_bound(Address); 2320 if (FI == BinaryFunctions.begin()) 2321 return nullptr; 2322 --FI; 2323 2324 const uint64_t UsedSize = 2325 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2326 2327 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2328 return nullptr; 2329 2330 return &FI->second; 2331 } 2332 2333 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2334 // First, try to find a function starting at the given address. If the 2335 // function was folded, this will get us the original folded function if it 2336 // wasn't removed from the list, e.g. in non-relocation mode. 2337 auto BFI = BinaryFunctions.find(Address); 2338 if (BFI != BinaryFunctions.end()) 2339 return &BFI->second; 2340 2341 // We might have folded the function matching the object at the given 2342 // address. In such case, we look for a function matching the symbol 2343 // registered at the original address. The new function (the one that the 2344 // original was folded into) will hold the symbol. 2345 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2346 uint64_t EntryID = 0; 2347 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2348 if (BF && EntryID == 0) 2349 return BF; 2350 } 2351 return nullptr; 2352 } 2353 2354 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2355 const DWARFAddressRangesVector &InputRanges) const { 2356 DebugAddressRangesVector OutputRanges; 2357 2358 for (const DWARFAddressRange Range : InputRanges) { 2359 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2360 while (BFI != BinaryFunctions.end()) { 2361 const BinaryFunction &Function = BFI->second; 2362 if (Function.getAddress() >= Range.HighPC) 2363 break; 2364 const DebugAddressRangesVector FunctionRanges = 2365 Function.getOutputAddressRanges(); 2366 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2367 std::advance(BFI, 1); 2368 } 2369 } 2370 2371 return OutputRanges; 2372 } 2373 2374 } // namespace bolt 2375 } // namespace llvm 2376