1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 22 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 23 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 24 #include "llvm/MC/MCAsmLayout.h" 25 #include "llvm/MC/MCAssembler.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 28 #include "llvm/MC/MCInstPrinter.h" 29 #include "llvm/MC/MCObjectStreamer.h" 30 #include "llvm/MC/MCObjectWriter.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/Regex.h" 39 #include <algorithm> 40 #include <functional> 41 #include <iterator> 42 #include <numeric> 43 #include <unordered_set> 44 45 using namespace llvm; 46 47 #undef DEBUG_TYPE 48 #define DEBUG_TYPE "bolt" 49 50 namespace opts { 51 52 cl::opt<bool> NoHugePages("no-huge-pages", 53 cl::desc("use regular size pages for code alignment"), 54 cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> 57 PrintDebugInfo("print-debug-info", 58 cl::desc("print debug info when printing functions"), 59 cl::Hidden, 60 cl::ZeroOrMore, 61 cl::cat(BoltCategory)); 62 63 cl::opt<bool> PrintRelocations( 64 "print-relocations", 65 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 66 cl::cat(BoltCategory)); 67 68 static cl::opt<bool> 69 PrintMemData("print-mem-data", 70 cl::desc("print memory data annotations when printing functions"), 71 cl::Hidden, 72 cl::ZeroOrMore, 73 cl::cat(BoltCategory)); 74 75 } // namespace opts 76 77 namespace llvm { 78 namespace bolt { 79 80 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 81 std::unique_ptr<DWARFContext> DwCtx, 82 std::unique_ptr<Triple> TheTriple, 83 const Target *TheTarget, std::string TripleName, 84 std::unique_ptr<MCCodeEmitter> MCE, 85 std::unique_ptr<MCObjectFileInfo> MOFI, 86 std::unique_ptr<const MCAsmInfo> AsmInfo, 87 std::unique_ptr<const MCInstrInfo> MII, 88 std::unique_ptr<const MCSubtargetInfo> STI, 89 std::unique_ptr<MCInstPrinter> InstPrinter, 90 std::unique_ptr<const MCInstrAnalysis> MIA, 91 std::unique_ptr<MCPlusBuilder> MIB, 92 std::unique_ptr<const MCRegisterInfo> MRI, 93 std::unique_ptr<MCDisassembler> DisAsm) 94 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 95 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 96 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 97 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 98 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 99 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 100 Relocation::Arch = this->TheTriple->getArch(); 101 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 102 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 103 } 104 105 BinaryContext::~BinaryContext() { 106 for (BinarySection *Section : Sections) 107 delete Section; 108 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 109 delete InjectedFunction; 110 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 111 delete JTI.second; 112 clearBinaryData(); 113 } 114 115 /// Create BinaryContext for a given architecture \p ArchName and 116 /// triple \p TripleName. 117 Expected<std::unique_ptr<BinaryContext>> 118 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 119 std::unique_ptr<DWARFContext> DwCtx) { 120 StringRef ArchName = ""; 121 StringRef FeaturesStr = ""; 122 switch (File->getArch()) { 123 case llvm::Triple::x86_64: 124 ArchName = "x86-64"; 125 FeaturesStr = "+nopl"; 126 break; 127 case llvm::Triple::aarch64: 128 ArchName = "aarch64"; 129 FeaturesStr = "+all"; 130 break; 131 default: 132 return createStringError(std::errc::not_supported, 133 "BOLT-ERROR: Unrecognized machine in ELF file"); 134 } 135 136 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 137 const std::string TripleName = TheTriple->str(); 138 139 std::string Error; 140 const Target *TheTarget = 141 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 142 if (!TheTarget) 143 return createStringError(make_error_code(std::errc::not_supported), 144 Twine("BOLT-ERROR: ", Error)); 145 146 std::unique_ptr<const MCRegisterInfo> MRI( 147 TheTarget->createMCRegInfo(TripleName)); 148 if (!MRI) 149 return createStringError( 150 make_error_code(std::errc::not_supported), 151 Twine("BOLT-ERROR: no register info for target ", TripleName)); 152 153 // Set up disassembler. 154 std::unique_ptr<MCAsmInfo> AsmInfo( 155 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 156 if (!AsmInfo) 157 return createStringError( 158 make_error_code(std::errc::not_supported), 159 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 160 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 161 // we want to emit such names as using @PLT without double quotes to convey 162 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 163 // override the default AsmInfo behavior to emit names the way we want. 164 AsmInfo->setAllowAtInName(true); 165 166 std::unique_ptr<const MCSubtargetInfo> STI( 167 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 168 if (!STI) 169 return createStringError( 170 make_error_code(std::errc::not_supported), 171 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 172 173 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 174 if (!MII) 175 return createStringError( 176 make_error_code(std::errc::not_supported), 177 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 178 179 std::unique_ptr<MCContext> Ctx( 180 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 181 std::unique_ptr<MCObjectFileInfo> MOFI( 182 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 183 Ctx->setObjectFileInfo(MOFI.get()); 184 // We do not support X86 Large code model. Change this in the future. 185 bool Large = false; 186 if (TheTriple->getArch() == llvm::Triple::aarch64) 187 Large = true; 188 unsigned LSDAEncoding = 189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190 if (IsPIC) { 191 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 } 194 195 std::unique_ptr<MCDisassembler> DisAsm( 196 TheTarget->createMCDisassembler(*STI, *Ctx)); 197 198 if (!DisAsm) 199 return createStringError( 200 make_error_code(std::errc::not_supported), 201 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 202 203 std::unique_ptr<const MCInstrAnalysis> MIA( 204 TheTarget->createMCInstrAnalysis(MII.get())); 205 if (!MIA) 206 return createStringError( 207 make_error_code(std::errc::not_supported), 208 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 209 TripleName)); 210 211 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 212 std::unique_ptr<MCInstPrinter> InstructionPrinter( 213 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 214 *MII, *MRI)); 215 if (!InstructionPrinter) 216 return createStringError( 217 make_error_code(std::errc::not_supported), 218 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 219 InstructionPrinter->setPrintImmHex(true); 220 221 std::unique_ptr<MCCodeEmitter> MCE( 222 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 223 224 // Make sure we don't miss any output on core dumps. 225 outs().SetUnbuffered(); 226 errs().SetUnbuffered(); 227 dbgs().SetUnbuffered(); 228 229 auto BC = std::make_unique<BinaryContext>( 230 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 231 std::string(TripleName), std::move(MCE), std::move(MOFI), 232 std::move(AsmInfo), std::move(MII), std::move(STI), 233 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 234 std::move(DisAsm)); 235 236 BC->LSDAEncoding = LSDAEncoding; 237 238 BC->MAB = std::unique_ptr<MCAsmBackend>( 239 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 240 241 BC->setFilename(File->getFileName()); 242 243 BC->HasFixedLoadAddress = !IsPIC; 244 245 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 246 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 247 248 if (!BC->SymbolicDisAsm) 249 return createStringError( 250 make_error_code(std::errc::not_supported), 251 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 252 253 return std::move(BC); 254 } 255 256 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 257 if (opts::HotText && 258 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 259 return true; 260 261 if (opts::HotData && 262 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 263 return true; 264 265 if (SymbolName == "_end") 266 return true; 267 268 return false; 269 } 270 271 std::unique_ptr<MCObjectWriter> 272 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 273 return MAB->createObjectWriter(OS); 274 } 275 276 bool BinaryContext::validateObjectNesting() const { 277 auto Itr = BinaryDataMap.begin(); 278 auto End = BinaryDataMap.end(); 279 bool Valid = true; 280 while (Itr != End) { 281 auto Next = std::next(Itr); 282 while (Next != End && 283 Itr->second->getSection() == Next->second->getSection() && 284 Itr->second->containsRange(Next->second->getAddress(), 285 Next->second->getSize())) { 286 if (Next->second->Parent != Itr->second) { 287 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 288 << "BOLT-WARNING: " << *Itr->second << "\n" 289 << "BOLT-WARNING: " << *Next->second << "\n"; 290 Valid = false; 291 } 292 ++Next; 293 } 294 Itr = Next; 295 } 296 return Valid; 297 } 298 299 bool BinaryContext::validateHoles() const { 300 bool Valid = true; 301 for (BinarySection &Section : sections()) { 302 for (const Relocation &Rel : Section.relocations()) { 303 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 304 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 305 if (!BD) { 306 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 307 << " 0x" << Twine::utohexstr(RelAddr) << " in " 308 << Section.getName() << "\n"; 309 Valid = false; 310 } else if (!BD->getAtomicRoot()) { 311 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 312 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 313 << Section.getName() << "\n"; 314 Valid = false; 315 } 316 } 317 } 318 return Valid; 319 } 320 321 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 322 const uint64_t Address = GAI->second->getAddress(); 323 const uint64_t Size = GAI->second->getSize(); 324 325 auto fixParents = [&](BinaryDataMapType::iterator Itr, 326 BinaryData *NewParent) { 327 BinaryData *OldParent = Itr->second->Parent; 328 Itr->second->Parent = NewParent; 329 ++Itr; 330 while (Itr != BinaryDataMap.end() && OldParent && 331 Itr->second->Parent == OldParent) { 332 Itr->second->Parent = NewParent; 333 ++Itr; 334 } 335 }; 336 337 // Check if the previous symbol contains the newly added symbol. 338 if (GAI != BinaryDataMap.begin()) { 339 BinaryData *Prev = std::prev(GAI)->second; 340 while (Prev) { 341 if (Prev->getSection() == GAI->second->getSection() && 342 Prev->containsRange(Address, Size)) { 343 fixParents(GAI, Prev); 344 } else { 345 fixParents(GAI, nullptr); 346 } 347 Prev = Prev->Parent; 348 } 349 } 350 351 // Check if the newly added symbol contains any subsequent symbols. 352 if (Size != 0) { 353 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 354 auto Itr = std::next(GAI); 355 while ( 356 Itr != BinaryDataMap.end() && 357 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 358 Itr->second->Parent = BD; 359 ++Itr; 360 } 361 } 362 } 363 364 iterator_range<BinaryContext::binary_data_iterator> 365 BinaryContext::getSubBinaryData(BinaryData *BD) { 366 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 367 auto End = Start; 368 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 369 ++End; 370 return make_range(Start, End); 371 } 372 373 std::pair<const MCSymbol *, uint64_t> 374 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 375 bool IsPCRel) { 376 if (isAArch64()) { 377 // Check if this is an access to a constant island and create bookkeeping 378 // to keep track of it and emit it later as part of this function. 379 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 380 return std::make_pair(IslandSym, 0); 381 382 // Detect custom code written in assembly that refers to arbitrary 383 // constant islands from other functions. Write this reference so we 384 // can pull this constant island and emit it as part of this function 385 // too. 386 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 387 388 if (IslandIter != AddressToConstantIslandMap.begin() && 389 (IslandIter == AddressToConstantIslandMap.end() || 390 IslandIter->first > Address)) 391 --IslandIter; 392 393 if (IslandIter != AddressToConstantIslandMap.end()) { 394 // Fall-back to referencing the original constant island in the presence 395 // of dynamic relocs, as we currently do not support cloning them. 396 // Notice: we might fail to link because of this, if the original constant 397 // island we are referring would be emitted too far away. 398 if (IslandIter->second->hasDynamicRelocationAtIsland()) { 399 MCSymbol *IslandSym = 400 IslandIter->second->getOrCreateIslandAccess(Address); 401 if (IslandSym) 402 return std::make_pair(IslandSym, 0); 403 } else if (MCSymbol *IslandSym = 404 IslandIter->second->getOrCreateProxyIslandAccess(Address, 405 BF)) { 406 BF.createIslandDependency(IslandSym, IslandIter->second); 407 return std::make_pair(IslandSym, 0); 408 } 409 } 410 } 411 412 // Note that the address does not necessarily have to reside inside 413 // a section, it could be an absolute address too. 414 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 415 if (Section && Section->isText()) { 416 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 417 if (Address != BF.getAddress()) { 418 // The address could potentially escape. Mark it as another entry 419 // point into the function. 420 if (opts::Verbosity >= 1) { 421 outs() << "BOLT-INFO: potentially escaped address 0x" 422 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 423 } 424 BF.HasInternalLabelReference = true; 425 return std::make_pair( 426 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 427 } 428 } else { 429 addInterproceduralReference(&BF, Address); 430 } 431 } 432 433 // With relocations, catch jump table references outside of the basic block 434 // containing the indirect jump. 435 if (HasRelocations) { 436 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 437 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 438 const MCSymbol *Symbol = 439 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 440 441 return std::make_pair(Symbol, 0); 442 } 443 } 444 445 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 446 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 447 448 // TODO: use DWARF info to get size/alignment here? 449 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 450 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 451 return std::make_pair(TargetSymbol, 0); 452 } 453 454 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 455 BinaryFunction &BF) { 456 if (!isX86()) 457 return MemoryContentsType::UNKNOWN; 458 459 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 460 if (!Section) { 461 // No section - possibly an absolute address. Since we don't allow 462 // internal function addresses to escape the function scope - we 463 // consider it a tail call. 464 if (opts::Verbosity > 1) { 465 errs() << "BOLT-WARNING: no section for address 0x" 466 << Twine::utohexstr(Address) << " referenced from function " << BF 467 << '\n'; 468 } 469 return MemoryContentsType::UNKNOWN; 470 } 471 472 if (Section->isVirtual()) { 473 // The contents are filled at runtime. 474 return MemoryContentsType::UNKNOWN; 475 } 476 477 // No support for jump tables in code yet. 478 if (Section->isText()) 479 return MemoryContentsType::UNKNOWN; 480 481 // Start with checking for PIC jump table. We expect non-PIC jump tables 482 // to have high 32 bits set to 0. 483 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 484 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 485 486 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 487 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 488 489 return MemoryContentsType::UNKNOWN; 490 } 491 492 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 493 const JumpTable::JumpTableType Type, 494 const BinaryFunction &BF, 495 const uint64_t NextJTAddress, 496 JumpTable::AddressesType *EntriesAsAddress, 497 bool *HasEntryInFragment) const { 498 // Is one of the targets __builtin_unreachable? 499 bool HasUnreachable = false; 500 501 // Number of targets other than __builtin_unreachable. 502 uint64_t NumRealEntries = 0; 503 504 auto addEntryAddress = [&](uint64_t EntryAddress) { 505 if (EntriesAsAddress) 506 EntriesAsAddress->emplace_back(EntryAddress); 507 }; 508 509 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 510 if (!Section) 511 return false; 512 513 // The upper bound is defined by containing object, section limits, and 514 // the next jump table in memory. 515 uint64_t UpperBound = Section->getEndAddress(); 516 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 517 if (JumpTableBD && JumpTableBD->getSize()) { 518 assert(JumpTableBD->getEndAddress() <= UpperBound && 519 "data object cannot cross a section boundary"); 520 UpperBound = JumpTableBD->getEndAddress(); 521 } 522 if (NextJTAddress) 523 UpperBound = std::min(NextJTAddress, UpperBound); 524 525 LLVM_DEBUG({ 526 using JTT = JumpTable::JumpTableType; 527 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 528 Address, BF.getPrintName(), 529 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 530 }); 531 const uint64_t EntrySize = getJumpTableEntrySize(Type); 532 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 533 EntryAddress += EntrySize) { 534 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 535 << " -> "); 536 // Check if there's a proper relocation against the jump table entry. 537 if (HasRelocations) { 538 if (Type == JumpTable::JTT_PIC && 539 !DataPCRelocations.count(EntryAddress)) { 540 LLVM_DEBUG( 541 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 542 break; 543 } 544 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 545 LLVM_DEBUG( 546 dbgs() 547 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 548 break; 549 } 550 } 551 552 const uint64_t Value = 553 (Type == JumpTable::JTT_PIC) 554 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 555 : *getPointerAtAddress(EntryAddress); 556 557 // __builtin_unreachable() case. 558 if (Value == BF.getAddress() + BF.getSize()) { 559 addEntryAddress(Value); 560 HasUnreachable = true; 561 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 562 continue; 563 } 564 565 // Function or one of its fragments. 566 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 567 568 bool DoesBelongToFunction = BF.containsAddress(Value) || 569 (TargetBF && TargetBF->isParentOrChildOf(BF)); 570 571 // We assume that a jump table cannot have function start as an entry. 572 if (!DoesBelongToFunction || Value == BF.getAddress()) { 573 LLVM_DEBUG({ 574 if (!BF.containsAddress(Value)) { 575 dbgs() << "FAIL: function doesn't contain this address\n"; 576 if (TargetBF) { 577 dbgs() << " ! function containing this address: " 578 << TargetBF->getPrintName() << '\n'; 579 if (TargetBF->isFragment()) { 580 dbgs() << " ! is a fragment"; 581 for (BinaryFunction *Parent : TargetBF->ParentFragments) 582 dbgs() << ", parent: " << Parent->getPrintName(); 583 dbgs() << '\n'; 584 } 585 } 586 } 587 if (Value == BF.getAddress()) 588 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 589 }); 590 break; 591 } 592 593 // Check there's an instruction at this offset. 594 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 595 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 596 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 597 break; 598 } 599 600 ++NumRealEntries; 601 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 602 603 if (TargetBF != &BF && HasEntryInFragment) 604 *HasEntryInFragment = true; 605 addEntryAddress(Value); 606 } 607 608 // It's a jump table if the number of real entries is more than 1, or there's 609 // one real entry and "unreachable" targets. If there are only multiple 610 // "unreachable" targets, then it's not a jump table. 611 return NumRealEntries + HasUnreachable >= 2; 612 } 613 614 void BinaryContext::populateJumpTables() { 615 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 616 << '\n'); 617 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 618 ++JTI) { 619 JumpTable *JT = JTI->second; 620 621 bool NonSimpleParent = false; 622 for (BinaryFunction *BF : JT->Parents) 623 NonSimpleParent |= !BF->isSimple(); 624 if (NonSimpleParent) 625 continue; 626 627 uint64_t NextJTAddress = 0; 628 auto NextJTI = std::next(JTI); 629 if (NextJTI != JTE) 630 NextJTAddress = NextJTI->second->getAddress(); 631 632 const bool Success = 633 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 634 NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); 635 if (!Success) { 636 LLVM_DEBUG({ 637 dbgs() << "failed to analyze "; 638 JT->print(dbgs()); 639 if (NextJTI != JTE) { 640 dbgs() << "next "; 641 NextJTI->second->print(dbgs()); 642 } 643 }); 644 llvm_unreachable("jump table heuristic failure"); 645 } 646 for (BinaryFunction *Frag : JT->Parents) { 647 if (JT->IsSplit) 648 Frag->setHasIndirectTargetToSplitFragment(true); 649 for (uint64_t EntryAddress : JT->EntriesAsAddress) 650 // if target is builtin_unreachable 651 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 652 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 653 Frag->getSize()); 654 } else if (EntryAddress >= Frag->getAddress() && 655 EntryAddress < Frag->getAddress() + Frag->getSize()) { 656 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 657 } 658 } 659 660 // In strict mode, erase PC-relative relocation record. Later we check that 661 // all such records are erased and thus have been accounted for. 662 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 663 for (uint64_t Address = JT->getAddress(); 664 Address < JT->getAddress() + JT->getSize(); 665 Address += JT->EntrySize) { 666 DataPCRelocations.erase(DataPCRelocations.find(Address)); 667 } 668 } 669 670 // Mark to skip the function and all its fragments. 671 for (BinaryFunction *Frag : JT->Parents) 672 if (Frag->hasIndirectTargetToSplitFragment()) 673 addFragmentsToSkip(Frag); 674 } 675 676 if (opts::StrictMode && DataPCRelocations.size()) { 677 LLVM_DEBUG({ 678 dbgs() << DataPCRelocations.size() 679 << " unclaimed PC-relative relocations left in data:\n"; 680 for (uint64_t Reloc : DataPCRelocations) 681 dbgs() << Twine::utohexstr(Reloc) << '\n'; 682 }); 683 assert(0 && "unclaimed PC-relative relocations left in data\n"); 684 } 685 clearList(DataPCRelocations); 686 } 687 688 void BinaryContext::skipMarkedFragments() { 689 std::vector<BinaryFunction *> FragmentQueue; 690 // Copy the functions to FragmentQueue. 691 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 692 auto addToWorklist = [&](BinaryFunction *Function) -> void { 693 if (FragmentsToSkip.count(Function)) 694 return; 695 FragmentQueue.push_back(Function); 696 addFragmentsToSkip(Function); 697 }; 698 // Functions containing split jump tables need to be skipped with all 699 // fragments (transitively). 700 for (size_t I = 0; I != FragmentQueue.size(); I++) { 701 BinaryFunction *BF = FragmentQueue[I]; 702 assert(FragmentsToSkip.count(BF) && 703 "internal error in traversing function fragments"); 704 if (opts::Verbosity >= 1) 705 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 706 BF->setSimple(false); 707 BF->setHasIndirectTargetToSplitFragment(true); 708 709 llvm::for_each(BF->Fragments, addToWorklist); 710 llvm::for_each(BF->ParentFragments, addToWorklist); 711 } 712 if (!FragmentsToSkip.empty()) 713 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 714 << (FragmentsToSkip.size() == 1 ? "" : "s") 715 << " due to cold fragments\n"; 716 } 717 718 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 719 uint64_t Size, 720 uint16_t Alignment, 721 unsigned Flags) { 722 auto Itr = BinaryDataMap.find(Address); 723 if (Itr != BinaryDataMap.end()) { 724 assert(Itr->second->getSize() == Size || !Size); 725 return Itr->second->getSymbol(); 726 } 727 728 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 729 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 730 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 731 } 732 733 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 734 return Ctx->getOrCreateSymbol(Name); 735 } 736 737 BinaryFunction *BinaryContext::createBinaryFunction( 738 const std::string &Name, BinarySection &Section, uint64_t Address, 739 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 740 auto Result = BinaryFunctions.emplace( 741 Address, BinaryFunction(Name, Section, Address, Size, *this)); 742 assert(Result.second == true && "unexpected duplicate function"); 743 BinaryFunction *BF = &Result.first->second; 744 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 745 Alignment); 746 setSymbolToFunctionMap(BF->getSymbol(), BF); 747 return BF; 748 } 749 750 const MCSymbol * 751 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 752 JumpTable::JumpTableType Type) { 753 // Two fragments of same function access same jump table 754 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 755 assert(JT->Type == Type && "jump table types have to match"); 756 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 757 758 // Prevent associating a jump table to a specific fragment twice. 759 // This simple check arises from the assumption: no more than 2 fragments. 760 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 761 assert(JT->Parents[0]->isParentOrChildOf(Function) && 762 "cannot re-use jump table of a different function"); 763 // Duplicate the entry for the parent function for easy access 764 JT->Parents.push_back(&Function); 765 if (opts::Verbosity > 2) { 766 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 767 << JT->Parents[0]->getPrintName() << "; " 768 << Function.getPrintName() << "\n"; 769 JT->print(outs()); 770 } 771 Function.JumpTables.emplace(Address, JT); 772 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 773 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 774 } 775 776 bool IsJumpTableParent = false; 777 (void)IsJumpTableParent; 778 for (BinaryFunction *Frag : JT->Parents) 779 if (Frag == &Function) 780 IsJumpTableParent = true; 781 assert(IsJumpTableParent && 782 "cannot re-use jump table of a different function"); 783 return JT->getFirstLabel(); 784 } 785 786 // Re-use the existing symbol if possible. 787 MCSymbol *JTLabel = nullptr; 788 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 789 if (!isInternalSymbolName(Object->getSymbol()->getName())) 790 JTLabel = Object->getSymbol(); 791 } 792 793 const uint64_t EntrySize = getJumpTableEntrySize(Type); 794 if (!JTLabel) { 795 const std::string JumpTableName = generateJumpTableName(Function, Address); 796 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 797 } 798 799 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 800 << " in function " << Function << '\n'); 801 802 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 803 JumpTable::LabelMapType{{0, JTLabel}}, 804 *getSectionForAddress(Address)); 805 JT->Parents.push_back(&Function); 806 if (opts::Verbosity > 2) 807 JT->print(outs()); 808 JumpTables.emplace(Address, JT); 809 810 // Duplicate the entry for the parent function for easy access. 811 Function.JumpTables.emplace(Address, JT); 812 return JTLabel; 813 } 814 815 std::pair<uint64_t, const MCSymbol *> 816 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 817 const MCSymbol *OldLabel) { 818 auto L = scopeLock(); 819 unsigned Offset = 0; 820 bool Found = false; 821 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 822 if (Elmt.second != OldLabel) 823 continue; 824 Offset = Elmt.first; 825 Found = true; 826 break; 827 } 828 assert(Found && "Label not found"); 829 (void)Found; 830 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 831 JumpTable *NewJT = 832 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 833 JumpTable::LabelMapType{{Offset, NewLabel}}, 834 *getSectionForAddress(JT->getAddress())); 835 NewJT->Parents = JT->Parents; 836 NewJT->Entries = JT->Entries; 837 NewJT->Counts = JT->Counts; 838 uint64_t JumpTableID = ++DuplicatedJumpTables; 839 // Invert it to differentiate from regular jump tables whose IDs are their 840 // addresses in the input binary memory space 841 JumpTableID = ~JumpTableID; 842 JumpTables.emplace(JumpTableID, NewJT); 843 Function.JumpTables.emplace(JumpTableID, NewJT); 844 return std::make_pair(JumpTableID, NewLabel); 845 } 846 847 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 848 uint64_t Address) { 849 size_t Id; 850 uint64_t Offset = 0; 851 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 852 Offset = Address - JT->getAddress(); 853 auto Itr = JT->Labels.find(Offset); 854 if (Itr != JT->Labels.end()) 855 return std::string(Itr->second->getName()); 856 Id = JumpTableIds.at(JT->getAddress()); 857 } else { 858 Id = JumpTableIds[Address] = BF.JumpTables.size(); 859 } 860 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 861 (Offset ? ("." + std::to_string(Offset)) : "")); 862 } 863 864 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 865 // FIXME: aarch64 support is missing. 866 if (!isX86()) 867 return true; 868 869 if (BF.getSize() == BF.getMaxSize()) 870 return true; 871 872 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 873 assert(FunctionData && "cannot get function as data"); 874 875 uint64_t Offset = BF.getSize(); 876 MCInst Instr; 877 uint64_t InstrSize = 0; 878 uint64_t InstrAddress = BF.getAddress() + Offset; 879 using std::placeholders::_1; 880 881 // Skip instructions that satisfy the predicate condition. 882 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 883 const uint64_t StartOffset = Offset; 884 for (; Offset < BF.getMaxSize(); 885 Offset += InstrSize, InstrAddress += InstrSize) { 886 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 887 InstrAddress, nulls())) 888 break; 889 if (!Predicate(Instr)) 890 break; 891 } 892 893 return Offset - StartOffset; 894 }; 895 896 // Skip a sequence of zero bytes. 897 auto skipZeros = [&]() { 898 const uint64_t StartOffset = Offset; 899 for (; Offset < BF.getMaxSize(); ++Offset) 900 if ((*FunctionData)[Offset] != 0) 901 break; 902 903 return Offset - StartOffset; 904 }; 905 906 // Accept the whole padding area filled with breakpoints. 907 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 908 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 909 return true; 910 911 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 912 913 // Some functions have a jump to the next function or to the padding area 914 // inserted after the body. 915 auto isSkipJump = [&](const MCInst &Instr) { 916 uint64_t TargetAddress = 0; 917 if (MIB->isUnconditionalBranch(Instr) && 918 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 919 if (TargetAddress >= InstrAddress + InstrSize && 920 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 921 return true; 922 } 923 } 924 return false; 925 }; 926 927 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 928 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 929 skipZeros()) 930 ; 931 932 if (Offset == BF.getMaxSize()) 933 return true; 934 935 if (opts::Verbosity >= 1) { 936 errs() << "BOLT-WARNING: bad padding at address 0x" 937 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 938 << " starting at offset " << (Offset - BF.getSize()) 939 << " in function " << BF << '\n' 940 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 941 << '\n'; 942 } 943 944 return false; 945 } 946 947 void BinaryContext::adjustCodePadding() { 948 for (auto &BFI : BinaryFunctions) { 949 BinaryFunction &BF = BFI.second; 950 if (!shouldEmit(BF)) 951 continue; 952 953 if (!hasValidCodePadding(BF)) { 954 if (HasRelocations) { 955 if (opts::Verbosity >= 1) { 956 outs() << "BOLT-INFO: function " << BF 957 << " has invalid padding. Ignoring the function.\n"; 958 } 959 BF.setIgnored(); 960 } else { 961 BF.setMaxSize(BF.getSize()); 962 } 963 } 964 } 965 } 966 967 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 968 uint64_t Size, 969 uint16_t Alignment, 970 unsigned Flags) { 971 // Register the name with MCContext. 972 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 973 974 auto GAI = BinaryDataMap.find(Address); 975 BinaryData *BD; 976 if (GAI == BinaryDataMap.end()) { 977 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 978 BinarySection &Section = 979 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 980 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 981 Section, Flags); 982 GAI = BinaryDataMap.emplace(Address, BD).first; 983 GlobalSymbols[Name] = BD; 984 updateObjectNesting(GAI); 985 } else { 986 BD = GAI->second; 987 if (!BD->hasName(Name)) { 988 GlobalSymbols[Name] = BD; 989 BD->Symbols.push_back(Symbol); 990 } 991 } 992 993 return Symbol; 994 } 995 996 const BinaryData * 997 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 998 auto NI = BinaryDataMap.lower_bound(Address); 999 auto End = BinaryDataMap.end(); 1000 if ((NI != End && Address == NI->first) || 1001 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1002 if (NI->second->containsAddress(Address)) 1003 return NI->second; 1004 1005 // If this is a sub-symbol, see if a parent data contains the address. 1006 const BinaryData *BD = NI->second->getParent(); 1007 while (BD) { 1008 if (BD->containsAddress(Address)) 1009 return BD; 1010 BD = BD->getParent(); 1011 } 1012 } 1013 return nullptr; 1014 } 1015 1016 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1017 auto NI = BinaryDataMap.find(Address); 1018 assert(NI != BinaryDataMap.end()); 1019 if (NI == BinaryDataMap.end()) 1020 return false; 1021 // TODO: it's possible that a jump table starts at the same address 1022 // as a larger blob of private data. When we set the size of the 1023 // jump table, it might be smaller than the total blob size. In this 1024 // case we just leave the original size since (currently) it won't really 1025 // affect anything. 1026 assert((!NI->second->Size || NI->second->Size == Size || 1027 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1028 "can't change the size of a symbol that has already had its " 1029 "size set"); 1030 if (!NI->second->Size) { 1031 NI->second->Size = Size; 1032 updateObjectNesting(NI); 1033 return true; 1034 } 1035 return false; 1036 } 1037 1038 void BinaryContext::generateSymbolHashes() { 1039 auto isPadding = [](const BinaryData &BD) { 1040 StringRef Contents = BD.getSection().getContents(); 1041 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1042 return (BD.getName().startswith("HOLEat") || 1043 SymData.find_first_not_of(0) == StringRef::npos); 1044 }; 1045 1046 uint64_t NumCollisions = 0; 1047 for (auto &Entry : BinaryDataMap) { 1048 BinaryData &BD = *Entry.second; 1049 StringRef Name = BD.getName(); 1050 1051 if (!isInternalSymbolName(Name)) 1052 continue; 1053 1054 // First check if a non-anonymous alias exists and move it to the front. 1055 if (BD.getSymbols().size() > 1) { 1056 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1057 return !isInternalSymbolName(Symbol->getName()); 1058 }); 1059 if (Itr != BD.getSymbols().end()) { 1060 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1061 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1062 continue; 1063 } 1064 } 1065 1066 // We have to skip 0 size symbols since they will all collide. 1067 if (BD.getSize() == 0) { 1068 continue; 1069 } 1070 1071 const uint64_t Hash = BD.getSection().hash(BD); 1072 const size_t Idx = Name.find("0x"); 1073 std::string NewName = 1074 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1075 if (getBinaryDataByName(NewName)) { 1076 // Ignore collisions for symbols that appear to be padding 1077 // (i.e. all zeros or a "hole") 1078 if (!isPadding(BD)) { 1079 if (opts::Verbosity) { 1080 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1081 << " with new name (" << NewName << "), skipping.\n"; 1082 } 1083 ++NumCollisions; 1084 } 1085 continue; 1086 } 1087 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1088 GlobalSymbols[NewName] = &BD; 1089 } 1090 if (NumCollisions) { 1091 errs() << "BOLT-WARNING: " << NumCollisions 1092 << " collisions detected while hashing binary objects"; 1093 if (!opts::Verbosity) 1094 errs() << ". Use -v=1 to see the list."; 1095 errs() << '\n'; 1096 } 1097 } 1098 1099 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1100 BinaryFunction &Function) const { 1101 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1102 if (TargetFunction.isChildOf(Function)) 1103 return true; 1104 TargetFunction.addParentFragment(Function); 1105 Function.addFragment(TargetFunction); 1106 if (!HasRelocations) { 1107 TargetFunction.setSimple(false); 1108 Function.setSimple(false); 1109 } 1110 if (opts::Verbosity >= 1) { 1111 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1112 << Function << '\n'; 1113 } 1114 return true; 1115 } 1116 1117 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1118 MCInst &LoadLowBits, 1119 MCInst &LoadHiBits, 1120 uint64_t Target) { 1121 const MCSymbol *TargetSymbol; 1122 uint64_t Addend = 0; 1123 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1124 /*IsPCRel*/ true); 1125 int64_t Val; 1126 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1127 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1128 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1129 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1130 } 1131 1132 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1133 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1134 if (TargetFunction) 1135 return false; 1136 1137 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1138 assert(Section && "cannot get section for referenced address"); 1139 if (!Section->isText()) 1140 return false; 1141 1142 bool Ret = false; 1143 StringRef SectionContents = Section->getContents(); 1144 uint64_t Offset = Address - Section->getAddress(); 1145 const uint64_t MaxSize = SectionContents.size() - Offset; 1146 const uint8_t *Bytes = 1147 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1148 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1149 1150 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1151 MCInst &Instruction, uint64_t Offset, 1152 uint64_t AbsoluteInstrAddr, 1153 uint64_t TotalSize) -> bool { 1154 MCInst *TargetHiBits, *TargetLowBits; 1155 uint64_t TargetAddress, Count; 1156 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1157 AbsoluteInstrAddr, Instruction, TargetHiBits, 1158 TargetLowBits, TargetAddress); 1159 if (!Count) 1160 return false; 1161 1162 if (MatchOnly) 1163 return true; 1164 1165 // NOTE The target symbol was created during disassemble's 1166 // handleExternalReference 1167 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1168 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1169 *Section, Address, TotalSize); 1170 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1171 TargetAddress); 1172 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1173 Veneer->addInstruction(Offset, std::move(Instruction)); 1174 --Count; 1175 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { 1176 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1177 Veneer->addInstruction(It->first, std::move(It->second)); 1178 } 1179 1180 Veneer->getOrCreateLocalLabel(Address); 1181 Veneer->setMaxSize(TotalSize); 1182 Veneer->updateState(BinaryFunction::State::Disassembled); 1183 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1184 << "\n"); 1185 return true; 1186 }; 1187 1188 uint64_t Size = 0, TotalSize = 0; 1189 BinaryFunction::InstrMapType VeneerInstructions; 1190 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1191 MCInst Instruction; 1192 const uint64_t AbsoluteInstrAddr = Address + Offset; 1193 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1194 AbsoluteInstrAddr, nulls())) 1195 break; 1196 1197 TotalSize += Size; 1198 if (MIB->isBranch(Instruction)) { 1199 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1200 AbsoluteInstrAddr, TotalSize); 1201 break; 1202 } 1203 1204 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1205 } 1206 1207 return Ret; 1208 } 1209 1210 void BinaryContext::processInterproceduralReferences() { 1211 for (const std::pair<BinaryFunction *, uint64_t> &It : 1212 InterproceduralReferences) { 1213 BinaryFunction &Function = *It.first; 1214 uint64_t Address = It.second; 1215 if (!Address || Function.isIgnored()) 1216 continue; 1217 1218 BinaryFunction *TargetFunction = 1219 getBinaryFunctionContainingAddress(Address); 1220 if (&Function == TargetFunction) 1221 continue; 1222 1223 if (TargetFunction) { 1224 if (TargetFunction->isFragment() && 1225 !TargetFunction->isChildOf(Function)) { 1226 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1227 "fragments: " 1228 << Function.getPrintName() << " and " 1229 << TargetFunction->getPrintName() << '\n'; 1230 } 1231 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1232 TargetFunction->addEntryPointAtOffset(Offset); 1233 1234 continue; 1235 } 1236 1237 // Check if address falls in function padding space - this could be 1238 // unmarked data in code. In this case adjust the padding space size. 1239 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1240 assert(Section && "cannot get section for referenced address"); 1241 1242 if (!Section->isText()) 1243 continue; 1244 1245 // PLT requires special handling and could be ignored in this context. 1246 StringRef SectionName = Section->getName(); 1247 if (SectionName == ".plt" || SectionName == ".plt.got") 1248 continue; 1249 1250 // Check if it is aarch64 veneer written at Address 1251 if (isAArch64() && handleAArch64Veneer(Address)) 1252 continue; 1253 1254 if (opts::processAllFunctions()) { 1255 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1256 << "object in code at address 0x" << Twine::utohexstr(Address) 1257 << " belonging to section " << SectionName << " in current mode\n"; 1258 exit(1); 1259 } 1260 1261 TargetFunction = getBinaryFunctionContainingAddress(Address, 1262 /*CheckPastEnd=*/false, 1263 /*UseMaxSize=*/true); 1264 // We are not going to overwrite non-simple functions, but for simple 1265 // ones - adjust the padding size. 1266 if (TargetFunction && TargetFunction->isSimple()) { 1267 errs() << "BOLT-WARNING: function " << *TargetFunction 1268 << " has an object detected in a padding region at address 0x" 1269 << Twine::utohexstr(Address) << '\n'; 1270 TargetFunction->setMaxSize(TargetFunction->getSize()); 1271 } 1272 } 1273 1274 InterproceduralReferences.clear(); 1275 } 1276 1277 void BinaryContext::postProcessSymbolTable() { 1278 fixBinaryDataHoles(); 1279 bool Valid = true; 1280 for (auto &Entry : BinaryDataMap) { 1281 BinaryData *BD = Entry.second; 1282 if ((BD->getName().startswith("SYMBOLat") || 1283 BD->getName().startswith("DATAat")) && 1284 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1285 BD->getSection()) { 1286 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1287 Valid = false; 1288 } 1289 } 1290 assert(Valid); 1291 (void)Valid; 1292 generateSymbolHashes(); 1293 } 1294 1295 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1296 BinaryFunction &ParentBF) { 1297 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1298 "cannot merge functions with multiple entry points"); 1299 1300 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); 1301 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( 1302 SymbolToFunctionMapMutex, std::defer_lock); 1303 1304 const StringRef ChildName = ChildBF.getOneName(); 1305 1306 // Move symbols over and update bookkeeping info. 1307 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1308 ParentBF.getSymbols().push_back(Symbol); 1309 WriteSymbolMapLock.lock(); 1310 SymbolToFunctionMap[Symbol] = &ParentBF; 1311 WriteSymbolMapLock.unlock(); 1312 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1313 } 1314 ChildBF.getSymbols().clear(); 1315 1316 // Move other names the child function is known under. 1317 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1318 ChildBF.Aliases.clear(); 1319 1320 if (HasRelocations) { 1321 // Merge execution counts of ChildBF into those of ParentBF. 1322 // Without relocations, we cannot reliably merge profiles as both functions 1323 // continue to exist and either one can be executed. 1324 ChildBF.mergeProfileDataInto(ParentBF); 1325 1326 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, 1327 std::defer_lock); 1328 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, 1329 std::defer_lock); 1330 // Remove ChildBF from the global set of functions in relocs mode. 1331 ReadBfsLock.lock(); 1332 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1333 ReadBfsLock.unlock(); 1334 1335 assert(FI != BinaryFunctions.end() && "function not found"); 1336 assert(&ChildBF == &FI->second && "function mismatch"); 1337 1338 WriteBfsLock.lock(); 1339 ChildBF.clearDisasmState(); 1340 FI = BinaryFunctions.erase(FI); 1341 WriteBfsLock.unlock(); 1342 1343 } else { 1344 // In non-relocation mode we keep the function, but rename it. 1345 std::string NewName = "__ICF_" + ChildName.str(); 1346 1347 WriteCtxLock.lock(); 1348 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1349 WriteCtxLock.unlock(); 1350 1351 ChildBF.setFolded(&ParentBF); 1352 } 1353 1354 ParentBF.setHasFunctionsFoldedInto(); 1355 } 1356 1357 void BinaryContext::fixBinaryDataHoles() { 1358 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1359 1360 for (BinarySection &Section : allocatableSections()) { 1361 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1362 1363 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1364 BinaryData *BD = Itr->second; 1365 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1366 (BD->getName().startswith("SYMBOLat0x") || 1367 BD->getName().startswith("DATAat0x") || 1368 BD->getName().startswith("ANONYMOUS"))); 1369 return !isHole && BD->getSection() == Section && !BD->getParent(); 1370 }; 1371 1372 auto BDStart = BinaryDataMap.begin(); 1373 auto BDEnd = BinaryDataMap.end(); 1374 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1375 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1376 1377 uint64_t EndAddress = Section.getAddress(); 1378 1379 while (Itr != End) { 1380 if (Itr->second->getAddress() > EndAddress) { 1381 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1382 Holes.emplace_back(EndAddress, Gap); 1383 } 1384 EndAddress = Itr->second->getEndAddress(); 1385 ++Itr; 1386 } 1387 1388 if (EndAddress < Section.getEndAddress()) 1389 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1390 1391 // If there is already a symbol at the start of the hole, grow that symbol 1392 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1393 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1394 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1395 if (BD) { 1396 // BD->getSection() can be != Section if there are sections that 1397 // overlap. In this case it is probably safe to just skip the holes 1398 // since the overlapping section will not(?) have any symbols in it. 1399 if (BD->getSection() == Section) 1400 setBinaryDataSize(Hole.first, Hole.second); 1401 } else { 1402 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1403 } 1404 } 1405 } 1406 1407 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1408 assert(validateHoles() && "top level hole detected in object map"); 1409 } 1410 1411 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1412 const BinarySection *CurrentSection = nullptr; 1413 bool FirstSection = true; 1414 1415 for (auto &Entry : BinaryDataMap) { 1416 const BinaryData *BD = Entry.second; 1417 const BinarySection &Section = BD->getSection(); 1418 if (FirstSection || Section != *CurrentSection) { 1419 uint64_t Address, Size; 1420 StringRef Name = Section.getName(); 1421 if (Section) { 1422 Address = Section.getAddress(); 1423 Size = Section.getSize(); 1424 } else { 1425 Address = BD->getAddress(); 1426 Size = BD->getSize(); 1427 } 1428 OS << "BOLT-INFO: Section " << Name << ", " 1429 << "0x" + Twine::utohexstr(Address) << ":" 1430 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1431 CurrentSection = &Section; 1432 FirstSection = false; 1433 } 1434 1435 OS << "BOLT-INFO: "; 1436 const BinaryData *P = BD->getParent(); 1437 while (P) { 1438 OS << " "; 1439 P = P->getParent(); 1440 } 1441 OS << *BD << "\n"; 1442 } 1443 } 1444 1445 Expected<unsigned> BinaryContext::getDwarfFile( 1446 StringRef Directory, StringRef FileName, unsigned FileNumber, 1447 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, 1448 unsigned CUID, unsigned DWARFVersion) { 1449 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1450 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1451 FileNumber); 1452 } 1453 1454 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1455 const uint32_t SrcCUID, 1456 unsigned FileIndex) { 1457 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1458 const DWARFDebugLine::LineTable *LineTable = 1459 DwCtx->getLineTableForUnit(SrcUnit); 1460 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1461 LineTable->Prologue.FileNames; 1462 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1463 // means empty dir. 1464 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1465 "FileIndex out of range for the compilation unit."); 1466 StringRef Dir = ""; 1467 if (FileNames[FileIndex - 1].DirIdx != 0) { 1468 if (std::optional<const char *> DirName = dwarf::toString( 1469 LineTable->Prologue 1470 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1471 Dir = *DirName; 1472 } 1473 } 1474 StringRef FileName = ""; 1475 if (std::optional<const char *> FName = 1476 dwarf::toString(FileNames[FileIndex - 1].Name)) 1477 FileName = *FName; 1478 assert(FileName != ""); 1479 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1480 return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, 1481 DestCUID, DstUnit->getVersion())); 1482 } 1483 1484 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1485 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1486 llvm::transform(llvm::make_second_range(BinaryFunctions), 1487 SortedFunctions.begin(), 1488 [](BinaryFunction &BF) { return &BF; }); 1489 1490 llvm::stable_sort(SortedFunctions, 1491 [](const BinaryFunction *A, const BinaryFunction *B) { 1492 if (A->hasValidIndex() && B->hasValidIndex()) { 1493 return A->getIndex() < B->getIndex(); 1494 } 1495 return A->hasValidIndex(); 1496 }); 1497 return SortedFunctions; 1498 } 1499 1500 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1501 std::vector<BinaryFunction *> AllFunctions; 1502 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1503 llvm::transform(llvm::make_second_range(BinaryFunctions), 1504 std::back_inserter(AllFunctions), 1505 [](BinaryFunction &BF) { return &BF; }); 1506 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1507 1508 return AllFunctions; 1509 } 1510 1511 std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1512 auto Iter = DWOCUs.find(DWOId); 1513 if (Iter == DWOCUs.end()) 1514 return std::nullopt; 1515 1516 return Iter->second; 1517 } 1518 1519 DWARFContext *BinaryContext::getDWOContext() const { 1520 if (DWOCUs.empty()) 1521 return nullptr; 1522 return &DWOCUs.begin()->second->getContext(); 1523 } 1524 1525 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1526 void BinaryContext::preprocessDWODebugInfo() { 1527 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1528 DWARFUnit *const DwarfUnit = CU.get(); 1529 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1530 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1531 if (!DWOCU->isDWOUnit()) { 1532 std::string DWOName = dwarf::toString( 1533 DwarfUnit->getUnitDIE().find( 1534 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1535 ""); 1536 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1537 << DWOName 1538 << " was not retrieved and won't be updated. Please check " 1539 "relative path.\n"; 1540 continue; 1541 } 1542 DWOCUs[*DWOId] = DWOCU; 1543 } 1544 } 1545 if (!DWOCUs.empty()) 1546 outs() << "BOLT-INFO: processing split DWARF\n"; 1547 } 1548 1549 void BinaryContext::preprocessDebugInfo() { 1550 struct CURange { 1551 uint64_t LowPC; 1552 uint64_t HighPC; 1553 DWARFUnit *Unit; 1554 1555 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1556 }; 1557 1558 // Building a map of address ranges to CUs similar to .debug_aranges and use 1559 // it to assign CU to functions. 1560 std::vector<CURange> AllRanges; 1561 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1562 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1563 Expected<DWARFAddressRangesVector> RangesOrError = 1564 CU->getUnitDIE().getAddressRanges(); 1565 if (!RangesOrError) { 1566 consumeError(RangesOrError.takeError()); 1567 continue; 1568 } 1569 for (DWARFAddressRange &Range : *RangesOrError) { 1570 // Parts of the debug info could be invalidated due to corresponding code 1571 // being removed from the binary by the linker. Hence we check if the 1572 // address is a valid one. 1573 if (containsAddress(Range.LowPC)) 1574 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1575 } 1576 1577 ContainsDwarf5 |= CU->getVersion() >= 5; 1578 ContainsDwarfLegacy |= CU->getVersion() < 5; 1579 } 1580 1581 llvm::sort(AllRanges); 1582 for (auto &KV : BinaryFunctions) { 1583 const uint64_t FunctionAddress = KV.first; 1584 BinaryFunction &Function = KV.second; 1585 1586 auto It = llvm::partition_point( 1587 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1588 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1589 Function.setDWARFUnit(It->Unit); 1590 } 1591 1592 // Discover units with debug info that needs to be updated. 1593 for (const auto &KV : BinaryFunctions) { 1594 const BinaryFunction &BF = KV.second; 1595 if (shouldEmit(BF) && BF.getDWARFUnit()) 1596 ProcessedCUs.insert(BF.getDWARFUnit()); 1597 } 1598 1599 // Clear debug info for functions from units that we are not going to process. 1600 for (auto &KV : BinaryFunctions) { 1601 BinaryFunction &BF = KV.second; 1602 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1603 BF.setDWARFUnit(nullptr); 1604 } 1605 1606 if (opts::Verbosity >= 1) { 1607 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1608 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1609 } 1610 1611 preprocessDWODebugInfo(); 1612 1613 // Populate MCContext with DWARF files from all units. 1614 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1615 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1616 const uint64_t CUID = CU->getOffset(); 1617 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1618 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1619 GlobalPrefix + "line_table_start" + Twine(CUID))); 1620 1621 if (!ProcessedCUs.count(CU.get())) 1622 continue; 1623 1624 const DWARFDebugLine::LineTable *LineTable = 1625 DwCtx->getLineTableForUnit(CU.get()); 1626 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1627 LineTable->Prologue.FileNames; 1628 1629 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1630 if (DwarfVersion >= 5) { 1631 std::optional<MD5::MD5Result> Checksum; 1632 if (LineTable->Prologue.ContentTypes.HasMD5) 1633 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1634 std::optional<const char *> Name = 1635 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1636 if (std::optional<uint64_t> DWOID = CU->getDWOId()) { 1637 auto Iter = DWOCUs.find(*DWOID); 1638 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1639 Name = dwarf::toString( 1640 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1641 } 1642 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1643 std::nullopt); 1644 } 1645 1646 BinaryLineTable.setDwarfVersion(DwarfVersion); 1647 1648 // Assign a unique label to every line table, one per CU. 1649 // Make sure empty debug line tables are registered too. 1650 if (FileNames.empty()) { 1651 cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, 1652 CUID, DwarfVersion)); 1653 continue; 1654 } 1655 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1656 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1657 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1658 // means empty dir. 1659 StringRef Dir = ""; 1660 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1661 if (std::optional<const char *> DirName = dwarf::toString( 1662 LineTable->Prologue 1663 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1664 Dir = *DirName; 1665 StringRef FileName = ""; 1666 if (std::optional<const char *> FName = 1667 dwarf::toString(FileNames[I].Name)) 1668 FileName = *FName; 1669 assert(FileName != ""); 1670 std::optional<MD5::MD5Result> Checksum; 1671 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1672 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1673 cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, 1674 DwarfVersion)); 1675 } 1676 } 1677 } 1678 1679 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1680 if (Function.isPseudo()) 1681 return false; 1682 1683 if (opts::processAllFunctions()) 1684 return true; 1685 1686 if (Function.isIgnored()) 1687 return false; 1688 1689 // In relocation mode we will emit non-simple functions with CFG. 1690 // If the function does not have a CFG it should be marked as ignored. 1691 return HasRelocations || Function.isSimple(); 1692 } 1693 1694 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1695 uint32_t Operation = Inst.getOperation(); 1696 switch (Operation) { 1697 case MCCFIInstruction::OpSameValue: 1698 OS << "OpSameValue Reg" << Inst.getRegister(); 1699 break; 1700 case MCCFIInstruction::OpRememberState: 1701 OS << "OpRememberState"; 1702 break; 1703 case MCCFIInstruction::OpRestoreState: 1704 OS << "OpRestoreState"; 1705 break; 1706 case MCCFIInstruction::OpOffset: 1707 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1708 break; 1709 case MCCFIInstruction::OpDefCfaRegister: 1710 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1711 break; 1712 case MCCFIInstruction::OpDefCfaOffset: 1713 OS << "OpDefCfaOffset " << Inst.getOffset(); 1714 break; 1715 case MCCFIInstruction::OpDefCfa: 1716 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1717 break; 1718 case MCCFIInstruction::OpRelOffset: 1719 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1720 break; 1721 case MCCFIInstruction::OpAdjustCfaOffset: 1722 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1723 break; 1724 case MCCFIInstruction::OpEscape: 1725 OS << "OpEscape"; 1726 break; 1727 case MCCFIInstruction::OpRestore: 1728 OS << "OpRestore Reg" << Inst.getRegister(); 1729 break; 1730 case MCCFIInstruction::OpUndefined: 1731 OS << "OpUndefined Reg" << Inst.getRegister(); 1732 break; 1733 case MCCFIInstruction::OpRegister: 1734 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1735 << Inst.getRegister2(); 1736 break; 1737 case MCCFIInstruction::OpWindowSave: 1738 OS << "OpWindowSave"; 1739 break; 1740 case MCCFIInstruction::OpGnuArgsSize: 1741 OS << "OpGnuArgsSize"; 1742 break; 1743 default: 1744 OS << "Op#" << Operation; 1745 break; 1746 } 1747 } 1748 1749 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1750 // For aarch64, the ABI defines mapping symbols so we identify data in the 1751 // code section (see IHI0056B). $x identifies a symbol starting code or the 1752 // end of a data chunk inside code, $d indentifies start of data. 1753 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1754 return MarkerSymType::NONE; 1755 1756 Expected<StringRef> NameOrError = Symbol.getName(); 1757 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1758 1759 if (!TypeOrError || !NameOrError) 1760 return MarkerSymType::NONE; 1761 1762 if (*TypeOrError != SymbolRef::ST_Unknown) 1763 return MarkerSymType::NONE; 1764 1765 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1766 return MarkerSymType::CODE; 1767 1768 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1769 return MarkerSymType::DATA; 1770 1771 return MarkerSymType::NONE; 1772 } 1773 1774 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1775 return getMarkerType(Symbol) != MarkerSymType::NONE; 1776 } 1777 1778 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1779 const BinaryFunction *Function, 1780 DWARFContext *DwCtx) { 1781 DebugLineTableRowRef RowRef = 1782 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1783 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1784 return; 1785 1786 const DWARFDebugLine::LineTable *LineTable; 1787 if (Function && Function->getDWARFUnit() && 1788 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1789 LineTable = Function->getDWARFLineTable(); 1790 } else { 1791 LineTable = DwCtx->getLineTableForUnit( 1792 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1793 } 1794 assert(LineTable && "line table expected for instruction with debug info"); 1795 1796 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1797 StringRef FileName = ""; 1798 if (std::optional<const char *> FName = 1799 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1800 FileName = *FName; 1801 OS << " # debug line " << FileName << ":" << Row.Line; 1802 if (Row.Column) 1803 OS << ":" << Row.Column; 1804 if (Row.Discriminator) 1805 OS << " discriminator:" << Row.Discriminator; 1806 } 1807 1808 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1809 uint64_t Offset, 1810 const BinaryFunction *Function, 1811 bool PrintMCInst, bool PrintMemData, 1812 bool PrintRelocations, 1813 StringRef Endl) const { 1814 if (MIB->isEHLabel(Instruction)) { 1815 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1816 return; 1817 } 1818 OS << format(" %08" PRIx64 ": ", Offset); 1819 if (MIB->isCFI(Instruction)) { 1820 uint32_t Offset = Instruction.getOperand(0).getImm(); 1821 OS << "\t!CFI\t$" << Offset << "\t; "; 1822 if (Function) 1823 printCFI(OS, *Function->getCFIFor(Instruction)); 1824 OS << Endl; 1825 return; 1826 } 1827 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1828 if (MIB->isCall(Instruction)) { 1829 if (MIB->isTailCall(Instruction)) 1830 OS << " # TAILCALL "; 1831 if (MIB->isInvoke(Instruction)) { 1832 const std::optional<MCPlus::MCLandingPad> EHInfo = 1833 MIB->getEHInfo(Instruction); 1834 OS << " # handler: "; 1835 if (EHInfo->first) 1836 OS << *EHInfo->first; 1837 else 1838 OS << '0'; 1839 OS << "; action: " << EHInfo->second; 1840 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1841 if (GnuArgsSize >= 0) 1842 OS << "; GNU_args_size = " << GnuArgsSize; 1843 } 1844 } else if (MIB->isIndirectBranch(Instruction)) { 1845 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1846 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1847 } else { 1848 OS << " # UNKNOWN CONTROL FLOW"; 1849 } 1850 } 1851 if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1852 OS << " # Offset: " << *Offset; 1853 1854 MIB->printAnnotations(Instruction, OS); 1855 1856 if (opts::PrintDebugInfo) 1857 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1858 1859 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1860 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1861 Function->printRelocations(OS, Offset, Size); 1862 } 1863 1864 OS << Endl; 1865 1866 if (PrintMCInst) { 1867 Instruction.dump_pretty(OS, InstPrinter.get()); 1868 OS << Endl; 1869 } 1870 } 1871 1872 std::optional<uint64_t> 1873 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1874 uint64_t FileOffset) const { 1875 // Find a segment with a matching file offset. 1876 for (auto &KV : SegmentMapInfo) { 1877 const SegmentInfo &SegInfo = KV.second; 1878 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1879 // Use segment's aligned memory offset to calculate the base address. 1880 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1881 return MMapAddress - MemOffset; 1882 } 1883 } 1884 1885 return std::nullopt; 1886 } 1887 1888 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1889 auto SI = AddressToSection.upper_bound(Address); 1890 if (SI != AddressToSection.begin()) { 1891 --SI; 1892 uint64_t UpperBound = SI->first + SI->second->getSize(); 1893 if (!SI->second->getSize()) 1894 UpperBound += 1; 1895 if (UpperBound > Address) 1896 return *SI->second; 1897 } 1898 return std::make_error_code(std::errc::bad_address); 1899 } 1900 1901 ErrorOr<StringRef> 1902 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1903 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1904 return Section->getName(); 1905 return std::make_error_code(std::errc::bad_address); 1906 } 1907 1908 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1909 auto Res = Sections.insert(Section); 1910 (void)Res; 1911 assert(Res.second && "can't register the same section twice."); 1912 1913 // Only register allocatable sections in the AddressToSection map. 1914 if (Section->isAllocatable() && Section->getAddress()) 1915 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1916 NameToSection.insert( 1917 std::make_pair(std::string(Section->getName()), Section)); 1918 if (Section->hasSectionRef()) 1919 SectionRefToBinarySection.insert( 1920 std::make_pair(Section->getSectionRef(), Section)); 1921 1922 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1923 return *Section; 1924 } 1925 1926 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1927 return registerSection(new BinarySection(*this, Section)); 1928 } 1929 1930 BinarySection & 1931 BinaryContext::registerSection(const Twine &SectionName, 1932 const BinarySection &OriginalSection) { 1933 return registerSection( 1934 new BinarySection(*this, SectionName, OriginalSection)); 1935 } 1936 1937 BinarySection & 1938 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 1939 unsigned ELFFlags, uint8_t *Data, 1940 uint64_t Size, unsigned Alignment) { 1941 auto NamedSections = getSectionByName(Name); 1942 if (NamedSections.begin() != NamedSections.end()) { 1943 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1944 "can only update unique sections"); 1945 BinarySection *Section = NamedSections.begin()->second; 1946 1947 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1948 const bool Flag = Section->isAllocatable(); 1949 (void)Flag; 1950 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1951 LLVM_DEBUG(dbgs() << *Section << "\n"); 1952 // FIXME: Fix section flags/attributes for MachO. 1953 if (isELF()) 1954 assert(Flag == Section->isAllocatable() && 1955 "can't change section allocation status"); 1956 return *Section; 1957 } 1958 1959 return registerSection( 1960 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1961 } 1962 1963 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 1964 auto NameRange = NameToSection.equal_range(Section.getName().str()); 1965 while (NameRange.first != NameRange.second) { 1966 if (NameRange.first->second == &Section) { 1967 NameToSection.erase(NameRange.first); 1968 break; 1969 } 1970 ++NameRange.first; 1971 } 1972 } 1973 1974 void BinaryContext::deregisterUnusedSections() { 1975 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 1976 for (auto SI = Sections.begin(); SI != Sections.end();) { 1977 BinarySection *Section = *SI; 1978 // We check getOutputData() instead of getOutputSize() because sometimes 1979 // zero-sized .text.cold sections are allocated. 1980 if (Section->hasSectionRef() || Section->getOutputData() || 1981 (AbsSection && Section == &AbsSection.get())) { 1982 ++SI; 1983 continue; 1984 } 1985 1986 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 1987 << '\n';); 1988 deregisterSectionName(*Section); 1989 SI = Sections.erase(SI); 1990 delete Section; 1991 } 1992 } 1993 1994 bool BinaryContext::deregisterSection(BinarySection &Section) { 1995 BinarySection *SectionPtr = &Section; 1996 auto Itr = Sections.find(SectionPtr); 1997 if (Itr != Sections.end()) { 1998 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1999 while (Range.first != Range.second) { 2000 if (Range.first->second == SectionPtr) { 2001 AddressToSection.erase(Range.first); 2002 break; 2003 } 2004 ++Range.first; 2005 } 2006 2007 deregisterSectionName(*SectionPtr); 2008 Sections.erase(Itr); 2009 delete SectionPtr; 2010 return true; 2011 } 2012 return false; 2013 } 2014 2015 void BinaryContext::renameSection(BinarySection &Section, 2016 const Twine &NewName) { 2017 auto Itr = Sections.find(&Section); 2018 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2019 Sections.erase(Itr); 2020 2021 deregisterSectionName(Section); 2022 2023 Section.Name = NewName.str(); 2024 Section.setOutputName(Section.Name); 2025 2026 NameToSection.insert(std::make_pair(Section.Name, &Section)); 2027 2028 // Reinsert with the new name. 2029 Sections.insert(&Section); 2030 } 2031 2032 void BinaryContext::printSections(raw_ostream &OS) const { 2033 for (BinarySection *const &Section : Sections) 2034 OS << "BOLT-INFO: " << *Section << "\n"; 2035 } 2036 2037 BinarySection &BinaryContext::absoluteSection() { 2038 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2039 return *Section; 2040 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2041 } 2042 2043 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2044 size_t Size) const { 2045 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2046 if (!Section) 2047 return std::make_error_code(std::errc::bad_address); 2048 2049 if (Section->isVirtual()) 2050 return 0; 2051 2052 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2053 AsmInfo->getCodePointerSize()); 2054 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2055 return DE.getUnsigned(&ValueOffset, Size); 2056 } 2057 2058 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2059 size_t Size) const { 2060 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2061 if (!Section) 2062 return std::make_error_code(std::errc::bad_address); 2063 2064 if (Section->isVirtual()) 2065 return 0; 2066 2067 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2068 AsmInfo->getCodePointerSize()); 2069 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2070 return DE.getSigned(&ValueOffset, Size); 2071 } 2072 2073 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2074 uint64_t Type, uint64_t Addend, 2075 uint64_t Value) { 2076 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2077 assert(Section && "cannot find section for address"); 2078 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2079 Value); 2080 } 2081 2082 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2083 uint64_t Type, uint64_t Addend, 2084 uint64_t Value) { 2085 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2086 assert(Section && "cannot find section for address"); 2087 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2088 Addend, Value); 2089 } 2090 2091 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2092 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2093 assert(Section && "cannot find section for address"); 2094 return Section->removeRelocationAt(Address - Section->getAddress()); 2095 } 2096 2097 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { 2098 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2099 if (!Section) 2100 return nullptr; 2101 2102 return Section->getRelocationAt(Address - Section->getAddress()); 2103 } 2104 2105 const Relocation * 2106 BinaryContext::getDynamicRelocationAt(uint64_t Address) const { 2107 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2108 if (!Section) 2109 return nullptr; 2110 2111 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2112 } 2113 2114 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2115 const uint64_t Address) { 2116 auto setImmovable = [&](BinaryData &BD) { 2117 BinaryData *Root = BD.getAtomicRoot(); 2118 LLVM_DEBUG(if (Root->isMoveable()) { 2119 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2120 << "due to ambiguous relocation referencing 0x" 2121 << Twine::utohexstr(Address) << '\n'; 2122 }); 2123 Root->setIsMoveable(false); 2124 }; 2125 2126 if (Address == BD.getAddress()) { 2127 setImmovable(BD); 2128 2129 // Set previous symbol as immovable 2130 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2131 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2132 setImmovable(*Prev); 2133 } 2134 2135 if (Address == BD.getEndAddress()) { 2136 setImmovable(BD); 2137 2138 // Set next symbol as immovable 2139 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2140 if (Next && Next->getAddress() == BD.getEndAddress()) 2141 setImmovable(*Next); 2142 } 2143 } 2144 2145 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2146 uint64_t *EntryDesc) { 2147 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); 2148 auto BFI = SymbolToFunctionMap.find(Symbol); 2149 if (BFI == SymbolToFunctionMap.end()) 2150 return nullptr; 2151 2152 BinaryFunction *BF = BFI->second; 2153 if (EntryDesc) 2154 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2155 2156 return BF; 2157 } 2158 2159 void BinaryContext::exitWithBugReport(StringRef Message, 2160 const BinaryFunction &Function) const { 2161 errs() << "=======================================\n"; 2162 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2163 "this function.\n"; 2164 errs() << "If you are running the most recent version of BOLT, you may " 2165 "want to " 2166 "report this and paste this dump.\nPlease check that there is no " 2167 "sensitive contents being shared in this dump.\n"; 2168 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2169 ScopedPrinter SP(errs()); 2170 SP.printBinaryBlock("Function contents", *Function.getData()); 2171 errs() << "\n"; 2172 Function.dump(); 2173 errs() << "ERROR: " << Message; 2174 errs() << "\n=======================================\n"; 2175 exit(1); 2176 } 2177 2178 BinaryFunction * 2179 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2180 bool IsSimple) { 2181 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2182 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2183 setSymbolToFunctionMap(BF->getSymbol(), BF); 2184 BF->CurrentState = BinaryFunction::State::CFG; 2185 return BF; 2186 } 2187 2188 std::pair<size_t, size_t> 2189 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2190 // Adjust branch instruction to match the current layout. 2191 if (FixBranches) 2192 BF.fixBranches(); 2193 2194 // Create local MC context to isolate the effect of ephemeral code emission. 2195 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2196 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2197 MCAsmBackend *MAB = 2198 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2199 2200 SmallString<256> Code; 2201 raw_svector_ostream VecOS(Code); 2202 2203 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2204 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2205 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2206 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2207 /*RelaxAll=*/false, 2208 /*IncrementalLinkerCompatible=*/false, 2209 /*DWARFMustBeAtTheEnd=*/false)); 2210 2211 Streamer->initSections(false, *STI); 2212 2213 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2214 Section->setHasInstructions(true); 2215 2216 // Create symbols in the LocalCtx so that they get destroyed with it. 2217 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2218 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2219 2220 Streamer->switchSection(Section); 2221 Streamer->emitLabel(StartLabel); 2222 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2223 /*EmitCodeOnly=*/true); 2224 Streamer->emitLabel(EndLabel); 2225 2226 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2227 SmallVector<LabelRange> SplitLabels; 2228 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2229 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2230 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2231 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2232 2233 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2234 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2235 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2236 SplitSection->setHasInstructions(true); 2237 Streamer->switchSection(SplitSection); 2238 2239 Streamer->emitLabel(SplitStartLabel); 2240 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2241 Streamer->emitLabel(SplitEndLabel); 2242 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2243 // private 2244 Streamer->emitBytes(StringRef("")); 2245 Streamer->switchSection(Section); 2246 } 2247 2248 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2249 // MCStreamer::Finish(), which does more than we want 2250 Streamer->emitBytes(StringRef("")); 2251 2252 MCAssembler &Assembler = 2253 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2254 MCAsmLayout Layout(Assembler); 2255 Assembler.layout(Layout); 2256 2257 const uint64_t HotSize = 2258 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2259 const uint64_t ColdSize = 2260 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2261 [&](const uint64_t Accu, const LabelRange &Labels) { 2262 return Accu + Layout.getSymbolOffset(*Labels.second) - 2263 Layout.getSymbolOffset(*Labels.first); 2264 }); 2265 2266 // Clean-up the effect of the code emission. 2267 for (const MCSymbol &Symbol : Assembler.symbols()) { 2268 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2269 MutableSymbol->setUndefined(); 2270 MutableSymbol->setIsRegistered(false); 2271 } 2272 2273 return std::make_pair(HotSize, ColdSize); 2274 } 2275 2276 bool BinaryContext::validateInstructionEncoding( 2277 ArrayRef<uint8_t> InputSequence) const { 2278 MCInst Inst; 2279 uint64_t InstSize; 2280 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2281 assert(InstSize == InputSequence.size() && 2282 "Disassembled instruction size does not match the sequence."); 2283 2284 SmallString<256> Code; 2285 SmallVector<MCFixup, 4> Fixups; 2286 2287 MCE->encodeInstruction(Inst, Code, Fixups, *STI); 2288 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2289 if (InputSequence != OutputSequence) { 2290 if (opts::Verbosity > 1) { 2291 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2292 << " input: " << InputSequence << '\n' 2293 << " output: " << OutputSequence << '\n'; 2294 } 2295 return false; 2296 } 2297 2298 return true; 2299 } 2300 2301 uint64_t BinaryContext::getHotThreshold() const { 2302 static uint64_t Threshold = 0; 2303 if (Threshold == 0) { 2304 Threshold = std::max( 2305 (uint64_t)opts::ExecutionCountThreshold, 2306 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2307 } 2308 return Threshold; 2309 } 2310 2311 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2312 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2313 auto FI = BinaryFunctions.upper_bound(Address); 2314 if (FI == BinaryFunctions.begin()) 2315 return nullptr; 2316 --FI; 2317 2318 const uint64_t UsedSize = 2319 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2320 2321 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2322 return nullptr; 2323 2324 return &FI->second; 2325 } 2326 2327 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2328 // First, try to find a function starting at the given address. If the 2329 // function was folded, this will get us the original folded function if it 2330 // wasn't removed from the list, e.g. in non-relocation mode. 2331 auto BFI = BinaryFunctions.find(Address); 2332 if (BFI != BinaryFunctions.end()) 2333 return &BFI->second; 2334 2335 // We might have folded the function matching the object at the given 2336 // address. In such case, we look for a function matching the symbol 2337 // registered at the original address. The new function (the one that the 2338 // original was folded into) will hold the symbol. 2339 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2340 uint64_t EntryID = 0; 2341 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2342 if (BF && EntryID == 0) 2343 return BF; 2344 } 2345 return nullptr; 2346 } 2347 2348 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2349 const DWARFAddressRangesVector &InputRanges) const { 2350 DebugAddressRangesVector OutputRanges; 2351 2352 for (const DWARFAddressRange Range : InputRanges) { 2353 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2354 while (BFI != BinaryFunctions.end()) { 2355 const BinaryFunction &Function = BFI->second; 2356 if (Function.getAddress() >= Range.HighPC) 2357 break; 2358 const DebugAddressRangesVector FunctionRanges = 2359 Function.getOutputAddressRanges(); 2360 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2361 std::advance(BFI, 1); 2362 } 2363 } 2364 2365 return OutputRanges; 2366 } 2367 2368 } // namespace bolt 2369 } // namespace llvm 2370