1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <unordered_set> 42 43 using namespace llvm; 44 45 #undef DEBUG_TYPE 46 #define DEBUG_TYPE "bolt" 47 48 namespace opts { 49 50 cl::opt<bool> NoHugePages("no-huge-pages", 51 cl::desc("use regular size pages for code alignment"), 52 cl::Hidden, cl::cat(BoltCategory)); 53 54 static cl::opt<bool> 55 PrintDebugInfo("print-debug-info", 56 cl::desc("print debug info when printing functions"), 57 cl::Hidden, 58 cl::ZeroOrMore, 59 cl::cat(BoltCategory)); 60 61 cl::opt<bool> PrintRelocations( 62 "print-relocations", 63 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 64 cl::cat(BoltCategory)); 65 66 static cl::opt<bool> 67 PrintMemData("print-mem-data", 68 cl::desc("print memory data annotations when printing functions"), 69 cl::Hidden, 70 cl::ZeroOrMore, 71 cl::cat(BoltCategory)); 72 73 } // namespace opts 74 75 namespace llvm { 76 namespace bolt { 77 78 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 79 std::unique_ptr<DWARFContext> DwCtx, 80 std::unique_ptr<Triple> TheTriple, 81 const Target *TheTarget, std::string TripleName, 82 std::unique_ptr<MCCodeEmitter> MCE, 83 std::unique_ptr<MCObjectFileInfo> MOFI, 84 std::unique_ptr<const MCAsmInfo> AsmInfo, 85 std::unique_ptr<const MCInstrInfo> MII, 86 std::unique_ptr<const MCSubtargetInfo> STI, 87 std::unique_ptr<MCInstPrinter> InstPrinter, 88 std::unique_ptr<const MCInstrAnalysis> MIA, 89 std::unique_ptr<MCPlusBuilder> MIB, 90 std::unique_ptr<const MCRegisterInfo> MRI, 91 std::unique_ptr<MCDisassembler> DisAsm) 92 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 93 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 94 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 95 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 96 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 97 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 98 Relocation::Arch = this->TheTriple->getArch(); 99 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 100 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 101 } 102 103 BinaryContext::~BinaryContext() { 104 for (BinarySection *Section : Sections) 105 delete Section; 106 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 107 delete InjectedFunction; 108 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 109 delete JTI.second; 110 clearBinaryData(); 111 } 112 113 /// Create BinaryContext for a given architecture \p ArchName and 114 /// triple \p TripleName. 115 Expected<std::unique_ptr<BinaryContext>> 116 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 117 std::unique_ptr<DWARFContext> DwCtx) { 118 StringRef ArchName = ""; 119 StringRef FeaturesStr = ""; 120 switch (File->getArch()) { 121 case llvm::Triple::x86_64: 122 ArchName = "x86-64"; 123 FeaturesStr = "+nopl"; 124 break; 125 case llvm::Triple::aarch64: 126 ArchName = "aarch64"; 127 FeaturesStr = "+all"; 128 break; 129 default: 130 return createStringError(std::errc::not_supported, 131 "BOLT-ERROR: Unrecognized machine in ELF file"); 132 } 133 134 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 135 const std::string TripleName = TheTriple->str(); 136 137 std::string Error; 138 const Target *TheTarget = 139 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 140 if (!TheTarget) 141 return createStringError(make_error_code(std::errc::not_supported), 142 Twine("BOLT-ERROR: ", Error)); 143 144 std::unique_ptr<const MCRegisterInfo> MRI( 145 TheTarget->createMCRegInfo(TripleName)); 146 if (!MRI) 147 return createStringError( 148 make_error_code(std::errc::not_supported), 149 Twine("BOLT-ERROR: no register info for target ", TripleName)); 150 151 // Set up disassembler. 152 std::unique_ptr<MCAsmInfo> AsmInfo( 153 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 154 if (!AsmInfo) 155 return createStringError( 156 make_error_code(std::errc::not_supported), 157 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 158 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 159 // we want to emit such names as using @PLT without double quotes to convey 160 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 161 // override the default AsmInfo behavior to emit names the way we want. 162 AsmInfo->setAllowAtInName(true); 163 164 std::unique_ptr<const MCSubtargetInfo> STI( 165 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 166 if (!STI) 167 return createStringError( 168 make_error_code(std::errc::not_supported), 169 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 170 171 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 172 if (!MII) 173 return createStringError( 174 make_error_code(std::errc::not_supported), 175 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 176 177 std::unique_ptr<MCContext> Ctx( 178 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 179 std::unique_ptr<MCObjectFileInfo> MOFI( 180 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 181 Ctx->setObjectFileInfo(MOFI.get()); 182 // We do not support X86 Large code model. Change this in the future. 183 bool Large = false; 184 if (TheTriple->getArch() == llvm::Triple::aarch64) 185 Large = true; 186 unsigned LSDAEncoding = 187 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 188 unsigned TTypeEncoding = 189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190 if (IsPIC) { 191 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 194 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 195 } 196 197 std::unique_ptr<MCDisassembler> DisAsm( 198 TheTarget->createMCDisassembler(*STI, *Ctx)); 199 200 if (!DisAsm) 201 return createStringError( 202 make_error_code(std::errc::not_supported), 203 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 204 205 std::unique_ptr<const MCInstrAnalysis> MIA( 206 TheTarget->createMCInstrAnalysis(MII.get())); 207 if (!MIA) 208 return createStringError( 209 make_error_code(std::errc::not_supported), 210 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 211 TripleName)); 212 213 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 214 std::unique_ptr<MCInstPrinter> InstructionPrinter( 215 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 216 *MII, *MRI)); 217 if (!InstructionPrinter) 218 return createStringError( 219 make_error_code(std::errc::not_supported), 220 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 221 InstructionPrinter->setPrintImmHex(true); 222 223 std::unique_ptr<MCCodeEmitter> MCE( 224 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 225 226 // Make sure we don't miss any output on core dumps. 227 outs().SetUnbuffered(); 228 errs().SetUnbuffered(); 229 dbgs().SetUnbuffered(); 230 231 auto BC = std::make_unique<BinaryContext>( 232 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 233 std::string(TripleName), std::move(MCE), std::move(MOFI), 234 std::move(AsmInfo), std::move(MII), std::move(STI), 235 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 236 std::move(DisAsm)); 237 238 BC->TTypeEncoding = TTypeEncoding; 239 BC->LSDAEncoding = LSDAEncoding; 240 241 BC->MAB = std::unique_ptr<MCAsmBackend>( 242 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 243 244 BC->setFilename(File->getFileName()); 245 246 BC->HasFixedLoadAddress = !IsPIC; 247 248 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 249 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 250 251 if (!BC->SymbolicDisAsm) 252 return createStringError( 253 make_error_code(std::errc::not_supported), 254 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 255 256 return std::move(BC); 257 } 258 259 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 260 if (opts::HotText && 261 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 262 return true; 263 264 if (opts::HotData && 265 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 266 return true; 267 268 if (SymbolName == "_end") 269 return true; 270 271 return false; 272 } 273 274 std::unique_ptr<MCObjectWriter> 275 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 276 return MAB->createObjectWriter(OS); 277 } 278 279 bool BinaryContext::validateObjectNesting() const { 280 auto Itr = BinaryDataMap.begin(); 281 auto End = BinaryDataMap.end(); 282 bool Valid = true; 283 while (Itr != End) { 284 auto Next = std::next(Itr); 285 while (Next != End && 286 Itr->second->getSection() == Next->second->getSection() && 287 Itr->second->containsRange(Next->second->getAddress(), 288 Next->second->getSize())) { 289 if (Next->second->Parent != Itr->second) { 290 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 291 << "BOLT-WARNING: " << *Itr->second << "\n" 292 << "BOLT-WARNING: " << *Next->second << "\n"; 293 Valid = false; 294 } 295 ++Next; 296 } 297 Itr = Next; 298 } 299 return Valid; 300 } 301 302 bool BinaryContext::validateHoles() const { 303 bool Valid = true; 304 for (BinarySection &Section : sections()) { 305 for (const Relocation &Rel : Section.relocations()) { 306 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 307 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 308 if (!BD) { 309 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 310 << " 0x" << Twine::utohexstr(RelAddr) << " in " 311 << Section.getName() << "\n"; 312 Valid = false; 313 } else if (!BD->getAtomicRoot()) { 314 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 315 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 316 << Section.getName() << "\n"; 317 Valid = false; 318 } 319 } 320 } 321 return Valid; 322 } 323 324 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 325 const uint64_t Address = GAI->second->getAddress(); 326 const uint64_t Size = GAI->second->getSize(); 327 328 auto fixParents = [&](BinaryDataMapType::iterator Itr, 329 BinaryData *NewParent) { 330 BinaryData *OldParent = Itr->second->Parent; 331 Itr->second->Parent = NewParent; 332 ++Itr; 333 while (Itr != BinaryDataMap.end() && OldParent && 334 Itr->second->Parent == OldParent) { 335 Itr->second->Parent = NewParent; 336 ++Itr; 337 } 338 }; 339 340 // Check if the previous symbol contains the newly added symbol. 341 if (GAI != BinaryDataMap.begin()) { 342 BinaryData *Prev = std::prev(GAI)->second; 343 while (Prev) { 344 if (Prev->getSection() == GAI->second->getSection() && 345 Prev->containsRange(Address, Size)) { 346 fixParents(GAI, Prev); 347 } else { 348 fixParents(GAI, nullptr); 349 } 350 Prev = Prev->Parent; 351 } 352 } 353 354 // Check if the newly added symbol contains any subsequent symbols. 355 if (Size != 0) { 356 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 357 auto Itr = std::next(GAI); 358 while ( 359 Itr != BinaryDataMap.end() && 360 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 361 Itr->second->Parent = BD; 362 ++Itr; 363 } 364 } 365 } 366 367 iterator_range<BinaryContext::binary_data_iterator> 368 BinaryContext::getSubBinaryData(BinaryData *BD) { 369 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 370 auto End = Start; 371 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 372 ++End; 373 return make_range(Start, End); 374 } 375 376 std::pair<const MCSymbol *, uint64_t> 377 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 378 bool IsPCRel) { 379 uint64_t Addend = 0; 380 381 if (isAArch64()) { 382 // Check if this is an access to a constant island and create bookkeeping 383 // to keep track of it and emit it later as part of this function. 384 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 385 return std::make_pair(IslandSym, Addend); 386 387 // Detect custom code written in assembly that refers to arbitrary 388 // constant islands from other functions. Write this reference so we 389 // can pull this constant island and emit it as part of this function 390 // too. 391 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 392 if (IslandIter != AddressToConstantIslandMap.end()) { 393 if (MCSymbol *IslandSym = 394 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 395 BF.createIslandDependency(IslandSym, IslandIter->second); 396 return std::make_pair(IslandSym, Addend); 397 } 398 } 399 } 400 401 // Note that the address does not necessarily have to reside inside 402 // a section, it could be an absolute address too. 403 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 404 if (Section && Section->isText()) { 405 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 406 if (Address != BF.getAddress()) { 407 // The address could potentially escape. Mark it as another entry 408 // point into the function. 409 if (opts::Verbosity >= 1) { 410 outs() << "BOLT-INFO: potentially escaped address 0x" 411 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 412 } 413 BF.HasInternalLabelReference = true; 414 return std::make_pair( 415 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 416 } 417 } else { 418 addInterproceduralReference(&BF, Address); 419 } 420 } 421 422 // With relocations, catch jump table references outside of the basic block 423 // containing the indirect jump. 424 if (HasRelocations) { 425 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 426 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 427 const MCSymbol *Symbol = 428 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 429 430 return std::make_pair(Symbol, Addend); 431 } 432 } 433 434 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 435 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 436 437 // TODO: use DWARF info to get size/alignment here? 438 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 439 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 440 return std::make_pair(TargetSymbol, Addend); 441 } 442 443 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 444 BinaryFunction &BF) { 445 if (!isX86()) 446 return MemoryContentsType::UNKNOWN; 447 448 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 449 if (!Section) { 450 // No section - possibly an absolute address. Since we don't allow 451 // internal function addresses to escape the function scope - we 452 // consider it a tail call. 453 if (opts::Verbosity > 1) { 454 errs() << "BOLT-WARNING: no section for address 0x" 455 << Twine::utohexstr(Address) << " referenced from function " << BF 456 << '\n'; 457 } 458 return MemoryContentsType::UNKNOWN; 459 } 460 461 if (Section->isVirtual()) { 462 // The contents are filled at runtime. 463 return MemoryContentsType::UNKNOWN; 464 } 465 466 // No support for jump tables in code yet. 467 if (Section->isText()) 468 return MemoryContentsType::UNKNOWN; 469 470 // Start with checking for PIC jump table. We expect non-PIC jump tables 471 // to have high 32 bits set to 0. 472 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 473 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 474 475 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 476 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 477 478 return MemoryContentsType::UNKNOWN; 479 } 480 481 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 482 bool isPotentialFragmentByName(BinaryFunction &Fragment, 483 BinaryFunction &Parent) { 484 for (StringRef Name : Parent.getNames()) { 485 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 486 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 487 if (Fragment.hasRestoredNameRegex(NameRegex)) 488 return true; 489 } 490 return false; 491 } 492 493 bool BinaryContext::analyzeJumpTable( 494 const uint64_t Address, const JumpTable::JumpTableType Type, 495 BinaryFunction &BF, const uint64_t NextJTAddress, 496 JumpTable::AddressesType *EntriesAsAddress) { 497 // Is one of the targets __builtin_unreachable? 498 bool HasUnreachable = false; 499 500 // Number of targets other than __builtin_unreachable. 501 uint64_t NumRealEntries = 0; 502 503 auto addEntryAddress = [&](uint64_t EntryAddress) { 504 if (EntriesAsAddress) 505 EntriesAsAddress->emplace_back(EntryAddress); 506 }; 507 508 auto doesBelongToFunction = [&](const uint64_t Addr, 509 BinaryFunction *TargetBF) -> bool { 510 if (BF.containsAddress(Addr)) 511 return true; 512 // Nothing to do if we failed to identify the containing function. 513 if (!TargetBF) 514 return false; 515 // Case 1: check if BF is a fragment and TargetBF is its parent. 516 if (BF.isFragment()) { 517 // Parent function may or may not be already registered. 518 // Set parent link based on function name matching heuristic. 519 return registerFragment(BF, *TargetBF); 520 } 521 // Case 2: check if TargetBF is a fragment and BF is its parent. 522 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 523 }; 524 525 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 526 if (!Section) 527 return false; 528 529 // The upper bound is defined by containing object, section limits, and 530 // the next jump table in memory. 531 uint64_t UpperBound = Section->getEndAddress(); 532 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 533 if (JumpTableBD && JumpTableBD->getSize()) { 534 assert(JumpTableBD->getEndAddress() <= UpperBound && 535 "data object cannot cross a section boundary"); 536 UpperBound = JumpTableBD->getEndAddress(); 537 } 538 if (NextJTAddress) 539 UpperBound = std::min(NextJTAddress, UpperBound); 540 541 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 542 << '\n'); 543 const uint64_t EntrySize = getJumpTableEntrySize(Type); 544 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 545 EntryAddress += EntrySize) { 546 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 547 << " -> "); 548 // Check if there's a proper relocation against the jump table entry. 549 if (HasRelocations) { 550 if (Type == JumpTable::JTT_PIC && 551 !DataPCRelocations.count(EntryAddress)) { 552 LLVM_DEBUG( 553 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 554 break; 555 } 556 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 557 LLVM_DEBUG( 558 dbgs() 559 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 560 break; 561 } 562 } 563 564 const uint64_t Value = 565 (Type == JumpTable::JTT_PIC) 566 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 567 : *getPointerAtAddress(EntryAddress); 568 569 // __builtin_unreachable() case. 570 if (Value == BF.getAddress() + BF.getSize()) { 571 addEntryAddress(Value); 572 HasUnreachable = true; 573 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 574 continue; 575 } 576 577 // Function or one of its fragments. 578 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 579 580 // We assume that a jump table cannot have function start as an entry. 581 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 582 LLVM_DEBUG({ 583 if (!BF.containsAddress(Value)) { 584 dbgs() << "FAIL: function doesn't contain this address\n"; 585 if (TargetBF) { 586 dbgs() << " ! function containing this address: " 587 << TargetBF->getPrintName() << '\n'; 588 if (TargetBF->isFragment()) 589 dbgs() << " ! is a fragment\n"; 590 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 591 dbgs() << " ! its parent is " 592 << (TargetParent ? TargetParent->getPrintName() : "(none)") 593 << '\n'; 594 } 595 } 596 if (Value == BF.getAddress()) 597 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 598 }); 599 break; 600 } 601 602 // Check there's an instruction at this offset. 603 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 604 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 605 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 606 break; 607 } 608 609 ++NumRealEntries; 610 611 if (TargetBF != &BF) 612 BF.setHasIndirectTargetToSplitFragment(true); 613 addEntryAddress(Value); 614 } 615 616 // It's a jump table if the number of real entries is more than 1, or there's 617 // one real entry and "unreachable" targets. If there are only multiple 618 // "unreachable" targets, then it's not a jump table. 619 return NumRealEntries + HasUnreachable >= 2; 620 } 621 622 void BinaryContext::populateJumpTables() { 623 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 624 << '\n'); 625 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 626 ++JTI) { 627 JumpTable *JT = JTI->second; 628 629 bool NonSimpleParent = false; 630 for (BinaryFunction *BF : JT->Parents) 631 NonSimpleParent |= !BF->isSimple(); 632 if (NonSimpleParent) 633 continue; 634 635 uint64_t NextJTAddress = 0; 636 auto NextJTI = std::next(JTI); 637 if (NextJTI != JTE) 638 NextJTAddress = NextJTI->second->getAddress(); 639 640 const bool Success = 641 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 642 NextJTAddress, &JT->EntriesAsAddress); 643 if (!Success) { 644 LLVM_DEBUG(ListSeparator LS; 645 dbgs() << "failed to analyze jump table in function "; 646 for (BinaryFunction *Frag 647 : JT->Parents) dbgs() 648 << LS << *Frag; 649 dbgs() << '\n';); 650 JT->print(dbgs()); 651 if (NextJTI != JTE) { 652 LLVM_DEBUG(ListSeparator LS; 653 dbgs() << "next jump table at 0x" 654 << Twine::utohexstr(NextJTI->second->getAddress()) 655 << " belongs to function "; 656 for (BinaryFunction *Frag 657 : NextJTI->second->Parents) dbgs() 658 << LS << *Frag; 659 dbgs() << "\n";); 660 NextJTI->second->print(dbgs()); 661 } 662 llvm_unreachable("jump table heuristic failure"); 663 } 664 for (BinaryFunction *Frag : JT->Parents) { 665 for (uint64_t EntryAddress : JT->EntriesAsAddress) 666 // if target is builtin_unreachable 667 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 668 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 669 Frag->getSize()); 670 } else if (EntryAddress >= Frag->getAddress() && 671 EntryAddress < Frag->getAddress() + Frag->getSize()) { 672 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 673 } 674 } 675 676 // In strict mode, erase PC-relative relocation record. Later we check that 677 // all such records are erased and thus have been accounted for. 678 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 679 for (uint64_t Address = JT->getAddress(); 680 Address < JT->getAddress() + JT->getSize(); 681 Address += JT->EntrySize) { 682 DataPCRelocations.erase(DataPCRelocations.find(Address)); 683 } 684 } 685 686 // Mark to skip the function and all its fragments. 687 for (BinaryFunction *Frag : JT->Parents) 688 if (Frag->hasIndirectTargetToSplitFragment()) 689 addFragmentsToSkip(Frag); 690 } 691 692 if (opts::StrictMode && DataPCRelocations.size()) { 693 LLVM_DEBUG({ 694 dbgs() << DataPCRelocations.size() 695 << " unclaimed PC-relative relocations left in data:\n"; 696 for (uint64_t Reloc : DataPCRelocations) 697 dbgs() << Twine::utohexstr(Reloc) << '\n'; 698 }); 699 assert(0 && "unclaimed PC-relative relocations left in data\n"); 700 } 701 clearList(DataPCRelocations); 702 } 703 704 void BinaryContext::skipMarkedFragments() { 705 std::vector<BinaryFunction *> FragmentQueue; 706 // Copy the functions to FragmentQueue. 707 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 708 auto addToWorklist = [&](BinaryFunction *Function) -> void { 709 if (FragmentsToSkip.count(Function)) 710 return; 711 FragmentQueue.push_back(Function); 712 addFragmentsToSkip(Function); 713 }; 714 // Functions containing split jump tables need to be skipped with all 715 // fragments (transitively). 716 for (size_t I = 0; I != FragmentQueue.size(); I++) { 717 BinaryFunction *BF = FragmentQueue[I]; 718 assert(FragmentsToSkip.count(BF) && 719 "internal error in traversing function fragments"); 720 if (opts::Verbosity >= 1) 721 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 722 BF->setSimple(false); 723 BF->setHasIndirectTargetToSplitFragment(true); 724 725 llvm::for_each(BF->Fragments, addToWorklist); 726 llvm::for_each(BF->ParentFragments, addToWorklist); 727 } 728 if (!FragmentsToSkip.empty()) 729 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 730 << (FragmentsToSkip.size() == 1 ? "" : "s") 731 << " due to cold fragments\n"; 732 } 733 734 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 735 uint64_t Size, 736 uint16_t Alignment, 737 unsigned Flags) { 738 auto Itr = BinaryDataMap.find(Address); 739 if (Itr != BinaryDataMap.end()) { 740 assert(Itr->second->getSize() == Size || !Size); 741 return Itr->second->getSymbol(); 742 } 743 744 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 745 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 746 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 747 } 748 749 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 750 return Ctx->getOrCreateSymbol(Name); 751 } 752 753 BinaryFunction *BinaryContext::createBinaryFunction( 754 const std::string &Name, BinarySection &Section, uint64_t Address, 755 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 756 auto Result = BinaryFunctions.emplace( 757 Address, BinaryFunction(Name, Section, Address, Size, *this)); 758 assert(Result.second == true && "unexpected duplicate function"); 759 BinaryFunction *BF = &Result.first->second; 760 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 761 Alignment); 762 setSymbolToFunctionMap(BF->getSymbol(), BF); 763 return BF; 764 } 765 766 const MCSymbol * 767 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 768 JumpTable::JumpTableType Type) { 769 auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { 770 return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 771 }; 772 773 // Two fragments of same function access same jump table 774 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 775 assert(JT->Type == Type && "jump table types have to match"); 776 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 777 778 // Prevent associating a jump table to a specific fragment twice. 779 // This simple check arises from the assumption: no more than 2 fragments. 780 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 781 bool SameFunction = isFragmentOf(JT->Parents[0], &Function) || 782 isFragmentOf(&Function, JT->Parents[0]); 783 assert(SameFunction && 784 "cannot re-use jump table of a different function"); 785 // Duplicate the entry for the parent function for easy access 786 JT->Parents.push_back(&Function); 787 if (opts::Verbosity > 2) { 788 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 789 << JT->Parents[0]->getPrintName() << "; " 790 << Function.getPrintName() << "\n"; 791 JT->print(outs()); 792 } 793 Function.JumpTables.emplace(Address, JT); 794 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 795 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 796 } 797 798 bool IsJumpTableParent = false; 799 for (BinaryFunction *Frag : JT->Parents) 800 if (Frag == &Function) 801 IsJumpTableParent = true; 802 assert(IsJumpTableParent && 803 "cannot re-use jump table of a different function"); 804 return JT->getFirstLabel(); 805 } 806 807 // Re-use the existing symbol if possible. 808 MCSymbol *JTLabel = nullptr; 809 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 810 if (!isInternalSymbolName(Object->getSymbol()->getName())) 811 JTLabel = Object->getSymbol(); 812 } 813 814 const uint64_t EntrySize = getJumpTableEntrySize(Type); 815 if (!JTLabel) { 816 const std::string JumpTableName = generateJumpTableName(Function, Address); 817 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 818 } 819 820 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 821 << " in function " << Function << '\n'); 822 823 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 824 JumpTable::LabelMapType{{0, JTLabel}}, 825 *getSectionForAddress(Address)); 826 JT->Parents.push_back(&Function); 827 if (opts::Verbosity > 2) 828 JT->print(outs()); 829 JumpTables.emplace(Address, JT); 830 831 // Duplicate the entry for the parent function for easy access. 832 Function.JumpTables.emplace(Address, JT); 833 return JTLabel; 834 } 835 836 std::pair<uint64_t, const MCSymbol *> 837 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 838 const MCSymbol *OldLabel) { 839 auto L = scopeLock(); 840 unsigned Offset = 0; 841 bool Found = false; 842 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 843 if (Elmt.second != OldLabel) 844 continue; 845 Offset = Elmt.first; 846 Found = true; 847 break; 848 } 849 assert(Found && "Label not found"); 850 (void)Found; 851 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 852 JumpTable *NewJT = 853 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 854 JumpTable::LabelMapType{{Offset, NewLabel}}, 855 *getSectionForAddress(JT->getAddress())); 856 NewJT->Parents = JT->Parents; 857 NewJT->Entries = JT->Entries; 858 NewJT->Counts = JT->Counts; 859 uint64_t JumpTableID = ++DuplicatedJumpTables; 860 // Invert it to differentiate from regular jump tables whose IDs are their 861 // addresses in the input binary memory space 862 JumpTableID = ~JumpTableID; 863 JumpTables.emplace(JumpTableID, NewJT); 864 Function.JumpTables.emplace(JumpTableID, NewJT); 865 return std::make_pair(JumpTableID, NewLabel); 866 } 867 868 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 869 uint64_t Address) { 870 size_t Id; 871 uint64_t Offset = 0; 872 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 873 Offset = Address - JT->getAddress(); 874 auto Itr = JT->Labels.find(Offset); 875 if (Itr != JT->Labels.end()) 876 return std::string(Itr->second->getName()); 877 Id = JumpTableIds.at(JT->getAddress()); 878 } else { 879 Id = JumpTableIds[Address] = BF.JumpTables.size(); 880 } 881 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 882 (Offset ? ("." + std::to_string(Offset)) : "")); 883 } 884 885 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 886 // FIXME: aarch64 support is missing. 887 if (!isX86()) 888 return true; 889 890 if (BF.getSize() == BF.getMaxSize()) 891 return true; 892 893 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 894 assert(FunctionData && "cannot get function as data"); 895 896 uint64_t Offset = BF.getSize(); 897 MCInst Instr; 898 uint64_t InstrSize = 0; 899 uint64_t InstrAddress = BF.getAddress() + Offset; 900 using std::placeholders::_1; 901 902 // Skip instructions that satisfy the predicate condition. 903 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 904 const uint64_t StartOffset = Offset; 905 for (; Offset < BF.getMaxSize(); 906 Offset += InstrSize, InstrAddress += InstrSize) { 907 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 908 InstrAddress, nulls())) 909 break; 910 if (!Predicate(Instr)) 911 break; 912 } 913 914 return Offset - StartOffset; 915 }; 916 917 // Skip a sequence of zero bytes. 918 auto skipZeros = [&]() { 919 const uint64_t StartOffset = Offset; 920 for (; Offset < BF.getMaxSize(); ++Offset) 921 if ((*FunctionData)[Offset] != 0) 922 break; 923 924 return Offset - StartOffset; 925 }; 926 927 // Accept the whole padding area filled with breakpoints. 928 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 929 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 930 return true; 931 932 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 933 934 // Some functions have a jump to the next function or to the padding area 935 // inserted after the body. 936 auto isSkipJump = [&](const MCInst &Instr) { 937 uint64_t TargetAddress = 0; 938 if (MIB->isUnconditionalBranch(Instr) && 939 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 940 if (TargetAddress >= InstrAddress + InstrSize && 941 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 942 return true; 943 } 944 } 945 return false; 946 }; 947 948 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 949 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 950 skipZeros()) 951 ; 952 953 if (Offset == BF.getMaxSize()) 954 return true; 955 956 if (opts::Verbosity >= 1) { 957 errs() << "BOLT-WARNING: bad padding at address 0x" 958 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 959 << " starting at offset " << (Offset - BF.getSize()) 960 << " in function " << BF << '\n' 961 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 962 << '\n'; 963 } 964 965 return false; 966 } 967 968 void BinaryContext::adjustCodePadding() { 969 for (auto &BFI : BinaryFunctions) { 970 BinaryFunction &BF = BFI.second; 971 if (!shouldEmit(BF)) 972 continue; 973 974 if (!hasValidCodePadding(BF)) { 975 if (HasRelocations) { 976 if (opts::Verbosity >= 1) { 977 outs() << "BOLT-INFO: function " << BF 978 << " has invalid padding. Ignoring the function.\n"; 979 } 980 BF.setIgnored(); 981 } else { 982 BF.setMaxSize(BF.getSize()); 983 } 984 } 985 } 986 } 987 988 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 989 uint64_t Size, 990 uint16_t Alignment, 991 unsigned Flags) { 992 // Register the name with MCContext. 993 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 994 995 auto GAI = BinaryDataMap.find(Address); 996 BinaryData *BD; 997 if (GAI == BinaryDataMap.end()) { 998 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 999 BinarySection &Section = 1000 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 1001 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 1002 Section, Flags); 1003 GAI = BinaryDataMap.emplace(Address, BD).first; 1004 GlobalSymbols[Name] = BD; 1005 updateObjectNesting(GAI); 1006 } else { 1007 BD = GAI->second; 1008 if (!BD->hasName(Name)) { 1009 GlobalSymbols[Name] = BD; 1010 BD->Symbols.push_back(Symbol); 1011 } 1012 } 1013 1014 return Symbol; 1015 } 1016 1017 const BinaryData * 1018 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1019 auto NI = BinaryDataMap.lower_bound(Address); 1020 auto End = BinaryDataMap.end(); 1021 if ((NI != End && Address == NI->first) || 1022 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1023 if (NI->second->containsAddress(Address)) 1024 return NI->second; 1025 1026 // If this is a sub-symbol, see if a parent data contains the address. 1027 const BinaryData *BD = NI->second->getParent(); 1028 while (BD) { 1029 if (BD->containsAddress(Address)) 1030 return BD; 1031 BD = BD->getParent(); 1032 } 1033 } 1034 return nullptr; 1035 } 1036 1037 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1038 auto NI = BinaryDataMap.find(Address); 1039 assert(NI != BinaryDataMap.end()); 1040 if (NI == BinaryDataMap.end()) 1041 return false; 1042 // TODO: it's possible that a jump table starts at the same address 1043 // as a larger blob of private data. When we set the size of the 1044 // jump table, it might be smaller than the total blob size. In this 1045 // case we just leave the original size since (currently) it won't really 1046 // affect anything. 1047 assert((!NI->second->Size || NI->second->Size == Size || 1048 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1049 "can't change the size of a symbol that has already had its " 1050 "size set"); 1051 if (!NI->second->Size) { 1052 NI->second->Size = Size; 1053 updateObjectNesting(NI); 1054 return true; 1055 } 1056 return false; 1057 } 1058 1059 void BinaryContext::generateSymbolHashes() { 1060 auto isPadding = [](const BinaryData &BD) { 1061 StringRef Contents = BD.getSection().getContents(); 1062 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1063 return (BD.getName().startswith("HOLEat") || 1064 SymData.find_first_not_of(0) == StringRef::npos); 1065 }; 1066 1067 uint64_t NumCollisions = 0; 1068 for (auto &Entry : BinaryDataMap) { 1069 BinaryData &BD = *Entry.second; 1070 StringRef Name = BD.getName(); 1071 1072 if (!isInternalSymbolName(Name)) 1073 continue; 1074 1075 // First check if a non-anonymous alias exists and move it to the front. 1076 if (BD.getSymbols().size() > 1) { 1077 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1078 return !isInternalSymbolName(Symbol->getName()); 1079 }); 1080 if (Itr != BD.getSymbols().end()) { 1081 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1082 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1083 continue; 1084 } 1085 } 1086 1087 // We have to skip 0 size symbols since they will all collide. 1088 if (BD.getSize() == 0) { 1089 continue; 1090 } 1091 1092 const uint64_t Hash = BD.getSection().hash(BD); 1093 const size_t Idx = Name.find("0x"); 1094 std::string NewName = 1095 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1096 if (getBinaryDataByName(NewName)) { 1097 // Ignore collisions for symbols that appear to be padding 1098 // (i.e. all zeros or a "hole") 1099 if (!isPadding(BD)) { 1100 if (opts::Verbosity) { 1101 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1102 << " with new name (" << NewName << "), skipping.\n"; 1103 } 1104 ++NumCollisions; 1105 } 1106 continue; 1107 } 1108 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1109 GlobalSymbols[NewName] = &BD; 1110 } 1111 if (NumCollisions) { 1112 errs() << "BOLT-WARNING: " << NumCollisions 1113 << " collisions detected while hashing binary objects"; 1114 if (!opts::Verbosity) 1115 errs() << ". Use -v=1 to see the list."; 1116 errs() << '\n'; 1117 } 1118 } 1119 1120 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1121 BinaryFunction &Function) const { 1122 if (!isPotentialFragmentByName(TargetFunction, Function)) 1123 return false; 1124 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1125 if (TargetFunction.isParentFragment(&Function)) 1126 return true; 1127 TargetFunction.addParentFragment(Function); 1128 Function.addFragment(TargetFunction); 1129 if (!HasRelocations) { 1130 TargetFunction.setSimple(false); 1131 Function.setSimple(false); 1132 } 1133 if (opts::Verbosity >= 1) { 1134 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1135 << Function << '\n'; 1136 } 1137 return true; 1138 } 1139 1140 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1141 MCInst &LoadLowBits, 1142 MCInst &LoadHiBits, 1143 uint64_t Target) { 1144 const MCSymbol *TargetSymbol; 1145 uint64_t Addend = 0; 1146 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1147 /*IsPCRel*/ true); 1148 int64_t Val; 1149 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1150 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1151 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1152 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1153 } 1154 1155 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1156 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1157 if (TargetFunction) 1158 return false; 1159 1160 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1161 assert(Section && "cannot get section for referenced address"); 1162 if (!Section->isText()) 1163 return false; 1164 1165 bool Ret = false; 1166 StringRef SectionContents = Section->getContents(); 1167 uint64_t Offset = Address - Section->getAddress(); 1168 const uint64_t MaxSize = SectionContents.size() - Offset; 1169 const uint8_t *Bytes = 1170 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1171 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1172 1173 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1174 MCInst &Instruction, uint64_t Offset, 1175 uint64_t AbsoluteInstrAddr, 1176 uint64_t TotalSize) -> bool { 1177 MCInst *TargetHiBits, *TargetLowBits; 1178 uint64_t TargetAddress, Count; 1179 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1180 AbsoluteInstrAddr, Instruction, TargetHiBits, 1181 TargetLowBits, TargetAddress); 1182 if (!Count) 1183 return false; 1184 1185 if (MatchOnly) 1186 return true; 1187 1188 // NOTE The target symbol was created during disassemble's 1189 // handleExternalReference 1190 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1191 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1192 *Section, Address, TotalSize); 1193 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1194 TargetAddress); 1195 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1196 Veneer->addInstruction(Offset, std::move(Instruction)); 1197 --Count; 1198 for (auto It = std::prev(Instructions.end()); Count != 0; 1199 It = std::prev(It), --Count) { 1200 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1201 Veneer->addInstruction(It->first, std::move(It->second)); 1202 } 1203 1204 Veneer->getOrCreateLocalLabel(Address); 1205 Veneer->setMaxSize(TotalSize); 1206 Veneer->updateState(BinaryFunction::State::Disassembled); 1207 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1208 << "\n"); 1209 return true; 1210 }; 1211 1212 uint64_t Size = 0, TotalSize = 0; 1213 BinaryFunction::InstrMapType VeneerInstructions; 1214 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1215 MCInst Instruction; 1216 const uint64_t AbsoluteInstrAddr = Address + Offset; 1217 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1218 AbsoluteInstrAddr, nulls())) 1219 break; 1220 1221 TotalSize += Size; 1222 if (MIB->isBranch(Instruction)) { 1223 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1224 AbsoluteInstrAddr, TotalSize); 1225 break; 1226 } 1227 1228 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1229 } 1230 1231 return Ret; 1232 } 1233 1234 void BinaryContext::processInterproceduralReferences() { 1235 for (const std::pair<BinaryFunction *, uint64_t> &It : 1236 InterproceduralReferences) { 1237 BinaryFunction &Function = *It.first; 1238 uint64_t Address = It.second; 1239 if (!Address || Function.isIgnored()) 1240 continue; 1241 1242 BinaryFunction *TargetFunction = 1243 getBinaryFunctionContainingAddress(Address); 1244 if (&Function == TargetFunction) 1245 continue; 1246 1247 if (TargetFunction) { 1248 if (TargetFunction->isFragment() && 1249 !registerFragment(*TargetFunction, Function)) { 1250 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1251 "fragments: " 1252 << Function.getPrintName() << " and " 1253 << TargetFunction->getPrintName() << '\n'; 1254 } 1255 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1256 TargetFunction->addEntryPointAtOffset(Offset); 1257 1258 continue; 1259 } 1260 1261 // Check if address falls in function padding space - this could be 1262 // unmarked data in code. In this case adjust the padding space size. 1263 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1264 assert(Section && "cannot get section for referenced address"); 1265 1266 if (!Section->isText()) 1267 continue; 1268 1269 // PLT requires special handling and could be ignored in this context. 1270 StringRef SectionName = Section->getName(); 1271 if (SectionName == ".plt" || SectionName == ".plt.got") 1272 continue; 1273 1274 // Check if it is aarch64 veneer written at Address 1275 if (isAArch64() && handleAArch64Veneer(Address)) 1276 continue; 1277 1278 if (opts::processAllFunctions()) { 1279 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1280 << "object in code at address 0x" << Twine::utohexstr(Address) 1281 << " belonging to section " << SectionName << " in current mode\n"; 1282 exit(1); 1283 } 1284 1285 TargetFunction = getBinaryFunctionContainingAddress(Address, 1286 /*CheckPastEnd=*/false, 1287 /*UseMaxSize=*/true); 1288 // We are not going to overwrite non-simple functions, but for simple 1289 // ones - adjust the padding size. 1290 if (TargetFunction && TargetFunction->isSimple()) { 1291 errs() << "BOLT-WARNING: function " << *TargetFunction 1292 << " has an object detected in a padding region at address 0x" 1293 << Twine::utohexstr(Address) << '\n'; 1294 TargetFunction->setMaxSize(TargetFunction->getSize()); 1295 } 1296 } 1297 1298 InterproceduralReferences.clear(); 1299 } 1300 1301 void BinaryContext::postProcessSymbolTable() { 1302 fixBinaryDataHoles(); 1303 bool Valid = true; 1304 for (auto &Entry : BinaryDataMap) { 1305 BinaryData *BD = Entry.second; 1306 if ((BD->getName().startswith("SYMBOLat") || 1307 BD->getName().startswith("DATAat")) && 1308 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1309 BD->getSection()) { 1310 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1311 Valid = false; 1312 } 1313 } 1314 assert(Valid); 1315 (void)Valid; 1316 generateSymbolHashes(); 1317 } 1318 1319 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1320 BinaryFunction &ParentBF) { 1321 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1322 "cannot merge functions with multiple entry points"); 1323 1324 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1325 std::defer_lock); 1326 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1327 SymbolToFunctionMapMutex, std::defer_lock); 1328 1329 const StringRef ChildName = ChildBF.getOneName(); 1330 1331 // Move symbols over and update bookkeeping info. 1332 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1333 ParentBF.getSymbols().push_back(Symbol); 1334 WriteSymbolMapLock.lock(); 1335 SymbolToFunctionMap[Symbol] = &ParentBF; 1336 WriteSymbolMapLock.unlock(); 1337 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1338 } 1339 ChildBF.getSymbols().clear(); 1340 1341 // Move other names the child function is known under. 1342 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1343 ChildBF.Aliases.clear(); 1344 1345 if (HasRelocations) { 1346 // Merge execution counts of ChildBF into those of ParentBF. 1347 // Without relocations, we cannot reliably merge profiles as both functions 1348 // continue to exist and either one can be executed. 1349 ChildBF.mergeProfileDataInto(ParentBF); 1350 1351 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1352 std::defer_lock); 1353 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1354 std::defer_lock); 1355 // Remove ChildBF from the global set of functions in relocs mode. 1356 ReadBfsLock.lock(); 1357 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1358 ReadBfsLock.unlock(); 1359 1360 assert(FI != BinaryFunctions.end() && "function not found"); 1361 assert(&ChildBF == &FI->second && "function mismatch"); 1362 1363 WriteBfsLock.lock(); 1364 ChildBF.clearDisasmState(); 1365 FI = BinaryFunctions.erase(FI); 1366 WriteBfsLock.unlock(); 1367 1368 } else { 1369 // In non-relocation mode we keep the function, but rename it. 1370 std::string NewName = "__ICF_" + ChildName.str(); 1371 1372 WriteCtxLock.lock(); 1373 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1374 WriteCtxLock.unlock(); 1375 1376 ChildBF.setFolded(&ParentBF); 1377 } 1378 } 1379 1380 void BinaryContext::fixBinaryDataHoles() { 1381 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1382 1383 for (BinarySection &Section : allocatableSections()) { 1384 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1385 1386 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1387 BinaryData *BD = Itr->second; 1388 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1389 (BD->getName().startswith("SYMBOLat0x") || 1390 BD->getName().startswith("DATAat0x") || 1391 BD->getName().startswith("ANONYMOUS"))); 1392 return !isHole && BD->getSection() == Section && !BD->getParent(); 1393 }; 1394 1395 auto BDStart = BinaryDataMap.begin(); 1396 auto BDEnd = BinaryDataMap.end(); 1397 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1398 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1399 1400 uint64_t EndAddress = Section.getAddress(); 1401 1402 while (Itr != End) { 1403 if (Itr->second->getAddress() > EndAddress) { 1404 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1405 Holes.emplace_back(EndAddress, Gap); 1406 } 1407 EndAddress = Itr->second->getEndAddress(); 1408 ++Itr; 1409 } 1410 1411 if (EndAddress < Section.getEndAddress()) 1412 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1413 1414 // If there is already a symbol at the start of the hole, grow that symbol 1415 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1416 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1417 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1418 if (BD) { 1419 // BD->getSection() can be != Section if there are sections that 1420 // overlap. In this case it is probably safe to just skip the holes 1421 // since the overlapping section will not(?) have any symbols in it. 1422 if (BD->getSection() == Section) 1423 setBinaryDataSize(Hole.first, Hole.second); 1424 } else { 1425 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1426 } 1427 } 1428 } 1429 1430 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1431 assert(validateHoles() && "top level hole detected in object map"); 1432 } 1433 1434 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1435 const BinarySection *CurrentSection = nullptr; 1436 bool FirstSection = true; 1437 1438 for (auto &Entry : BinaryDataMap) { 1439 const BinaryData *BD = Entry.second; 1440 const BinarySection &Section = BD->getSection(); 1441 if (FirstSection || Section != *CurrentSection) { 1442 uint64_t Address, Size; 1443 StringRef Name = Section.getName(); 1444 if (Section) { 1445 Address = Section.getAddress(); 1446 Size = Section.getSize(); 1447 } else { 1448 Address = BD->getAddress(); 1449 Size = BD->getSize(); 1450 } 1451 OS << "BOLT-INFO: Section " << Name << ", " 1452 << "0x" + Twine::utohexstr(Address) << ":" 1453 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1454 CurrentSection = &Section; 1455 FirstSection = false; 1456 } 1457 1458 OS << "BOLT-INFO: "; 1459 const BinaryData *P = BD->getParent(); 1460 while (P) { 1461 OS << " "; 1462 P = P->getParent(); 1463 } 1464 OS << *BD << "\n"; 1465 } 1466 } 1467 1468 Expected<unsigned> BinaryContext::getDwarfFile( 1469 StringRef Directory, StringRef FileName, unsigned FileNumber, 1470 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1471 unsigned CUID, unsigned DWARFVersion) { 1472 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1473 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1474 FileNumber); 1475 } 1476 1477 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1478 const uint32_t SrcCUID, 1479 unsigned FileIndex) { 1480 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1481 const DWARFDebugLine::LineTable *LineTable = 1482 DwCtx->getLineTableForUnit(SrcUnit); 1483 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1484 LineTable->Prologue.FileNames; 1485 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1486 // means empty dir. 1487 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1488 "FileIndex out of range for the compilation unit."); 1489 StringRef Dir = ""; 1490 if (FileNames[FileIndex - 1].DirIdx != 0) { 1491 if (Optional<const char *> DirName = dwarf::toString( 1492 LineTable->Prologue 1493 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1494 Dir = *DirName; 1495 } 1496 } 1497 StringRef FileName = ""; 1498 if (Optional<const char *> FName = 1499 dwarf::toString(FileNames[FileIndex - 1].Name)) 1500 FileName = *FName; 1501 assert(FileName != ""); 1502 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1503 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1504 DstUnit->getVersion())); 1505 } 1506 1507 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1508 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1509 llvm::transform(BinaryFunctions, SortedFunctions.begin(), 1510 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1511 return &BFI.second; 1512 }); 1513 1514 llvm::stable_sort(SortedFunctions, 1515 [](const BinaryFunction *A, const BinaryFunction *B) { 1516 if (A->hasValidIndex() && B->hasValidIndex()) { 1517 return A->getIndex() < B->getIndex(); 1518 } 1519 return A->hasValidIndex(); 1520 }); 1521 return SortedFunctions; 1522 } 1523 1524 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1525 std::vector<BinaryFunction *> AllFunctions; 1526 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1527 llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions), 1528 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1529 return &BFI.second; 1530 }); 1531 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1532 1533 return AllFunctions; 1534 } 1535 1536 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1537 auto Iter = DWOCUs.find(DWOId); 1538 if (Iter == DWOCUs.end()) 1539 return None; 1540 1541 return Iter->second; 1542 } 1543 1544 DWARFContext *BinaryContext::getDWOContext() const { 1545 if (DWOCUs.empty()) 1546 return nullptr; 1547 return &DWOCUs.begin()->second->getContext(); 1548 } 1549 1550 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1551 void BinaryContext::preprocessDWODebugInfo() { 1552 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1553 DWARFUnit *const DwarfUnit = CU.get(); 1554 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1555 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1556 if (!DWOCU->isDWOUnit()) { 1557 std::string DWOName = dwarf::toString( 1558 DwarfUnit->getUnitDIE().find( 1559 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1560 ""); 1561 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1562 << DWOName 1563 << " was not retrieved and won't be updated. Please check " 1564 "relative path.\n"; 1565 continue; 1566 } 1567 DWOCUs[*DWOId] = DWOCU; 1568 } 1569 } 1570 } 1571 1572 void BinaryContext::preprocessDebugInfo() { 1573 struct CURange { 1574 uint64_t LowPC; 1575 uint64_t HighPC; 1576 DWARFUnit *Unit; 1577 1578 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1579 }; 1580 1581 // Building a map of address ranges to CUs similar to .debug_aranges and use 1582 // it to assign CU to functions. 1583 std::vector<CURange> AllRanges; 1584 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1585 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1586 Expected<DWARFAddressRangesVector> RangesOrError = 1587 CU->getUnitDIE().getAddressRanges(); 1588 if (!RangesOrError) { 1589 consumeError(RangesOrError.takeError()); 1590 continue; 1591 } 1592 for (DWARFAddressRange &Range : *RangesOrError) { 1593 // Parts of the debug info could be invalidated due to corresponding code 1594 // being removed from the binary by the linker. Hence we check if the 1595 // address is a valid one. 1596 if (containsAddress(Range.LowPC)) 1597 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1598 } 1599 1600 ContainsDwarf5 |= CU->getVersion() >= 5; 1601 ContainsDwarfLegacy |= CU->getVersion() < 5; 1602 } 1603 1604 llvm::sort(AllRanges); 1605 for (auto &KV : BinaryFunctions) { 1606 const uint64_t FunctionAddress = KV.first; 1607 BinaryFunction &Function = KV.second; 1608 1609 auto It = llvm::partition_point( 1610 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1611 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1612 Function.setDWARFUnit(It->Unit); 1613 } 1614 1615 // Discover units with debug info that needs to be updated. 1616 for (const auto &KV : BinaryFunctions) { 1617 const BinaryFunction &BF = KV.second; 1618 if (shouldEmit(BF) && BF.getDWARFUnit()) 1619 ProcessedCUs.insert(BF.getDWARFUnit()); 1620 } 1621 1622 // Clear debug info for functions from units that we are not going to process. 1623 for (auto &KV : BinaryFunctions) { 1624 BinaryFunction &BF = KV.second; 1625 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1626 BF.setDWARFUnit(nullptr); 1627 } 1628 1629 if (opts::Verbosity >= 1) { 1630 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1631 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1632 } 1633 1634 preprocessDWODebugInfo(); 1635 1636 // Populate MCContext with DWARF files from all units. 1637 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1638 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1639 const uint64_t CUID = CU->getOffset(); 1640 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1641 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1642 GlobalPrefix + "line_table_start" + Twine(CUID))); 1643 1644 if (!ProcessedCUs.count(CU.get())) 1645 continue; 1646 1647 const DWARFDebugLine::LineTable *LineTable = 1648 DwCtx->getLineTableForUnit(CU.get()); 1649 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1650 LineTable->Prologue.FileNames; 1651 1652 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1653 if (DwarfVersion >= 5) { 1654 Optional<MD5::MD5Result> Checksum = None; 1655 if (LineTable->Prologue.ContentTypes.HasMD5) 1656 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1657 Optional<const char *> Name = 1658 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1659 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1660 auto Iter = DWOCUs.find(*DWOID); 1661 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1662 Name = dwarf::toString( 1663 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1664 } 1665 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1666 None); 1667 } 1668 1669 BinaryLineTable.setDwarfVersion(DwarfVersion); 1670 1671 // Assign a unique label to every line table, one per CU. 1672 // Make sure empty debug line tables are registered too. 1673 if (FileNames.empty()) { 1674 cantFail( 1675 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1676 continue; 1677 } 1678 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1679 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1680 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1681 // means empty dir. 1682 StringRef Dir = ""; 1683 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1684 if (Optional<const char *> DirName = dwarf::toString( 1685 LineTable->Prologue 1686 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1687 Dir = *DirName; 1688 StringRef FileName = ""; 1689 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1690 FileName = *FName; 1691 assert(FileName != ""); 1692 Optional<MD5::MD5Result> Checksum = None; 1693 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1694 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1695 cantFail( 1696 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1697 } 1698 } 1699 } 1700 1701 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1702 if (Function.isPseudo()) 1703 return false; 1704 1705 if (opts::processAllFunctions()) 1706 return true; 1707 1708 if (Function.isIgnored()) 1709 return false; 1710 1711 // In relocation mode we will emit non-simple functions with CFG. 1712 // If the function does not have a CFG it should be marked as ignored. 1713 return HasRelocations || Function.isSimple(); 1714 } 1715 1716 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1717 uint32_t Operation = Inst.getOperation(); 1718 switch (Operation) { 1719 case MCCFIInstruction::OpSameValue: 1720 OS << "OpSameValue Reg" << Inst.getRegister(); 1721 break; 1722 case MCCFIInstruction::OpRememberState: 1723 OS << "OpRememberState"; 1724 break; 1725 case MCCFIInstruction::OpRestoreState: 1726 OS << "OpRestoreState"; 1727 break; 1728 case MCCFIInstruction::OpOffset: 1729 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1730 break; 1731 case MCCFIInstruction::OpDefCfaRegister: 1732 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1733 break; 1734 case MCCFIInstruction::OpDefCfaOffset: 1735 OS << "OpDefCfaOffset " << Inst.getOffset(); 1736 break; 1737 case MCCFIInstruction::OpDefCfa: 1738 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1739 break; 1740 case MCCFIInstruction::OpRelOffset: 1741 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1742 break; 1743 case MCCFIInstruction::OpAdjustCfaOffset: 1744 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1745 break; 1746 case MCCFIInstruction::OpEscape: 1747 OS << "OpEscape"; 1748 break; 1749 case MCCFIInstruction::OpRestore: 1750 OS << "OpRestore Reg" << Inst.getRegister(); 1751 break; 1752 case MCCFIInstruction::OpUndefined: 1753 OS << "OpUndefined Reg" << Inst.getRegister(); 1754 break; 1755 case MCCFIInstruction::OpRegister: 1756 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1757 << Inst.getRegister2(); 1758 break; 1759 case MCCFIInstruction::OpWindowSave: 1760 OS << "OpWindowSave"; 1761 break; 1762 case MCCFIInstruction::OpGnuArgsSize: 1763 OS << "OpGnuArgsSize"; 1764 break; 1765 default: 1766 OS << "Op#" << Operation; 1767 break; 1768 } 1769 } 1770 1771 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1772 // For aarch64, the ABI defines mapping symbols so we identify data in the 1773 // code section (see IHI0056B). $x identifies a symbol starting code or the 1774 // end of a data chunk inside code, $d indentifies start of data. 1775 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1776 return MarkerSymType::NONE; 1777 1778 Expected<StringRef> NameOrError = Symbol.getName(); 1779 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1780 1781 if (!TypeOrError || !NameOrError) 1782 return MarkerSymType::NONE; 1783 1784 if (*TypeOrError != SymbolRef::ST_Unknown) 1785 return MarkerSymType::NONE; 1786 1787 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1788 return MarkerSymType::CODE; 1789 1790 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1791 return MarkerSymType::DATA; 1792 1793 return MarkerSymType::NONE; 1794 } 1795 1796 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1797 return getMarkerType(Symbol) != MarkerSymType::NONE; 1798 } 1799 1800 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1801 const BinaryFunction *Function, 1802 DWARFContext *DwCtx) { 1803 DebugLineTableRowRef RowRef = 1804 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1805 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1806 return; 1807 1808 const DWARFDebugLine::LineTable *LineTable; 1809 if (Function && Function->getDWARFUnit() && 1810 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1811 LineTable = Function->getDWARFLineTable(); 1812 } else { 1813 LineTable = DwCtx->getLineTableForUnit( 1814 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1815 } 1816 assert(LineTable && "line table expected for instruction with debug info"); 1817 1818 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1819 StringRef FileName = ""; 1820 if (Optional<const char *> FName = 1821 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1822 FileName = *FName; 1823 OS << " # debug line " << FileName << ":" << Row.Line; 1824 if (Row.Column) 1825 OS << ":" << Row.Column; 1826 if (Row.Discriminator) 1827 OS << " discriminator:" << Row.Discriminator; 1828 } 1829 1830 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1831 uint64_t Offset, 1832 const BinaryFunction *Function, 1833 bool PrintMCInst, bool PrintMemData, 1834 bool PrintRelocations, 1835 StringRef Endl) const { 1836 if (MIB->isEHLabel(Instruction)) { 1837 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1838 return; 1839 } 1840 OS << format(" %08" PRIx64 ": ", Offset); 1841 if (MIB->isCFI(Instruction)) { 1842 uint32_t Offset = Instruction.getOperand(0).getImm(); 1843 OS << "\t!CFI\t$" << Offset << "\t; "; 1844 if (Function) 1845 printCFI(OS, *Function->getCFIFor(Instruction)); 1846 OS << Endl; 1847 return; 1848 } 1849 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1850 if (MIB->isCall(Instruction)) { 1851 if (MIB->isTailCall(Instruction)) 1852 OS << " # TAILCALL "; 1853 if (MIB->isInvoke(Instruction)) { 1854 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1855 OS << " # handler: "; 1856 if (EHInfo->first) 1857 OS << *EHInfo->first; 1858 else 1859 OS << '0'; 1860 OS << "; action: " << EHInfo->second; 1861 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1862 if (GnuArgsSize >= 0) 1863 OS << "; GNU_args_size = " << GnuArgsSize; 1864 } 1865 } else if (MIB->isIndirectBranch(Instruction)) { 1866 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1867 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1868 } else { 1869 OS << " # UNKNOWN CONTROL FLOW"; 1870 } 1871 } 1872 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1873 OS << " # Offset: " << *Offset; 1874 1875 MIB->printAnnotations(Instruction, OS); 1876 1877 if (opts::PrintDebugInfo) 1878 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1879 1880 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1881 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1882 Function->printRelocations(OS, Offset, Size); 1883 } 1884 1885 OS << Endl; 1886 1887 if (PrintMCInst) { 1888 Instruction.dump_pretty(OS, InstPrinter.get()); 1889 OS << Endl; 1890 } 1891 } 1892 1893 Optional<uint64_t> 1894 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1895 uint64_t FileOffset) const { 1896 // Find a segment with a matching file offset. 1897 for (auto &KV : SegmentMapInfo) { 1898 const SegmentInfo &SegInfo = KV.second; 1899 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1900 // Use segment's aligned memory offset to calculate the base address. 1901 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1902 return MMapAddress - MemOffset; 1903 } 1904 } 1905 1906 return NoneType(); 1907 } 1908 1909 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1910 auto SI = AddressToSection.upper_bound(Address); 1911 if (SI != AddressToSection.begin()) { 1912 --SI; 1913 uint64_t UpperBound = SI->first + SI->second->getSize(); 1914 if (!SI->second->getSize()) 1915 UpperBound += 1; 1916 if (UpperBound > Address) 1917 return *SI->second; 1918 } 1919 return std::make_error_code(std::errc::bad_address); 1920 } 1921 1922 ErrorOr<StringRef> 1923 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1924 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1925 return Section->getName(); 1926 return std::make_error_code(std::errc::bad_address); 1927 } 1928 1929 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1930 auto Res = Sections.insert(Section); 1931 (void)Res; 1932 assert(Res.second && "can't register the same section twice."); 1933 1934 // Only register allocatable sections in the AddressToSection map. 1935 if (Section->isAllocatable() && Section->getAddress()) 1936 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1937 NameToSection.insert( 1938 std::make_pair(std::string(Section->getName()), Section)); 1939 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1940 return *Section; 1941 } 1942 1943 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1944 return registerSection(new BinarySection(*this, Section)); 1945 } 1946 1947 BinarySection & 1948 BinaryContext::registerSection(StringRef SectionName, 1949 const BinarySection &OriginalSection) { 1950 return registerSection( 1951 new BinarySection(*this, SectionName, OriginalSection)); 1952 } 1953 1954 BinarySection & 1955 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1956 unsigned ELFFlags, uint8_t *Data, 1957 uint64_t Size, unsigned Alignment) { 1958 auto NamedSections = getSectionByName(Name); 1959 if (NamedSections.begin() != NamedSections.end()) { 1960 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1961 "can only update unique sections"); 1962 BinarySection *Section = NamedSections.begin()->second; 1963 1964 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1965 const bool Flag = Section->isAllocatable(); 1966 (void)Flag; 1967 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1968 LLVM_DEBUG(dbgs() << *Section << "\n"); 1969 // FIXME: Fix section flags/attributes for MachO. 1970 if (isELF()) 1971 assert(Flag == Section->isAllocatable() && 1972 "can't change section allocation status"); 1973 return *Section; 1974 } 1975 1976 return registerSection( 1977 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1978 } 1979 1980 bool BinaryContext::deregisterSection(BinarySection &Section) { 1981 BinarySection *SectionPtr = &Section; 1982 auto Itr = Sections.find(SectionPtr); 1983 if (Itr != Sections.end()) { 1984 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1985 while (Range.first != Range.second) { 1986 if (Range.first->second == SectionPtr) { 1987 AddressToSection.erase(Range.first); 1988 break; 1989 } 1990 ++Range.first; 1991 } 1992 1993 auto NameRange = 1994 NameToSection.equal_range(std::string(SectionPtr->getName())); 1995 while (NameRange.first != NameRange.second) { 1996 if (NameRange.first->second == SectionPtr) { 1997 NameToSection.erase(NameRange.first); 1998 break; 1999 } 2000 ++NameRange.first; 2001 } 2002 2003 Sections.erase(Itr); 2004 delete SectionPtr; 2005 return true; 2006 } 2007 return false; 2008 } 2009 2010 void BinaryContext::printSections(raw_ostream &OS) const { 2011 for (BinarySection *const &Section : Sections) 2012 OS << "BOLT-INFO: " << *Section << "\n"; 2013 } 2014 2015 BinarySection &BinaryContext::absoluteSection() { 2016 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2017 return *Section; 2018 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2019 } 2020 2021 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2022 size_t Size) const { 2023 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2024 if (!Section) 2025 return std::make_error_code(std::errc::bad_address); 2026 2027 if (Section->isVirtual()) 2028 return 0; 2029 2030 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2031 AsmInfo->getCodePointerSize()); 2032 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2033 return DE.getUnsigned(&ValueOffset, Size); 2034 } 2035 2036 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2037 size_t Size) const { 2038 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2039 if (!Section) 2040 return std::make_error_code(std::errc::bad_address); 2041 2042 if (Section->isVirtual()) 2043 return 0; 2044 2045 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2046 AsmInfo->getCodePointerSize()); 2047 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2048 return DE.getSigned(&ValueOffset, Size); 2049 } 2050 2051 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2052 uint64_t Type, uint64_t Addend, 2053 uint64_t Value) { 2054 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2055 assert(Section && "cannot find section for address"); 2056 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2057 Value); 2058 } 2059 2060 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2061 uint64_t Type, uint64_t Addend, 2062 uint64_t Value) { 2063 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2064 assert(Section && "cannot find section for address"); 2065 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2066 Addend, Value); 2067 } 2068 2069 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2070 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2071 assert(Section && "cannot find section for address"); 2072 return Section->removeRelocationAt(Address - Section->getAddress()); 2073 } 2074 2075 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 2076 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2077 if (!Section) 2078 return nullptr; 2079 2080 return Section->getRelocationAt(Address - Section->getAddress()); 2081 } 2082 2083 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 2084 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2085 if (!Section) 2086 return nullptr; 2087 2088 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2089 } 2090 2091 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2092 const uint64_t Address) { 2093 auto setImmovable = [&](BinaryData &BD) { 2094 BinaryData *Root = BD.getAtomicRoot(); 2095 LLVM_DEBUG(if (Root->isMoveable()) { 2096 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2097 << "due to ambiguous relocation referencing 0x" 2098 << Twine::utohexstr(Address) << '\n'; 2099 }); 2100 Root->setIsMoveable(false); 2101 }; 2102 2103 if (Address == BD.getAddress()) { 2104 setImmovable(BD); 2105 2106 // Set previous symbol as immovable 2107 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2108 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2109 setImmovable(*Prev); 2110 } 2111 2112 if (Address == BD.getEndAddress()) { 2113 setImmovable(BD); 2114 2115 // Set next symbol as immovable 2116 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2117 if (Next && Next->getAddress() == BD.getEndAddress()) 2118 setImmovable(*Next); 2119 } 2120 } 2121 2122 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2123 uint64_t *EntryDesc) { 2124 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 2125 auto BFI = SymbolToFunctionMap.find(Symbol); 2126 if (BFI == SymbolToFunctionMap.end()) 2127 return nullptr; 2128 2129 BinaryFunction *BF = BFI->second; 2130 if (EntryDesc) 2131 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2132 2133 return BF; 2134 } 2135 2136 void BinaryContext::exitWithBugReport(StringRef Message, 2137 const BinaryFunction &Function) const { 2138 errs() << "=======================================\n"; 2139 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2140 "this function.\n"; 2141 errs() << "If you are running the most recent version of BOLT, you may " 2142 "want to " 2143 "report this and paste this dump.\nPlease check that there is no " 2144 "sensitive contents being shared in this dump.\n"; 2145 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2146 ScopedPrinter SP(errs()); 2147 SP.printBinaryBlock("Function contents", *Function.getData()); 2148 errs() << "\n"; 2149 Function.dump(); 2150 errs() << "ERROR: " << Message; 2151 errs() << "\n=======================================\n"; 2152 exit(1); 2153 } 2154 2155 BinaryFunction * 2156 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2157 bool IsSimple) { 2158 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2159 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2160 setSymbolToFunctionMap(BF->getSymbol(), BF); 2161 BF->CurrentState = BinaryFunction::State::CFG; 2162 return BF; 2163 } 2164 2165 std::pair<size_t, size_t> 2166 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2167 // Adjust branch instruction to match the current layout. 2168 if (FixBranches) 2169 BF.fixBranches(); 2170 2171 // Create local MC context to isolate the effect of ephemeral code emission. 2172 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2173 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2174 MCAsmBackend *MAB = 2175 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2176 2177 SmallString<256> Code; 2178 raw_svector_ostream VecOS(Code); 2179 2180 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2181 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2182 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2183 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2184 /*RelaxAll=*/false, 2185 /*IncrementalLinkerCompatible=*/false, 2186 /*DWARFMustBeAtTheEnd=*/false)); 2187 2188 Streamer->initSections(false, *STI); 2189 2190 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2191 Section->setHasInstructions(true); 2192 2193 // Create symbols in the LocalCtx so that they get destroyed with it. 2194 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2195 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2196 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2197 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2198 2199 Streamer->switchSection(Section); 2200 Streamer->emitLabel(StartLabel); 2201 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2202 /*EmitCodeOnly=*/true); 2203 Streamer->emitLabel(EndLabel); 2204 2205 if (BF.isSplit()) { 2206 MCSectionELF *ColdSection = 2207 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2208 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2209 ColdSection->setHasInstructions(true); 2210 2211 Streamer->switchSection(ColdSection); 2212 Streamer->emitLabel(ColdStartLabel); 2213 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2214 /*EmitCodeOnly=*/true); 2215 Streamer->emitLabel(ColdEndLabel); 2216 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2217 Streamer->emitBytes(StringRef("")); 2218 Streamer->switchSection(Section); 2219 } 2220 2221 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2222 // MCStreamer::Finish(), which does more than we want 2223 Streamer->emitBytes(StringRef("")); 2224 2225 MCAssembler &Assembler = 2226 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2227 MCAsmLayout Layout(Assembler); 2228 Assembler.layout(Layout); 2229 2230 const uint64_t HotSize = 2231 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2232 const uint64_t ColdSize = BF.isSplit() 2233 ? Layout.getSymbolOffset(*ColdEndLabel) - 2234 Layout.getSymbolOffset(*ColdStartLabel) 2235 : 0ULL; 2236 2237 // Clean-up the effect of the code emission. 2238 for (const MCSymbol &Symbol : Assembler.symbols()) { 2239 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2240 MutableSymbol->setUndefined(); 2241 MutableSymbol->setIsRegistered(false); 2242 } 2243 2244 return std::make_pair(HotSize, ColdSize); 2245 } 2246 2247 bool BinaryContext::validateEncoding(const MCInst &Inst, 2248 ArrayRef<uint8_t> InputEncoding) const { 2249 SmallString<256> Code; 2250 SmallVector<MCFixup, 4> Fixups; 2251 raw_svector_ostream VecOS(Code); 2252 2253 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2254 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2255 if (InputEncoding != EncodedData) { 2256 if (opts::Verbosity > 1) { 2257 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2258 << " input: " << InputEncoding << '\n' 2259 << " output: " << EncodedData << '\n'; 2260 } 2261 return false; 2262 } 2263 2264 return true; 2265 } 2266 2267 uint64_t BinaryContext::getHotThreshold() const { 2268 static uint64_t Threshold = 0; 2269 if (Threshold == 0) { 2270 Threshold = std::max( 2271 (uint64_t)opts::ExecutionCountThreshold, 2272 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2273 } 2274 return Threshold; 2275 } 2276 2277 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2278 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2279 auto FI = BinaryFunctions.upper_bound(Address); 2280 if (FI == BinaryFunctions.begin()) 2281 return nullptr; 2282 --FI; 2283 2284 const uint64_t UsedSize = 2285 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2286 2287 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2288 return nullptr; 2289 2290 return &FI->second; 2291 } 2292 2293 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2294 // First, try to find a function starting at the given address. If the 2295 // function was folded, this will get us the original folded function if it 2296 // wasn't removed from the list, e.g. in non-relocation mode. 2297 auto BFI = BinaryFunctions.find(Address); 2298 if (BFI != BinaryFunctions.end()) 2299 return &BFI->second; 2300 2301 // We might have folded the function matching the object at the given 2302 // address. In such case, we look for a function matching the symbol 2303 // registered at the original address. The new function (the one that the 2304 // original was folded into) will hold the symbol. 2305 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2306 uint64_t EntryID = 0; 2307 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2308 if (BF && EntryID == 0) 2309 return BF; 2310 } 2311 return nullptr; 2312 } 2313 2314 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2315 const DWARFAddressRangesVector &InputRanges) const { 2316 DebugAddressRangesVector OutputRanges; 2317 2318 for (const DWARFAddressRange Range : InputRanges) { 2319 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2320 while (BFI != BinaryFunctions.end()) { 2321 const BinaryFunction &Function = BFI->second; 2322 if (Function.getAddress() >= Range.HighPC) 2323 break; 2324 const DebugAddressRangesVector FunctionRanges = 2325 Function.getOutputAddressRanges(); 2326 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2327 std::advance(BFI, 1); 2328 } 2329 } 2330 2331 return OutputRanges; 2332 } 2333 2334 } // namespace bolt 2335 } // namespace llvm 2336