1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <numeric> 42 #include <unordered_set> 43 44 using namespace llvm; 45 46 #undef DEBUG_TYPE 47 #define DEBUG_TYPE "bolt" 48 49 namespace opts { 50 51 cl::opt<bool> NoHugePages("no-huge-pages", 52 cl::desc("use regular size pages for code alignment"), 53 cl::Hidden, cl::cat(BoltCategory)); 54 55 static cl::opt<bool> 56 PrintDebugInfo("print-debug-info", 57 cl::desc("print debug info when printing functions"), 58 cl::Hidden, 59 cl::ZeroOrMore, 60 cl::cat(BoltCategory)); 61 62 cl::opt<bool> PrintRelocations( 63 "print-relocations", 64 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 65 cl::cat(BoltCategory)); 66 67 static cl::opt<bool> 68 PrintMemData("print-mem-data", 69 cl::desc("print memory data annotations when printing functions"), 70 cl::Hidden, 71 cl::ZeroOrMore, 72 cl::cat(BoltCategory)); 73 74 } // namespace opts 75 76 namespace llvm { 77 namespace bolt { 78 79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 80 std::unique_ptr<DWARFContext> DwCtx, 81 std::unique_ptr<Triple> TheTriple, 82 const Target *TheTarget, std::string TripleName, 83 std::unique_ptr<MCCodeEmitter> MCE, 84 std::unique_ptr<MCObjectFileInfo> MOFI, 85 std::unique_ptr<const MCAsmInfo> AsmInfo, 86 std::unique_ptr<const MCInstrInfo> MII, 87 std::unique_ptr<const MCSubtargetInfo> STI, 88 std::unique_ptr<MCInstPrinter> InstPrinter, 89 std::unique_ptr<const MCInstrAnalysis> MIA, 90 std::unique_ptr<MCPlusBuilder> MIB, 91 std::unique_ptr<const MCRegisterInfo> MRI, 92 std::unique_ptr<MCDisassembler> DisAsm) 93 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 94 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 95 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 96 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 97 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 98 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 99 Relocation::Arch = this->TheTriple->getArch(); 100 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 101 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 102 } 103 104 BinaryContext::~BinaryContext() { 105 for (BinarySection *Section : Sections) 106 delete Section; 107 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 108 delete InjectedFunction; 109 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 110 delete JTI.second; 111 clearBinaryData(); 112 } 113 114 /// Create BinaryContext for a given architecture \p ArchName and 115 /// triple \p TripleName. 116 Expected<std::unique_ptr<BinaryContext>> 117 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 118 std::unique_ptr<DWARFContext> DwCtx) { 119 StringRef ArchName = ""; 120 StringRef FeaturesStr = ""; 121 switch (File->getArch()) { 122 case llvm::Triple::x86_64: 123 ArchName = "x86-64"; 124 FeaturesStr = "+nopl"; 125 break; 126 case llvm::Triple::aarch64: 127 ArchName = "aarch64"; 128 FeaturesStr = "+all"; 129 break; 130 default: 131 return createStringError(std::errc::not_supported, 132 "BOLT-ERROR: Unrecognized machine in ELF file"); 133 } 134 135 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 136 const std::string TripleName = TheTriple->str(); 137 138 std::string Error; 139 const Target *TheTarget = 140 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 141 if (!TheTarget) 142 return createStringError(make_error_code(std::errc::not_supported), 143 Twine("BOLT-ERROR: ", Error)); 144 145 std::unique_ptr<const MCRegisterInfo> MRI( 146 TheTarget->createMCRegInfo(TripleName)); 147 if (!MRI) 148 return createStringError( 149 make_error_code(std::errc::not_supported), 150 Twine("BOLT-ERROR: no register info for target ", TripleName)); 151 152 // Set up disassembler. 153 std::unique_ptr<MCAsmInfo> AsmInfo( 154 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 155 if (!AsmInfo) 156 return createStringError( 157 make_error_code(std::errc::not_supported), 158 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 159 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 160 // we want to emit such names as using @PLT without double quotes to convey 161 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 162 // override the default AsmInfo behavior to emit names the way we want. 163 AsmInfo->setAllowAtInName(true); 164 165 std::unique_ptr<const MCSubtargetInfo> STI( 166 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 167 if (!STI) 168 return createStringError( 169 make_error_code(std::errc::not_supported), 170 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 171 172 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 173 if (!MII) 174 return createStringError( 175 make_error_code(std::errc::not_supported), 176 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 177 178 std::unique_ptr<MCContext> Ctx( 179 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 180 std::unique_ptr<MCObjectFileInfo> MOFI( 181 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 182 Ctx->setObjectFileInfo(MOFI.get()); 183 // We do not support X86 Large code model. Change this in the future. 184 bool Large = false; 185 if (TheTriple->getArch() == llvm::Triple::aarch64) 186 Large = true; 187 unsigned LSDAEncoding = 188 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 189 unsigned TTypeEncoding = 190 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 191 if (IsPIC) { 192 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 193 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 194 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 195 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 196 } 197 198 std::unique_ptr<MCDisassembler> DisAsm( 199 TheTarget->createMCDisassembler(*STI, *Ctx)); 200 201 if (!DisAsm) 202 return createStringError( 203 make_error_code(std::errc::not_supported), 204 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 205 206 std::unique_ptr<const MCInstrAnalysis> MIA( 207 TheTarget->createMCInstrAnalysis(MII.get())); 208 if (!MIA) 209 return createStringError( 210 make_error_code(std::errc::not_supported), 211 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 212 TripleName)); 213 214 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 215 std::unique_ptr<MCInstPrinter> InstructionPrinter( 216 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 217 *MII, *MRI)); 218 if (!InstructionPrinter) 219 return createStringError( 220 make_error_code(std::errc::not_supported), 221 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 222 InstructionPrinter->setPrintImmHex(true); 223 224 std::unique_ptr<MCCodeEmitter> MCE( 225 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 226 227 // Make sure we don't miss any output on core dumps. 228 outs().SetUnbuffered(); 229 errs().SetUnbuffered(); 230 dbgs().SetUnbuffered(); 231 232 auto BC = std::make_unique<BinaryContext>( 233 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 234 std::string(TripleName), std::move(MCE), std::move(MOFI), 235 std::move(AsmInfo), std::move(MII), std::move(STI), 236 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 237 std::move(DisAsm)); 238 239 BC->TTypeEncoding = TTypeEncoding; 240 BC->LSDAEncoding = LSDAEncoding; 241 242 BC->MAB = std::unique_ptr<MCAsmBackend>( 243 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 244 245 BC->setFilename(File->getFileName()); 246 247 BC->HasFixedLoadAddress = !IsPIC; 248 249 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 250 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 251 252 if (!BC->SymbolicDisAsm) 253 return createStringError( 254 make_error_code(std::errc::not_supported), 255 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 256 257 return std::move(BC); 258 } 259 260 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 261 if (opts::HotText && 262 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 263 return true; 264 265 if (opts::HotData && 266 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 267 return true; 268 269 if (SymbolName == "_end") 270 return true; 271 272 return false; 273 } 274 275 std::unique_ptr<MCObjectWriter> 276 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 277 return MAB->createObjectWriter(OS); 278 } 279 280 bool BinaryContext::validateObjectNesting() const { 281 auto Itr = BinaryDataMap.begin(); 282 auto End = BinaryDataMap.end(); 283 bool Valid = true; 284 while (Itr != End) { 285 auto Next = std::next(Itr); 286 while (Next != End && 287 Itr->second->getSection() == Next->second->getSection() && 288 Itr->second->containsRange(Next->second->getAddress(), 289 Next->second->getSize())) { 290 if (Next->second->Parent != Itr->second) { 291 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 292 << "BOLT-WARNING: " << *Itr->second << "\n" 293 << "BOLT-WARNING: " << *Next->second << "\n"; 294 Valid = false; 295 } 296 ++Next; 297 } 298 Itr = Next; 299 } 300 return Valid; 301 } 302 303 bool BinaryContext::validateHoles() const { 304 bool Valid = true; 305 for (BinarySection &Section : sections()) { 306 for (const Relocation &Rel : Section.relocations()) { 307 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 308 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 309 if (!BD) { 310 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 311 << " 0x" << Twine::utohexstr(RelAddr) << " in " 312 << Section.getName() << "\n"; 313 Valid = false; 314 } else if (!BD->getAtomicRoot()) { 315 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 316 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 317 << Section.getName() << "\n"; 318 Valid = false; 319 } 320 } 321 } 322 return Valid; 323 } 324 325 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 326 const uint64_t Address = GAI->second->getAddress(); 327 const uint64_t Size = GAI->second->getSize(); 328 329 auto fixParents = [&](BinaryDataMapType::iterator Itr, 330 BinaryData *NewParent) { 331 BinaryData *OldParent = Itr->second->Parent; 332 Itr->second->Parent = NewParent; 333 ++Itr; 334 while (Itr != BinaryDataMap.end() && OldParent && 335 Itr->second->Parent == OldParent) { 336 Itr->second->Parent = NewParent; 337 ++Itr; 338 } 339 }; 340 341 // Check if the previous symbol contains the newly added symbol. 342 if (GAI != BinaryDataMap.begin()) { 343 BinaryData *Prev = std::prev(GAI)->second; 344 while (Prev) { 345 if (Prev->getSection() == GAI->second->getSection() && 346 Prev->containsRange(Address, Size)) { 347 fixParents(GAI, Prev); 348 } else { 349 fixParents(GAI, nullptr); 350 } 351 Prev = Prev->Parent; 352 } 353 } 354 355 // Check if the newly added symbol contains any subsequent symbols. 356 if (Size != 0) { 357 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 358 auto Itr = std::next(GAI); 359 while ( 360 Itr != BinaryDataMap.end() && 361 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 362 Itr->second->Parent = BD; 363 ++Itr; 364 } 365 } 366 } 367 368 iterator_range<BinaryContext::binary_data_iterator> 369 BinaryContext::getSubBinaryData(BinaryData *BD) { 370 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 371 auto End = Start; 372 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 373 ++End; 374 return make_range(Start, End); 375 } 376 377 std::pair<const MCSymbol *, uint64_t> 378 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 379 bool IsPCRel) { 380 uint64_t Addend = 0; 381 382 if (isAArch64()) { 383 // Check if this is an access to a constant island and create bookkeeping 384 // to keep track of it and emit it later as part of this function. 385 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 386 return std::make_pair(IslandSym, Addend); 387 388 // Detect custom code written in assembly that refers to arbitrary 389 // constant islands from other functions. Write this reference so we 390 // can pull this constant island and emit it as part of this function 391 // too. 392 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 393 if (IslandIter != AddressToConstantIslandMap.end()) { 394 if (MCSymbol *IslandSym = 395 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 396 BF.createIslandDependency(IslandSym, IslandIter->second); 397 return std::make_pair(IslandSym, Addend); 398 } 399 } 400 } 401 402 // Note that the address does not necessarily have to reside inside 403 // a section, it could be an absolute address too. 404 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 405 if (Section && Section->isText()) { 406 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 407 if (Address != BF.getAddress()) { 408 // The address could potentially escape. Mark it as another entry 409 // point into the function. 410 if (opts::Verbosity >= 1) { 411 outs() << "BOLT-INFO: potentially escaped address 0x" 412 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 413 } 414 BF.HasInternalLabelReference = true; 415 return std::make_pair( 416 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 417 } 418 } else { 419 addInterproceduralReference(&BF, Address); 420 } 421 } 422 423 // With relocations, catch jump table references outside of the basic block 424 // containing the indirect jump. 425 if (HasRelocations) { 426 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 427 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 428 const MCSymbol *Symbol = 429 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 430 431 return std::make_pair(Symbol, Addend); 432 } 433 } 434 435 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 436 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 437 438 // TODO: use DWARF info to get size/alignment here? 439 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 440 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 441 return std::make_pair(TargetSymbol, Addend); 442 } 443 444 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 445 BinaryFunction &BF) { 446 if (!isX86()) 447 return MemoryContentsType::UNKNOWN; 448 449 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 450 if (!Section) { 451 // No section - possibly an absolute address. Since we don't allow 452 // internal function addresses to escape the function scope - we 453 // consider it a tail call. 454 if (opts::Verbosity > 1) { 455 errs() << "BOLT-WARNING: no section for address 0x" 456 << Twine::utohexstr(Address) << " referenced from function " << BF 457 << '\n'; 458 } 459 return MemoryContentsType::UNKNOWN; 460 } 461 462 if (Section->isVirtual()) { 463 // The contents are filled at runtime. 464 return MemoryContentsType::UNKNOWN; 465 } 466 467 // No support for jump tables in code yet. 468 if (Section->isText()) 469 return MemoryContentsType::UNKNOWN; 470 471 // Start with checking for PIC jump table. We expect non-PIC jump tables 472 // to have high 32 bits set to 0. 473 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 474 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 475 476 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 477 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 478 479 return MemoryContentsType::UNKNOWN; 480 } 481 482 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 483 bool isPotentialFragmentByName(BinaryFunction &Fragment, 484 BinaryFunction &Parent) { 485 for (StringRef Name : Parent.getNames()) { 486 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 487 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 488 if (Fragment.hasRestoredNameRegex(NameRegex)) 489 return true; 490 } 491 return false; 492 } 493 494 bool BinaryContext::analyzeJumpTable( 495 const uint64_t Address, const JumpTable::JumpTableType Type, 496 BinaryFunction &BF, const uint64_t NextJTAddress, 497 JumpTable::AddressesType *EntriesAsAddress) { 498 // Is one of the targets __builtin_unreachable? 499 bool HasUnreachable = false; 500 501 // Number of targets other than __builtin_unreachable. 502 uint64_t NumRealEntries = 0; 503 504 auto addEntryAddress = [&](uint64_t EntryAddress) { 505 if (EntriesAsAddress) 506 EntriesAsAddress->emplace_back(EntryAddress); 507 }; 508 509 auto doesBelongToFunction = [&](const uint64_t Addr, 510 BinaryFunction *TargetBF) -> bool { 511 if (BF.containsAddress(Addr)) 512 return true; 513 // Nothing to do if we failed to identify the containing function. 514 if (!TargetBF) 515 return false; 516 // Case 1: check if BF is a fragment and TargetBF is its parent. 517 if (BF.isFragment()) { 518 // Parent function may or may not be already registered. 519 // Set parent link based on function name matching heuristic. 520 return registerFragment(BF, *TargetBF); 521 } 522 // Case 2: check if TargetBF is a fragment and BF is its parent. 523 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 524 }; 525 526 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 527 if (!Section) 528 return false; 529 530 // The upper bound is defined by containing object, section limits, and 531 // the next jump table in memory. 532 uint64_t UpperBound = Section->getEndAddress(); 533 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 534 if (JumpTableBD && JumpTableBD->getSize()) { 535 assert(JumpTableBD->getEndAddress() <= UpperBound && 536 "data object cannot cross a section boundary"); 537 UpperBound = JumpTableBD->getEndAddress(); 538 } 539 if (NextJTAddress) 540 UpperBound = std::min(NextJTAddress, UpperBound); 541 542 LLVM_DEBUG({ 543 using JTT = JumpTable::JumpTableType; 544 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 545 Address, BF.getPrintName(), 546 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 547 }); 548 const uint64_t EntrySize = getJumpTableEntrySize(Type); 549 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 550 EntryAddress += EntrySize) { 551 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 552 << " -> "); 553 // Check if there's a proper relocation against the jump table entry. 554 if (HasRelocations) { 555 if (Type == JumpTable::JTT_PIC && 556 !DataPCRelocations.count(EntryAddress)) { 557 LLVM_DEBUG( 558 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 559 break; 560 } 561 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 562 LLVM_DEBUG( 563 dbgs() 564 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 565 break; 566 } 567 } 568 569 const uint64_t Value = 570 (Type == JumpTable::JTT_PIC) 571 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 572 : *getPointerAtAddress(EntryAddress); 573 574 // __builtin_unreachable() case. 575 if (Value == BF.getAddress() + BF.getSize()) { 576 addEntryAddress(Value); 577 HasUnreachable = true; 578 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 579 continue; 580 } 581 582 // Function or one of its fragments. 583 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 584 585 // We assume that a jump table cannot have function start as an entry. 586 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 587 LLVM_DEBUG({ 588 if (!BF.containsAddress(Value)) { 589 dbgs() << "FAIL: function doesn't contain this address\n"; 590 if (TargetBF) { 591 dbgs() << " ! function containing this address: " 592 << TargetBF->getPrintName() << '\n'; 593 if (TargetBF->isFragment()) { 594 dbgs() << " ! is a fragment"; 595 for (BinaryFunction *Parent : TargetBF->ParentFragments) 596 dbgs() << ", parent: " << Parent->getPrintName(); 597 dbgs() << '\n'; 598 } 599 } 600 } 601 if (Value == BF.getAddress()) 602 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 603 }); 604 break; 605 } 606 607 // Check there's an instruction at this offset. 608 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 609 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 610 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 611 break; 612 } 613 614 ++NumRealEntries; 615 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 616 617 if (TargetBF != &BF) 618 BF.setHasIndirectTargetToSplitFragment(true); 619 addEntryAddress(Value); 620 } 621 622 // It's a jump table if the number of real entries is more than 1, or there's 623 // one real entry and "unreachable" targets. If there are only multiple 624 // "unreachable" targets, then it's not a jump table. 625 return NumRealEntries + HasUnreachable >= 2; 626 } 627 628 void BinaryContext::populateJumpTables() { 629 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 630 << '\n'); 631 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 632 ++JTI) { 633 JumpTable *JT = JTI->second; 634 635 bool NonSimpleParent = false; 636 for (BinaryFunction *BF : JT->Parents) 637 NonSimpleParent |= !BF->isSimple(); 638 if (NonSimpleParent) 639 continue; 640 641 uint64_t NextJTAddress = 0; 642 auto NextJTI = std::next(JTI); 643 if (NextJTI != JTE) 644 NextJTAddress = NextJTI->second->getAddress(); 645 646 const bool Success = 647 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 648 NextJTAddress, &JT->EntriesAsAddress); 649 if (!Success) { 650 LLVM_DEBUG({ 651 dbgs() << "failed to analyze "; 652 JT->print(dbgs()); 653 if (NextJTI != JTE) { 654 dbgs() << "next "; 655 NextJTI->second->print(dbgs()); 656 } 657 }); 658 llvm_unreachable("jump table heuristic failure"); 659 } 660 for (BinaryFunction *Frag : JT->Parents) { 661 for (uint64_t EntryAddress : JT->EntriesAsAddress) 662 // if target is builtin_unreachable 663 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 664 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 665 Frag->getSize()); 666 } else if (EntryAddress >= Frag->getAddress() && 667 EntryAddress < Frag->getAddress() + Frag->getSize()) { 668 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 669 } 670 } 671 672 // In strict mode, erase PC-relative relocation record. Later we check that 673 // all such records are erased and thus have been accounted for. 674 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 675 for (uint64_t Address = JT->getAddress(); 676 Address < JT->getAddress() + JT->getSize(); 677 Address += JT->EntrySize) { 678 DataPCRelocations.erase(DataPCRelocations.find(Address)); 679 } 680 } 681 682 // Mark to skip the function and all its fragments. 683 for (BinaryFunction *Frag : JT->Parents) 684 if (Frag->hasIndirectTargetToSplitFragment()) 685 addFragmentsToSkip(Frag); 686 } 687 688 if (opts::StrictMode && DataPCRelocations.size()) { 689 LLVM_DEBUG({ 690 dbgs() << DataPCRelocations.size() 691 << " unclaimed PC-relative relocations left in data:\n"; 692 for (uint64_t Reloc : DataPCRelocations) 693 dbgs() << Twine::utohexstr(Reloc) << '\n'; 694 }); 695 assert(0 && "unclaimed PC-relative relocations left in data\n"); 696 } 697 clearList(DataPCRelocations); 698 } 699 700 void BinaryContext::skipMarkedFragments() { 701 std::vector<BinaryFunction *> FragmentQueue; 702 // Copy the functions to FragmentQueue. 703 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 704 auto addToWorklist = [&](BinaryFunction *Function) -> void { 705 if (FragmentsToSkip.count(Function)) 706 return; 707 FragmentQueue.push_back(Function); 708 addFragmentsToSkip(Function); 709 }; 710 // Functions containing split jump tables need to be skipped with all 711 // fragments (transitively). 712 for (size_t I = 0; I != FragmentQueue.size(); I++) { 713 BinaryFunction *BF = FragmentQueue[I]; 714 assert(FragmentsToSkip.count(BF) && 715 "internal error in traversing function fragments"); 716 if (opts::Verbosity >= 1) 717 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 718 BF->setSimple(false); 719 BF->setHasIndirectTargetToSplitFragment(true); 720 721 llvm::for_each(BF->Fragments, addToWorklist); 722 llvm::for_each(BF->ParentFragments, addToWorklist); 723 } 724 if (!FragmentsToSkip.empty()) 725 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 726 << (FragmentsToSkip.size() == 1 ? "" : "s") 727 << " due to cold fragments\n"; 728 } 729 730 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 731 uint64_t Size, 732 uint16_t Alignment, 733 unsigned Flags) { 734 auto Itr = BinaryDataMap.find(Address); 735 if (Itr != BinaryDataMap.end()) { 736 assert(Itr->second->getSize() == Size || !Size); 737 return Itr->second->getSymbol(); 738 } 739 740 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 741 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 742 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 743 } 744 745 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 746 return Ctx->getOrCreateSymbol(Name); 747 } 748 749 BinaryFunction *BinaryContext::createBinaryFunction( 750 const std::string &Name, BinarySection &Section, uint64_t Address, 751 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 752 auto Result = BinaryFunctions.emplace( 753 Address, BinaryFunction(Name, Section, Address, Size, *this)); 754 assert(Result.second == true && "unexpected duplicate function"); 755 BinaryFunction *BF = &Result.first->second; 756 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 757 Alignment); 758 setSymbolToFunctionMap(BF->getSymbol(), BF); 759 return BF; 760 } 761 762 const MCSymbol * 763 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 764 JumpTable::JumpTableType Type) { 765 auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { 766 return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 767 }; 768 (void)isFragmentOf; 769 770 // Two fragments of same function access same jump table 771 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 772 assert(JT->Type == Type && "jump table types have to match"); 773 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 774 775 // Prevent associating a jump table to a specific fragment twice. 776 // This simple check arises from the assumption: no more than 2 fragments. 777 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 778 assert((isFragmentOf(JT->Parents[0], &Function) || 779 isFragmentOf(&Function, JT->Parents[0])) && 780 "cannot re-use jump table of a different function"); 781 // Duplicate the entry for the parent function for easy access 782 JT->Parents.push_back(&Function); 783 if (opts::Verbosity > 2) { 784 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 785 << JT->Parents[0]->getPrintName() << "; " 786 << Function.getPrintName() << "\n"; 787 JT->print(outs()); 788 } 789 Function.JumpTables.emplace(Address, JT); 790 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 791 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 792 } 793 794 bool IsJumpTableParent = false; 795 (void)IsJumpTableParent; 796 for (BinaryFunction *Frag : JT->Parents) 797 if (Frag == &Function) 798 IsJumpTableParent = true; 799 assert(IsJumpTableParent && 800 "cannot re-use jump table of a different function"); 801 return JT->getFirstLabel(); 802 } 803 804 // Re-use the existing symbol if possible. 805 MCSymbol *JTLabel = nullptr; 806 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 807 if (!isInternalSymbolName(Object->getSymbol()->getName())) 808 JTLabel = Object->getSymbol(); 809 } 810 811 const uint64_t EntrySize = getJumpTableEntrySize(Type); 812 if (!JTLabel) { 813 const std::string JumpTableName = generateJumpTableName(Function, Address); 814 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 815 } 816 817 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 818 << " in function " << Function << '\n'); 819 820 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 821 JumpTable::LabelMapType{{0, JTLabel}}, 822 *getSectionForAddress(Address)); 823 JT->Parents.push_back(&Function); 824 if (opts::Verbosity > 2) 825 JT->print(outs()); 826 JumpTables.emplace(Address, JT); 827 828 // Duplicate the entry for the parent function for easy access. 829 Function.JumpTables.emplace(Address, JT); 830 return JTLabel; 831 } 832 833 std::pair<uint64_t, const MCSymbol *> 834 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 835 const MCSymbol *OldLabel) { 836 auto L = scopeLock(); 837 unsigned Offset = 0; 838 bool Found = false; 839 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 840 if (Elmt.second != OldLabel) 841 continue; 842 Offset = Elmt.first; 843 Found = true; 844 break; 845 } 846 assert(Found && "Label not found"); 847 (void)Found; 848 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 849 JumpTable *NewJT = 850 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 851 JumpTable::LabelMapType{{Offset, NewLabel}}, 852 *getSectionForAddress(JT->getAddress())); 853 NewJT->Parents = JT->Parents; 854 NewJT->Entries = JT->Entries; 855 NewJT->Counts = JT->Counts; 856 uint64_t JumpTableID = ++DuplicatedJumpTables; 857 // Invert it to differentiate from regular jump tables whose IDs are their 858 // addresses in the input binary memory space 859 JumpTableID = ~JumpTableID; 860 JumpTables.emplace(JumpTableID, NewJT); 861 Function.JumpTables.emplace(JumpTableID, NewJT); 862 return std::make_pair(JumpTableID, NewLabel); 863 } 864 865 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 866 uint64_t Address) { 867 size_t Id; 868 uint64_t Offset = 0; 869 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 870 Offset = Address - JT->getAddress(); 871 auto Itr = JT->Labels.find(Offset); 872 if (Itr != JT->Labels.end()) 873 return std::string(Itr->second->getName()); 874 Id = JumpTableIds.at(JT->getAddress()); 875 } else { 876 Id = JumpTableIds[Address] = BF.JumpTables.size(); 877 } 878 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 879 (Offset ? ("." + std::to_string(Offset)) : "")); 880 } 881 882 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 883 // FIXME: aarch64 support is missing. 884 if (!isX86()) 885 return true; 886 887 if (BF.getSize() == BF.getMaxSize()) 888 return true; 889 890 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 891 assert(FunctionData && "cannot get function as data"); 892 893 uint64_t Offset = BF.getSize(); 894 MCInst Instr; 895 uint64_t InstrSize = 0; 896 uint64_t InstrAddress = BF.getAddress() + Offset; 897 using std::placeholders::_1; 898 899 // Skip instructions that satisfy the predicate condition. 900 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 901 const uint64_t StartOffset = Offset; 902 for (; Offset < BF.getMaxSize(); 903 Offset += InstrSize, InstrAddress += InstrSize) { 904 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 905 InstrAddress, nulls())) 906 break; 907 if (!Predicate(Instr)) 908 break; 909 } 910 911 return Offset - StartOffset; 912 }; 913 914 // Skip a sequence of zero bytes. 915 auto skipZeros = [&]() { 916 const uint64_t StartOffset = Offset; 917 for (; Offset < BF.getMaxSize(); ++Offset) 918 if ((*FunctionData)[Offset] != 0) 919 break; 920 921 return Offset - StartOffset; 922 }; 923 924 // Accept the whole padding area filled with breakpoints. 925 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 926 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 927 return true; 928 929 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 930 931 // Some functions have a jump to the next function or to the padding area 932 // inserted after the body. 933 auto isSkipJump = [&](const MCInst &Instr) { 934 uint64_t TargetAddress = 0; 935 if (MIB->isUnconditionalBranch(Instr) && 936 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 937 if (TargetAddress >= InstrAddress + InstrSize && 938 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 939 return true; 940 } 941 } 942 return false; 943 }; 944 945 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 946 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 947 skipZeros()) 948 ; 949 950 if (Offset == BF.getMaxSize()) 951 return true; 952 953 if (opts::Verbosity >= 1) { 954 errs() << "BOLT-WARNING: bad padding at address 0x" 955 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 956 << " starting at offset " << (Offset - BF.getSize()) 957 << " in function " << BF << '\n' 958 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 959 << '\n'; 960 } 961 962 return false; 963 } 964 965 void BinaryContext::adjustCodePadding() { 966 for (auto &BFI : BinaryFunctions) { 967 BinaryFunction &BF = BFI.second; 968 if (!shouldEmit(BF)) 969 continue; 970 971 if (!hasValidCodePadding(BF)) { 972 if (HasRelocations) { 973 if (opts::Verbosity >= 1) { 974 outs() << "BOLT-INFO: function " << BF 975 << " has invalid padding. Ignoring the function.\n"; 976 } 977 BF.setIgnored(); 978 } else { 979 BF.setMaxSize(BF.getSize()); 980 } 981 } 982 } 983 } 984 985 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 986 uint64_t Size, 987 uint16_t Alignment, 988 unsigned Flags) { 989 // Register the name with MCContext. 990 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 991 992 auto GAI = BinaryDataMap.find(Address); 993 BinaryData *BD; 994 if (GAI == BinaryDataMap.end()) { 995 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 996 BinarySection &Section = 997 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 998 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 999 Section, Flags); 1000 GAI = BinaryDataMap.emplace(Address, BD).first; 1001 GlobalSymbols[Name] = BD; 1002 updateObjectNesting(GAI); 1003 } else { 1004 BD = GAI->second; 1005 if (!BD->hasName(Name)) { 1006 GlobalSymbols[Name] = BD; 1007 BD->Symbols.push_back(Symbol); 1008 } 1009 } 1010 1011 return Symbol; 1012 } 1013 1014 const BinaryData * 1015 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1016 auto NI = BinaryDataMap.lower_bound(Address); 1017 auto End = BinaryDataMap.end(); 1018 if ((NI != End && Address == NI->first) || 1019 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1020 if (NI->second->containsAddress(Address)) 1021 return NI->second; 1022 1023 // If this is a sub-symbol, see if a parent data contains the address. 1024 const BinaryData *BD = NI->second->getParent(); 1025 while (BD) { 1026 if (BD->containsAddress(Address)) 1027 return BD; 1028 BD = BD->getParent(); 1029 } 1030 } 1031 return nullptr; 1032 } 1033 1034 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1035 auto NI = BinaryDataMap.find(Address); 1036 assert(NI != BinaryDataMap.end()); 1037 if (NI == BinaryDataMap.end()) 1038 return false; 1039 // TODO: it's possible that a jump table starts at the same address 1040 // as a larger blob of private data. When we set the size of the 1041 // jump table, it might be smaller than the total blob size. In this 1042 // case we just leave the original size since (currently) it won't really 1043 // affect anything. 1044 assert((!NI->second->Size || NI->second->Size == Size || 1045 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1046 "can't change the size of a symbol that has already had its " 1047 "size set"); 1048 if (!NI->second->Size) { 1049 NI->second->Size = Size; 1050 updateObjectNesting(NI); 1051 return true; 1052 } 1053 return false; 1054 } 1055 1056 void BinaryContext::generateSymbolHashes() { 1057 auto isPadding = [](const BinaryData &BD) { 1058 StringRef Contents = BD.getSection().getContents(); 1059 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1060 return (BD.getName().startswith("HOLEat") || 1061 SymData.find_first_not_of(0) == StringRef::npos); 1062 }; 1063 1064 uint64_t NumCollisions = 0; 1065 for (auto &Entry : BinaryDataMap) { 1066 BinaryData &BD = *Entry.second; 1067 StringRef Name = BD.getName(); 1068 1069 if (!isInternalSymbolName(Name)) 1070 continue; 1071 1072 // First check if a non-anonymous alias exists and move it to the front. 1073 if (BD.getSymbols().size() > 1) { 1074 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1075 return !isInternalSymbolName(Symbol->getName()); 1076 }); 1077 if (Itr != BD.getSymbols().end()) { 1078 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1079 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1080 continue; 1081 } 1082 } 1083 1084 // We have to skip 0 size symbols since they will all collide. 1085 if (BD.getSize() == 0) { 1086 continue; 1087 } 1088 1089 const uint64_t Hash = BD.getSection().hash(BD); 1090 const size_t Idx = Name.find("0x"); 1091 std::string NewName = 1092 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1093 if (getBinaryDataByName(NewName)) { 1094 // Ignore collisions for symbols that appear to be padding 1095 // (i.e. all zeros or a "hole") 1096 if (!isPadding(BD)) { 1097 if (opts::Verbosity) { 1098 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1099 << " with new name (" << NewName << "), skipping.\n"; 1100 } 1101 ++NumCollisions; 1102 } 1103 continue; 1104 } 1105 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1106 GlobalSymbols[NewName] = &BD; 1107 } 1108 if (NumCollisions) { 1109 errs() << "BOLT-WARNING: " << NumCollisions 1110 << " collisions detected while hashing binary objects"; 1111 if (!opts::Verbosity) 1112 errs() << ". Use -v=1 to see the list."; 1113 errs() << '\n'; 1114 } 1115 } 1116 1117 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1118 BinaryFunction &Function) const { 1119 if (!isPotentialFragmentByName(TargetFunction, Function)) 1120 return false; 1121 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1122 if (TargetFunction.isParentFragment(&Function)) 1123 return true; 1124 TargetFunction.addParentFragment(Function); 1125 Function.addFragment(TargetFunction); 1126 if (!HasRelocations) { 1127 TargetFunction.setSimple(false); 1128 Function.setSimple(false); 1129 } 1130 if (opts::Verbosity >= 1) { 1131 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1132 << Function << '\n'; 1133 } 1134 return true; 1135 } 1136 1137 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1138 MCInst &LoadLowBits, 1139 MCInst &LoadHiBits, 1140 uint64_t Target) { 1141 const MCSymbol *TargetSymbol; 1142 uint64_t Addend = 0; 1143 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1144 /*IsPCRel*/ true); 1145 int64_t Val; 1146 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1147 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1148 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1149 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1150 } 1151 1152 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1153 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1154 if (TargetFunction) 1155 return false; 1156 1157 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1158 assert(Section && "cannot get section for referenced address"); 1159 if (!Section->isText()) 1160 return false; 1161 1162 bool Ret = false; 1163 StringRef SectionContents = Section->getContents(); 1164 uint64_t Offset = Address - Section->getAddress(); 1165 const uint64_t MaxSize = SectionContents.size() - Offset; 1166 const uint8_t *Bytes = 1167 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1168 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1169 1170 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1171 MCInst &Instruction, uint64_t Offset, 1172 uint64_t AbsoluteInstrAddr, 1173 uint64_t TotalSize) -> bool { 1174 MCInst *TargetHiBits, *TargetLowBits; 1175 uint64_t TargetAddress, Count; 1176 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1177 AbsoluteInstrAddr, Instruction, TargetHiBits, 1178 TargetLowBits, TargetAddress); 1179 if (!Count) 1180 return false; 1181 1182 if (MatchOnly) 1183 return true; 1184 1185 // NOTE The target symbol was created during disassemble's 1186 // handleExternalReference 1187 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1188 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1189 *Section, Address, TotalSize); 1190 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1191 TargetAddress); 1192 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1193 Veneer->addInstruction(Offset, std::move(Instruction)); 1194 --Count; 1195 for (auto It = std::prev(Instructions.end()); Count != 0; 1196 It = std::prev(It), --Count) { 1197 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1198 Veneer->addInstruction(It->first, std::move(It->second)); 1199 } 1200 1201 Veneer->getOrCreateLocalLabel(Address); 1202 Veneer->setMaxSize(TotalSize); 1203 Veneer->updateState(BinaryFunction::State::Disassembled); 1204 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1205 << "\n"); 1206 return true; 1207 }; 1208 1209 uint64_t Size = 0, TotalSize = 0; 1210 BinaryFunction::InstrMapType VeneerInstructions; 1211 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1212 MCInst Instruction; 1213 const uint64_t AbsoluteInstrAddr = Address + Offset; 1214 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1215 AbsoluteInstrAddr, nulls())) 1216 break; 1217 1218 TotalSize += Size; 1219 if (MIB->isBranch(Instruction)) { 1220 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1221 AbsoluteInstrAddr, TotalSize); 1222 break; 1223 } 1224 1225 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1226 } 1227 1228 return Ret; 1229 } 1230 1231 void BinaryContext::processInterproceduralReferences() { 1232 for (const std::pair<BinaryFunction *, uint64_t> &It : 1233 InterproceduralReferences) { 1234 BinaryFunction &Function = *It.first; 1235 uint64_t Address = It.second; 1236 if (!Address || Function.isIgnored()) 1237 continue; 1238 1239 BinaryFunction *TargetFunction = 1240 getBinaryFunctionContainingAddress(Address); 1241 if (&Function == TargetFunction) 1242 continue; 1243 1244 if (TargetFunction) { 1245 if (TargetFunction->isFragment() && 1246 !registerFragment(*TargetFunction, Function)) { 1247 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1248 "fragments: " 1249 << Function.getPrintName() << " and " 1250 << TargetFunction->getPrintName() << '\n'; 1251 } 1252 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1253 TargetFunction->addEntryPointAtOffset(Offset); 1254 1255 continue; 1256 } 1257 1258 // Check if address falls in function padding space - this could be 1259 // unmarked data in code. In this case adjust the padding space size. 1260 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1261 assert(Section && "cannot get section for referenced address"); 1262 1263 if (!Section->isText()) 1264 continue; 1265 1266 // PLT requires special handling and could be ignored in this context. 1267 StringRef SectionName = Section->getName(); 1268 if (SectionName == ".plt" || SectionName == ".plt.got") 1269 continue; 1270 1271 // Check if it is aarch64 veneer written at Address 1272 if (isAArch64() && handleAArch64Veneer(Address)) 1273 continue; 1274 1275 if (opts::processAllFunctions()) { 1276 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1277 << "object in code at address 0x" << Twine::utohexstr(Address) 1278 << " belonging to section " << SectionName << " in current mode\n"; 1279 exit(1); 1280 } 1281 1282 TargetFunction = getBinaryFunctionContainingAddress(Address, 1283 /*CheckPastEnd=*/false, 1284 /*UseMaxSize=*/true); 1285 // We are not going to overwrite non-simple functions, but for simple 1286 // ones - adjust the padding size. 1287 if (TargetFunction && TargetFunction->isSimple()) { 1288 errs() << "BOLT-WARNING: function " << *TargetFunction 1289 << " has an object detected in a padding region at address 0x" 1290 << Twine::utohexstr(Address) << '\n'; 1291 TargetFunction->setMaxSize(TargetFunction->getSize()); 1292 } 1293 } 1294 1295 InterproceduralReferences.clear(); 1296 } 1297 1298 void BinaryContext::postProcessSymbolTable() { 1299 fixBinaryDataHoles(); 1300 bool Valid = true; 1301 for (auto &Entry : BinaryDataMap) { 1302 BinaryData *BD = Entry.second; 1303 if ((BD->getName().startswith("SYMBOLat") || 1304 BD->getName().startswith("DATAat")) && 1305 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1306 BD->getSection()) { 1307 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1308 Valid = false; 1309 } 1310 } 1311 assert(Valid); 1312 (void)Valid; 1313 generateSymbolHashes(); 1314 } 1315 1316 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1317 BinaryFunction &ParentBF) { 1318 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1319 "cannot merge functions with multiple entry points"); 1320 1321 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1322 std::defer_lock); 1323 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1324 SymbolToFunctionMapMutex, std::defer_lock); 1325 1326 const StringRef ChildName = ChildBF.getOneName(); 1327 1328 // Move symbols over and update bookkeeping info. 1329 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1330 ParentBF.getSymbols().push_back(Symbol); 1331 WriteSymbolMapLock.lock(); 1332 SymbolToFunctionMap[Symbol] = &ParentBF; 1333 WriteSymbolMapLock.unlock(); 1334 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1335 } 1336 ChildBF.getSymbols().clear(); 1337 1338 // Move other names the child function is known under. 1339 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1340 ChildBF.Aliases.clear(); 1341 1342 if (HasRelocations) { 1343 // Merge execution counts of ChildBF into those of ParentBF. 1344 // Without relocations, we cannot reliably merge profiles as both functions 1345 // continue to exist and either one can be executed. 1346 ChildBF.mergeProfileDataInto(ParentBF); 1347 1348 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1349 std::defer_lock); 1350 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1351 std::defer_lock); 1352 // Remove ChildBF from the global set of functions in relocs mode. 1353 ReadBfsLock.lock(); 1354 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1355 ReadBfsLock.unlock(); 1356 1357 assert(FI != BinaryFunctions.end() && "function not found"); 1358 assert(&ChildBF == &FI->second && "function mismatch"); 1359 1360 WriteBfsLock.lock(); 1361 ChildBF.clearDisasmState(); 1362 FI = BinaryFunctions.erase(FI); 1363 WriteBfsLock.unlock(); 1364 1365 } else { 1366 // In non-relocation mode we keep the function, but rename it. 1367 std::string NewName = "__ICF_" + ChildName.str(); 1368 1369 WriteCtxLock.lock(); 1370 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1371 WriteCtxLock.unlock(); 1372 1373 ChildBF.setFolded(&ParentBF); 1374 } 1375 } 1376 1377 void BinaryContext::fixBinaryDataHoles() { 1378 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1379 1380 for (BinarySection &Section : allocatableSections()) { 1381 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1382 1383 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1384 BinaryData *BD = Itr->second; 1385 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1386 (BD->getName().startswith("SYMBOLat0x") || 1387 BD->getName().startswith("DATAat0x") || 1388 BD->getName().startswith("ANONYMOUS"))); 1389 return !isHole && BD->getSection() == Section && !BD->getParent(); 1390 }; 1391 1392 auto BDStart = BinaryDataMap.begin(); 1393 auto BDEnd = BinaryDataMap.end(); 1394 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1395 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1396 1397 uint64_t EndAddress = Section.getAddress(); 1398 1399 while (Itr != End) { 1400 if (Itr->second->getAddress() > EndAddress) { 1401 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1402 Holes.emplace_back(EndAddress, Gap); 1403 } 1404 EndAddress = Itr->second->getEndAddress(); 1405 ++Itr; 1406 } 1407 1408 if (EndAddress < Section.getEndAddress()) 1409 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1410 1411 // If there is already a symbol at the start of the hole, grow that symbol 1412 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1413 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1414 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1415 if (BD) { 1416 // BD->getSection() can be != Section if there are sections that 1417 // overlap. In this case it is probably safe to just skip the holes 1418 // since the overlapping section will not(?) have any symbols in it. 1419 if (BD->getSection() == Section) 1420 setBinaryDataSize(Hole.first, Hole.second); 1421 } else { 1422 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1423 } 1424 } 1425 } 1426 1427 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1428 assert(validateHoles() && "top level hole detected in object map"); 1429 } 1430 1431 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1432 const BinarySection *CurrentSection = nullptr; 1433 bool FirstSection = true; 1434 1435 for (auto &Entry : BinaryDataMap) { 1436 const BinaryData *BD = Entry.second; 1437 const BinarySection &Section = BD->getSection(); 1438 if (FirstSection || Section != *CurrentSection) { 1439 uint64_t Address, Size; 1440 StringRef Name = Section.getName(); 1441 if (Section) { 1442 Address = Section.getAddress(); 1443 Size = Section.getSize(); 1444 } else { 1445 Address = BD->getAddress(); 1446 Size = BD->getSize(); 1447 } 1448 OS << "BOLT-INFO: Section " << Name << ", " 1449 << "0x" + Twine::utohexstr(Address) << ":" 1450 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1451 CurrentSection = &Section; 1452 FirstSection = false; 1453 } 1454 1455 OS << "BOLT-INFO: "; 1456 const BinaryData *P = BD->getParent(); 1457 while (P) { 1458 OS << " "; 1459 P = P->getParent(); 1460 } 1461 OS << *BD << "\n"; 1462 } 1463 } 1464 1465 Expected<unsigned> BinaryContext::getDwarfFile( 1466 StringRef Directory, StringRef FileName, unsigned FileNumber, 1467 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1468 unsigned CUID, unsigned DWARFVersion) { 1469 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1470 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1471 FileNumber); 1472 } 1473 1474 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1475 const uint32_t SrcCUID, 1476 unsigned FileIndex) { 1477 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1478 const DWARFDebugLine::LineTable *LineTable = 1479 DwCtx->getLineTableForUnit(SrcUnit); 1480 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1481 LineTable->Prologue.FileNames; 1482 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1483 // means empty dir. 1484 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1485 "FileIndex out of range for the compilation unit."); 1486 StringRef Dir = ""; 1487 if (FileNames[FileIndex - 1].DirIdx != 0) { 1488 if (Optional<const char *> DirName = dwarf::toString( 1489 LineTable->Prologue 1490 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1491 Dir = *DirName; 1492 } 1493 } 1494 StringRef FileName = ""; 1495 if (Optional<const char *> FName = 1496 dwarf::toString(FileNames[FileIndex - 1].Name)) 1497 FileName = *FName; 1498 assert(FileName != ""); 1499 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1500 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1501 DstUnit->getVersion())); 1502 } 1503 1504 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1505 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1506 llvm::transform(BinaryFunctions, SortedFunctions.begin(), 1507 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1508 return &BFI.second; 1509 }); 1510 1511 llvm::stable_sort(SortedFunctions, 1512 [](const BinaryFunction *A, const BinaryFunction *B) { 1513 if (A->hasValidIndex() && B->hasValidIndex()) { 1514 return A->getIndex() < B->getIndex(); 1515 } 1516 return A->hasValidIndex(); 1517 }); 1518 return SortedFunctions; 1519 } 1520 1521 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1522 std::vector<BinaryFunction *> AllFunctions; 1523 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1524 llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions), 1525 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1526 return &BFI.second; 1527 }); 1528 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1529 1530 return AllFunctions; 1531 } 1532 1533 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1534 auto Iter = DWOCUs.find(DWOId); 1535 if (Iter == DWOCUs.end()) 1536 return None; 1537 1538 return Iter->second; 1539 } 1540 1541 DWARFContext *BinaryContext::getDWOContext() const { 1542 if (DWOCUs.empty()) 1543 return nullptr; 1544 return &DWOCUs.begin()->second->getContext(); 1545 } 1546 1547 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1548 void BinaryContext::preprocessDWODebugInfo() { 1549 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1550 DWARFUnit *const DwarfUnit = CU.get(); 1551 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1552 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1553 if (!DWOCU->isDWOUnit()) { 1554 std::string DWOName = dwarf::toString( 1555 DwarfUnit->getUnitDIE().find( 1556 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1557 ""); 1558 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1559 << DWOName 1560 << " was not retrieved and won't be updated. Please check " 1561 "relative path.\n"; 1562 continue; 1563 } 1564 DWOCUs[*DWOId] = DWOCU; 1565 } 1566 } 1567 } 1568 1569 void BinaryContext::preprocessDebugInfo() { 1570 struct CURange { 1571 uint64_t LowPC; 1572 uint64_t HighPC; 1573 DWARFUnit *Unit; 1574 1575 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1576 }; 1577 1578 // Building a map of address ranges to CUs similar to .debug_aranges and use 1579 // it to assign CU to functions. 1580 std::vector<CURange> AllRanges; 1581 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1582 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1583 Expected<DWARFAddressRangesVector> RangesOrError = 1584 CU->getUnitDIE().getAddressRanges(); 1585 if (!RangesOrError) { 1586 consumeError(RangesOrError.takeError()); 1587 continue; 1588 } 1589 for (DWARFAddressRange &Range : *RangesOrError) { 1590 // Parts of the debug info could be invalidated due to corresponding code 1591 // being removed from the binary by the linker. Hence we check if the 1592 // address is a valid one. 1593 if (containsAddress(Range.LowPC)) 1594 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1595 } 1596 1597 ContainsDwarf5 |= CU->getVersion() >= 5; 1598 ContainsDwarfLegacy |= CU->getVersion() < 5; 1599 } 1600 1601 llvm::sort(AllRanges); 1602 for (auto &KV : BinaryFunctions) { 1603 const uint64_t FunctionAddress = KV.first; 1604 BinaryFunction &Function = KV.second; 1605 1606 auto It = llvm::partition_point( 1607 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1608 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1609 Function.setDWARFUnit(It->Unit); 1610 } 1611 1612 // Discover units with debug info that needs to be updated. 1613 for (const auto &KV : BinaryFunctions) { 1614 const BinaryFunction &BF = KV.second; 1615 if (shouldEmit(BF) && BF.getDWARFUnit()) 1616 ProcessedCUs.insert(BF.getDWARFUnit()); 1617 } 1618 1619 // Clear debug info for functions from units that we are not going to process. 1620 for (auto &KV : BinaryFunctions) { 1621 BinaryFunction &BF = KV.second; 1622 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1623 BF.setDWARFUnit(nullptr); 1624 } 1625 1626 if (opts::Verbosity >= 1) { 1627 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1628 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1629 } 1630 1631 preprocessDWODebugInfo(); 1632 1633 // Populate MCContext with DWARF files from all units. 1634 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1635 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1636 const uint64_t CUID = CU->getOffset(); 1637 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1638 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1639 GlobalPrefix + "line_table_start" + Twine(CUID))); 1640 1641 if (!ProcessedCUs.count(CU.get())) 1642 continue; 1643 1644 const DWARFDebugLine::LineTable *LineTable = 1645 DwCtx->getLineTableForUnit(CU.get()); 1646 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1647 LineTable->Prologue.FileNames; 1648 1649 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1650 if (DwarfVersion >= 5) { 1651 Optional<MD5::MD5Result> Checksum = None; 1652 if (LineTable->Prologue.ContentTypes.HasMD5) 1653 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1654 Optional<const char *> Name = 1655 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1656 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1657 auto Iter = DWOCUs.find(*DWOID); 1658 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1659 Name = dwarf::toString( 1660 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1661 } 1662 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1663 None); 1664 } 1665 1666 BinaryLineTable.setDwarfVersion(DwarfVersion); 1667 1668 // Assign a unique label to every line table, one per CU. 1669 // Make sure empty debug line tables are registered too. 1670 if (FileNames.empty()) { 1671 cantFail( 1672 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1673 continue; 1674 } 1675 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1676 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1677 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1678 // means empty dir. 1679 StringRef Dir = ""; 1680 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1681 if (Optional<const char *> DirName = dwarf::toString( 1682 LineTable->Prologue 1683 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1684 Dir = *DirName; 1685 StringRef FileName = ""; 1686 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1687 FileName = *FName; 1688 assert(FileName != ""); 1689 Optional<MD5::MD5Result> Checksum = None; 1690 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1691 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1692 cantFail( 1693 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1694 } 1695 } 1696 } 1697 1698 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1699 if (Function.isPseudo()) 1700 return false; 1701 1702 if (opts::processAllFunctions()) 1703 return true; 1704 1705 if (Function.isIgnored()) 1706 return false; 1707 1708 // In relocation mode we will emit non-simple functions with CFG. 1709 // If the function does not have a CFG it should be marked as ignored. 1710 return HasRelocations || Function.isSimple(); 1711 } 1712 1713 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1714 uint32_t Operation = Inst.getOperation(); 1715 switch (Operation) { 1716 case MCCFIInstruction::OpSameValue: 1717 OS << "OpSameValue Reg" << Inst.getRegister(); 1718 break; 1719 case MCCFIInstruction::OpRememberState: 1720 OS << "OpRememberState"; 1721 break; 1722 case MCCFIInstruction::OpRestoreState: 1723 OS << "OpRestoreState"; 1724 break; 1725 case MCCFIInstruction::OpOffset: 1726 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1727 break; 1728 case MCCFIInstruction::OpDefCfaRegister: 1729 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1730 break; 1731 case MCCFIInstruction::OpDefCfaOffset: 1732 OS << "OpDefCfaOffset " << Inst.getOffset(); 1733 break; 1734 case MCCFIInstruction::OpDefCfa: 1735 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1736 break; 1737 case MCCFIInstruction::OpRelOffset: 1738 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1739 break; 1740 case MCCFIInstruction::OpAdjustCfaOffset: 1741 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1742 break; 1743 case MCCFIInstruction::OpEscape: 1744 OS << "OpEscape"; 1745 break; 1746 case MCCFIInstruction::OpRestore: 1747 OS << "OpRestore Reg" << Inst.getRegister(); 1748 break; 1749 case MCCFIInstruction::OpUndefined: 1750 OS << "OpUndefined Reg" << Inst.getRegister(); 1751 break; 1752 case MCCFIInstruction::OpRegister: 1753 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1754 << Inst.getRegister2(); 1755 break; 1756 case MCCFIInstruction::OpWindowSave: 1757 OS << "OpWindowSave"; 1758 break; 1759 case MCCFIInstruction::OpGnuArgsSize: 1760 OS << "OpGnuArgsSize"; 1761 break; 1762 default: 1763 OS << "Op#" << Operation; 1764 break; 1765 } 1766 } 1767 1768 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1769 // For aarch64, the ABI defines mapping symbols so we identify data in the 1770 // code section (see IHI0056B). $x identifies a symbol starting code or the 1771 // end of a data chunk inside code, $d indentifies start of data. 1772 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1773 return MarkerSymType::NONE; 1774 1775 Expected<StringRef> NameOrError = Symbol.getName(); 1776 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1777 1778 if (!TypeOrError || !NameOrError) 1779 return MarkerSymType::NONE; 1780 1781 if (*TypeOrError != SymbolRef::ST_Unknown) 1782 return MarkerSymType::NONE; 1783 1784 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1785 return MarkerSymType::CODE; 1786 1787 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1788 return MarkerSymType::DATA; 1789 1790 return MarkerSymType::NONE; 1791 } 1792 1793 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1794 return getMarkerType(Symbol) != MarkerSymType::NONE; 1795 } 1796 1797 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1798 const BinaryFunction *Function, 1799 DWARFContext *DwCtx) { 1800 DebugLineTableRowRef RowRef = 1801 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1802 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1803 return; 1804 1805 const DWARFDebugLine::LineTable *LineTable; 1806 if (Function && Function->getDWARFUnit() && 1807 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1808 LineTable = Function->getDWARFLineTable(); 1809 } else { 1810 LineTable = DwCtx->getLineTableForUnit( 1811 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1812 } 1813 assert(LineTable && "line table expected for instruction with debug info"); 1814 1815 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1816 StringRef FileName = ""; 1817 if (Optional<const char *> FName = 1818 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1819 FileName = *FName; 1820 OS << " # debug line " << FileName << ":" << Row.Line; 1821 if (Row.Column) 1822 OS << ":" << Row.Column; 1823 if (Row.Discriminator) 1824 OS << " discriminator:" << Row.Discriminator; 1825 } 1826 1827 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1828 uint64_t Offset, 1829 const BinaryFunction *Function, 1830 bool PrintMCInst, bool PrintMemData, 1831 bool PrintRelocations, 1832 StringRef Endl) const { 1833 if (MIB->isEHLabel(Instruction)) { 1834 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1835 return; 1836 } 1837 OS << format(" %08" PRIx64 ": ", Offset); 1838 if (MIB->isCFI(Instruction)) { 1839 uint32_t Offset = Instruction.getOperand(0).getImm(); 1840 OS << "\t!CFI\t$" << Offset << "\t; "; 1841 if (Function) 1842 printCFI(OS, *Function->getCFIFor(Instruction)); 1843 OS << Endl; 1844 return; 1845 } 1846 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1847 if (MIB->isCall(Instruction)) { 1848 if (MIB->isTailCall(Instruction)) 1849 OS << " # TAILCALL "; 1850 if (MIB->isInvoke(Instruction)) { 1851 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1852 OS << " # handler: "; 1853 if (EHInfo->first) 1854 OS << *EHInfo->first; 1855 else 1856 OS << '0'; 1857 OS << "; action: " << EHInfo->second; 1858 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1859 if (GnuArgsSize >= 0) 1860 OS << "; GNU_args_size = " << GnuArgsSize; 1861 } 1862 } else if (MIB->isIndirectBranch(Instruction)) { 1863 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1864 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1865 } else { 1866 OS << " # UNKNOWN CONTROL FLOW"; 1867 } 1868 } 1869 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1870 OS << " # Offset: " << *Offset; 1871 1872 MIB->printAnnotations(Instruction, OS); 1873 1874 if (opts::PrintDebugInfo) 1875 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1876 1877 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1878 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1879 Function->printRelocations(OS, Offset, Size); 1880 } 1881 1882 OS << Endl; 1883 1884 if (PrintMCInst) { 1885 Instruction.dump_pretty(OS, InstPrinter.get()); 1886 OS << Endl; 1887 } 1888 } 1889 1890 Optional<uint64_t> 1891 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1892 uint64_t FileOffset) const { 1893 // Find a segment with a matching file offset. 1894 for (auto &KV : SegmentMapInfo) { 1895 const SegmentInfo &SegInfo = KV.second; 1896 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1897 // Use segment's aligned memory offset to calculate the base address. 1898 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1899 return MMapAddress - MemOffset; 1900 } 1901 } 1902 1903 return NoneType(); 1904 } 1905 1906 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1907 auto SI = AddressToSection.upper_bound(Address); 1908 if (SI != AddressToSection.begin()) { 1909 --SI; 1910 uint64_t UpperBound = SI->first + SI->second->getSize(); 1911 if (!SI->second->getSize()) 1912 UpperBound += 1; 1913 if (UpperBound > Address) 1914 return *SI->second; 1915 } 1916 return std::make_error_code(std::errc::bad_address); 1917 } 1918 1919 ErrorOr<StringRef> 1920 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1921 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1922 return Section->getName(); 1923 return std::make_error_code(std::errc::bad_address); 1924 } 1925 1926 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1927 auto Res = Sections.insert(Section); 1928 (void)Res; 1929 assert(Res.second && "can't register the same section twice."); 1930 1931 // Only register allocatable sections in the AddressToSection map. 1932 if (Section->isAllocatable() && Section->getAddress()) 1933 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1934 NameToSection.insert( 1935 std::make_pair(std::string(Section->getName()), Section)); 1936 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1937 return *Section; 1938 } 1939 1940 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1941 return registerSection(new BinarySection(*this, Section)); 1942 } 1943 1944 BinarySection & 1945 BinaryContext::registerSection(StringRef SectionName, 1946 const BinarySection &OriginalSection) { 1947 return registerSection( 1948 new BinarySection(*this, SectionName, OriginalSection)); 1949 } 1950 1951 BinarySection & 1952 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1953 unsigned ELFFlags, uint8_t *Data, 1954 uint64_t Size, unsigned Alignment) { 1955 auto NamedSections = getSectionByName(Name); 1956 if (NamedSections.begin() != NamedSections.end()) { 1957 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1958 "can only update unique sections"); 1959 BinarySection *Section = NamedSections.begin()->second; 1960 1961 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1962 const bool Flag = Section->isAllocatable(); 1963 (void)Flag; 1964 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1965 LLVM_DEBUG(dbgs() << *Section << "\n"); 1966 // FIXME: Fix section flags/attributes for MachO. 1967 if (isELF()) 1968 assert(Flag == Section->isAllocatable() && 1969 "can't change section allocation status"); 1970 return *Section; 1971 } 1972 1973 return registerSection( 1974 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1975 } 1976 1977 bool BinaryContext::deregisterSection(BinarySection &Section) { 1978 BinarySection *SectionPtr = &Section; 1979 auto Itr = Sections.find(SectionPtr); 1980 if (Itr != Sections.end()) { 1981 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1982 while (Range.first != Range.second) { 1983 if (Range.first->second == SectionPtr) { 1984 AddressToSection.erase(Range.first); 1985 break; 1986 } 1987 ++Range.first; 1988 } 1989 1990 auto NameRange = 1991 NameToSection.equal_range(std::string(SectionPtr->getName())); 1992 while (NameRange.first != NameRange.second) { 1993 if (NameRange.first->second == SectionPtr) { 1994 NameToSection.erase(NameRange.first); 1995 break; 1996 } 1997 ++NameRange.first; 1998 } 1999 2000 Sections.erase(Itr); 2001 delete SectionPtr; 2002 return true; 2003 } 2004 return false; 2005 } 2006 2007 void BinaryContext::printSections(raw_ostream &OS) const { 2008 for (BinarySection *const &Section : Sections) 2009 OS << "BOLT-INFO: " << *Section << "\n"; 2010 } 2011 2012 BinarySection &BinaryContext::absoluteSection() { 2013 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2014 return *Section; 2015 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2016 } 2017 2018 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2019 size_t Size) const { 2020 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2021 if (!Section) 2022 return std::make_error_code(std::errc::bad_address); 2023 2024 if (Section->isVirtual()) 2025 return 0; 2026 2027 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2028 AsmInfo->getCodePointerSize()); 2029 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2030 return DE.getUnsigned(&ValueOffset, Size); 2031 } 2032 2033 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2034 size_t Size) const { 2035 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2036 if (!Section) 2037 return std::make_error_code(std::errc::bad_address); 2038 2039 if (Section->isVirtual()) 2040 return 0; 2041 2042 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2043 AsmInfo->getCodePointerSize()); 2044 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2045 return DE.getSigned(&ValueOffset, Size); 2046 } 2047 2048 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2049 uint64_t Type, uint64_t Addend, 2050 uint64_t Value) { 2051 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2052 assert(Section && "cannot find section for address"); 2053 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2054 Value); 2055 } 2056 2057 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2058 uint64_t Type, uint64_t Addend, 2059 uint64_t Value) { 2060 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2061 assert(Section && "cannot find section for address"); 2062 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2063 Addend, Value); 2064 } 2065 2066 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2067 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2068 assert(Section && "cannot find section for address"); 2069 return Section->removeRelocationAt(Address - Section->getAddress()); 2070 } 2071 2072 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 2073 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2074 if (!Section) 2075 return nullptr; 2076 2077 return Section->getRelocationAt(Address - Section->getAddress()); 2078 } 2079 2080 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 2081 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2082 if (!Section) 2083 return nullptr; 2084 2085 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2086 } 2087 2088 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2089 const uint64_t Address) { 2090 auto setImmovable = [&](BinaryData &BD) { 2091 BinaryData *Root = BD.getAtomicRoot(); 2092 LLVM_DEBUG(if (Root->isMoveable()) { 2093 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2094 << "due to ambiguous relocation referencing 0x" 2095 << Twine::utohexstr(Address) << '\n'; 2096 }); 2097 Root->setIsMoveable(false); 2098 }; 2099 2100 if (Address == BD.getAddress()) { 2101 setImmovable(BD); 2102 2103 // Set previous symbol as immovable 2104 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2105 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2106 setImmovable(*Prev); 2107 } 2108 2109 if (Address == BD.getEndAddress()) { 2110 setImmovable(BD); 2111 2112 // Set next symbol as immovable 2113 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2114 if (Next && Next->getAddress() == BD.getEndAddress()) 2115 setImmovable(*Next); 2116 } 2117 } 2118 2119 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2120 uint64_t *EntryDesc) { 2121 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 2122 auto BFI = SymbolToFunctionMap.find(Symbol); 2123 if (BFI == SymbolToFunctionMap.end()) 2124 return nullptr; 2125 2126 BinaryFunction *BF = BFI->second; 2127 if (EntryDesc) 2128 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2129 2130 return BF; 2131 } 2132 2133 void BinaryContext::exitWithBugReport(StringRef Message, 2134 const BinaryFunction &Function) const { 2135 errs() << "=======================================\n"; 2136 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2137 "this function.\n"; 2138 errs() << "If you are running the most recent version of BOLT, you may " 2139 "want to " 2140 "report this and paste this dump.\nPlease check that there is no " 2141 "sensitive contents being shared in this dump.\n"; 2142 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2143 ScopedPrinter SP(errs()); 2144 SP.printBinaryBlock("Function contents", *Function.getData()); 2145 errs() << "\n"; 2146 Function.dump(); 2147 errs() << "ERROR: " << Message; 2148 errs() << "\n=======================================\n"; 2149 exit(1); 2150 } 2151 2152 BinaryFunction * 2153 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2154 bool IsSimple) { 2155 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2156 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2157 setSymbolToFunctionMap(BF->getSymbol(), BF); 2158 BF->CurrentState = BinaryFunction::State::CFG; 2159 return BF; 2160 } 2161 2162 std::pair<size_t, size_t> 2163 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2164 // Adjust branch instruction to match the current layout. 2165 if (FixBranches) 2166 BF.fixBranches(); 2167 2168 // Create local MC context to isolate the effect of ephemeral code emission. 2169 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2170 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2171 MCAsmBackend *MAB = 2172 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2173 2174 SmallString<256> Code; 2175 raw_svector_ostream VecOS(Code); 2176 2177 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2178 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2179 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2180 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2181 /*RelaxAll=*/false, 2182 /*IncrementalLinkerCompatible=*/false, 2183 /*DWARFMustBeAtTheEnd=*/false)); 2184 2185 Streamer->initSections(false, *STI); 2186 2187 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2188 Section->setHasInstructions(true); 2189 2190 // Create symbols in the LocalCtx so that they get destroyed with it. 2191 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2192 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2193 2194 Streamer->switchSection(Section); 2195 Streamer->emitLabel(StartLabel); 2196 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2197 /*EmitCodeOnly=*/true); 2198 Streamer->emitLabel(EndLabel); 2199 2200 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2201 SmallVector<LabelRange> SplitLabels; 2202 for (const FunctionFragment FF : BF.getLayout().getSplitFragments()) { 2203 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2204 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2205 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2206 2207 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2208 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2209 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2210 SplitSection->setHasInstructions(true); 2211 Streamer->switchSection(SplitSection); 2212 2213 Streamer->emitLabel(SplitStartLabel); 2214 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2215 Streamer->emitLabel(SplitEndLabel); 2216 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2217 // private 2218 Streamer->emitBytes(StringRef("")); 2219 Streamer->switchSection(Section); 2220 } 2221 2222 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2223 // MCStreamer::Finish(), which does more than we want 2224 Streamer->emitBytes(StringRef("")); 2225 2226 MCAssembler &Assembler = 2227 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2228 MCAsmLayout Layout(Assembler); 2229 Assembler.layout(Layout); 2230 2231 const uint64_t HotSize = 2232 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2233 const uint64_t ColdSize = 2234 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2235 [&](const uint64_t Accu, const LabelRange &Labels) { 2236 return Accu + Layout.getSymbolOffset(*Labels.second) - 2237 Layout.getSymbolOffset(*Labels.first); 2238 }); 2239 2240 // Clean-up the effect of the code emission. 2241 for (const MCSymbol &Symbol : Assembler.symbols()) { 2242 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2243 MutableSymbol->setUndefined(); 2244 MutableSymbol->setIsRegistered(false); 2245 } 2246 2247 return std::make_pair(HotSize, ColdSize); 2248 } 2249 2250 bool BinaryContext::validateEncoding(const MCInst &Inst, 2251 ArrayRef<uint8_t> InputEncoding) const { 2252 SmallString<256> Code; 2253 SmallVector<MCFixup, 4> Fixups; 2254 raw_svector_ostream VecOS(Code); 2255 2256 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2257 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2258 if (InputEncoding != EncodedData) { 2259 if (opts::Verbosity > 1) { 2260 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2261 << " input: " << InputEncoding << '\n' 2262 << " output: " << EncodedData << '\n'; 2263 } 2264 return false; 2265 } 2266 2267 return true; 2268 } 2269 2270 uint64_t BinaryContext::getHotThreshold() const { 2271 static uint64_t Threshold = 0; 2272 if (Threshold == 0) { 2273 Threshold = std::max( 2274 (uint64_t)opts::ExecutionCountThreshold, 2275 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2276 } 2277 return Threshold; 2278 } 2279 2280 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2281 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2282 auto FI = BinaryFunctions.upper_bound(Address); 2283 if (FI == BinaryFunctions.begin()) 2284 return nullptr; 2285 --FI; 2286 2287 const uint64_t UsedSize = 2288 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2289 2290 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2291 return nullptr; 2292 2293 return &FI->second; 2294 } 2295 2296 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2297 // First, try to find a function starting at the given address. If the 2298 // function was folded, this will get us the original folded function if it 2299 // wasn't removed from the list, e.g. in non-relocation mode. 2300 auto BFI = BinaryFunctions.find(Address); 2301 if (BFI != BinaryFunctions.end()) 2302 return &BFI->second; 2303 2304 // We might have folded the function matching the object at the given 2305 // address. In such case, we look for a function matching the symbol 2306 // registered at the original address. The new function (the one that the 2307 // original was folded into) will hold the symbol. 2308 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2309 uint64_t EntryID = 0; 2310 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2311 if (BF && EntryID == 0) 2312 return BF; 2313 } 2314 return nullptr; 2315 } 2316 2317 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2318 const DWARFAddressRangesVector &InputRanges) const { 2319 DebugAddressRangesVector OutputRanges; 2320 2321 for (const DWARFAddressRange Range : InputRanges) { 2322 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2323 while (BFI != BinaryFunctions.end()) { 2324 const BinaryFunction &Function = BFI->second; 2325 if (Function.getAddress() >= Range.HighPC) 2326 break; 2327 const DebugAddressRangesVector FunctionRanges = 2328 Function.getOutputAddressRanges(); 2329 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2330 std::advance(BFI, 1); 2331 } 2332 } 2333 2334 return OutputRanges; 2335 } 2336 2337 } // namespace bolt 2338 } // namespace llvm 2339