1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <numeric> 42 #include <unordered_set> 43 44 using namespace llvm; 45 46 #undef DEBUG_TYPE 47 #define DEBUG_TYPE "bolt" 48 49 namespace opts { 50 51 cl::opt<bool> NoHugePages("no-huge-pages", 52 cl::desc("use regular size pages for code alignment"), 53 cl::Hidden, cl::cat(BoltCategory)); 54 55 static cl::opt<bool> 56 PrintDebugInfo("print-debug-info", 57 cl::desc("print debug info when printing functions"), 58 cl::Hidden, 59 cl::ZeroOrMore, 60 cl::cat(BoltCategory)); 61 62 cl::opt<bool> PrintRelocations( 63 "print-relocations", 64 cl::desc("print relocations when printing functions/objects"), cl::Hidden, 65 cl::cat(BoltCategory)); 66 67 static cl::opt<bool> 68 PrintMemData("print-mem-data", 69 cl::desc("print memory data annotations when printing functions"), 70 cl::Hidden, 71 cl::ZeroOrMore, 72 cl::cat(BoltCategory)); 73 74 } // namespace opts 75 76 namespace llvm { 77 namespace bolt { 78 79 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 80 std::unique_ptr<DWARFContext> DwCtx, 81 std::unique_ptr<Triple> TheTriple, 82 const Target *TheTarget, std::string TripleName, 83 std::unique_ptr<MCCodeEmitter> MCE, 84 std::unique_ptr<MCObjectFileInfo> MOFI, 85 std::unique_ptr<const MCAsmInfo> AsmInfo, 86 std::unique_ptr<const MCInstrInfo> MII, 87 std::unique_ptr<const MCSubtargetInfo> STI, 88 std::unique_ptr<MCInstPrinter> InstPrinter, 89 std::unique_ptr<const MCInstrAnalysis> MIA, 90 std::unique_ptr<MCPlusBuilder> MIB, 91 std::unique_ptr<const MCRegisterInfo> MRI, 92 std::unique_ptr<MCDisassembler> DisAsm) 93 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 94 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 95 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 96 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 97 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 98 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 99 Relocation::Arch = this->TheTriple->getArch(); 100 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 101 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 102 } 103 104 BinaryContext::~BinaryContext() { 105 for (BinarySection *Section : Sections) 106 delete Section; 107 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 108 delete InjectedFunction; 109 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 110 delete JTI.second; 111 clearBinaryData(); 112 } 113 114 /// Create BinaryContext for a given architecture \p ArchName and 115 /// triple \p TripleName. 116 Expected<std::unique_ptr<BinaryContext>> 117 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 118 std::unique_ptr<DWARFContext> DwCtx) { 119 StringRef ArchName = ""; 120 StringRef FeaturesStr = ""; 121 switch (File->getArch()) { 122 case llvm::Triple::x86_64: 123 ArchName = "x86-64"; 124 FeaturesStr = "+nopl"; 125 break; 126 case llvm::Triple::aarch64: 127 ArchName = "aarch64"; 128 FeaturesStr = "+all"; 129 break; 130 default: 131 return createStringError(std::errc::not_supported, 132 "BOLT-ERROR: Unrecognized machine in ELF file"); 133 } 134 135 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 136 const std::string TripleName = TheTriple->str(); 137 138 std::string Error; 139 const Target *TheTarget = 140 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 141 if (!TheTarget) 142 return createStringError(make_error_code(std::errc::not_supported), 143 Twine("BOLT-ERROR: ", Error)); 144 145 std::unique_ptr<const MCRegisterInfo> MRI( 146 TheTarget->createMCRegInfo(TripleName)); 147 if (!MRI) 148 return createStringError( 149 make_error_code(std::errc::not_supported), 150 Twine("BOLT-ERROR: no register info for target ", TripleName)); 151 152 // Set up disassembler. 153 std::unique_ptr<MCAsmInfo> AsmInfo( 154 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 155 if (!AsmInfo) 156 return createStringError( 157 make_error_code(std::errc::not_supported), 158 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 159 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 160 // we want to emit such names as using @PLT without double quotes to convey 161 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 162 // override the default AsmInfo behavior to emit names the way we want. 163 AsmInfo->setAllowAtInName(true); 164 165 std::unique_ptr<const MCSubtargetInfo> STI( 166 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 167 if (!STI) 168 return createStringError( 169 make_error_code(std::errc::not_supported), 170 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 171 172 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 173 if (!MII) 174 return createStringError( 175 make_error_code(std::errc::not_supported), 176 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 177 178 std::unique_ptr<MCContext> Ctx( 179 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 180 std::unique_ptr<MCObjectFileInfo> MOFI( 181 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 182 Ctx->setObjectFileInfo(MOFI.get()); 183 // We do not support X86 Large code model. Change this in the future. 184 bool Large = false; 185 if (TheTriple->getArch() == llvm::Triple::aarch64) 186 Large = true; 187 unsigned LSDAEncoding = 188 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 189 if (IsPIC) { 190 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 191 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 192 } 193 194 std::unique_ptr<MCDisassembler> DisAsm( 195 TheTarget->createMCDisassembler(*STI, *Ctx)); 196 197 if (!DisAsm) 198 return createStringError( 199 make_error_code(std::errc::not_supported), 200 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 201 202 std::unique_ptr<const MCInstrAnalysis> MIA( 203 TheTarget->createMCInstrAnalysis(MII.get())); 204 if (!MIA) 205 return createStringError( 206 make_error_code(std::errc::not_supported), 207 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 208 TripleName)); 209 210 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 211 std::unique_ptr<MCInstPrinter> InstructionPrinter( 212 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 213 *MII, *MRI)); 214 if (!InstructionPrinter) 215 return createStringError( 216 make_error_code(std::errc::not_supported), 217 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 218 InstructionPrinter->setPrintImmHex(true); 219 220 std::unique_ptr<MCCodeEmitter> MCE( 221 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 222 223 // Make sure we don't miss any output on core dumps. 224 outs().SetUnbuffered(); 225 errs().SetUnbuffered(); 226 dbgs().SetUnbuffered(); 227 228 auto BC = std::make_unique<BinaryContext>( 229 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 230 std::string(TripleName), std::move(MCE), std::move(MOFI), 231 std::move(AsmInfo), std::move(MII), std::move(STI), 232 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 233 std::move(DisAsm)); 234 235 BC->LSDAEncoding = LSDAEncoding; 236 237 BC->MAB = std::unique_ptr<MCAsmBackend>( 238 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 239 240 BC->setFilename(File->getFileName()); 241 242 BC->HasFixedLoadAddress = !IsPIC; 243 244 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 245 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 246 247 if (!BC->SymbolicDisAsm) 248 return createStringError( 249 make_error_code(std::errc::not_supported), 250 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 251 252 return std::move(BC); 253 } 254 255 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 256 if (opts::HotText && 257 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 258 return true; 259 260 if (opts::HotData && 261 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 262 return true; 263 264 if (SymbolName == "_end") 265 return true; 266 267 return false; 268 } 269 270 std::unique_ptr<MCObjectWriter> 271 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 272 return MAB->createObjectWriter(OS); 273 } 274 275 bool BinaryContext::validateObjectNesting() const { 276 auto Itr = BinaryDataMap.begin(); 277 auto End = BinaryDataMap.end(); 278 bool Valid = true; 279 while (Itr != End) { 280 auto Next = std::next(Itr); 281 while (Next != End && 282 Itr->second->getSection() == Next->second->getSection() && 283 Itr->second->containsRange(Next->second->getAddress(), 284 Next->second->getSize())) { 285 if (Next->second->Parent != Itr->second) { 286 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 287 << "BOLT-WARNING: " << *Itr->second << "\n" 288 << "BOLT-WARNING: " << *Next->second << "\n"; 289 Valid = false; 290 } 291 ++Next; 292 } 293 Itr = Next; 294 } 295 return Valid; 296 } 297 298 bool BinaryContext::validateHoles() const { 299 bool Valid = true; 300 for (BinarySection &Section : sections()) { 301 for (const Relocation &Rel : Section.relocations()) { 302 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 303 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 304 if (!BD) { 305 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 306 << " 0x" << Twine::utohexstr(RelAddr) << " in " 307 << Section.getName() << "\n"; 308 Valid = false; 309 } else if (!BD->getAtomicRoot()) { 310 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 311 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 312 << Section.getName() << "\n"; 313 Valid = false; 314 } 315 } 316 } 317 return Valid; 318 } 319 320 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 321 const uint64_t Address = GAI->second->getAddress(); 322 const uint64_t Size = GAI->second->getSize(); 323 324 auto fixParents = [&](BinaryDataMapType::iterator Itr, 325 BinaryData *NewParent) { 326 BinaryData *OldParent = Itr->second->Parent; 327 Itr->second->Parent = NewParent; 328 ++Itr; 329 while (Itr != BinaryDataMap.end() && OldParent && 330 Itr->second->Parent == OldParent) { 331 Itr->second->Parent = NewParent; 332 ++Itr; 333 } 334 }; 335 336 // Check if the previous symbol contains the newly added symbol. 337 if (GAI != BinaryDataMap.begin()) { 338 BinaryData *Prev = std::prev(GAI)->second; 339 while (Prev) { 340 if (Prev->getSection() == GAI->second->getSection() && 341 Prev->containsRange(Address, Size)) { 342 fixParents(GAI, Prev); 343 } else { 344 fixParents(GAI, nullptr); 345 } 346 Prev = Prev->Parent; 347 } 348 } 349 350 // Check if the newly added symbol contains any subsequent symbols. 351 if (Size != 0) { 352 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 353 auto Itr = std::next(GAI); 354 while ( 355 Itr != BinaryDataMap.end() && 356 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 357 Itr->second->Parent = BD; 358 ++Itr; 359 } 360 } 361 } 362 363 iterator_range<BinaryContext::binary_data_iterator> 364 BinaryContext::getSubBinaryData(BinaryData *BD) { 365 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 366 auto End = Start; 367 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 368 ++End; 369 return make_range(Start, End); 370 } 371 372 std::pair<const MCSymbol *, uint64_t> 373 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 374 bool IsPCRel) { 375 if (isAArch64()) { 376 // Check if this is an access to a constant island and create bookkeeping 377 // to keep track of it and emit it later as part of this function. 378 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 379 return std::make_pair(IslandSym, 0); 380 381 // Detect custom code written in assembly that refers to arbitrary 382 // constant islands from other functions. Write this reference so we 383 // can pull this constant island and emit it as part of this function 384 // too. 385 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 386 387 if (IslandIter != AddressToConstantIslandMap.begin() && 388 (IslandIter == AddressToConstantIslandMap.end() || 389 IslandIter->first > Address)) 390 --IslandIter; 391 392 if (IslandIter != AddressToConstantIslandMap.end()) { 393 if (MCSymbol *IslandSym = 394 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 395 BF.createIslandDependency(IslandSym, IslandIter->second); 396 return std::make_pair(IslandSym, 0); 397 } 398 } 399 } 400 401 // Note that the address does not necessarily have to reside inside 402 // a section, it could be an absolute address too. 403 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 404 if (Section && Section->isText()) { 405 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 406 if (Address != BF.getAddress()) { 407 // The address could potentially escape. Mark it as another entry 408 // point into the function. 409 if (opts::Verbosity >= 1) { 410 outs() << "BOLT-INFO: potentially escaped address 0x" 411 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 412 } 413 BF.HasInternalLabelReference = true; 414 return std::make_pair( 415 BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); 416 } 417 } else { 418 addInterproceduralReference(&BF, Address); 419 } 420 } 421 422 // With relocations, catch jump table references outside of the basic block 423 // containing the indirect jump. 424 if (HasRelocations) { 425 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 426 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 427 const MCSymbol *Symbol = 428 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 429 430 return std::make_pair(Symbol, 0); 431 } 432 } 433 434 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 435 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 436 437 // TODO: use DWARF info to get size/alignment here? 438 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 439 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 440 return std::make_pair(TargetSymbol, 0); 441 } 442 443 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 444 BinaryFunction &BF) { 445 if (!isX86()) 446 return MemoryContentsType::UNKNOWN; 447 448 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 449 if (!Section) { 450 // No section - possibly an absolute address. Since we don't allow 451 // internal function addresses to escape the function scope - we 452 // consider it a tail call. 453 if (opts::Verbosity > 1) { 454 errs() << "BOLT-WARNING: no section for address 0x" 455 << Twine::utohexstr(Address) << " referenced from function " << BF 456 << '\n'; 457 } 458 return MemoryContentsType::UNKNOWN; 459 } 460 461 if (Section->isVirtual()) { 462 // The contents are filled at runtime. 463 return MemoryContentsType::UNKNOWN; 464 } 465 466 // No support for jump tables in code yet. 467 if (Section->isText()) 468 return MemoryContentsType::UNKNOWN; 469 470 // Start with checking for PIC jump table. We expect non-PIC jump tables 471 // to have high 32 bits set to 0. 472 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 473 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 474 475 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 476 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 477 478 return MemoryContentsType::UNKNOWN; 479 } 480 481 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 482 bool isPotentialFragmentByName(BinaryFunction &Fragment, 483 BinaryFunction &Parent) { 484 for (StringRef Name : Parent.getNames()) { 485 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 486 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 487 if (Fragment.hasRestoredNameRegex(NameRegex)) 488 return true; 489 } 490 return false; 491 } 492 493 bool BinaryContext::analyzeJumpTable( 494 const uint64_t Address, const JumpTable::JumpTableType Type, 495 BinaryFunction &BF, const uint64_t NextJTAddress, 496 JumpTable::AddressesType *EntriesAsAddress) { 497 // Is one of the targets __builtin_unreachable? 498 bool HasUnreachable = false; 499 500 // Number of targets other than __builtin_unreachable. 501 uint64_t NumRealEntries = 0; 502 503 auto addEntryAddress = [&](uint64_t EntryAddress) { 504 if (EntriesAsAddress) 505 EntriesAsAddress->emplace_back(EntryAddress); 506 }; 507 508 auto doesBelongToFunction = [&](const uint64_t Addr, 509 BinaryFunction *TargetBF) -> bool { 510 if (BF.containsAddress(Addr)) 511 return true; 512 // Nothing to do if we failed to identify the containing function. 513 if (!TargetBF) 514 return false; 515 // Case 1: check if BF is a fragment and TargetBF is its parent. 516 if (BF.isFragment()) { 517 // Parent function may or may not be already registered. 518 // Set parent link based on function name matching heuristic. 519 return registerFragment(BF, *TargetBF); 520 } 521 // Case 2: check if TargetBF is a fragment and BF is its parent. 522 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 523 }; 524 525 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 526 if (!Section) 527 return false; 528 529 // The upper bound is defined by containing object, section limits, and 530 // the next jump table in memory. 531 uint64_t UpperBound = Section->getEndAddress(); 532 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 533 if (JumpTableBD && JumpTableBD->getSize()) { 534 assert(JumpTableBD->getEndAddress() <= UpperBound && 535 "data object cannot cross a section boundary"); 536 UpperBound = JumpTableBD->getEndAddress(); 537 } 538 if (NextJTAddress) 539 UpperBound = std::min(NextJTAddress, UpperBound); 540 541 LLVM_DEBUG({ 542 using JTT = JumpTable::JumpTableType; 543 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", 544 Address, BF.getPrintName(), 545 Type == JTT::JTT_PIC ? "PIC" : "Normal"); 546 }); 547 const uint64_t EntrySize = getJumpTableEntrySize(Type); 548 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 549 EntryAddress += EntrySize) { 550 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 551 << " -> "); 552 // Check if there's a proper relocation against the jump table entry. 553 if (HasRelocations) { 554 if (Type == JumpTable::JTT_PIC && 555 !DataPCRelocations.count(EntryAddress)) { 556 LLVM_DEBUG( 557 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 558 break; 559 } 560 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 561 LLVM_DEBUG( 562 dbgs() 563 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 564 break; 565 } 566 } 567 568 const uint64_t Value = 569 (Type == JumpTable::JTT_PIC) 570 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 571 : *getPointerAtAddress(EntryAddress); 572 573 // __builtin_unreachable() case. 574 if (Value == BF.getAddress() + BF.getSize()) { 575 addEntryAddress(Value); 576 HasUnreachable = true; 577 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); 578 continue; 579 } 580 581 // Function or one of its fragments. 582 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 583 584 // We assume that a jump table cannot have function start as an entry. 585 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 586 LLVM_DEBUG({ 587 if (!BF.containsAddress(Value)) { 588 dbgs() << "FAIL: function doesn't contain this address\n"; 589 if (TargetBF) { 590 dbgs() << " ! function containing this address: " 591 << TargetBF->getPrintName() << '\n'; 592 if (TargetBF->isFragment()) { 593 dbgs() << " ! is a fragment"; 594 for (BinaryFunction *Parent : TargetBF->ParentFragments) 595 dbgs() << ", parent: " << Parent->getPrintName(); 596 dbgs() << '\n'; 597 } 598 } 599 } 600 if (Value == BF.getAddress()) 601 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 602 }); 603 break; 604 } 605 606 // Check there's an instruction at this offset. 607 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 608 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 609 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); 610 break; 611 } 612 613 ++NumRealEntries; 614 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); 615 616 if (TargetBF != &BF) 617 BF.setHasIndirectTargetToSplitFragment(true); 618 addEntryAddress(Value); 619 } 620 621 // It's a jump table if the number of real entries is more than 1, or there's 622 // one real entry and "unreachable" targets. If there are only multiple 623 // "unreachable" targets, then it's not a jump table. 624 return NumRealEntries + HasUnreachable >= 2; 625 } 626 627 void BinaryContext::populateJumpTables() { 628 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 629 << '\n'); 630 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 631 ++JTI) { 632 JumpTable *JT = JTI->second; 633 634 bool NonSimpleParent = false; 635 for (BinaryFunction *BF : JT->Parents) 636 NonSimpleParent |= !BF->isSimple(); 637 if (NonSimpleParent) 638 continue; 639 640 uint64_t NextJTAddress = 0; 641 auto NextJTI = std::next(JTI); 642 if (NextJTI != JTE) 643 NextJTAddress = NextJTI->second->getAddress(); 644 645 const bool Success = 646 analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), 647 NextJTAddress, &JT->EntriesAsAddress); 648 if (!Success) { 649 LLVM_DEBUG({ 650 dbgs() << "failed to analyze "; 651 JT->print(dbgs()); 652 if (NextJTI != JTE) { 653 dbgs() << "next "; 654 NextJTI->second->print(dbgs()); 655 } 656 }); 657 llvm_unreachable("jump table heuristic failure"); 658 } 659 for (BinaryFunction *Frag : JT->Parents) { 660 for (uint64_t EntryAddress : JT->EntriesAsAddress) 661 // if target is builtin_unreachable 662 if (EntryAddress == Frag->getAddress() + Frag->getSize()) { 663 Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), 664 Frag->getSize()); 665 } else if (EntryAddress >= Frag->getAddress() && 666 EntryAddress < Frag->getAddress() + Frag->getSize()) { 667 Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); 668 } 669 } 670 671 // In strict mode, erase PC-relative relocation record. Later we check that 672 // all such records are erased and thus have been accounted for. 673 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 674 for (uint64_t Address = JT->getAddress(); 675 Address < JT->getAddress() + JT->getSize(); 676 Address += JT->EntrySize) { 677 DataPCRelocations.erase(DataPCRelocations.find(Address)); 678 } 679 } 680 681 // Mark to skip the function and all its fragments. 682 for (BinaryFunction *Frag : JT->Parents) 683 if (Frag->hasIndirectTargetToSplitFragment()) 684 addFragmentsToSkip(Frag); 685 } 686 687 if (opts::StrictMode && DataPCRelocations.size()) { 688 LLVM_DEBUG({ 689 dbgs() << DataPCRelocations.size() 690 << " unclaimed PC-relative relocations left in data:\n"; 691 for (uint64_t Reloc : DataPCRelocations) 692 dbgs() << Twine::utohexstr(Reloc) << '\n'; 693 }); 694 assert(0 && "unclaimed PC-relative relocations left in data\n"); 695 } 696 clearList(DataPCRelocations); 697 } 698 699 void BinaryContext::skipMarkedFragments() { 700 std::vector<BinaryFunction *> FragmentQueue; 701 // Copy the functions to FragmentQueue. 702 FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); 703 auto addToWorklist = [&](BinaryFunction *Function) -> void { 704 if (FragmentsToSkip.count(Function)) 705 return; 706 FragmentQueue.push_back(Function); 707 addFragmentsToSkip(Function); 708 }; 709 // Functions containing split jump tables need to be skipped with all 710 // fragments (transitively). 711 for (size_t I = 0; I != FragmentQueue.size(); I++) { 712 BinaryFunction *BF = FragmentQueue[I]; 713 assert(FragmentsToSkip.count(BF) && 714 "internal error in traversing function fragments"); 715 if (opts::Verbosity >= 1) 716 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 717 BF->setSimple(false); 718 BF->setHasIndirectTargetToSplitFragment(true); 719 720 llvm::for_each(BF->Fragments, addToWorklist); 721 llvm::for_each(BF->ParentFragments, addToWorklist); 722 } 723 if (!FragmentsToSkip.empty()) 724 errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() << " function" 725 << (FragmentsToSkip.size() == 1 ? "" : "s") 726 << " due to cold fragments\n"; 727 } 728 729 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 730 uint64_t Size, 731 uint16_t Alignment, 732 unsigned Flags) { 733 auto Itr = BinaryDataMap.find(Address); 734 if (Itr != BinaryDataMap.end()) { 735 assert(Itr->second->getSize() == Size || !Size); 736 return Itr->second->getSymbol(); 737 } 738 739 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 740 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 741 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 742 } 743 744 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 745 return Ctx->getOrCreateSymbol(Name); 746 } 747 748 BinaryFunction *BinaryContext::createBinaryFunction( 749 const std::string &Name, BinarySection &Section, uint64_t Address, 750 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 751 auto Result = BinaryFunctions.emplace( 752 Address, BinaryFunction(Name, Section, Address, Size, *this)); 753 assert(Result.second == true && "unexpected duplicate function"); 754 BinaryFunction *BF = &Result.first->second; 755 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 756 Alignment); 757 setSymbolToFunctionMap(BF->getSymbol(), BF); 758 return BF; 759 } 760 761 const MCSymbol * 762 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 763 JumpTable::JumpTableType Type) { 764 auto isFragmentOf = [](BinaryFunction *Fragment, BinaryFunction *Parent) { 765 return (Fragment->isFragment() && Fragment->isParentFragment(Parent)); 766 }; 767 (void)isFragmentOf; 768 769 // Two fragments of same function access same jump table 770 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 771 assert(JT->Type == Type && "jump table types have to match"); 772 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 773 774 // Prevent associating a jump table to a specific fragment twice. 775 // This simple check arises from the assumption: no more than 2 fragments. 776 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) { 777 assert((isFragmentOf(JT->Parents[0], &Function) || 778 isFragmentOf(&Function, JT->Parents[0])) && 779 "cannot re-use jump table of a different function"); 780 // Duplicate the entry for the parent function for easy access 781 JT->Parents.push_back(&Function); 782 if (opts::Verbosity > 2) { 783 outs() << "BOLT-INFO: Multiple fragments access same jump table: " 784 << JT->Parents[0]->getPrintName() << "; " 785 << Function.getPrintName() << "\n"; 786 JT->print(outs()); 787 } 788 Function.JumpTables.emplace(Address, JT); 789 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true); 790 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true); 791 } 792 793 bool IsJumpTableParent = false; 794 (void)IsJumpTableParent; 795 for (BinaryFunction *Frag : JT->Parents) 796 if (Frag == &Function) 797 IsJumpTableParent = true; 798 assert(IsJumpTableParent && 799 "cannot re-use jump table of a different function"); 800 return JT->getFirstLabel(); 801 } 802 803 // Re-use the existing symbol if possible. 804 MCSymbol *JTLabel = nullptr; 805 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 806 if (!isInternalSymbolName(Object->getSymbol()->getName())) 807 JTLabel = Object->getSymbol(); 808 } 809 810 const uint64_t EntrySize = getJumpTableEntrySize(Type); 811 if (!JTLabel) { 812 const std::string JumpTableName = generateJumpTableName(Function, Address); 813 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 814 } 815 816 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 817 << " in function " << Function << '\n'); 818 819 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 820 JumpTable::LabelMapType{{0, JTLabel}}, 821 *getSectionForAddress(Address)); 822 JT->Parents.push_back(&Function); 823 if (opts::Verbosity > 2) 824 JT->print(outs()); 825 JumpTables.emplace(Address, JT); 826 827 // Duplicate the entry for the parent function for easy access. 828 Function.JumpTables.emplace(Address, JT); 829 return JTLabel; 830 } 831 832 std::pair<uint64_t, const MCSymbol *> 833 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 834 const MCSymbol *OldLabel) { 835 auto L = scopeLock(); 836 unsigned Offset = 0; 837 bool Found = false; 838 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 839 if (Elmt.second != OldLabel) 840 continue; 841 Offset = Elmt.first; 842 Found = true; 843 break; 844 } 845 assert(Found && "Label not found"); 846 (void)Found; 847 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 848 JumpTable *NewJT = 849 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 850 JumpTable::LabelMapType{{Offset, NewLabel}}, 851 *getSectionForAddress(JT->getAddress())); 852 NewJT->Parents = JT->Parents; 853 NewJT->Entries = JT->Entries; 854 NewJT->Counts = JT->Counts; 855 uint64_t JumpTableID = ++DuplicatedJumpTables; 856 // Invert it to differentiate from regular jump tables whose IDs are their 857 // addresses in the input binary memory space 858 JumpTableID = ~JumpTableID; 859 JumpTables.emplace(JumpTableID, NewJT); 860 Function.JumpTables.emplace(JumpTableID, NewJT); 861 return std::make_pair(JumpTableID, NewLabel); 862 } 863 864 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 865 uint64_t Address) { 866 size_t Id; 867 uint64_t Offset = 0; 868 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 869 Offset = Address - JT->getAddress(); 870 auto Itr = JT->Labels.find(Offset); 871 if (Itr != JT->Labels.end()) 872 return std::string(Itr->second->getName()); 873 Id = JumpTableIds.at(JT->getAddress()); 874 } else { 875 Id = JumpTableIds[Address] = BF.JumpTables.size(); 876 } 877 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 878 (Offset ? ("." + std::to_string(Offset)) : "")); 879 } 880 881 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 882 // FIXME: aarch64 support is missing. 883 if (!isX86()) 884 return true; 885 886 if (BF.getSize() == BF.getMaxSize()) 887 return true; 888 889 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 890 assert(FunctionData && "cannot get function as data"); 891 892 uint64_t Offset = BF.getSize(); 893 MCInst Instr; 894 uint64_t InstrSize = 0; 895 uint64_t InstrAddress = BF.getAddress() + Offset; 896 using std::placeholders::_1; 897 898 // Skip instructions that satisfy the predicate condition. 899 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 900 const uint64_t StartOffset = Offset; 901 for (; Offset < BF.getMaxSize(); 902 Offset += InstrSize, InstrAddress += InstrSize) { 903 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 904 InstrAddress, nulls())) 905 break; 906 if (!Predicate(Instr)) 907 break; 908 } 909 910 return Offset - StartOffset; 911 }; 912 913 // Skip a sequence of zero bytes. 914 auto skipZeros = [&]() { 915 const uint64_t StartOffset = Offset; 916 for (; Offset < BF.getMaxSize(); ++Offset) 917 if ((*FunctionData)[Offset] != 0) 918 break; 919 920 return Offset - StartOffset; 921 }; 922 923 // Accept the whole padding area filled with breakpoints. 924 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 925 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 926 return true; 927 928 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 929 930 // Some functions have a jump to the next function or to the padding area 931 // inserted after the body. 932 auto isSkipJump = [&](const MCInst &Instr) { 933 uint64_t TargetAddress = 0; 934 if (MIB->isUnconditionalBranch(Instr) && 935 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 936 if (TargetAddress >= InstrAddress + InstrSize && 937 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 938 return true; 939 } 940 } 941 return false; 942 }; 943 944 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 945 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 946 skipZeros()) 947 ; 948 949 if (Offset == BF.getMaxSize()) 950 return true; 951 952 if (opts::Verbosity >= 1) { 953 errs() << "BOLT-WARNING: bad padding at address 0x" 954 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 955 << " starting at offset " << (Offset - BF.getSize()) 956 << " in function " << BF << '\n' 957 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 958 << '\n'; 959 } 960 961 return false; 962 } 963 964 void BinaryContext::adjustCodePadding() { 965 for (auto &BFI : BinaryFunctions) { 966 BinaryFunction &BF = BFI.second; 967 if (!shouldEmit(BF)) 968 continue; 969 970 if (!hasValidCodePadding(BF)) { 971 if (HasRelocations) { 972 if (opts::Verbosity >= 1) { 973 outs() << "BOLT-INFO: function " << BF 974 << " has invalid padding. Ignoring the function.\n"; 975 } 976 BF.setIgnored(); 977 } else { 978 BF.setMaxSize(BF.getSize()); 979 } 980 } 981 } 982 } 983 984 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 985 uint64_t Size, 986 uint16_t Alignment, 987 unsigned Flags) { 988 // Register the name with MCContext. 989 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 990 991 auto GAI = BinaryDataMap.find(Address); 992 BinaryData *BD; 993 if (GAI == BinaryDataMap.end()) { 994 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 995 BinarySection &Section = 996 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 997 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 998 Section, Flags); 999 GAI = BinaryDataMap.emplace(Address, BD).first; 1000 GlobalSymbols[Name] = BD; 1001 updateObjectNesting(GAI); 1002 } else { 1003 BD = GAI->second; 1004 if (!BD->hasName(Name)) { 1005 GlobalSymbols[Name] = BD; 1006 BD->Symbols.push_back(Symbol); 1007 } 1008 } 1009 1010 return Symbol; 1011 } 1012 1013 const BinaryData * 1014 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 1015 auto NI = BinaryDataMap.lower_bound(Address); 1016 auto End = BinaryDataMap.end(); 1017 if ((NI != End && Address == NI->first) || 1018 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 1019 if (NI->second->containsAddress(Address)) 1020 return NI->second; 1021 1022 // If this is a sub-symbol, see if a parent data contains the address. 1023 const BinaryData *BD = NI->second->getParent(); 1024 while (BD) { 1025 if (BD->containsAddress(Address)) 1026 return BD; 1027 BD = BD->getParent(); 1028 } 1029 } 1030 return nullptr; 1031 } 1032 1033 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1034 auto NI = BinaryDataMap.find(Address); 1035 assert(NI != BinaryDataMap.end()); 1036 if (NI == BinaryDataMap.end()) 1037 return false; 1038 // TODO: it's possible that a jump table starts at the same address 1039 // as a larger blob of private data. When we set the size of the 1040 // jump table, it might be smaller than the total blob size. In this 1041 // case we just leave the original size since (currently) it won't really 1042 // affect anything. 1043 assert((!NI->second->Size || NI->second->Size == Size || 1044 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1045 "can't change the size of a symbol that has already had its " 1046 "size set"); 1047 if (!NI->second->Size) { 1048 NI->second->Size = Size; 1049 updateObjectNesting(NI); 1050 return true; 1051 } 1052 return false; 1053 } 1054 1055 void BinaryContext::generateSymbolHashes() { 1056 auto isPadding = [](const BinaryData &BD) { 1057 StringRef Contents = BD.getSection().getContents(); 1058 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1059 return (BD.getName().startswith("HOLEat") || 1060 SymData.find_first_not_of(0) == StringRef::npos); 1061 }; 1062 1063 uint64_t NumCollisions = 0; 1064 for (auto &Entry : BinaryDataMap) { 1065 BinaryData &BD = *Entry.second; 1066 StringRef Name = BD.getName(); 1067 1068 if (!isInternalSymbolName(Name)) 1069 continue; 1070 1071 // First check if a non-anonymous alias exists and move it to the front. 1072 if (BD.getSymbols().size() > 1) { 1073 auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { 1074 return !isInternalSymbolName(Symbol->getName()); 1075 }); 1076 if (Itr != BD.getSymbols().end()) { 1077 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1078 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1079 continue; 1080 } 1081 } 1082 1083 // We have to skip 0 size symbols since they will all collide. 1084 if (BD.getSize() == 0) { 1085 continue; 1086 } 1087 1088 const uint64_t Hash = BD.getSection().hash(BD); 1089 const size_t Idx = Name.find("0x"); 1090 std::string NewName = 1091 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1092 if (getBinaryDataByName(NewName)) { 1093 // Ignore collisions for symbols that appear to be padding 1094 // (i.e. all zeros or a "hole") 1095 if (!isPadding(BD)) { 1096 if (opts::Verbosity) { 1097 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1098 << " with new name (" << NewName << "), skipping.\n"; 1099 } 1100 ++NumCollisions; 1101 } 1102 continue; 1103 } 1104 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1105 GlobalSymbols[NewName] = &BD; 1106 } 1107 if (NumCollisions) { 1108 errs() << "BOLT-WARNING: " << NumCollisions 1109 << " collisions detected while hashing binary objects"; 1110 if (!opts::Verbosity) 1111 errs() << ". Use -v=1 to see the list."; 1112 errs() << '\n'; 1113 } 1114 } 1115 1116 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1117 BinaryFunction &Function) const { 1118 if (!isPotentialFragmentByName(TargetFunction, Function)) 1119 return false; 1120 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1121 if (TargetFunction.isParentFragment(&Function)) 1122 return true; 1123 TargetFunction.addParentFragment(Function); 1124 Function.addFragment(TargetFunction); 1125 if (!HasRelocations) { 1126 TargetFunction.setSimple(false); 1127 Function.setSimple(false); 1128 } 1129 if (opts::Verbosity >= 1) { 1130 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1131 << Function << '\n'; 1132 } 1133 return true; 1134 } 1135 1136 void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, 1137 MCInst &LoadLowBits, 1138 MCInst &LoadHiBits, 1139 uint64_t Target) { 1140 const MCSymbol *TargetSymbol; 1141 uint64_t Addend = 0; 1142 std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, 1143 /*IsPCRel*/ true); 1144 int64_t Val; 1145 MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, 1146 ELF::R_AARCH64_ADR_PREL_PG_HI21); 1147 MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), 1148 Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); 1149 } 1150 1151 bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { 1152 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); 1153 if (TargetFunction) 1154 return false; 1155 1156 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1157 assert(Section && "cannot get section for referenced address"); 1158 if (!Section->isText()) 1159 return false; 1160 1161 bool Ret = false; 1162 StringRef SectionContents = Section->getContents(); 1163 uint64_t Offset = Address - Section->getAddress(); 1164 const uint64_t MaxSize = SectionContents.size() - Offset; 1165 const uint8_t *Bytes = 1166 reinterpret_cast<const uint8_t *>(SectionContents.data()); 1167 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); 1168 1169 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, 1170 MCInst &Instruction, uint64_t Offset, 1171 uint64_t AbsoluteInstrAddr, 1172 uint64_t TotalSize) -> bool { 1173 MCInst *TargetHiBits, *TargetLowBits; 1174 uint64_t TargetAddress, Count; 1175 Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), 1176 AbsoluteInstrAddr, Instruction, TargetHiBits, 1177 TargetLowBits, TargetAddress); 1178 if (!Count) 1179 return false; 1180 1181 if (MatchOnly) 1182 return true; 1183 1184 // NOTE The target symbol was created during disassemble's 1185 // handleExternalReference 1186 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); 1187 BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), 1188 *Section, Address, TotalSize); 1189 addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, 1190 TargetAddress); 1191 MIB->addAnnotation(Instruction, "AArch64Veneer", true); 1192 Veneer->addInstruction(Offset, std::move(Instruction)); 1193 --Count; 1194 for (auto It = std::prev(Instructions.end()); Count != 0; 1195 It = std::prev(It), --Count) { 1196 MIB->addAnnotation(It->second, "AArch64Veneer", true); 1197 Veneer->addInstruction(It->first, std::move(It->second)); 1198 } 1199 1200 Veneer->getOrCreateLocalLabel(Address); 1201 Veneer->setMaxSize(TotalSize); 1202 Veneer->updateState(BinaryFunction::State::Disassembled); 1203 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address 1204 << "\n"); 1205 return true; 1206 }; 1207 1208 uint64_t Size = 0, TotalSize = 0; 1209 BinaryFunction::InstrMapType VeneerInstructions; 1210 for (Offset = 0; Offset < MaxSize; Offset += Size) { 1211 MCInst Instruction; 1212 const uint64_t AbsoluteInstrAddr = Address + Offset; 1213 if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), 1214 AbsoluteInstrAddr, nulls())) 1215 break; 1216 1217 TotalSize += Size; 1218 if (MIB->isBranch(Instruction)) { 1219 Ret = matchVeneer(VeneerInstructions, Instruction, Offset, 1220 AbsoluteInstrAddr, TotalSize); 1221 break; 1222 } 1223 1224 VeneerInstructions.emplace(Offset, std::move(Instruction)); 1225 } 1226 1227 return Ret; 1228 } 1229 1230 void BinaryContext::processInterproceduralReferences() { 1231 for (const std::pair<BinaryFunction *, uint64_t> &It : 1232 InterproceduralReferences) { 1233 BinaryFunction &Function = *It.first; 1234 uint64_t Address = It.second; 1235 if (!Address || Function.isIgnored()) 1236 continue; 1237 1238 BinaryFunction *TargetFunction = 1239 getBinaryFunctionContainingAddress(Address); 1240 if (&Function == TargetFunction) 1241 continue; 1242 1243 if (TargetFunction) { 1244 if (TargetFunction->isFragment() && 1245 !registerFragment(*TargetFunction, Function)) { 1246 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1247 "fragments: " 1248 << Function.getPrintName() << " and " 1249 << TargetFunction->getPrintName() << '\n'; 1250 } 1251 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1252 TargetFunction->addEntryPointAtOffset(Offset); 1253 1254 continue; 1255 } 1256 1257 // Check if address falls in function padding space - this could be 1258 // unmarked data in code. In this case adjust the padding space size. 1259 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1260 assert(Section && "cannot get section for referenced address"); 1261 1262 if (!Section->isText()) 1263 continue; 1264 1265 // PLT requires special handling and could be ignored in this context. 1266 StringRef SectionName = Section->getName(); 1267 if (SectionName == ".plt" || SectionName == ".plt.got") 1268 continue; 1269 1270 // Check if it is aarch64 veneer written at Address 1271 if (isAArch64() && handleAArch64Veneer(Address)) 1272 continue; 1273 1274 if (opts::processAllFunctions()) { 1275 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1276 << "object in code at address 0x" << Twine::utohexstr(Address) 1277 << " belonging to section " << SectionName << " in current mode\n"; 1278 exit(1); 1279 } 1280 1281 TargetFunction = getBinaryFunctionContainingAddress(Address, 1282 /*CheckPastEnd=*/false, 1283 /*UseMaxSize=*/true); 1284 // We are not going to overwrite non-simple functions, but for simple 1285 // ones - adjust the padding size. 1286 if (TargetFunction && TargetFunction->isSimple()) { 1287 errs() << "BOLT-WARNING: function " << *TargetFunction 1288 << " has an object detected in a padding region at address 0x" 1289 << Twine::utohexstr(Address) << '\n'; 1290 TargetFunction->setMaxSize(TargetFunction->getSize()); 1291 } 1292 } 1293 1294 InterproceduralReferences.clear(); 1295 } 1296 1297 void BinaryContext::postProcessSymbolTable() { 1298 fixBinaryDataHoles(); 1299 bool Valid = true; 1300 for (auto &Entry : BinaryDataMap) { 1301 BinaryData *BD = Entry.second; 1302 if ((BD->getName().startswith("SYMBOLat") || 1303 BD->getName().startswith("DATAat")) && 1304 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1305 BD->getSection()) { 1306 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1307 Valid = false; 1308 } 1309 } 1310 assert(Valid); 1311 (void)Valid; 1312 generateSymbolHashes(); 1313 } 1314 1315 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1316 BinaryFunction &ParentBF) { 1317 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1318 "cannot merge functions with multiple entry points"); 1319 1320 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1321 std::defer_lock); 1322 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1323 SymbolToFunctionMapMutex, std::defer_lock); 1324 1325 const StringRef ChildName = ChildBF.getOneName(); 1326 1327 // Move symbols over and update bookkeeping info. 1328 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1329 ParentBF.getSymbols().push_back(Symbol); 1330 WriteSymbolMapLock.lock(); 1331 SymbolToFunctionMap[Symbol] = &ParentBF; 1332 WriteSymbolMapLock.unlock(); 1333 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1334 } 1335 ChildBF.getSymbols().clear(); 1336 1337 // Move other names the child function is known under. 1338 llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); 1339 ChildBF.Aliases.clear(); 1340 1341 if (HasRelocations) { 1342 // Merge execution counts of ChildBF into those of ParentBF. 1343 // Without relocations, we cannot reliably merge profiles as both functions 1344 // continue to exist and either one can be executed. 1345 ChildBF.mergeProfileDataInto(ParentBF); 1346 1347 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1348 std::defer_lock); 1349 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1350 std::defer_lock); 1351 // Remove ChildBF from the global set of functions in relocs mode. 1352 ReadBfsLock.lock(); 1353 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1354 ReadBfsLock.unlock(); 1355 1356 assert(FI != BinaryFunctions.end() && "function not found"); 1357 assert(&ChildBF == &FI->second && "function mismatch"); 1358 1359 WriteBfsLock.lock(); 1360 ChildBF.clearDisasmState(); 1361 FI = BinaryFunctions.erase(FI); 1362 WriteBfsLock.unlock(); 1363 1364 } else { 1365 // In non-relocation mode we keep the function, but rename it. 1366 std::string NewName = "__ICF_" + ChildName.str(); 1367 1368 WriteCtxLock.lock(); 1369 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1370 WriteCtxLock.unlock(); 1371 1372 ChildBF.setFolded(&ParentBF); 1373 } 1374 } 1375 1376 void BinaryContext::fixBinaryDataHoles() { 1377 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1378 1379 for (BinarySection &Section : allocatableSections()) { 1380 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1381 1382 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1383 BinaryData *BD = Itr->second; 1384 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1385 (BD->getName().startswith("SYMBOLat0x") || 1386 BD->getName().startswith("DATAat0x") || 1387 BD->getName().startswith("ANONYMOUS"))); 1388 return !isHole && BD->getSection() == Section && !BD->getParent(); 1389 }; 1390 1391 auto BDStart = BinaryDataMap.begin(); 1392 auto BDEnd = BinaryDataMap.end(); 1393 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1394 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1395 1396 uint64_t EndAddress = Section.getAddress(); 1397 1398 while (Itr != End) { 1399 if (Itr->second->getAddress() > EndAddress) { 1400 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1401 Holes.emplace_back(EndAddress, Gap); 1402 } 1403 EndAddress = Itr->second->getEndAddress(); 1404 ++Itr; 1405 } 1406 1407 if (EndAddress < Section.getEndAddress()) 1408 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1409 1410 // If there is already a symbol at the start of the hole, grow that symbol 1411 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1412 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1413 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1414 if (BD) { 1415 // BD->getSection() can be != Section if there are sections that 1416 // overlap. In this case it is probably safe to just skip the holes 1417 // since the overlapping section will not(?) have any symbols in it. 1418 if (BD->getSection() == Section) 1419 setBinaryDataSize(Hole.first, Hole.second); 1420 } else { 1421 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1422 } 1423 } 1424 } 1425 1426 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1427 assert(validateHoles() && "top level hole detected in object map"); 1428 } 1429 1430 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1431 const BinarySection *CurrentSection = nullptr; 1432 bool FirstSection = true; 1433 1434 for (auto &Entry : BinaryDataMap) { 1435 const BinaryData *BD = Entry.second; 1436 const BinarySection &Section = BD->getSection(); 1437 if (FirstSection || Section != *CurrentSection) { 1438 uint64_t Address, Size; 1439 StringRef Name = Section.getName(); 1440 if (Section) { 1441 Address = Section.getAddress(); 1442 Size = Section.getSize(); 1443 } else { 1444 Address = BD->getAddress(); 1445 Size = BD->getSize(); 1446 } 1447 OS << "BOLT-INFO: Section " << Name << ", " 1448 << "0x" + Twine::utohexstr(Address) << ":" 1449 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1450 CurrentSection = &Section; 1451 FirstSection = false; 1452 } 1453 1454 OS << "BOLT-INFO: "; 1455 const BinaryData *P = BD->getParent(); 1456 while (P) { 1457 OS << " "; 1458 P = P->getParent(); 1459 } 1460 OS << *BD << "\n"; 1461 } 1462 } 1463 1464 Expected<unsigned> BinaryContext::getDwarfFile( 1465 StringRef Directory, StringRef FileName, unsigned FileNumber, 1466 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1467 unsigned CUID, unsigned DWARFVersion) { 1468 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1469 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1470 FileNumber); 1471 } 1472 1473 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1474 const uint32_t SrcCUID, 1475 unsigned FileIndex) { 1476 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1477 const DWARFDebugLine::LineTable *LineTable = 1478 DwCtx->getLineTableForUnit(SrcUnit); 1479 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1480 LineTable->Prologue.FileNames; 1481 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1482 // means empty dir. 1483 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1484 "FileIndex out of range for the compilation unit."); 1485 StringRef Dir = ""; 1486 if (FileNames[FileIndex - 1].DirIdx != 0) { 1487 if (Optional<const char *> DirName = dwarf::toString( 1488 LineTable->Prologue 1489 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1490 Dir = *DirName; 1491 } 1492 } 1493 StringRef FileName = ""; 1494 if (Optional<const char *> FName = 1495 dwarf::toString(FileNames[FileIndex - 1].Name)) 1496 FileName = *FName; 1497 assert(FileName != ""); 1498 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1499 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1500 DstUnit->getVersion())); 1501 } 1502 1503 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1504 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1505 llvm::transform(BinaryFunctions, SortedFunctions.begin(), 1506 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1507 return &BFI.second; 1508 }); 1509 1510 llvm::stable_sort(SortedFunctions, 1511 [](const BinaryFunction *A, const BinaryFunction *B) { 1512 if (A->hasValidIndex() && B->hasValidIndex()) { 1513 return A->getIndex() < B->getIndex(); 1514 } 1515 return A->hasValidIndex(); 1516 }); 1517 return SortedFunctions; 1518 } 1519 1520 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1521 std::vector<BinaryFunction *> AllFunctions; 1522 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1523 llvm::transform(BinaryFunctions, std::back_inserter(AllFunctions), 1524 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1525 return &BFI.second; 1526 }); 1527 llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); 1528 1529 return AllFunctions; 1530 } 1531 1532 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1533 auto Iter = DWOCUs.find(DWOId); 1534 if (Iter == DWOCUs.end()) 1535 return None; 1536 1537 return Iter->second; 1538 } 1539 1540 DWARFContext *BinaryContext::getDWOContext() const { 1541 if (DWOCUs.empty()) 1542 return nullptr; 1543 return &DWOCUs.begin()->second->getContext(); 1544 } 1545 1546 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1547 void BinaryContext::preprocessDWODebugInfo() { 1548 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1549 DWARFUnit *const DwarfUnit = CU.get(); 1550 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1551 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1552 if (!DWOCU->isDWOUnit()) { 1553 std::string DWOName = dwarf::toString( 1554 DwarfUnit->getUnitDIE().find( 1555 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1556 ""); 1557 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1558 << DWOName 1559 << " was not retrieved and won't be updated. Please check " 1560 "relative path.\n"; 1561 continue; 1562 } 1563 DWOCUs[*DWOId] = DWOCU; 1564 } 1565 } 1566 } 1567 1568 void BinaryContext::preprocessDebugInfo() { 1569 struct CURange { 1570 uint64_t LowPC; 1571 uint64_t HighPC; 1572 DWARFUnit *Unit; 1573 1574 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1575 }; 1576 1577 // Building a map of address ranges to CUs similar to .debug_aranges and use 1578 // it to assign CU to functions. 1579 std::vector<CURange> AllRanges; 1580 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1581 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1582 Expected<DWARFAddressRangesVector> RangesOrError = 1583 CU->getUnitDIE().getAddressRanges(); 1584 if (!RangesOrError) { 1585 consumeError(RangesOrError.takeError()); 1586 continue; 1587 } 1588 for (DWARFAddressRange &Range : *RangesOrError) { 1589 // Parts of the debug info could be invalidated due to corresponding code 1590 // being removed from the binary by the linker. Hence we check if the 1591 // address is a valid one. 1592 if (containsAddress(Range.LowPC)) 1593 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1594 } 1595 1596 ContainsDwarf5 |= CU->getVersion() >= 5; 1597 ContainsDwarfLegacy |= CU->getVersion() < 5; 1598 } 1599 1600 llvm::sort(AllRanges); 1601 for (auto &KV : BinaryFunctions) { 1602 const uint64_t FunctionAddress = KV.first; 1603 BinaryFunction &Function = KV.second; 1604 1605 auto It = llvm::partition_point( 1606 AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1607 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) 1608 Function.setDWARFUnit(It->Unit); 1609 } 1610 1611 // Discover units with debug info that needs to be updated. 1612 for (const auto &KV : BinaryFunctions) { 1613 const BinaryFunction &BF = KV.second; 1614 if (shouldEmit(BF) && BF.getDWARFUnit()) 1615 ProcessedCUs.insert(BF.getDWARFUnit()); 1616 } 1617 1618 // Clear debug info for functions from units that we are not going to process. 1619 for (auto &KV : BinaryFunctions) { 1620 BinaryFunction &BF = KV.second; 1621 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1622 BF.setDWARFUnit(nullptr); 1623 } 1624 1625 if (opts::Verbosity >= 1) { 1626 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1627 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1628 } 1629 1630 preprocessDWODebugInfo(); 1631 1632 // Populate MCContext with DWARF files from all units. 1633 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1634 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1635 const uint64_t CUID = CU->getOffset(); 1636 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1637 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1638 GlobalPrefix + "line_table_start" + Twine(CUID))); 1639 1640 if (!ProcessedCUs.count(CU.get())) 1641 continue; 1642 1643 const DWARFDebugLine::LineTable *LineTable = 1644 DwCtx->getLineTableForUnit(CU.get()); 1645 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1646 LineTable->Prologue.FileNames; 1647 1648 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1649 if (DwarfVersion >= 5) { 1650 Optional<MD5::MD5Result> Checksum; 1651 if (LineTable->Prologue.ContentTypes.HasMD5) 1652 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1653 Optional<const char *> Name = 1654 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1655 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1656 auto Iter = DWOCUs.find(*DWOID); 1657 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1658 Name = dwarf::toString( 1659 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1660 } 1661 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1662 None); 1663 } 1664 1665 BinaryLineTable.setDwarfVersion(DwarfVersion); 1666 1667 // Assign a unique label to every line table, one per CU. 1668 // Make sure empty debug line tables are registered too. 1669 if (FileNames.empty()) { 1670 cantFail( 1671 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1672 continue; 1673 } 1674 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1675 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1676 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1677 // means empty dir. 1678 StringRef Dir = ""; 1679 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1680 if (Optional<const char *> DirName = dwarf::toString( 1681 LineTable->Prologue 1682 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1683 Dir = *DirName; 1684 StringRef FileName = ""; 1685 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1686 FileName = *FName; 1687 assert(FileName != ""); 1688 Optional<MD5::MD5Result> Checksum; 1689 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1690 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1691 cantFail( 1692 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1693 } 1694 } 1695 } 1696 1697 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1698 if (Function.isPseudo()) 1699 return false; 1700 1701 if (opts::processAllFunctions()) 1702 return true; 1703 1704 if (Function.isIgnored()) 1705 return false; 1706 1707 // In relocation mode we will emit non-simple functions with CFG. 1708 // If the function does not have a CFG it should be marked as ignored. 1709 return HasRelocations || Function.isSimple(); 1710 } 1711 1712 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1713 uint32_t Operation = Inst.getOperation(); 1714 switch (Operation) { 1715 case MCCFIInstruction::OpSameValue: 1716 OS << "OpSameValue Reg" << Inst.getRegister(); 1717 break; 1718 case MCCFIInstruction::OpRememberState: 1719 OS << "OpRememberState"; 1720 break; 1721 case MCCFIInstruction::OpRestoreState: 1722 OS << "OpRestoreState"; 1723 break; 1724 case MCCFIInstruction::OpOffset: 1725 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1726 break; 1727 case MCCFIInstruction::OpDefCfaRegister: 1728 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1729 break; 1730 case MCCFIInstruction::OpDefCfaOffset: 1731 OS << "OpDefCfaOffset " << Inst.getOffset(); 1732 break; 1733 case MCCFIInstruction::OpDefCfa: 1734 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1735 break; 1736 case MCCFIInstruction::OpRelOffset: 1737 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1738 break; 1739 case MCCFIInstruction::OpAdjustCfaOffset: 1740 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1741 break; 1742 case MCCFIInstruction::OpEscape: 1743 OS << "OpEscape"; 1744 break; 1745 case MCCFIInstruction::OpRestore: 1746 OS << "OpRestore Reg" << Inst.getRegister(); 1747 break; 1748 case MCCFIInstruction::OpUndefined: 1749 OS << "OpUndefined Reg" << Inst.getRegister(); 1750 break; 1751 case MCCFIInstruction::OpRegister: 1752 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1753 << Inst.getRegister2(); 1754 break; 1755 case MCCFIInstruction::OpWindowSave: 1756 OS << "OpWindowSave"; 1757 break; 1758 case MCCFIInstruction::OpGnuArgsSize: 1759 OS << "OpGnuArgsSize"; 1760 break; 1761 default: 1762 OS << "Op#" << Operation; 1763 break; 1764 } 1765 } 1766 1767 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1768 // For aarch64, the ABI defines mapping symbols so we identify data in the 1769 // code section (see IHI0056B). $x identifies a symbol starting code or the 1770 // end of a data chunk inside code, $d indentifies start of data. 1771 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1772 return MarkerSymType::NONE; 1773 1774 Expected<StringRef> NameOrError = Symbol.getName(); 1775 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1776 1777 if (!TypeOrError || !NameOrError) 1778 return MarkerSymType::NONE; 1779 1780 if (*TypeOrError != SymbolRef::ST_Unknown) 1781 return MarkerSymType::NONE; 1782 1783 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1784 return MarkerSymType::CODE; 1785 1786 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1787 return MarkerSymType::DATA; 1788 1789 return MarkerSymType::NONE; 1790 } 1791 1792 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1793 return getMarkerType(Symbol) != MarkerSymType::NONE; 1794 } 1795 1796 static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, 1797 const BinaryFunction *Function, 1798 DWARFContext *DwCtx) { 1799 DebugLineTableRowRef RowRef = 1800 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1801 if (RowRef == DebugLineTableRowRef::NULL_ROW) 1802 return; 1803 1804 const DWARFDebugLine::LineTable *LineTable; 1805 if (Function && Function->getDWARFUnit() && 1806 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1807 LineTable = Function->getDWARFLineTable(); 1808 } else { 1809 LineTable = DwCtx->getLineTableForUnit( 1810 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1811 } 1812 assert(LineTable && "line table expected for instruction with debug info"); 1813 1814 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1815 StringRef FileName = ""; 1816 if (Optional<const char *> FName = 1817 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1818 FileName = *FName; 1819 OS << " # debug line " << FileName << ":" << Row.Line; 1820 if (Row.Column) 1821 OS << ":" << Row.Column; 1822 if (Row.Discriminator) 1823 OS << " discriminator:" << Row.Discriminator; 1824 } 1825 1826 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1827 uint64_t Offset, 1828 const BinaryFunction *Function, 1829 bool PrintMCInst, bool PrintMemData, 1830 bool PrintRelocations, 1831 StringRef Endl) const { 1832 if (MIB->isEHLabel(Instruction)) { 1833 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1834 return; 1835 } 1836 OS << format(" %08" PRIx64 ": ", Offset); 1837 if (MIB->isCFI(Instruction)) { 1838 uint32_t Offset = Instruction.getOperand(0).getImm(); 1839 OS << "\t!CFI\t$" << Offset << "\t; "; 1840 if (Function) 1841 printCFI(OS, *Function->getCFIFor(Instruction)); 1842 OS << Endl; 1843 return; 1844 } 1845 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1846 if (MIB->isCall(Instruction)) { 1847 if (MIB->isTailCall(Instruction)) 1848 OS << " # TAILCALL "; 1849 if (MIB->isInvoke(Instruction)) { 1850 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1851 OS << " # handler: "; 1852 if (EHInfo->first) 1853 OS << *EHInfo->first; 1854 else 1855 OS << '0'; 1856 OS << "; action: " << EHInfo->second; 1857 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1858 if (GnuArgsSize >= 0) 1859 OS << "; GNU_args_size = " << GnuArgsSize; 1860 } 1861 } else if (MIB->isIndirectBranch(Instruction)) { 1862 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1863 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1864 } else { 1865 OS << " # UNKNOWN CONTROL FLOW"; 1866 } 1867 } 1868 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1869 OS << " # Offset: " << *Offset; 1870 1871 MIB->printAnnotations(Instruction, OS); 1872 1873 if (opts::PrintDebugInfo) 1874 printDebugInfo(OS, Instruction, Function, DwCtx.get()); 1875 1876 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1877 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1878 Function->printRelocations(OS, Offset, Size); 1879 } 1880 1881 OS << Endl; 1882 1883 if (PrintMCInst) { 1884 Instruction.dump_pretty(OS, InstPrinter.get()); 1885 OS << Endl; 1886 } 1887 } 1888 1889 Optional<uint64_t> 1890 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1891 uint64_t FileOffset) const { 1892 // Find a segment with a matching file offset. 1893 for (auto &KV : SegmentMapInfo) { 1894 const SegmentInfo &SegInfo = KV.second; 1895 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1896 // Use segment's aligned memory offset to calculate the base address. 1897 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1898 return MMapAddress - MemOffset; 1899 } 1900 } 1901 1902 return NoneType(); 1903 } 1904 1905 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1906 auto SI = AddressToSection.upper_bound(Address); 1907 if (SI != AddressToSection.begin()) { 1908 --SI; 1909 uint64_t UpperBound = SI->first + SI->second->getSize(); 1910 if (!SI->second->getSize()) 1911 UpperBound += 1; 1912 if (UpperBound > Address) 1913 return *SI->second; 1914 } 1915 return std::make_error_code(std::errc::bad_address); 1916 } 1917 1918 ErrorOr<StringRef> 1919 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1920 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1921 return Section->getName(); 1922 return std::make_error_code(std::errc::bad_address); 1923 } 1924 1925 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1926 auto Res = Sections.insert(Section); 1927 (void)Res; 1928 assert(Res.second && "can't register the same section twice."); 1929 1930 // Only register allocatable sections in the AddressToSection map. 1931 if (Section->isAllocatable() && Section->getAddress()) 1932 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1933 NameToSection.insert( 1934 std::make_pair(std::string(Section->getName()), Section)); 1935 if (Section->hasSectionRef()) 1936 SectionRefToBinarySection.insert( 1937 std::make_pair(Section->getSectionRef(), Section)); 1938 1939 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1940 return *Section; 1941 } 1942 1943 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1944 return registerSection(new BinarySection(*this, Section)); 1945 } 1946 1947 BinarySection & 1948 BinaryContext::registerSection(const Twine &SectionName, 1949 const BinarySection &OriginalSection) { 1950 return registerSection( 1951 new BinarySection(*this, SectionName, OriginalSection)); 1952 } 1953 1954 BinarySection & 1955 BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, 1956 unsigned ELFFlags, uint8_t *Data, 1957 uint64_t Size, unsigned Alignment) { 1958 auto NamedSections = getSectionByName(Name); 1959 if (NamedSections.begin() != NamedSections.end()) { 1960 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1961 "can only update unique sections"); 1962 BinarySection *Section = NamedSections.begin()->second; 1963 1964 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1965 const bool Flag = Section->isAllocatable(); 1966 (void)Flag; 1967 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1968 LLVM_DEBUG(dbgs() << *Section << "\n"); 1969 // FIXME: Fix section flags/attributes for MachO. 1970 if (isELF()) 1971 assert(Flag == Section->isAllocatable() && 1972 "can't change section allocation status"); 1973 return *Section; 1974 } 1975 1976 return registerSection( 1977 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1978 } 1979 1980 void BinaryContext::deregisterSectionName(const BinarySection &Section) { 1981 auto NameRange = NameToSection.equal_range(Section.getName().str()); 1982 while (NameRange.first != NameRange.second) { 1983 if (NameRange.first->second == &Section) { 1984 NameToSection.erase(NameRange.first); 1985 break; 1986 } 1987 ++NameRange.first; 1988 } 1989 } 1990 1991 void BinaryContext::deregisterUnusedSections() { 1992 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); 1993 for (auto SI = Sections.begin(); SI != Sections.end();) { 1994 BinarySection *Section = *SI; 1995 if (Section->hasSectionRef() || Section->getOutputSize() || 1996 (AbsSection && Section == &AbsSection.get())) { 1997 ++SI; 1998 continue; 1999 } 2000 2001 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() 2002 << '\n';); 2003 deregisterSectionName(*Section); 2004 SI = Sections.erase(SI); 2005 delete Section; 2006 } 2007 } 2008 2009 bool BinaryContext::deregisterSection(BinarySection &Section) { 2010 BinarySection *SectionPtr = &Section; 2011 auto Itr = Sections.find(SectionPtr); 2012 if (Itr != Sections.end()) { 2013 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 2014 while (Range.first != Range.second) { 2015 if (Range.first->second == SectionPtr) { 2016 AddressToSection.erase(Range.first); 2017 break; 2018 } 2019 ++Range.first; 2020 } 2021 2022 deregisterSectionName(*SectionPtr); 2023 Sections.erase(Itr); 2024 delete SectionPtr; 2025 return true; 2026 } 2027 return false; 2028 } 2029 2030 void BinaryContext::renameSection(BinarySection &Section, 2031 const Twine &NewName) { 2032 auto Itr = Sections.find(&Section); 2033 assert(Itr != Sections.end() && "Section must exist to be renamed."); 2034 Sections.erase(Itr); 2035 2036 deregisterSectionName(Section); 2037 2038 Section.Name = NewName.str(); 2039 Section.setOutputName(NewName); 2040 2041 NameToSection.insert(std::make_pair(NewName.str(), &Section)); 2042 2043 // Reinsert with the new name. 2044 Sections.insert(&Section); 2045 } 2046 2047 void BinaryContext::printSections(raw_ostream &OS) const { 2048 for (BinarySection *const &Section : Sections) 2049 OS << "BOLT-INFO: " << *Section << "\n"; 2050 } 2051 2052 BinarySection &BinaryContext::absoluteSection() { 2053 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 2054 return *Section; 2055 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 2056 } 2057 2058 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 2059 size_t Size) const { 2060 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2061 if (!Section) 2062 return std::make_error_code(std::errc::bad_address); 2063 2064 if (Section->isVirtual()) 2065 return 0; 2066 2067 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2068 AsmInfo->getCodePointerSize()); 2069 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2070 return DE.getUnsigned(&ValueOffset, Size); 2071 } 2072 2073 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 2074 size_t Size) const { 2075 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 2076 if (!Section) 2077 return std::make_error_code(std::errc::bad_address); 2078 2079 if (Section->isVirtual()) 2080 return 0; 2081 2082 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 2083 AsmInfo->getCodePointerSize()); 2084 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 2085 return DE.getSigned(&ValueOffset, Size); 2086 } 2087 2088 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 2089 uint64_t Type, uint64_t Addend, 2090 uint64_t Value) { 2091 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2092 assert(Section && "cannot find section for address"); 2093 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 2094 Value); 2095 } 2096 2097 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 2098 uint64_t Type, uint64_t Addend, 2099 uint64_t Value) { 2100 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2101 assert(Section && "cannot find section for address"); 2102 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 2103 Addend, Value); 2104 } 2105 2106 bool BinaryContext::removeRelocationAt(uint64_t Address) { 2107 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2108 assert(Section && "cannot find section for address"); 2109 return Section->removeRelocationAt(Address - Section->getAddress()); 2110 } 2111 2112 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 2113 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2114 if (!Section) 2115 return nullptr; 2116 2117 return Section->getRelocationAt(Address - Section->getAddress()); 2118 } 2119 2120 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 2121 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 2122 if (!Section) 2123 return nullptr; 2124 2125 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 2126 } 2127 2128 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 2129 const uint64_t Address) { 2130 auto setImmovable = [&](BinaryData &BD) { 2131 BinaryData *Root = BD.getAtomicRoot(); 2132 LLVM_DEBUG(if (Root->isMoveable()) { 2133 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 2134 << "due to ambiguous relocation referencing 0x" 2135 << Twine::utohexstr(Address) << '\n'; 2136 }); 2137 Root->setIsMoveable(false); 2138 }; 2139 2140 if (Address == BD.getAddress()) { 2141 setImmovable(BD); 2142 2143 // Set previous symbol as immovable 2144 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 2145 if (Prev && Prev->getEndAddress() == BD.getAddress()) 2146 setImmovable(*Prev); 2147 } 2148 2149 if (Address == BD.getEndAddress()) { 2150 setImmovable(BD); 2151 2152 // Set next symbol as immovable 2153 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 2154 if (Next && Next->getAddress() == BD.getEndAddress()) 2155 setImmovable(*Next); 2156 } 2157 } 2158 2159 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 2160 uint64_t *EntryDesc) { 2161 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 2162 auto BFI = SymbolToFunctionMap.find(Symbol); 2163 if (BFI == SymbolToFunctionMap.end()) 2164 return nullptr; 2165 2166 BinaryFunction *BF = BFI->second; 2167 if (EntryDesc) 2168 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2169 2170 return BF; 2171 } 2172 2173 void BinaryContext::exitWithBugReport(StringRef Message, 2174 const BinaryFunction &Function) const { 2175 errs() << "=======================================\n"; 2176 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2177 "this function.\n"; 2178 errs() << "If you are running the most recent version of BOLT, you may " 2179 "want to " 2180 "report this and paste this dump.\nPlease check that there is no " 2181 "sensitive contents being shared in this dump.\n"; 2182 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2183 ScopedPrinter SP(errs()); 2184 SP.printBinaryBlock("Function contents", *Function.getData()); 2185 errs() << "\n"; 2186 Function.dump(); 2187 errs() << "ERROR: " << Message; 2188 errs() << "\n=======================================\n"; 2189 exit(1); 2190 } 2191 2192 BinaryFunction * 2193 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2194 bool IsSimple) { 2195 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2196 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2197 setSymbolToFunctionMap(BF->getSymbol(), BF); 2198 BF->CurrentState = BinaryFunction::State::CFG; 2199 return BF; 2200 } 2201 2202 std::pair<size_t, size_t> 2203 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2204 // Adjust branch instruction to match the current layout. 2205 if (FixBranches) 2206 BF.fixBranches(); 2207 2208 // Create local MC context to isolate the effect of ephemeral code emission. 2209 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2210 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2211 MCAsmBackend *MAB = 2212 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2213 2214 SmallString<256> Code; 2215 raw_svector_ostream VecOS(Code); 2216 2217 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2218 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2219 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2220 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2221 /*RelaxAll=*/false, 2222 /*IncrementalLinkerCompatible=*/false, 2223 /*DWARFMustBeAtTheEnd=*/false)); 2224 2225 Streamer->initSections(false, *STI); 2226 2227 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2228 Section->setHasInstructions(true); 2229 2230 // Create symbols in the LocalCtx so that they get destroyed with it. 2231 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2232 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2233 2234 Streamer->switchSection(Section); 2235 Streamer->emitLabel(StartLabel); 2236 emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), 2237 /*EmitCodeOnly=*/true); 2238 Streamer->emitLabel(EndLabel); 2239 2240 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; 2241 SmallVector<LabelRange> SplitLabels; 2242 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { 2243 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); 2244 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); 2245 SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); 2246 2247 MCSectionELF *const SplitSection = LocalCtx->getELFSection( 2248 BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, 2249 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2250 SplitSection->setHasInstructions(true); 2251 Streamer->switchSection(SplitSection); 2252 2253 Streamer->emitLabel(SplitStartLabel); 2254 emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); 2255 Streamer->emitLabel(SplitEndLabel); 2256 // To avoid calling MCObjectStreamer::flushPendingLabels() which is 2257 // private 2258 Streamer->emitBytes(StringRef("")); 2259 Streamer->switchSection(Section); 2260 } 2261 2262 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2263 // MCStreamer::Finish(), which does more than we want 2264 Streamer->emitBytes(StringRef("")); 2265 2266 MCAssembler &Assembler = 2267 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2268 MCAsmLayout Layout(Assembler); 2269 Assembler.layout(Layout); 2270 2271 const uint64_t HotSize = 2272 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2273 const uint64_t ColdSize = 2274 std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL, 2275 [&](const uint64_t Accu, const LabelRange &Labels) { 2276 return Accu + Layout.getSymbolOffset(*Labels.second) - 2277 Layout.getSymbolOffset(*Labels.first); 2278 }); 2279 2280 // Clean-up the effect of the code emission. 2281 for (const MCSymbol &Symbol : Assembler.symbols()) { 2282 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2283 MutableSymbol->setUndefined(); 2284 MutableSymbol->setIsRegistered(false); 2285 } 2286 2287 return std::make_pair(HotSize, ColdSize); 2288 } 2289 2290 bool BinaryContext::validateInstructionEncoding( 2291 ArrayRef<uint8_t> InputSequence) const { 2292 MCInst Inst; 2293 uint64_t InstSize; 2294 DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); 2295 assert(InstSize == InputSequence.size() && 2296 "Disassembled instruction size does not match the sequence."); 2297 2298 SmallString<256> Code; 2299 SmallVector<MCFixup, 4> Fixups; 2300 raw_svector_ostream VecOS(Code); 2301 2302 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2303 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2304 if (InputSequence != OutputSequence) { 2305 if (opts::Verbosity > 1) { 2306 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2307 << " input: " << InputSequence << '\n' 2308 << " output: " << OutputSequence << '\n'; 2309 } 2310 return false; 2311 } 2312 2313 return true; 2314 } 2315 2316 uint64_t BinaryContext::getHotThreshold() const { 2317 static uint64_t Threshold = 0; 2318 if (Threshold == 0) { 2319 Threshold = std::max( 2320 (uint64_t)opts::ExecutionCountThreshold, 2321 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2322 } 2323 return Threshold; 2324 } 2325 2326 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2327 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2328 auto FI = BinaryFunctions.upper_bound(Address); 2329 if (FI == BinaryFunctions.begin()) 2330 return nullptr; 2331 --FI; 2332 2333 const uint64_t UsedSize = 2334 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2335 2336 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2337 return nullptr; 2338 2339 return &FI->second; 2340 } 2341 2342 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2343 // First, try to find a function starting at the given address. If the 2344 // function was folded, this will get us the original folded function if it 2345 // wasn't removed from the list, e.g. in non-relocation mode. 2346 auto BFI = BinaryFunctions.find(Address); 2347 if (BFI != BinaryFunctions.end()) 2348 return &BFI->second; 2349 2350 // We might have folded the function matching the object at the given 2351 // address. In such case, we look for a function matching the symbol 2352 // registered at the original address. The new function (the one that the 2353 // original was folded into) will hold the symbol. 2354 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2355 uint64_t EntryID = 0; 2356 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2357 if (BF && EntryID == 0) 2358 return BF; 2359 } 2360 return nullptr; 2361 } 2362 2363 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2364 const DWARFAddressRangesVector &InputRanges) const { 2365 DebugAddressRangesVector OutputRanges; 2366 2367 for (const DWARFAddressRange Range : InputRanges) { 2368 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2369 while (BFI != BinaryFunctions.end()) { 2370 const BinaryFunction &Function = BFI->second; 2371 if (Function.getAddress() >= Range.HighPC) 2372 break; 2373 const DebugAddressRangesVector FunctionRanges = 2374 Function.getOutputAddressRanges(); 2375 llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); 2376 std::advance(BFI, 1); 2377 } 2378 } 2379 2380 return OutputRanges; 2381 } 2382 2383 } // namespace bolt 2384 } // namespace llvm 2385