1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <unordered_set> 42 43 using namespace llvm; 44 45 #undef DEBUG_TYPE 46 #define DEBUG_TYPE "bolt" 47 48 namespace opts { 49 50 cl::opt<bool> 51 NoHugePages("no-huge-pages", 52 cl::desc("use regular size pages for code alignment"), 53 cl::ZeroOrMore, 54 cl::Hidden, 55 cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 PrintDebugInfo("print-debug-info", 59 cl::desc("print debug info when printing functions"), 60 cl::Hidden, 61 cl::ZeroOrMore, 62 cl::cat(BoltCategory)); 63 64 cl::opt<bool> 65 PrintRelocations("print-relocations", 66 cl::desc("print relocations when printing functions/objects"), 67 cl::Hidden, 68 cl::ZeroOrMore, 69 cl::cat(BoltCategory)); 70 71 static cl::opt<bool> 72 PrintMemData("print-mem-data", 73 cl::desc("print memory data annotations when printing functions"), 74 cl::Hidden, 75 cl::ZeroOrMore, 76 cl::cat(BoltCategory)); 77 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 84 std::unique_ptr<DWARFContext> DwCtx, 85 std::unique_ptr<Triple> TheTriple, 86 const Target *TheTarget, std::string TripleName, 87 std::unique_ptr<MCCodeEmitter> MCE, 88 std::unique_ptr<MCObjectFileInfo> MOFI, 89 std::unique_ptr<const MCAsmInfo> AsmInfo, 90 std::unique_ptr<const MCInstrInfo> MII, 91 std::unique_ptr<const MCSubtargetInfo> STI, 92 std::unique_ptr<MCInstPrinter> InstPrinter, 93 std::unique_ptr<const MCInstrAnalysis> MIA, 94 std::unique_ptr<MCPlusBuilder> MIB, 95 std::unique_ptr<const MCRegisterInfo> MRI, 96 std::unique_ptr<MCDisassembler> DisAsm) 97 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 98 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 99 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 100 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 101 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 102 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 103 Relocation::Arch = this->TheTriple->getArch(); 104 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 105 } 106 107 BinaryContext::~BinaryContext() { 108 for (BinarySection *Section : Sections) 109 delete Section; 110 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 111 delete InjectedFunction; 112 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 113 delete JTI.second; 114 clearBinaryData(); 115 } 116 117 /// Create BinaryContext for a given architecture \p ArchName and 118 /// triple \p TripleName. 119 Expected<std::unique_ptr<BinaryContext>> 120 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 121 std::unique_ptr<DWARFContext> DwCtx) { 122 StringRef ArchName = ""; 123 StringRef FeaturesStr = ""; 124 switch (File->getArch()) { 125 case llvm::Triple::x86_64: 126 ArchName = "x86-64"; 127 FeaturesStr = "+nopl"; 128 break; 129 case llvm::Triple::aarch64: 130 ArchName = "aarch64"; 131 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 132 "+fullfp16,+spe,+fuse-aes,+rcpc"; 133 break; 134 default: 135 return createStringError(std::errc::not_supported, 136 "BOLT-ERROR: Unrecognized machine in ELF file"); 137 } 138 139 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 140 const std::string TripleName = TheTriple->str(); 141 142 std::string Error; 143 const Target *TheTarget = 144 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 145 if (!TheTarget) 146 return createStringError(make_error_code(std::errc::not_supported), 147 Twine("BOLT-ERROR: ", Error)); 148 149 std::unique_ptr<const MCRegisterInfo> MRI( 150 TheTarget->createMCRegInfo(TripleName)); 151 if (!MRI) 152 return createStringError( 153 make_error_code(std::errc::not_supported), 154 Twine("BOLT-ERROR: no register info for target ", TripleName)); 155 156 // Set up disassembler. 157 std::unique_ptr<const MCAsmInfo> AsmInfo( 158 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 159 if (!AsmInfo) 160 return createStringError( 161 make_error_code(std::errc::not_supported), 162 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 163 164 std::unique_ptr<const MCSubtargetInfo> STI( 165 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 166 if (!STI) 167 return createStringError( 168 make_error_code(std::errc::not_supported), 169 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 170 171 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 172 if (!MII) 173 return createStringError( 174 make_error_code(std::errc::not_supported), 175 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 176 177 std::unique_ptr<MCContext> Ctx( 178 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 179 std::unique_ptr<MCObjectFileInfo> MOFI( 180 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 181 Ctx->setObjectFileInfo(MOFI.get()); 182 // We do not support X86 Large code model. Change this in the future. 183 bool Large = false; 184 if (TheTriple->getArch() == llvm::Triple::aarch64) 185 Large = true; 186 unsigned LSDAEncoding = 187 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 188 unsigned TTypeEncoding = 189 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 190 if (IsPIC) { 191 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 192 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 193 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 194 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 195 } 196 197 std::unique_ptr<MCDisassembler> DisAsm( 198 TheTarget->createMCDisassembler(*STI, *Ctx)); 199 200 if (!DisAsm) 201 return createStringError( 202 make_error_code(std::errc::not_supported), 203 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 204 205 std::unique_ptr<const MCInstrAnalysis> MIA( 206 TheTarget->createMCInstrAnalysis(MII.get())); 207 if (!MIA) 208 return createStringError( 209 make_error_code(std::errc::not_supported), 210 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 211 TripleName)); 212 213 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 214 std::unique_ptr<MCInstPrinter> InstructionPrinter( 215 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 216 *MII, *MRI)); 217 if (!InstructionPrinter) 218 return createStringError( 219 make_error_code(std::errc::not_supported), 220 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 221 InstructionPrinter->setPrintImmHex(true); 222 223 std::unique_ptr<MCCodeEmitter> MCE( 224 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 225 226 // Make sure we don't miss any output on core dumps. 227 outs().SetUnbuffered(); 228 errs().SetUnbuffered(); 229 dbgs().SetUnbuffered(); 230 231 auto BC = std::make_unique<BinaryContext>( 232 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 233 std::string(TripleName), std::move(MCE), std::move(MOFI), 234 std::move(AsmInfo), std::move(MII), std::move(STI), 235 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 236 std::move(DisAsm)); 237 238 BC->TTypeEncoding = TTypeEncoding; 239 BC->LSDAEncoding = LSDAEncoding; 240 241 BC->MAB = std::unique_ptr<MCAsmBackend>( 242 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 243 244 BC->setFilename(File->getFileName()); 245 246 BC->HasFixedLoadAddress = !IsPIC; 247 248 return BC; 249 } 250 251 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 252 if (opts::HotText && 253 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 254 return true; 255 256 if (opts::HotData && 257 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 258 return true; 259 260 if (SymbolName == "_end") 261 return true; 262 263 return false; 264 } 265 266 std::unique_ptr<MCObjectWriter> 267 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 268 return MAB->createObjectWriter(OS); 269 } 270 271 bool BinaryContext::validateObjectNesting() const { 272 auto Itr = BinaryDataMap.begin(); 273 auto End = BinaryDataMap.end(); 274 bool Valid = true; 275 while (Itr != End) { 276 auto Next = std::next(Itr); 277 while (Next != End && 278 Itr->second->getSection() == Next->second->getSection() && 279 Itr->second->containsRange(Next->second->getAddress(), 280 Next->second->getSize())) { 281 if (Next->second->Parent != Itr->second) { 282 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 283 << "BOLT-WARNING: " << *Itr->second << "\n" 284 << "BOLT-WARNING: " << *Next->second << "\n"; 285 Valid = false; 286 } 287 ++Next; 288 } 289 Itr = Next; 290 } 291 return Valid; 292 } 293 294 bool BinaryContext::validateHoles() const { 295 bool Valid = true; 296 for (BinarySection &Section : sections()) { 297 for (const Relocation &Rel : Section.relocations()) { 298 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 299 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 300 if (!BD) { 301 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 302 << " 0x" << Twine::utohexstr(RelAddr) << " in " 303 << Section.getName() << "\n"; 304 Valid = false; 305 } else if (!BD->getAtomicRoot()) { 306 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 307 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 308 << Section.getName() << "\n"; 309 Valid = false; 310 } 311 } 312 } 313 return Valid; 314 } 315 316 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 317 const uint64_t Address = GAI->second->getAddress(); 318 const uint64_t Size = GAI->second->getSize(); 319 320 auto fixParents = [&](BinaryDataMapType::iterator Itr, 321 BinaryData *NewParent) { 322 BinaryData *OldParent = Itr->second->Parent; 323 Itr->second->Parent = NewParent; 324 ++Itr; 325 while (Itr != BinaryDataMap.end() && OldParent && 326 Itr->second->Parent == OldParent) { 327 Itr->second->Parent = NewParent; 328 ++Itr; 329 } 330 }; 331 332 // Check if the previous symbol contains the newly added symbol. 333 if (GAI != BinaryDataMap.begin()) { 334 BinaryData *Prev = std::prev(GAI)->second; 335 while (Prev) { 336 if (Prev->getSection() == GAI->second->getSection() && 337 Prev->containsRange(Address, Size)) { 338 fixParents(GAI, Prev); 339 } else { 340 fixParents(GAI, nullptr); 341 } 342 Prev = Prev->Parent; 343 } 344 } 345 346 // Check if the newly added symbol contains any subsequent symbols. 347 if (Size != 0) { 348 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 349 auto Itr = std::next(GAI); 350 while ( 351 Itr != BinaryDataMap.end() && 352 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 353 Itr->second->Parent = BD; 354 ++Itr; 355 } 356 } 357 } 358 359 iterator_range<BinaryContext::binary_data_iterator> 360 BinaryContext::getSubBinaryData(BinaryData *BD) { 361 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 362 auto End = Start; 363 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 364 ++End; 365 return make_range(Start, End); 366 } 367 368 std::pair<const MCSymbol *, uint64_t> 369 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 370 bool IsPCRel) { 371 uint64_t Addend = 0; 372 373 if (isAArch64()) { 374 // Check if this is an access to a constant island and create bookkeeping 375 // to keep track of it and emit it later as part of this function. 376 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 377 return std::make_pair(IslandSym, Addend); 378 379 // Detect custom code written in assembly that refers to arbitrary 380 // constant islands from other functions. Write this reference so we 381 // can pull this constant island and emit it as part of this function 382 // too. 383 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 384 if (IslandIter != AddressToConstantIslandMap.end()) { 385 if (MCSymbol *IslandSym = 386 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 387 BF.createIslandDependency(IslandSym, IslandIter->second); 388 return std::make_pair(IslandSym, Addend); 389 } 390 } 391 } 392 393 // Note that the address does not necessarily have to reside inside 394 // a section, it could be an absolute address too. 395 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 396 if (Section && Section->isText()) { 397 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 398 if (Address != BF.getAddress()) { 399 // The address could potentially escape. Mark it as another entry 400 // point into the function. 401 if (opts::Verbosity >= 1) { 402 outs() << "BOLT-INFO: potentially escaped address 0x" 403 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 404 } 405 BF.HasInternalLabelReference = true; 406 return std::make_pair( 407 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 408 } 409 } else { 410 BF.InterproceduralReferences.insert(Address); 411 } 412 } 413 414 // With relocations, catch jump table references outside of the basic block 415 // containing the indirect jump. 416 if (HasRelocations) { 417 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 418 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 419 const MCSymbol *Symbol = 420 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 421 422 return std::make_pair(Symbol, Addend); 423 } 424 } 425 426 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 427 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 428 429 // TODO: use DWARF info to get size/alignment here? 430 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 431 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 432 return std::make_pair(TargetSymbol, Addend); 433 } 434 435 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 436 BinaryFunction &BF) { 437 if (!isX86()) 438 return MemoryContentsType::UNKNOWN; 439 440 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 441 if (!Section) { 442 // No section - possibly an absolute address. Since we don't allow 443 // internal function addresses to escape the function scope - we 444 // consider it a tail call. 445 if (opts::Verbosity > 1) { 446 errs() << "BOLT-WARNING: no section for address 0x" 447 << Twine::utohexstr(Address) << " referenced from function " << BF 448 << '\n'; 449 } 450 return MemoryContentsType::UNKNOWN; 451 } 452 453 if (Section->isVirtual()) { 454 // The contents are filled at runtime. 455 return MemoryContentsType::UNKNOWN; 456 } 457 458 // No support for jump tables in code yet. 459 if (Section->isText()) 460 return MemoryContentsType::UNKNOWN; 461 462 // Start with checking for PIC jump table. We expect non-PIC jump tables 463 // to have high 32 bits set to 0. 464 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 465 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 466 467 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 468 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 469 470 return MemoryContentsType::UNKNOWN; 471 } 472 473 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 474 bool isPotentialFragmentByName(BinaryFunction &Fragment, 475 BinaryFunction &Parent) { 476 for (StringRef Name : Parent.getNames()) { 477 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 478 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 479 if (Fragment.hasRestoredNameRegex(NameRegex)) 480 return true; 481 } 482 return false; 483 } 484 485 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 486 const JumpTable::JumpTableType Type, 487 BinaryFunction &BF, 488 const uint64_t NextJTAddress, 489 JumpTable::OffsetsType *Offsets) { 490 // Is one of the targets __builtin_unreachable? 491 bool HasUnreachable = false; 492 493 // Number of targets other than __builtin_unreachable. 494 uint64_t NumRealEntries = 0; 495 496 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 497 auto addOffset = [&](uint64_t Offset) { 498 if (Offsets) 499 Offsets->emplace_back(Offset); 500 }; 501 502 auto doesBelongToFunction = [&](const uint64_t Addr, 503 BinaryFunction *TargetBF) -> bool { 504 if (BF.containsAddress(Addr)) 505 return true; 506 // Nothing to do if we failed to identify the containing function. 507 if (!TargetBF) 508 return false; 509 // Case 1: check if BF is a fragment and TargetBF is its parent. 510 if (BF.isFragment()) { 511 // Parent function may or may not be already registered. 512 // Set parent link based on function name matching heuristic. 513 return registerFragment(BF, *TargetBF); 514 } 515 // Case 2: check if TargetBF is a fragment and BF is its parent. 516 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 517 }; 518 519 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 520 if (!Section) 521 return false; 522 523 // The upper bound is defined by containing object, section limits, and 524 // the next jump table in memory. 525 uint64_t UpperBound = Section->getEndAddress(); 526 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 527 if (JumpTableBD && JumpTableBD->getSize()) { 528 assert(JumpTableBD->getEndAddress() <= UpperBound && 529 "data object cannot cross a section boundary"); 530 UpperBound = JumpTableBD->getEndAddress(); 531 } 532 if (NextJTAddress) 533 UpperBound = std::min(NextJTAddress, UpperBound); 534 535 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 536 << '\n'); 537 const uint64_t EntrySize = getJumpTableEntrySize(Type); 538 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 539 EntryAddress += EntrySize) { 540 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 541 << " -> "); 542 // Check if there's a proper relocation against the jump table entry. 543 if (HasRelocations) { 544 if (Type == JumpTable::JTT_PIC && 545 !DataPCRelocations.count(EntryAddress)) { 546 LLVM_DEBUG( 547 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 548 break; 549 } 550 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 551 LLVM_DEBUG( 552 dbgs() 553 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 554 break; 555 } 556 } 557 558 const uint64_t Value = 559 (Type == JumpTable::JTT_PIC) 560 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 561 : *getPointerAtAddress(EntryAddress); 562 563 // __builtin_unreachable() case. 564 if (Value == BF.getAddress() + BF.getSize()) { 565 addOffset(Value - BF.getAddress()); 566 HasUnreachable = true; 567 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 568 continue; 569 } 570 571 // Function or one of its fragments. 572 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 573 574 // We assume that a jump table cannot have function start as an entry. 575 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 576 LLVM_DEBUG({ 577 if (!BF.containsAddress(Value)) { 578 dbgs() << "FAIL: function doesn't contain this address\n"; 579 if (TargetBF) { 580 dbgs() << " ! function containing this address: " 581 << TargetBF->getPrintName() << '\n'; 582 if (TargetBF->isFragment()) 583 dbgs() << " ! is a fragment\n"; 584 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 585 dbgs() << " ! its parent is " 586 << (TargetParent ? TargetParent->getPrintName() : "(none)") 587 << '\n'; 588 } 589 } 590 if (Value == BF.getAddress()) 591 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 592 }); 593 break; 594 } 595 596 // Check there's an instruction at this offset. 597 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 598 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 599 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 600 break; 601 } 602 603 ++NumRealEntries; 604 605 if (TargetBF == &BF) { 606 // Address inside the function. 607 addOffset(Value - TargetBF->getAddress()); 608 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 609 } else { 610 // Address in split fragment. 611 BF.setHasSplitJumpTable(true); 612 // Add invalid offset for proper identification of jump table size. 613 addOffset(INVALID_OFFSET); 614 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 615 << TargetBF->getPrintName() << '\n'); 616 } 617 } 618 619 // It's a jump table if the number of real entries is more than 1, or there's 620 // one real entry and "unreachable" targets. If there are only multiple 621 // "unreachable" targets, then it's not a jump table. 622 return NumRealEntries + HasUnreachable >= 2; 623 } 624 625 void BinaryContext::populateJumpTables() { 626 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 627 << '\n'); 628 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 629 ++JTI) { 630 JumpTable *JT = JTI->second; 631 BinaryFunction &BF = *JT->Parent; 632 633 if (!BF.isSimple()) 634 continue; 635 636 uint64_t NextJTAddress = 0; 637 auto NextJTI = std::next(JTI); 638 if (NextJTI != JTE) 639 NextJTAddress = NextJTI->second->getAddress(); 640 641 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 642 NextJTAddress, &JT->OffsetEntries); 643 if (!Success) { 644 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 645 JT->print(dbgs()); 646 if (NextJTI != JTE) { 647 dbgs() << "next jump table at 0x" 648 << Twine::utohexstr(NextJTI->second->getAddress()) 649 << " belongs to function " << *NextJTI->second->Parent << '\n'; 650 NextJTI->second->print(dbgs()); 651 } 652 llvm_unreachable("jump table heuristic failure"); 653 } 654 655 for (uint64_t EntryOffset : JT->OffsetEntries) { 656 if (EntryOffset == BF.getSize()) 657 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 658 else 659 BF.registerReferencedOffset(EntryOffset); 660 } 661 662 // In strict mode, erase PC-relative relocation record. Later we check that 663 // all such records are erased and thus have been accounted for. 664 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 665 for (uint64_t Address = JT->getAddress(); 666 Address < JT->getAddress() + JT->getSize(); 667 Address += JT->EntrySize) { 668 DataPCRelocations.erase(DataPCRelocations.find(Address)); 669 } 670 } 671 672 // Mark to skip the function and all its fragments. 673 if (BF.hasSplitJumpTable()) 674 FragmentsToSkip.push_back(&BF); 675 } 676 677 if (opts::StrictMode && DataPCRelocations.size()) { 678 LLVM_DEBUG({ 679 dbgs() << DataPCRelocations.size() 680 << " unclaimed PC-relative relocations left in data:\n"; 681 for (uint64_t Reloc : DataPCRelocations) 682 dbgs() << Twine::utohexstr(Reloc) << '\n'; 683 }); 684 assert(0 && "unclaimed PC-relative relocations left in data\n"); 685 } 686 clearList(DataPCRelocations); 687 } 688 689 void BinaryContext::skipMarkedFragments() { 690 // Unique functions in the vector. 691 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 692 FragmentsToSkip.end()); 693 // Copy the functions back to FragmentsToSkip. 694 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 695 auto addToWorklist = [&](BinaryFunction *Function) -> void { 696 if (UniqueFunctions.count(Function)) 697 return; 698 FragmentsToSkip.push_back(Function); 699 UniqueFunctions.insert(Function); 700 }; 701 // Functions containing split jump tables need to be skipped with all 702 // fragments (transitively). 703 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 704 BinaryFunction *BF = FragmentsToSkip[I]; 705 assert(UniqueFunctions.count(BF) && 706 "internal error in traversing function fragments"); 707 if (opts::Verbosity >= 1) 708 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 709 BF->setIgnored(); 710 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 711 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 712 addToWorklist); 713 } 714 if (!FragmentsToSkip.empty()) 715 errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function" 716 << (FragmentsToSkip.size() == 1 ? "" : "s") 717 << " due to cold fragments\n"; 718 FragmentsToSkip.clear(); 719 } 720 721 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 722 uint64_t Size, 723 uint16_t Alignment, 724 unsigned Flags) { 725 auto Itr = BinaryDataMap.find(Address); 726 if (Itr != BinaryDataMap.end()) { 727 assert(Itr->second->getSize() == Size || !Size); 728 return Itr->second->getSymbol(); 729 } 730 731 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 732 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 733 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 734 } 735 736 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 737 return Ctx->getOrCreateSymbol(Name); 738 } 739 740 BinaryFunction *BinaryContext::createBinaryFunction( 741 const std::string &Name, BinarySection &Section, uint64_t Address, 742 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 743 auto Result = BinaryFunctions.emplace( 744 Address, BinaryFunction(Name, Section, Address, Size, *this)); 745 assert(Result.second == true && "unexpected duplicate function"); 746 BinaryFunction *BF = &Result.first->second; 747 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 748 Alignment); 749 setSymbolToFunctionMap(BF->getSymbol(), BF); 750 return BF; 751 } 752 753 const MCSymbol * 754 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 755 JumpTable::JumpTableType Type) { 756 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 757 assert(JT->Type == Type && "jump table types have to match"); 758 assert(JT->Parent == &Function && 759 "cannot re-use jump table of a different function"); 760 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 761 762 return JT->getFirstLabel(); 763 } 764 765 // Re-use the existing symbol if possible. 766 MCSymbol *JTLabel = nullptr; 767 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 768 if (!isInternalSymbolName(Object->getSymbol()->getName())) 769 JTLabel = Object->getSymbol(); 770 } 771 772 const uint64_t EntrySize = getJumpTableEntrySize(Type); 773 if (!JTLabel) { 774 const std::string JumpTableName = generateJumpTableName(Function, Address); 775 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 776 } 777 778 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 779 << " in function " << Function << '\n'); 780 781 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 782 JumpTable::LabelMapType{{0, JTLabel}}, Function, 783 *getSectionForAddress(Address)); 784 JumpTables.emplace(Address, JT); 785 786 // Duplicate the entry for the parent function for easy access. 787 Function.JumpTables.emplace(Address, JT); 788 789 return JTLabel; 790 } 791 792 std::pair<uint64_t, const MCSymbol *> 793 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 794 const MCSymbol *OldLabel) { 795 auto L = scopeLock(); 796 unsigned Offset = 0; 797 bool Found = false; 798 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 799 if (Elmt.second != OldLabel) 800 continue; 801 Offset = Elmt.first; 802 Found = true; 803 break; 804 } 805 assert(Found && "Label not found"); 806 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 807 JumpTable *NewJT = 808 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 809 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 810 *getSectionForAddress(JT->getAddress())); 811 NewJT->Entries = JT->Entries; 812 NewJT->Counts = JT->Counts; 813 uint64_t JumpTableID = ++DuplicatedJumpTables; 814 // Invert it to differentiate from regular jump tables whose IDs are their 815 // addresses in the input binary memory space 816 JumpTableID = ~JumpTableID; 817 JumpTables.emplace(JumpTableID, NewJT); 818 Function.JumpTables.emplace(JumpTableID, NewJT); 819 return std::make_pair(JumpTableID, NewLabel); 820 } 821 822 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 823 uint64_t Address) { 824 size_t Id; 825 uint64_t Offset = 0; 826 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 827 Offset = Address - JT->getAddress(); 828 auto Itr = JT->Labels.find(Offset); 829 if (Itr != JT->Labels.end()) 830 return std::string(Itr->second->getName()); 831 Id = JumpTableIds.at(JT->getAddress()); 832 } else { 833 Id = JumpTableIds[Address] = BF.JumpTables.size(); 834 } 835 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 836 (Offset ? ("." + std::to_string(Offset)) : "")); 837 } 838 839 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 840 // FIXME: aarch64 support is missing. 841 if (!isX86()) 842 return true; 843 844 if (BF.getSize() == BF.getMaxSize()) 845 return true; 846 847 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 848 assert(FunctionData && "cannot get function as data"); 849 850 uint64_t Offset = BF.getSize(); 851 MCInst Instr; 852 uint64_t InstrSize = 0; 853 uint64_t InstrAddress = BF.getAddress() + Offset; 854 using std::placeholders::_1; 855 856 // Skip instructions that satisfy the predicate condition. 857 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 858 const uint64_t StartOffset = Offset; 859 for (; Offset < BF.getMaxSize(); 860 Offset += InstrSize, InstrAddress += InstrSize) { 861 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 862 InstrAddress, nulls())) 863 break; 864 if (!Predicate(Instr)) 865 break; 866 } 867 868 return Offset - StartOffset; 869 }; 870 871 // Skip a sequence of zero bytes. 872 auto skipZeros = [&]() { 873 const uint64_t StartOffset = Offset; 874 for (; Offset < BF.getMaxSize(); ++Offset) 875 if ((*FunctionData)[Offset] != 0) 876 break; 877 878 return Offset - StartOffset; 879 }; 880 881 // Accept the whole padding area filled with breakpoints. 882 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 883 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 884 return true; 885 886 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 887 888 // Some functions have a jump to the next function or to the padding area 889 // inserted after the body. 890 auto isSkipJump = [&](const MCInst &Instr) { 891 uint64_t TargetAddress = 0; 892 if (MIB->isUnconditionalBranch(Instr) && 893 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 894 if (TargetAddress >= InstrAddress + InstrSize && 895 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 896 return true; 897 } 898 } 899 return false; 900 }; 901 902 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 903 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 904 skipZeros()) 905 ; 906 907 if (Offset == BF.getMaxSize()) 908 return true; 909 910 if (opts::Verbosity >= 1) { 911 errs() << "BOLT-WARNING: bad padding at address 0x" 912 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 913 << " starting at offset " << (Offset - BF.getSize()) 914 << " in function " << BF << '\n' 915 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 916 << '\n'; 917 } 918 919 return false; 920 } 921 922 void BinaryContext::adjustCodePadding() { 923 for (auto &BFI : BinaryFunctions) { 924 BinaryFunction &BF = BFI.second; 925 if (!shouldEmit(BF)) 926 continue; 927 928 if (!hasValidCodePadding(BF)) { 929 if (HasRelocations) { 930 if (opts::Verbosity >= 1) { 931 outs() << "BOLT-INFO: function " << BF 932 << " has invalid padding. Ignoring the function.\n"; 933 } 934 BF.setIgnored(); 935 } else { 936 BF.setMaxSize(BF.getSize()); 937 } 938 } 939 } 940 } 941 942 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 943 uint64_t Size, 944 uint16_t Alignment, 945 unsigned Flags) { 946 // Register the name with MCContext. 947 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 948 949 auto GAI = BinaryDataMap.find(Address); 950 BinaryData *BD; 951 if (GAI == BinaryDataMap.end()) { 952 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 953 BinarySection &Section = 954 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 955 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 956 Section, Flags); 957 GAI = BinaryDataMap.emplace(Address, BD).first; 958 GlobalSymbols[Name] = BD; 959 updateObjectNesting(GAI); 960 } else { 961 BD = GAI->second; 962 if (!BD->hasName(Name)) { 963 GlobalSymbols[Name] = BD; 964 BD->Symbols.push_back(Symbol); 965 } 966 } 967 968 return Symbol; 969 } 970 971 const BinaryData * 972 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 973 auto NI = BinaryDataMap.lower_bound(Address); 974 auto End = BinaryDataMap.end(); 975 if ((NI != End && Address == NI->first) || 976 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 977 if (NI->second->containsAddress(Address)) 978 return NI->second; 979 980 // If this is a sub-symbol, see if a parent data contains the address. 981 const BinaryData *BD = NI->second->getParent(); 982 while (BD) { 983 if (BD->containsAddress(Address)) 984 return BD; 985 BD = BD->getParent(); 986 } 987 } 988 return nullptr; 989 } 990 991 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 992 auto NI = BinaryDataMap.find(Address); 993 assert(NI != BinaryDataMap.end()); 994 if (NI == BinaryDataMap.end()) 995 return false; 996 // TODO: it's possible that a jump table starts at the same address 997 // as a larger blob of private data. When we set the size of the 998 // jump table, it might be smaller than the total blob size. In this 999 // case we just leave the original size since (currently) it won't really 1000 // affect anything. 1001 assert((!NI->second->Size || NI->second->Size == Size || 1002 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1003 "can't change the size of a symbol that has already had its " 1004 "size set"); 1005 if (!NI->second->Size) { 1006 NI->second->Size = Size; 1007 updateObjectNesting(NI); 1008 return true; 1009 } 1010 return false; 1011 } 1012 1013 void BinaryContext::generateSymbolHashes() { 1014 auto isPadding = [](const BinaryData &BD) { 1015 StringRef Contents = BD.getSection().getContents(); 1016 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1017 return (BD.getName().startswith("HOLEat") || 1018 SymData.find_first_not_of(0) == StringRef::npos); 1019 }; 1020 1021 uint64_t NumCollisions = 0; 1022 for (auto &Entry : BinaryDataMap) { 1023 BinaryData &BD = *Entry.second; 1024 StringRef Name = BD.getName(); 1025 1026 if (!isInternalSymbolName(Name)) 1027 continue; 1028 1029 // First check if a non-anonymous alias exists and move it to the front. 1030 if (BD.getSymbols().size() > 1) { 1031 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1032 [&](const MCSymbol *Symbol) { 1033 return !isInternalSymbolName(Symbol->getName()); 1034 }); 1035 if (Itr != BD.getSymbols().end()) { 1036 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1037 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1038 continue; 1039 } 1040 } 1041 1042 // We have to skip 0 size symbols since they will all collide. 1043 if (BD.getSize() == 0) { 1044 continue; 1045 } 1046 1047 const uint64_t Hash = BD.getSection().hash(BD); 1048 const size_t Idx = Name.find("0x"); 1049 std::string NewName = 1050 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1051 if (getBinaryDataByName(NewName)) { 1052 // Ignore collisions for symbols that appear to be padding 1053 // (i.e. all zeros or a "hole") 1054 if (!isPadding(BD)) { 1055 if (opts::Verbosity) { 1056 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1057 << " with new name (" << NewName << "), skipping.\n"; 1058 } 1059 ++NumCollisions; 1060 } 1061 continue; 1062 } 1063 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1064 GlobalSymbols[NewName] = &BD; 1065 } 1066 if (NumCollisions) { 1067 errs() << "BOLT-WARNING: " << NumCollisions 1068 << " collisions detected while hashing binary objects"; 1069 if (!opts::Verbosity) 1070 errs() << ". Use -v=1 to see the list."; 1071 errs() << '\n'; 1072 } 1073 } 1074 1075 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1076 BinaryFunction &Function) const { 1077 if (!isPotentialFragmentByName(TargetFunction, Function)) 1078 return false; 1079 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1080 if (TargetFunction.isParentFragment(&Function)) 1081 return true; 1082 TargetFunction.addParentFragment(Function); 1083 Function.addFragment(TargetFunction); 1084 if (!HasRelocations) { 1085 TargetFunction.setSimple(false); 1086 Function.setSimple(false); 1087 } 1088 if (opts::Verbosity >= 1) { 1089 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1090 << Function << '\n'; 1091 } 1092 return true; 1093 } 1094 1095 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1096 for (uint64_t Address : Function.InterproceduralReferences) { 1097 if (!Address) 1098 continue; 1099 1100 BinaryFunction *TargetFunction = 1101 getBinaryFunctionContainingAddress(Address); 1102 if (&Function == TargetFunction) 1103 continue; 1104 1105 if (TargetFunction) { 1106 if (TargetFunction->IsFragment && 1107 !registerFragment(*TargetFunction, Function)) { 1108 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1109 "fragments: " 1110 << Function.getPrintName() << " and " 1111 << TargetFunction->getPrintName() << '\n'; 1112 } 1113 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1114 TargetFunction->addEntryPointAtOffset(Offset); 1115 1116 continue; 1117 } 1118 1119 // Check if address falls in function padding space - this could be 1120 // unmarked data in code. In this case adjust the padding space size. 1121 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1122 assert(Section && "cannot get section for referenced address"); 1123 1124 if (!Section->isText()) 1125 continue; 1126 1127 // PLT requires special handling and could be ignored in this context. 1128 StringRef SectionName = Section->getName(); 1129 if (SectionName == ".plt" || SectionName == ".plt.got") 1130 continue; 1131 1132 if (opts::processAllFunctions()) { 1133 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1134 << "object in code at address 0x" << Twine::utohexstr(Address) 1135 << " belonging to section " << SectionName << " in current mode\n"; 1136 exit(1); 1137 } 1138 1139 TargetFunction = getBinaryFunctionContainingAddress(Address, 1140 /*CheckPastEnd=*/false, 1141 /*UseMaxSize=*/true); 1142 // We are not going to overwrite non-simple functions, but for simple 1143 // ones - adjust the padding size. 1144 if (TargetFunction && TargetFunction->isSimple()) { 1145 errs() << "BOLT-WARNING: function " << *TargetFunction 1146 << " has an object detected in a padding region at address 0x" 1147 << Twine::utohexstr(Address) << '\n'; 1148 TargetFunction->setMaxSize(TargetFunction->getSize()); 1149 } 1150 } 1151 1152 clearList(Function.InterproceduralReferences); 1153 } 1154 1155 void BinaryContext::postProcessSymbolTable() { 1156 fixBinaryDataHoles(); 1157 bool Valid = true; 1158 for (auto &Entry : BinaryDataMap) { 1159 BinaryData *BD = Entry.second; 1160 if ((BD->getName().startswith("SYMBOLat") || 1161 BD->getName().startswith("DATAat")) && 1162 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1163 BD->getSection()) { 1164 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1165 Valid = false; 1166 } 1167 } 1168 assert(Valid); 1169 generateSymbolHashes(); 1170 } 1171 1172 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1173 BinaryFunction &ParentBF) { 1174 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1175 "cannot merge functions with multiple entry points"); 1176 1177 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1178 std::defer_lock); 1179 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1180 SymbolToFunctionMapMutex, std::defer_lock); 1181 1182 const StringRef ChildName = ChildBF.getOneName(); 1183 1184 // Move symbols over and update bookkeeping info. 1185 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1186 ParentBF.getSymbols().push_back(Symbol); 1187 WriteSymbolMapLock.lock(); 1188 SymbolToFunctionMap[Symbol] = &ParentBF; 1189 WriteSymbolMapLock.unlock(); 1190 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1191 } 1192 ChildBF.getSymbols().clear(); 1193 1194 // Move other names the child function is known under. 1195 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1196 std::back_inserter(ParentBF.Aliases)); 1197 ChildBF.Aliases.clear(); 1198 1199 if (HasRelocations) { 1200 // Merge execution counts of ChildBF into those of ParentBF. 1201 // Without relocations, we cannot reliably merge profiles as both functions 1202 // continue to exist and either one can be executed. 1203 ChildBF.mergeProfileDataInto(ParentBF); 1204 1205 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1206 std::defer_lock); 1207 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1208 std::defer_lock); 1209 // Remove ChildBF from the global set of functions in relocs mode. 1210 ReadBfsLock.lock(); 1211 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1212 ReadBfsLock.unlock(); 1213 1214 assert(FI != BinaryFunctions.end() && "function not found"); 1215 assert(&ChildBF == &FI->second && "function mismatch"); 1216 1217 WriteBfsLock.lock(); 1218 ChildBF.clearDisasmState(); 1219 FI = BinaryFunctions.erase(FI); 1220 WriteBfsLock.unlock(); 1221 1222 } else { 1223 // In non-relocation mode we keep the function, but rename it. 1224 std::string NewName = "__ICF_" + ChildName.str(); 1225 1226 WriteCtxLock.lock(); 1227 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1228 WriteCtxLock.unlock(); 1229 1230 ChildBF.setFolded(&ParentBF); 1231 } 1232 } 1233 1234 void BinaryContext::fixBinaryDataHoles() { 1235 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1236 1237 for (BinarySection &Section : allocatableSections()) { 1238 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1239 1240 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1241 BinaryData *BD = Itr->second; 1242 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1243 (BD->getName().startswith("SYMBOLat0x") || 1244 BD->getName().startswith("DATAat0x") || 1245 BD->getName().startswith("ANONYMOUS"))); 1246 return !isHole && BD->getSection() == Section && !BD->getParent(); 1247 }; 1248 1249 auto BDStart = BinaryDataMap.begin(); 1250 auto BDEnd = BinaryDataMap.end(); 1251 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1252 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1253 1254 uint64_t EndAddress = Section.getAddress(); 1255 1256 while (Itr != End) { 1257 if (Itr->second->getAddress() > EndAddress) { 1258 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1259 Holes.emplace_back(EndAddress, Gap); 1260 } 1261 EndAddress = Itr->second->getEndAddress(); 1262 ++Itr; 1263 } 1264 1265 if (EndAddress < Section.getEndAddress()) 1266 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1267 1268 // If there is already a symbol at the start of the hole, grow that symbol 1269 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1270 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1271 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1272 if (BD) { 1273 // BD->getSection() can be != Section if there are sections that 1274 // overlap. In this case it is probably safe to just skip the holes 1275 // since the overlapping section will not(?) have any symbols in it. 1276 if (BD->getSection() == Section) 1277 setBinaryDataSize(Hole.first, Hole.second); 1278 } else { 1279 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1280 } 1281 } 1282 } 1283 1284 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1285 assert(validateHoles() && "top level hole detected in object map"); 1286 } 1287 1288 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1289 const BinarySection *CurrentSection = nullptr; 1290 bool FirstSection = true; 1291 1292 for (auto &Entry : BinaryDataMap) { 1293 const BinaryData *BD = Entry.second; 1294 const BinarySection &Section = BD->getSection(); 1295 if (FirstSection || Section != *CurrentSection) { 1296 uint64_t Address, Size; 1297 StringRef Name = Section.getName(); 1298 if (Section) { 1299 Address = Section.getAddress(); 1300 Size = Section.getSize(); 1301 } else { 1302 Address = BD->getAddress(); 1303 Size = BD->getSize(); 1304 } 1305 OS << "BOLT-INFO: Section " << Name << ", " 1306 << "0x" + Twine::utohexstr(Address) << ":" 1307 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1308 CurrentSection = &Section; 1309 FirstSection = false; 1310 } 1311 1312 OS << "BOLT-INFO: "; 1313 const BinaryData *P = BD->getParent(); 1314 while (P) { 1315 OS << " "; 1316 P = P->getParent(); 1317 } 1318 OS << *BD << "\n"; 1319 } 1320 } 1321 1322 Expected<unsigned> 1323 BinaryContext::getDwarfFile(StringRef Directory, StringRef FileName, 1324 unsigned FileNumber, 1325 Optional<MD5::MD5Result> Checksum, 1326 Optional<StringRef> Source, unsigned CUID) { 1327 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1328 return Table.tryGetFile(Directory, FileName, Checksum, Source, 1329 Ctx->getDwarfVersion(), FileNumber); 1330 } 1331 1332 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1333 const uint32_t SrcCUID, 1334 unsigned FileIndex) { 1335 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1336 const DWARFDebugLine::LineTable *LineTable = 1337 DwCtx->getLineTableForUnit(SrcUnit); 1338 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1339 LineTable->Prologue.FileNames; 1340 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1341 // means empty dir. 1342 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1343 "FileIndex out of range for the compilation unit."); 1344 StringRef Dir = ""; 1345 if (FileNames[FileIndex - 1].DirIdx != 0) { 1346 if (Optional<const char *> DirName = dwarf::toString( 1347 LineTable->Prologue 1348 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1349 Dir = *DirName; 1350 } 1351 } 1352 StringRef FileName = ""; 1353 if (Optional<const char *> FName = 1354 dwarf::toString(FileNames[FileIndex - 1].Name)) 1355 FileName = *FName; 1356 assert(FileName != ""); 1357 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID)); 1358 } 1359 1360 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1361 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1362 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1363 SortedFunctions.begin(), 1364 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1365 return &BFI.second; 1366 }); 1367 1368 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1369 [](const BinaryFunction *A, const BinaryFunction *B) { 1370 if (A->hasValidIndex() && B->hasValidIndex()) { 1371 return A->getIndex() < B->getIndex(); 1372 } 1373 return A->hasValidIndex(); 1374 }); 1375 return SortedFunctions; 1376 } 1377 1378 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1379 std::vector<BinaryFunction *> AllFunctions; 1380 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1381 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1382 std::back_inserter(AllFunctions), 1383 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1384 return &BFI.second; 1385 }); 1386 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1387 std::back_inserter(AllFunctions)); 1388 1389 return AllFunctions; 1390 } 1391 1392 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1393 auto Iter = DWOCUs.find(DWOId); 1394 if (Iter == DWOCUs.end()) 1395 return None; 1396 1397 return Iter->second; 1398 } 1399 1400 DWARFContext *BinaryContext::getDWOContext() { 1401 if (DWOCUs.empty()) 1402 return nullptr; 1403 return &DWOCUs.begin()->second->getContext(); 1404 } 1405 1406 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1407 void BinaryContext::preprocessDWODebugInfo() { 1408 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1409 DWARFUnit *const DwarfUnit = CU.get(); 1410 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1411 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1412 if (!DWOCU->isDWOUnit()) { 1413 std::string DWOName = dwarf::toString( 1414 DwarfUnit->getUnitDIE().find( 1415 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1416 ""); 1417 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1418 << DWOName 1419 << " was not retrieved and won't be updated. Please check " 1420 "relative path.\n"; 1421 continue; 1422 } 1423 DWOCUs[*DWOId] = DWOCU; 1424 } 1425 } 1426 } 1427 1428 void BinaryContext::preprocessDebugInfo() { 1429 struct CURange { 1430 uint64_t LowPC; 1431 uint64_t HighPC; 1432 DWARFUnit *Unit; 1433 1434 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1435 }; 1436 1437 // Building a map of address ranges to CUs similar to .debug_aranges and use 1438 // it to assign CU to functions. 1439 std::vector<CURange> AllRanges; 1440 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1441 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1442 Expected<DWARFAddressRangesVector> RangesOrError = 1443 CU->getUnitDIE().getAddressRanges(); 1444 if (!RangesOrError) { 1445 consumeError(RangesOrError.takeError()); 1446 continue; 1447 } 1448 for (DWARFAddressRange &Range : *RangesOrError) { 1449 // Parts of the debug info could be invalidated due to corresponding code 1450 // being removed from the binary by the linker. Hence we check if the 1451 // address is a valid one. 1452 if (containsAddress(Range.LowPC)) 1453 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1454 } 1455 } 1456 1457 std::sort(AllRanges.begin(), AllRanges.end()); 1458 for (auto &KV : BinaryFunctions) { 1459 const uint64_t FunctionAddress = KV.first; 1460 BinaryFunction &Function = KV.second; 1461 1462 auto It = std::partition_point( 1463 AllRanges.begin(), AllRanges.end(), 1464 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1465 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1466 Function.setDWARFUnit(It->Unit); 1467 } 1468 } 1469 1470 // Discover units with debug info that needs to be updated. 1471 for (const auto &KV : BinaryFunctions) { 1472 const BinaryFunction &BF = KV.second; 1473 if (shouldEmit(BF) && BF.getDWARFUnit()) 1474 ProcessedCUs.insert(BF.getDWARFUnit()); 1475 } 1476 1477 // Clear debug info for functions from units that we are not going to process. 1478 for (auto &KV : BinaryFunctions) { 1479 BinaryFunction &BF = KV.second; 1480 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1481 BF.setDWARFUnit(nullptr); 1482 } 1483 1484 if (opts::Verbosity >= 1) { 1485 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1486 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1487 } 1488 1489 // Populate MCContext with DWARF files from all units. 1490 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1491 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1492 const uint64_t CUID = CU->getOffset(); 1493 getDwarfLineTable(CUID).setLabel(Ctx->getOrCreateSymbol( 1494 GlobalPrefix + "line_table_start" + Twine(CUID))); 1495 1496 if (!ProcessedCUs.count(CU.get())) 1497 continue; 1498 1499 const DWARFDebugLine::LineTable *LineTable = 1500 DwCtx->getLineTableForUnit(CU.get()); 1501 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1502 LineTable->Prologue.FileNames; 1503 1504 // Assign a unique label to every line table, one per CU. 1505 // Make sure empty debug line tables are registered too. 1506 if (FileNames.empty()) { 1507 cantFail(getDwarfFile("", "<unknown>", 0, None, None, CUID)); 1508 continue; 1509 } 1510 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1511 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1512 // means empty dir. 1513 StringRef Dir = ""; 1514 if (FileNames[I].DirIdx != 0) 1515 if (Optional<const char *> DirName = dwarf::toString( 1516 LineTable->Prologue 1517 .IncludeDirectories[FileNames[I].DirIdx - 1])) 1518 Dir = *DirName; 1519 StringRef FileName = ""; 1520 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1521 FileName = *FName; 1522 assert(FileName != ""); 1523 cantFail(getDwarfFile(Dir, FileName, 0, None, None, CUID)); 1524 } 1525 } 1526 1527 preprocessDWODebugInfo(); 1528 } 1529 1530 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1531 if (opts::processAllFunctions()) 1532 return true; 1533 1534 if (Function.isIgnored()) 1535 return false; 1536 1537 // In relocation mode we will emit non-simple functions with CFG. 1538 // If the function does not have a CFG it should be marked as ignored. 1539 return HasRelocations || Function.isSimple(); 1540 } 1541 1542 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1543 uint32_t Operation = Inst.getOperation(); 1544 switch (Operation) { 1545 case MCCFIInstruction::OpSameValue: 1546 OS << "OpSameValue Reg" << Inst.getRegister(); 1547 break; 1548 case MCCFIInstruction::OpRememberState: 1549 OS << "OpRememberState"; 1550 break; 1551 case MCCFIInstruction::OpRestoreState: 1552 OS << "OpRestoreState"; 1553 break; 1554 case MCCFIInstruction::OpOffset: 1555 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1556 break; 1557 case MCCFIInstruction::OpDefCfaRegister: 1558 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1559 break; 1560 case MCCFIInstruction::OpDefCfaOffset: 1561 OS << "OpDefCfaOffset " << Inst.getOffset(); 1562 break; 1563 case MCCFIInstruction::OpDefCfa: 1564 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1565 break; 1566 case MCCFIInstruction::OpRelOffset: 1567 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1568 break; 1569 case MCCFIInstruction::OpAdjustCfaOffset: 1570 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1571 break; 1572 case MCCFIInstruction::OpEscape: 1573 OS << "OpEscape"; 1574 break; 1575 case MCCFIInstruction::OpRestore: 1576 OS << "OpRestore Reg" << Inst.getRegister(); 1577 break; 1578 case MCCFIInstruction::OpUndefined: 1579 OS << "OpUndefined Reg" << Inst.getRegister(); 1580 break; 1581 case MCCFIInstruction::OpRegister: 1582 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1583 << Inst.getRegister2(); 1584 break; 1585 case MCCFIInstruction::OpWindowSave: 1586 OS << "OpWindowSave"; 1587 break; 1588 case MCCFIInstruction::OpGnuArgsSize: 1589 OS << "OpGnuArgsSize"; 1590 break; 1591 default: 1592 OS << "Op#" << Operation; 1593 break; 1594 } 1595 } 1596 1597 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1598 uint64_t Offset, 1599 const BinaryFunction *Function, 1600 bool PrintMCInst, bool PrintMemData, 1601 bool PrintRelocations) const { 1602 if (MIB->isEHLabel(Instruction)) { 1603 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1604 return; 1605 } 1606 OS << format(" %08" PRIx64 ": ", Offset); 1607 if (MIB->isCFI(Instruction)) { 1608 uint32_t Offset = Instruction.getOperand(0).getImm(); 1609 OS << "\t!CFI\t$" << Offset << "\t; "; 1610 if (Function) 1611 printCFI(OS, *Function->getCFIFor(Instruction)); 1612 OS << "\n"; 1613 return; 1614 } 1615 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1616 if (MIB->isCall(Instruction)) { 1617 if (MIB->isTailCall(Instruction)) 1618 OS << " # TAILCALL "; 1619 if (MIB->isInvoke(Instruction)) { 1620 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1621 OS << " # handler: "; 1622 if (EHInfo->first) 1623 OS << *EHInfo->first; 1624 else 1625 OS << '0'; 1626 OS << "; action: " << EHInfo->second; 1627 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1628 if (GnuArgsSize >= 0) 1629 OS << "; GNU_args_size = " << GnuArgsSize; 1630 } 1631 } else if (MIB->isIndirectBranch(Instruction)) { 1632 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1633 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1634 } else { 1635 OS << " # UNKNOWN CONTROL FLOW"; 1636 } 1637 } 1638 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1639 OS << " # Offset: " << *Offset; 1640 1641 MIB->printAnnotations(Instruction, OS); 1642 1643 if (opts::PrintDebugInfo) { 1644 DebugLineTableRowRef RowRef = 1645 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1646 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1647 const DWARFDebugLine::LineTable *LineTable; 1648 if (Function && Function->getDWARFUnit() && 1649 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1650 LineTable = Function->getDWARFLineTable(); 1651 } else { 1652 LineTable = DwCtx->getLineTableForUnit( 1653 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1654 } 1655 assert(LineTable && 1656 "line table expected for instruction with debug info"); 1657 1658 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1659 StringRef FileName = ""; 1660 if (Optional<const char *> FName = 1661 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1662 FileName = *FName; 1663 OS << " # debug line " << FileName << ":" << Row.Line; 1664 if (Row.Column) 1665 OS << ":" << Row.Column; 1666 if (Row.Discriminator) 1667 OS << " discriminator:" << Row.Discriminator; 1668 } 1669 } 1670 1671 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1672 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1673 Function->printRelocations(OS, Offset, Size); 1674 } 1675 1676 OS << "\n"; 1677 1678 if (PrintMCInst) { 1679 Instruction.dump_pretty(OS, InstPrinter.get()); 1680 OS << "\n"; 1681 } 1682 } 1683 1684 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1685 auto SI = AddressToSection.upper_bound(Address); 1686 if (SI != AddressToSection.begin()) { 1687 --SI; 1688 uint64_t UpperBound = SI->first + SI->second->getSize(); 1689 if (!SI->second->getSize()) 1690 UpperBound += 1; 1691 if (UpperBound > Address) 1692 return *SI->second; 1693 } 1694 return std::make_error_code(std::errc::bad_address); 1695 } 1696 1697 ErrorOr<StringRef> 1698 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1699 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1700 return Section->getName(); 1701 return std::make_error_code(std::errc::bad_address); 1702 } 1703 1704 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1705 auto Res = Sections.insert(Section); 1706 (void)Res; 1707 assert(Res.second && "can't register the same section twice."); 1708 1709 // Only register allocatable sections in the AddressToSection map. 1710 if (Section->isAllocatable() && Section->getAddress()) 1711 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1712 NameToSection.insert( 1713 std::make_pair(std::string(Section->getName()), Section)); 1714 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1715 return *Section; 1716 } 1717 1718 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1719 return registerSection(new BinarySection(*this, Section)); 1720 } 1721 1722 BinarySection & 1723 BinaryContext::registerSection(StringRef SectionName, 1724 const BinarySection &OriginalSection) { 1725 return registerSection( 1726 new BinarySection(*this, SectionName, OriginalSection)); 1727 } 1728 1729 BinarySection & 1730 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1731 unsigned ELFFlags, uint8_t *Data, 1732 uint64_t Size, unsigned Alignment) { 1733 auto NamedSections = getSectionByName(Name); 1734 if (NamedSections.begin() != NamedSections.end()) { 1735 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1736 "can only update unique sections"); 1737 BinarySection *Section = NamedSections.begin()->second; 1738 1739 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1740 const bool Flag = Section->isAllocatable(); 1741 (void)Flag; 1742 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1743 LLVM_DEBUG(dbgs() << *Section << "\n"); 1744 // FIXME: Fix section flags/attributes for MachO. 1745 if (isELF()) 1746 assert(Flag == Section->isAllocatable() && 1747 "can't change section allocation status"); 1748 return *Section; 1749 } 1750 1751 return registerSection( 1752 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1753 } 1754 1755 bool BinaryContext::deregisterSection(BinarySection &Section) { 1756 BinarySection *SectionPtr = &Section; 1757 auto Itr = Sections.find(SectionPtr); 1758 if (Itr != Sections.end()) { 1759 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1760 while (Range.first != Range.second) { 1761 if (Range.first->second == SectionPtr) { 1762 AddressToSection.erase(Range.first); 1763 break; 1764 } 1765 ++Range.first; 1766 } 1767 1768 auto NameRange = 1769 NameToSection.equal_range(std::string(SectionPtr->getName())); 1770 while (NameRange.first != NameRange.second) { 1771 if (NameRange.first->second == SectionPtr) { 1772 NameToSection.erase(NameRange.first); 1773 break; 1774 } 1775 ++NameRange.first; 1776 } 1777 1778 Sections.erase(Itr); 1779 delete SectionPtr; 1780 return true; 1781 } 1782 return false; 1783 } 1784 1785 void BinaryContext::printSections(raw_ostream &OS) const { 1786 for (BinarySection *const &Section : Sections) 1787 OS << "BOLT-INFO: " << *Section << "\n"; 1788 } 1789 1790 BinarySection &BinaryContext::absoluteSection() { 1791 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1792 return *Section; 1793 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1794 } 1795 1796 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1797 size_t Size) const { 1798 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1799 if (!Section) 1800 return std::make_error_code(std::errc::bad_address); 1801 1802 if (Section->isVirtual()) 1803 return 0; 1804 1805 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1806 AsmInfo->getCodePointerSize()); 1807 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1808 return DE.getUnsigned(&ValueOffset, Size); 1809 } 1810 1811 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1812 size_t Size) const { 1813 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1814 if (!Section) 1815 return std::make_error_code(std::errc::bad_address); 1816 1817 if (Section->isVirtual()) 1818 return 0; 1819 1820 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1821 AsmInfo->getCodePointerSize()); 1822 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1823 return DE.getSigned(&ValueOffset, Size); 1824 } 1825 1826 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1827 uint64_t Type, uint64_t Addend, 1828 uint64_t Value) { 1829 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1830 assert(Section && "cannot find section for address"); 1831 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1832 Value); 1833 } 1834 1835 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1836 uint64_t Type, uint64_t Addend, 1837 uint64_t Value) { 1838 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1839 assert(Section && "cannot find section for address"); 1840 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1841 Addend, Value); 1842 } 1843 1844 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1845 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1846 assert(Section && "cannot find section for address"); 1847 return Section->removeRelocationAt(Address - Section->getAddress()); 1848 } 1849 1850 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1851 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1852 if (!Section) 1853 return nullptr; 1854 1855 return Section->getRelocationAt(Address - Section->getAddress()); 1856 } 1857 1858 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1859 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1860 if (!Section) 1861 return nullptr; 1862 1863 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1864 } 1865 1866 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1867 const uint64_t Address) { 1868 auto setImmovable = [&](BinaryData &BD) { 1869 BinaryData *Root = BD.getAtomicRoot(); 1870 LLVM_DEBUG(if (Root->isMoveable()) { 1871 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1872 << "due to ambiguous relocation referencing 0x" 1873 << Twine::utohexstr(Address) << '\n'; 1874 }); 1875 Root->setIsMoveable(false); 1876 }; 1877 1878 if (Address == BD.getAddress()) { 1879 setImmovable(BD); 1880 1881 // Set previous symbol as immovable 1882 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1883 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1884 setImmovable(*Prev); 1885 } 1886 1887 if (Address == BD.getEndAddress()) { 1888 setImmovable(BD); 1889 1890 // Set next symbol as immovable 1891 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1892 if (Next && Next->getAddress() == BD.getEndAddress()) 1893 setImmovable(*Next); 1894 } 1895 } 1896 1897 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1898 uint64_t *EntryDesc) { 1899 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1900 auto BFI = SymbolToFunctionMap.find(Symbol); 1901 if (BFI == SymbolToFunctionMap.end()) 1902 return nullptr; 1903 1904 BinaryFunction *BF = BFI->second; 1905 if (EntryDesc) 1906 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1907 1908 return BF; 1909 } 1910 1911 void BinaryContext::exitWithBugReport(StringRef Message, 1912 const BinaryFunction &Function) const { 1913 errs() << "=======================================\n"; 1914 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1915 "this function.\n"; 1916 errs() << "If you are running the most recent version of BOLT, you may " 1917 "want to " 1918 "report this and paste this dump.\nPlease check that there is no " 1919 "sensitive contents being shared in this dump.\n"; 1920 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1921 ScopedPrinter SP(errs()); 1922 SP.printBinaryBlock("Function contents", *Function.getData()); 1923 errs() << "\n"; 1924 Function.dump(); 1925 errs() << "ERROR: " << Message; 1926 errs() << "\n=======================================\n"; 1927 exit(1); 1928 } 1929 1930 BinaryFunction * 1931 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1932 bool IsSimple) { 1933 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1934 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1935 setSymbolToFunctionMap(BF->getSymbol(), BF); 1936 BF->CurrentState = BinaryFunction::State::CFG; 1937 return BF; 1938 } 1939 1940 std::pair<size_t, size_t> 1941 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 1942 // Adjust branch instruction to match the current layout. 1943 if (FixBranches) 1944 BF.fixBranches(); 1945 1946 // Create local MC context to isolate the effect of ephemeral code emission. 1947 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 1948 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 1949 MCAsmBackend *MAB = 1950 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1951 1952 SmallString<256> Code; 1953 raw_svector_ostream VecOS(Code); 1954 1955 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 1956 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 1957 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 1958 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 1959 /*RelaxAll=*/false, 1960 /*IncrementalLinkerCompatible=*/false, 1961 /*DWARFMustBeAtTheEnd=*/false)); 1962 1963 Streamer->initSections(false, *STI); 1964 1965 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 1966 Section->setHasInstructions(true); 1967 1968 // Create symbols in the LocalCtx so that they get destroyed with it. 1969 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 1970 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 1971 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 1972 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 1973 1974 Streamer->SwitchSection(Section); 1975 Streamer->emitLabel(StartLabel); 1976 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 1977 /*EmitCodeOnly=*/true); 1978 Streamer->emitLabel(EndLabel); 1979 1980 if (BF.isSplit()) { 1981 MCSectionELF *ColdSection = 1982 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 1983 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 1984 ColdSection->setHasInstructions(true); 1985 1986 Streamer->SwitchSection(ColdSection); 1987 Streamer->emitLabel(ColdStartLabel); 1988 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 1989 /*EmitCodeOnly=*/true); 1990 Streamer->emitLabel(ColdEndLabel); 1991 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 1992 Streamer->emitBytes(StringRef("")); 1993 Streamer->SwitchSection(Section); 1994 } 1995 1996 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 1997 // MCStreamer::Finish(), which does more than we want 1998 Streamer->emitBytes(StringRef("")); 1999 2000 MCAssembler &Assembler = 2001 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2002 MCAsmLayout Layout(Assembler); 2003 Assembler.layout(Layout); 2004 2005 const uint64_t HotSize = 2006 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2007 const uint64_t ColdSize = BF.isSplit() 2008 ? Layout.getSymbolOffset(*ColdEndLabel) - 2009 Layout.getSymbolOffset(*ColdStartLabel) 2010 : 0ULL; 2011 2012 // Clean-up the effect of the code emission. 2013 for (const MCSymbol &Symbol : Assembler.symbols()) { 2014 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2015 MutableSymbol->setUndefined(); 2016 MutableSymbol->setIsRegistered(false); 2017 } 2018 2019 return std::make_pair(HotSize, ColdSize); 2020 } 2021 2022 bool BinaryContext::validateEncoding(const MCInst &Inst, 2023 ArrayRef<uint8_t> InputEncoding) const { 2024 SmallString<256> Code; 2025 SmallVector<MCFixup, 4> Fixups; 2026 raw_svector_ostream VecOS(Code); 2027 2028 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2029 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2030 if (InputEncoding != EncodedData) { 2031 if (opts::Verbosity > 1) { 2032 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2033 << " input: " << InputEncoding << '\n' 2034 << " output: " << EncodedData << '\n'; 2035 } 2036 return false; 2037 } 2038 2039 return true; 2040 } 2041 2042 uint64_t BinaryContext::getHotThreshold() const { 2043 static uint64_t Threshold = 0; 2044 if (Threshold == 0) { 2045 Threshold = std::max( 2046 (uint64_t)opts::ExecutionCountThreshold, 2047 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2048 } 2049 return Threshold; 2050 } 2051 2052 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2053 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2054 auto FI = BinaryFunctions.upper_bound(Address); 2055 if (FI == BinaryFunctions.begin()) 2056 return nullptr; 2057 --FI; 2058 2059 const uint64_t UsedSize = 2060 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2061 2062 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2063 return nullptr; 2064 2065 return &FI->second; 2066 } 2067 2068 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2069 // First, try to find a function starting at the given address. If the 2070 // function was folded, this will get us the original folded function if it 2071 // wasn't removed from the list, e.g. in non-relocation mode. 2072 auto BFI = BinaryFunctions.find(Address); 2073 if (BFI != BinaryFunctions.end()) 2074 return &BFI->second; 2075 2076 // We might have folded the function matching the object at the given 2077 // address. In such case, we look for a function matching the symbol 2078 // registered at the original address. The new function (the one that the 2079 // original was folded into) will hold the symbol. 2080 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2081 uint64_t EntryID = 0; 2082 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2083 if (BF && EntryID == 0) 2084 return BF; 2085 } 2086 return nullptr; 2087 } 2088 2089 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2090 const DWARFAddressRangesVector &InputRanges) const { 2091 DebugAddressRangesVector OutputRanges; 2092 2093 for (const DWARFAddressRange Range : InputRanges) { 2094 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2095 while (BFI != BinaryFunctions.end()) { 2096 const BinaryFunction &Function = BFI->second; 2097 if (Function.getAddress() >= Range.HighPC) 2098 break; 2099 const DebugAddressRangesVector FunctionRanges = 2100 Function.getOutputAddressRanges(); 2101 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2102 std::back_inserter(OutputRanges)); 2103 std::advance(BFI, 1); 2104 } 2105 } 2106 2107 return OutputRanges; 2108 } 2109 2110 } // namespace bolt 2111 } // namespace llvm 2112