1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <unordered_set> 42 43 using namespace llvm; 44 45 #undef DEBUG_TYPE 46 #define DEBUG_TYPE "bolt" 47 48 namespace opts { 49 50 cl::opt<bool> 51 NoHugePages("no-huge-pages", 52 cl::desc("use regular size pages for code alignment"), 53 cl::ZeroOrMore, 54 cl::Hidden, 55 cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 PrintDebugInfo("print-debug-info", 59 cl::desc("print debug info when printing functions"), 60 cl::Hidden, 61 cl::ZeroOrMore, 62 cl::cat(BoltCategory)); 63 64 cl::opt<bool> 65 PrintRelocations("print-relocations", 66 cl::desc("print relocations when printing functions/objects"), 67 cl::Hidden, 68 cl::ZeroOrMore, 69 cl::cat(BoltCategory)); 70 71 static cl::opt<bool> 72 PrintMemData("print-mem-data", 73 cl::desc("print memory data annotations when printing functions"), 74 cl::Hidden, 75 cl::ZeroOrMore, 76 cl::cat(BoltCategory)); 77 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 84 std::unique_ptr<DWARFContext> DwCtx, 85 std::unique_ptr<Triple> TheTriple, 86 const Target *TheTarget, std::string TripleName, 87 std::unique_ptr<MCCodeEmitter> MCE, 88 std::unique_ptr<MCObjectFileInfo> MOFI, 89 std::unique_ptr<const MCAsmInfo> AsmInfo, 90 std::unique_ptr<const MCInstrInfo> MII, 91 std::unique_ptr<const MCSubtargetInfo> STI, 92 std::unique_ptr<MCInstPrinter> InstPrinter, 93 std::unique_ptr<const MCInstrAnalysis> MIA, 94 std::unique_ptr<MCPlusBuilder> MIB, 95 std::unique_ptr<const MCRegisterInfo> MRI, 96 std::unique_ptr<MCDisassembler> DisAsm) 97 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 98 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 99 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 100 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 101 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 102 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 103 Relocation::Arch = this->TheTriple->getArch(); 104 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 105 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 106 } 107 108 BinaryContext::~BinaryContext() { 109 for (BinarySection *Section : Sections) 110 delete Section; 111 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 112 delete InjectedFunction; 113 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 114 delete JTI.second; 115 clearBinaryData(); 116 } 117 118 /// Create BinaryContext for a given architecture \p ArchName and 119 /// triple \p TripleName. 120 Expected<std::unique_ptr<BinaryContext>> 121 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 122 std::unique_ptr<DWARFContext> DwCtx) { 123 StringRef ArchName = ""; 124 StringRef FeaturesStr = ""; 125 switch (File->getArch()) { 126 case llvm::Triple::x86_64: 127 ArchName = "x86-64"; 128 FeaturesStr = "+nopl"; 129 break; 130 case llvm::Triple::aarch64: 131 ArchName = "aarch64"; 132 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 133 "+fullfp16,+spe,+fuse-aes,+rcpc"; 134 break; 135 default: 136 return createStringError(std::errc::not_supported, 137 "BOLT-ERROR: Unrecognized machine in ELF file"); 138 } 139 140 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 141 const std::string TripleName = TheTriple->str(); 142 143 std::string Error; 144 const Target *TheTarget = 145 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 146 if (!TheTarget) 147 return createStringError(make_error_code(std::errc::not_supported), 148 Twine("BOLT-ERROR: ", Error)); 149 150 std::unique_ptr<const MCRegisterInfo> MRI( 151 TheTarget->createMCRegInfo(TripleName)); 152 if (!MRI) 153 return createStringError( 154 make_error_code(std::errc::not_supported), 155 Twine("BOLT-ERROR: no register info for target ", TripleName)); 156 157 // Set up disassembler. 158 std::unique_ptr<MCAsmInfo> AsmInfo( 159 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 160 if (!AsmInfo) 161 return createStringError( 162 make_error_code(std::errc::not_supported), 163 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 164 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 165 // we want to emit such names as using @PLT without double quotes to convey 166 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 167 // override the default AsmInfo behavior to emit names the way we want. 168 AsmInfo->setAllowAtInName(true); 169 170 std::unique_ptr<const MCSubtargetInfo> STI( 171 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 172 if (!STI) 173 return createStringError( 174 make_error_code(std::errc::not_supported), 175 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 176 177 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 178 if (!MII) 179 return createStringError( 180 make_error_code(std::errc::not_supported), 181 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 182 183 std::unique_ptr<MCContext> Ctx( 184 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 185 std::unique_ptr<MCObjectFileInfo> MOFI( 186 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 187 Ctx->setObjectFileInfo(MOFI.get()); 188 // We do not support X86 Large code model. Change this in the future. 189 bool Large = false; 190 if (TheTriple->getArch() == llvm::Triple::aarch64) 191 Large = true; 192 unsigned LSDAEncoding = 193 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 194 unsigned TTypeEncoding = 195 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 196 if (IsPIC) { 197 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 198 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 199 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 200 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 201 } 202 203 std::unique_ptr<MCDisassembler> DisAsm( 204 TheTarget->createMCDisassembler(*STI, *Ctx)); 205 206 if (!DisAsm) 207 return createStringError( 208 make_error_code(std::errc::not_supported), 209 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 210 211 std::unique_ptr<const MCInstrAnalysis> MIA( 212 TheTarget->createMCInstrAnalysis(MII.get())); 213 if (!MIA) 214 return createStringError( 215 make_error_code(std::errc::not_supported), 216 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 217 TripleName)); 218 219 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 220 std::unique_ptr<MCInstPrinter> InstructionPrinter( 221 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 222 *MII, *MRI)); 223 if (!InstructionPrinter) 224 return createStringError( 225 make_error_code(std::errc::not_supported), 226 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 227 InstructionPrinter->setPrintImmHex(true); 228 229 std::unique_ptr<MCCodeEmitter> MCE( 230 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 231 232 // Make sure we don't miss any output on core dumps. 233 outs().SetUnbuffered(); 234 errs().SetUnbuffered(); 235 dbgs().SetUnbuffered(); 236 237 auto BC = std::make_unique<BinaryContext>( 238 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 239 std::string(TripleName), std::move(MCE), std::move(MOFI), 240 std::move(AsmInfo), std::move(MII), std::move(STI), 241 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 242 std::move(DisAsm)); 243 244 BC->TTypeEncoding = TTypeEncoding; 245 BC->LSDAEncoding = LSDAEncoding; 246 247 BC->MAB = std::unique_ptr<MCAsmBackend>( 248 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 249 250 BC->setFilename(File->getFileName()); 251 252 BC->HasFixedLoadAddress = !IsPIC; 253 254 return std::move(BC); 255 } 256 257 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 258 if (opts::HotText && 259 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 260 return true; 261 262 if (opts::HotData && 263 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 264 return true; 265 266 if (SymbolName == "_end") 267 return true; 268 269 return false; 270 } 271 272 std::unique_ptr<MCObjectWriter> 273 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 274 return MAB->createObjectWriter(OS); 275 } 276 277 bool BinaryContext::validateObjectNesting() const { 278 auto Itr = BinaryDataMap.begin(); 279 auto End = BinaryDataMap.end(); 280 bool Valid = true; 281 while (Itr != End) { 282 auto Next = std::next(Itr); 283 while (Next != End && 284 Itr->second->getSection() == Next->second->getSection() && 285 Itr->second->containsRange(Next->second->getAddress(), 286 Next->second->getSize())) { 287 if (Next->second->Parent != Itr->second) { 288 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 289 << "BOLT-WARNING: " << *Itr->second << "\n" 290 << "BOLT-WARNING: " << *Next->second << "\n"; 291 Valid = false; 292 } 293 ++Next; 294 } 295 Itr = Next; 296 } 297 return Valid; 298 } 299 300 bool BinaryContext::validateHoles() const { 301 bool Valid = true; 302 for (BinarySection &Section : sections()) { 303 for (const Relocation &Rel : Section.relocations()) { 304 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 305 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 306 if (!BD) { 307 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 308 << " 0x" << Twine::utohexstr(RelAddr) << " in " 309 << Section.getName() << "\n"; 310 Valid = false; 311 } else if (!BD->getAtomicRoot()) { 312 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 313 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 314 << Section.getName() << "\n"; 315 Valid = false; 316 } 317 } 318 } 319 return Valid; 320 } 321 322 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 323 const uint64_t Address = GAI->second->getAddress(); 324 const uint64_t Size = GAI->second->getSize(); 325 326 auto fixParents = [&](BinaryDataMapType::iterator Itr, 327 BinaryData *NewParent) { 328 BinaryData *OldParent = Itr->second->Parent; 329 Itr->second->Parent = NewParent; 330 ++Itr; 331 while (Itr != BinaryDataMap.end() && OldParent && 332 Itr->second->Parent == OldParent) { 333 Itr->second->Parent = NewParent; 334 ++Itr; 335 } 336 }; 337 338 // Check if the previous symbol contains the newly added symbol. 339 if (GAI != BinaryDataMap.begin()) { 340 BinaryData *Prev = std::prev(GAI)->second; 341 while (Prev) { 342 if (Prev->getSection() == GAI->second->getSection() && 343 Prev->containsRange(Address, Size)) { 344 fixParents(GAI, Prev); 345 } else { 346 fixParents(GAI, nullptr); 347 } 348 Prev = Prev->Parent; 349 } 350 } 351 352 // Check if the newly added symbol contains any subsequent symbols. 353 if (Size != 0) { 354 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 355 auto Itr = std::next(GAI); 356 while ( 357 Itr != BinaryDataMap.end() && 358 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 359 Itr->second->Parent = BD; 360 ++Itr; 361 } 362 } 363 } 364 365 iterator_range<BinaryContext::binary_data_iterator> 366 BinaryContext::getSubBinaryData(BinaryData *BD) { 367 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 368 auto End = Start; 369 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 370 ++End; 371 return make_range(Start, End); 372 } 373 374 std::pair<const MCSymbol *, uint64_t> 375 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 376 bool IsPCRel) { 377 uint64_t Addend = 0; 378 379 if (isAArch64()) { 380 // Check if this is an access to a constant island and create bookkeeping 381 // to keep track of it and emit it later as part of this function. 382 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 383 return std::make_pair(IslandSym, Addend); 384 385 // Detect custom code written in assembly that refers to arbitrary 386 // constant islands from other functions. Write this reference so we 387 // can pull this constant island and emit it as part of this function 388 // too. 389 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 390 if (IslandIter != AddressToConstantIslandMap.end()) { 391 if (MCSymbol *IslandSym = 392 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 393 BF.createIslandDependency(IslandSym, IslandIter->second); 394 return std::make_pair(IslandSym, Addend); 395 } 396 } 397 } 398 399 // Note that the address does not necessarily have to reside inside 400 // a section, it could be an absolute address too. 401 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 402 if (Section && Section->isText()) { 403 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 404 if (Address != BF.getAddress()) { 405 // The address could potentially escape. Mark it as another entry 406 // point into the function. 407 if (opts::Verbosity >= 1) { 408 outs() << "BOLT-INFO: potentially escaped address 0x" 409 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 410 } 411 BF.HasInternalLabelReference = true; 412 return std::make_pair( 413 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 414 } 415 } else { 416 BF.InterproceduralReferences.insert(Address); 417 } 418 } 419 420 // With relocations, catch jump table references outside of the basic block 421 // containing the indirect jump. 422 if (HasRelocations) { 423 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 424 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 425 const MCSymbol *Symbol = 426 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 427 428 return std::make_pair(Symbol, Addend); 429 } 430 } 431 432 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 433 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 434 435 // TODO: use DWARF info to get size/alignment here? 436 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 437 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 438 return std::make_pair(TargetSymbol, Addend); 439 } 440 441 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 442 BinaryFunction &BF) { 443 if (!isX86()) 444 return MemoryContentsType::UNKNOWN; 445 446 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 447 if (!Section) { 448 // No section - possibly an absolute address. Since we don't allow 449 // internal function addresses to escape the function scope - we 450 // consider it a tail call. 451 if (opts::Verbosity > 1) { 452 errs() << "BOLT-WARNING: no section for address 0x" 453 << Twine::utohexstr(Address) << " referenced from function " << BF 454 << '\n'; 455 } 456 return MemoryContentsType::UNKNOWN; 457 } 458 459 if (Section->isVirtual()) { 460 // The contents are filled at runtime. 461 return MemoryContentsType::UNKNOWN; 462 } 463 464 // No support for jump tables in code yet. 465 if (Section->isText()) 466 return MemoryContentsType::UNKNOWN; 467 468 // Start with checking for PIC jump table. We expect non-PIC jump tables 469 // to have high 32 bits set to 0. 470 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 471 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 472 473 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 474 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 475 476 return MemoryContentsType::UNKNOWN; 477 } 478 479 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 480 bool isPotentialFragmentByName(BinaryFunction &Fragment, 481 BinaryFunction &Parent) { 482 for (StringRef Name : Parent.getNames()) { 483 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 484 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 485 if (Fragment.hasRestoredNameRegex(NameRegex)) 486 return true; 487 } 488 return false; 489 } 490 491 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 492 const JumpTable::JumpTableType Type, 493 BinaryFunction &BF, 494 const uint64_t NextJTAddress, 495 JumpTable::OffsetsType *Offsets) { 496 // Is one of the targets __builtin_unreachable? 497 bool HasUnreachable = false; 498 499 // Number of targets other than __builtin_unreachable. 500 uint64_t NumRealEntries = 0; 501 502 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 503 auto addOffset = [&](uint64_t Offset) { 504 if (Offsets) 505 Offsets->emplace_back(Offset); 506 }; 507 508 auto doesBelongToFunction = [&](const uint64_t Addr, 509 BinaryFunction *TargetBF) -> bool { 510 if (BF.containsAddress(Addr)) 511 return true; 512 // Nothing to do if we failed to identify the containing function. 513 if (!TargetBF) 514 return false; 515 // Case 1: check if BF is a fragment and TargetBF is its parent. 516 if (BF.isFragment()) { 517 // Parent function may or may not be already registered. 518 // Set parent link based on function name matching heuristic. 519 return registerFragment(BF, *TargetBF); 520 } 521 // Case 2: check if TargetBF is a fragment and BF is its parent. 522 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 523 }; 524 525 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 526 if (!Section) 527 return false; 528 529 // The upper bound is defined by containing object, section limits, and 530 // the next jump table in memory. 531 uint64_t UpperBound = Section->getEndAddress(); 532 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 533 if (JumpTableBD && JumpTableBD->getSize()) { 534 assert(JumpTableBD->getEndAddress() <= UpperBound && 535 "data object cannot cross a section boundary"); 536 UpperBound = JumpTableBD->getEndAddress(); 537 } 538 if (NextJTAddress) 539 UpperBound = std::min(NextJTAddress, UpperBound); 540 541 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 542 << '\n'); 543 const uint64_t EntrySize = getJumpTableEntrySize(Type); 544 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 545 EntryAddress += EntrySize) { 546 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 547 << " -> "); 548 // Check if there's a proper relocation against the jump table entry. 549 if (HasRelocations) { 550 if (Type == JumpTable::JTT_PIC && 551 !DataPCRelocations.count(EntryAddress)) { 552 LLVM_DEBUG( 553 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 554 break; 555 } 556 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 557 LLVM_DEBUG( 558 dbgs() 559 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 560 break; 561 } 562 } 563 564 const uint64_t Value = 565 (Type == JumpTable::JTT_PIC) 566 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 567 : *getPointerAtAddress(EntryAddress); 568 569 // __builtin_unreachable() case. 570 if (Value == BF.getAddress() + BF.getSize()) { 571 addOffset(Value - BF.getAddress()); 572 HasUnreachable = true; 573 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 574 continue; 575 } 576 577 // Function or one of its fragments. 578 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 579 580 // We assume that a jump table cannot have function start as an entry. 581 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 582 LLVM_DEBUG({ 583 if (!BF.containsAddress(Value)) { 584 dbgs() << "FAIL: function doesn't contain this address\n"; 585 if (TargetBF) { 586 dbgs() << " ! function containing this address: " 587 << TargetBF->getPrintName() << '\n'; 588 if (TargetBF->isFragment()) 589 dbgs() << " ! is a fragment\n"; 590 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 591 dbgs() << " ! its parent is " 592 << (TargetParent ? TargetParent->getPrintName() : "(none)") 593 << '\n'; 594 } 595 } 596 if (Value == BF.getAddress()) 597 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 598 }); 599 break; 600 } 601 602 // Check there's an instruction at this offset. 603 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 604 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 605 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 606 break; 607 } 608 609 ++NumRealEntries; 610 611 if (TargetBF == &BF) { 612 // Address inside the function. 613 addOffset(Value - TargetBF->getAddress()); 614 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 615 } else { 616 // Address in split fragment. 617 BF.setHasSplitJumpTable(true); 618 // Add invalid offset for proper identification of jump table size. 619 addOffset(INVALID_OFFSET); 620 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 621 << TargetBF->getPrintName() << '\n'); 622 } 623 } 624 625 // It's a jump table if the number of real entries is more than 1, or there's 626 // one real entry and "unreachable" targets. If there are only multiple 627 // "unreachable" targets, then it's not a jump table. 628 return NumRealEntries + HasUnreachable >= 2; 629 } 630 631 void BinaryContext::populateJumpTables() { 632 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 633 << '\n'); 634 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 635 ++JTI) { 636 JumpTable *JT = JTI->second; 637 BinaryFunction &BF = *JT->Parent; 638 639 if (!BF.isSimple()) 640 continue; 641 642 uint64_t NextJTAddress = 0; 643 auto NextJTI = std::next(JTI); 644 if (NextJTI != JTE) 645 NextJTAddress = NextJTI->second->getAddress(); 646 647 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 648 NextJTAddress, &JT->OffsetEntries); 649 if (!Success) { 650 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 651 JT->print(dbgs()); 652 if (NextJTI != JTE) { 653 dbgs() << "next jump table at 0x" 654 << Twine::utohexstr(NextJTI->second->getAddress()) 655 << " belongs to function " << *NextJTI->second->Parent << '\n'; 656 NextJTI->second->print(dbgs()); 657 } 658 llvm_unreachable("jump table heuristic failure"); 659 } 660 661 for (uint64_t EntryOffset : JT->OffsetEntries) { 662 if (EntryOffset == BF.getSize()) 663 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 664 else 665 BF.registerReferencedOffset(EntryOffset); 666 } 667 668 // In strict mode, erase PC-relative relocation record. Later we check that 669 // all such records are erased and thus have been accounted for. 670 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 671 for (uint64_t Address = JT->getAddress(); 672 Address < JT->getAddress() + JT->getSize(); 673 Address += JT->EntrySize) { 674 DataPCRelocations.erase(DataPCRelocations.find(Address)); 675 } 676 } 677 678 // Mark to skip the function and all its fragments. 679 if (BF.hasSplitJumpTable()) 680 FragmentsToSkip.push_back(&BF); 681 } 682 683 if (opts::StrictMode && DataPCRelocations.size()) { 684 LLVM_DEBUG({ 685 dbgs() << DataPCRelocations.size() 686 << " unclaimed PC-relative relocations left in data:\n"; 687 for (uint64_t Reloc : DataPCRelocations) 688 dbgs() << Twine::utohexstr(Reloc) << '\n'; 689 }); 690 assert(0 && "unclaimed PC-relative relocations left in data\n"); 691 } 692 clearList(DataPCRelocations); 693 } 694 695 void BinaryContext::skipMarkedFragments() { 696 // Unique functions in the vector. 697 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 698 FragmentsToSkip.end()); 699 // Copy the functions back to FragmentsToSkip. 700 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 701 auto addToWorklist = [&](BinaryFunction *Function) -> void { 702 if (UniqueFunctions.count(Function)) 703 return; 704 FragmentsToSkip.push_back(Function); 705 UniqueFunctions.insert(Function); 706 }; 707 // Functions containing split jump tables need to be skipped with all 708 // fragments (transitively). 709 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 710 BinaryFunction *BF = FragmentsToSkip[I]; 711 assert(UniqueFunctions.count(BF) && 712 "internal error in traversing function fragments"); 713 if (opts::Verbosity >= 1) 714 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 715 BF->setIgnored(); 716 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 717 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 718 addToWorklist); 719 } 720 if (!FragmentsToSkip.empty()) 721 errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function" 722 << (FragmentsToSkip.size() == 1 ? "" : "s") 723 << " due to cold fragments\n"; 724 FragmentsToSkip.clear(); 725 } 726 727 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 728 uint64_t Size, 729 uint16_t Alignment, 730 unsigned Flags) { 731 auto Itr = BinaryDataMap.find(Address); 732 if (Itr != BinaryDataMap.end()) { 733 assert(Itr->second->getSize() == Size || !Size); 734 return Itr->second->getSymbol(); 735 } 736 737 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 738 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 739 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 740 } 741 742 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 743 return Ctx->getOrCreateSymbol(Name); 744 } 745 746 BinaryFunction *BinaryContext::createBinaryFunction( 747 const std::string &Name, BinarySection &Section, uint64_t Address, 748 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 749 auto Result = BinaryFunctions.emplace( 750 Address, BinaryFunction(Name, Section, Address, Size, *this)); 751 assert(Result.second == true && "unexpected duplicate function"); 752 BinaryFunction *BF = &Result.first->second; 753 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 754 Alignment); 755 setSymbolToFunctionMap(BF->getSymbol(), BF); 756 return BF; 757 } 758 759 const MCSymbol * 760 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 761 JumpTable::JumpTableType Type) { 762 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 763 assert(JT->Type == Type && "jump table types have to match"); 764 assert(JT->Parent == &Function && 765 "cannot re-use jump table of a different function"); 766 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 767 768 return JT->getFirstLabel(); 769 } 770 771 // Re-use the existing symbol if possible. 772 MCSymbol *JTLabel = nullptr; 773 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 774 if (!isInternalSymbolName(Object->getSymbol()->getName())) 775 JTLabel = Object->getSymbol(); 776 } 777 778 const uint64_t EntrySize = getJumpTableEntrySize(Type); 779 if (!JTLabel) { 780 const std::string JumpTableName = generateJumpTableName(Function, Address); 781 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 782 } 783 784 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 785 << " in function " << Function << '\n'); 786 787 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 788 JumpTable::LabelMapType{{0, JTLabel}}, Function, 789 *getSectionForAddress(Address)); 790 JumpTables.emplace(Address, JT); 791 792 // Duplicate the entry for the parent function for easy access. 793 Function.JumpTables.emplace(Address, JT); 794 795 return JTLabel; 796 } 797 798 std::pair<uint64_t, const MCSymbol *> 799 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 800 const MCSymbol *OldLabel) { 801 auto L = scopeLock(); 802 unsigned Offset = 0; 803 bool Found = false; 804 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 805 if (Elmt.second != OldLabel) 806 continue; 807 Offset = Elmt.first; 808 Found = true; 809 break; 810 } 811 assert(Found && "Label not found"); 812 (void)Found; 813 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 814 JumpTable *NewJT = 815 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 816 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 817 *getSectionForAddress(JT->getAddress())); 818 NewJT->Entries = JT->Entries; 819 NewJT->Counts = JT->Counts; 820 uint64_t JumpTableID = ++DuplicatedJumpTables; 821 // Invert it to differentiate from regular jump tables whose IDs are their 822 // addresses in the input binary memory space 823 JumpTableID = ~JumpTableID; 824 JumpTables.emplace(JumpTableID, NewJT); 825 Function.JumpTables.emplace(JumpTableID, NewJT); 826 return std::make_pair(JumpTableID, NewLabel); 827 } 828 829 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 830 uint64_t Address) { 831 size_t Id; 832 uint64_t Offset = 0; 833 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 834 Offset = Address - JT->getAddress(); 835 auto Itr = JT->Labels.find(Offset); 836 if (Itr != JT->Labels.end()) 837 return std::string(Itr->second->getName()); 838 Id = JumpTableIds.at(JT->getAddress()); 839 } else { 840 Id = JumpTableIds[Address] = BF.JumpTables.size(); 841 } 842 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 843 (Offset ? ("." + std::to_string(Offset)) : "")); 844 } 845 846 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 847 // FIXME: aarch64 support is missing. 848 if (!isX86()) 849 return true; 850 851 if (BF.getSize() == BF.getMaxSize()) 852 return true; 853 854 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 855 assert(FunctionData && "cannot get function as data"); 856 857 uint64_t Offset = BF.getSize(); 858 MCInst Instr; 859 uint64_t InstrSize = 0; 860 uint64_t InstrAddress = BF.getAddress() + Offset; 861 using std::placeholders::_1; 862 863 // Skip instructions that satisfy the predicate condition. 864 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 865 const uint64_t StartOffset = Offset; 866 for (; Offset < BF.getMaxSize(); 867 Offset += InstrSize, InstrAddress += InstrSize) { 868 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 869 InstrAddress, nulls())) 870 break; 871 if (!Predicate(Instr)) 872 break; 873 } 874 875 return Offset - StartOffset; 876 }; 877 878 // Skip a sequence of zero bytes. 879 auto skipZeros = [&]() { 880 const uint64_t StartOffset = Offset; 881 for (; Offset < BF.getMaxSize(); ++Offset) 882 if ((*FunctionData)[Offset] != 0) 883 break; 884 885 return Offset - StartOffset; 886 }; 887 888 // Accept the whole padding area filled with breakpoints. 889 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 890 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 891 return true; 892 893 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 894 895 // Some functions have a jump to the next function or to the padding area 896 // inserted after the body. 897 auto isSkipJump = [&](const MCInst &Instr) { 898 uint64_t TargetAddress = 0; 899 if (MIB->isUnconditionalBranch(Instr) && 900 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 901 if (TargetAddress >= InstrAddress + InstrSize && 902 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 903 return true; 904 } 905 } 906 return false; 907 }; 908 909 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 910 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 911 skipZeros()) 912 ; 913 914 if (Offset == BF.getMaxSize()) 915 return true; 916 917 if (opts::Verbosity >= 1) { 918 errs() << "BOLT-WARNING: bad padding at address 0x" 919 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 920 << " starting at offset " << (Offset - BF.getSize()) 921 << " in function " << BF << '\n' 922 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 923 << '\n'; 924 } 925 926 return false; 927 } 928 929 void BinaryContext::adjustCodePadding() { 930 for (auto &BFI : BinaryFunctions) { 931 BinaryFunction &BF = BFI.second; 932 if (!shouldEmit(BF)) 933 continue; 934 935 if (!hasValidCodePadding(BF)) { 936 if (HasRelocations) { 937 if (opts::Verbosity >= 1) { 938 outs() << "BOLT-INFO: function " << BF 939 << " has invalid padding. Ignoring the function.\n"; 940 } 941 BF.setIgnored(); 942 } else { 943 BF.setMaxSize(BF.getSize()); 944 } 945 } 946 } 947 } 948 949 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 950 uint64_t Size, 951 uint16_t Alignment, 952 unsigned Flags) { 953 // Register the name with MCContext. 954 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 955 956 auto GAI = BinaryDataMap.find(Address); 957 BinaryData *BD; 958 if (GAI == BinaryDataMap.end()) { 959 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 960 BinarySection &Section = 961 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 962 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 963 Section, Flags); 964 GAI = BinaryDataMap.emplace(Address, BD).first; 965 GlobalSymbols[Name] = BD; 966 updateObjectNesting(GAI); 967 } else { 968 BD = GAI->second; 969 if (!BD->hasName(Name)) { 970 GlobalSymbols[Name] = BD; 971 BD->Symbols.push_back(Symbol); 972 } 973 } 974 975 return Symbol; 976 } 977 978 const BinaryData * 979 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 980 auto NI = BinaryDataMap.lower_bound(Address); 981 auto End = BinaryDataMap.end(); 982 if ((NI != End && Address == NI->first) || 983 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 984 if (NI->second->containsAddress(Address)) 985 return NI->second; 986 987 // If this is a sub-symbol, see if a parent data contains the address. 988 const BinaryData *BD = NI->second->getParent(); 989 while (BD) { 990 if (BD->containsAddress(Address)) 991 return BD; 992 BD = BD->getParent(); 993 } 994 } 995 return nullptr; 996 } 997 998 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 999 auto NI = BinaryDataMap.find(Address); 1000 assert(NI != BinaryDataMap.end()); 1001 if (NI == BinaryDataMap.end()) 1002 return false; 1003 // TODO: it's possible that a jump table starts at the same address 1004 // as a larger blob of private data. When we set the size of the 1005 // jump table, it might be smaller than the total blob size. In this 1006 // case we just leave the original size since (currently) it won't really 1007 // affect anything. 1008 assert((!NI->second->Size || NI->second->Size == Size || 1009 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1010 "can't change the size of a symbol that has already had its " 1011 "size set"); 1012 if (!NI->second->Size) { 1013 NI->second->Size = Size; 1014 updateObjectNesting(NI); 1015 return true; 1016 } 1017 return false; 1018 } 1019 1020 void BinaryContext::generateSymbolHashes() { 1021 auto isPadding = [](const BinaryData &BD) { 1022 StringRef Contents = BD.getSection().getContents(); 1023 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1024 return (BD.getName().startswith("HOLEat") || 1025 SymData.find_first_not_of(0) == StringRef::npos); 1026 }; 1027 1028 uint64_t NumCollisions = 0; 1029 for (auto &Entry : BinaryDataMap) { 1030 BinaryData &BD = *Entry.second; 1031 StringRef Name = BD.getName(); 1032 1033 if (!isInternalSymbolName(Name)) 1034 continue; 1035 1036 // First check if a non-anonymous alias exists and move it to the front. 1037 if (BD.getSymbols().size() > 1) { 1038 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1039 [&](const MCSymbol *Symbol) { 1040 return !isInternalSymbolName(Symbol->getName()); 1041 }); 1042 if (Itr != BD.getSymbols().end()) { 1043 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1044 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1045 continue; 1046 } 1047 } 1048 1049 // We have to skip 0 size symbols since they will all collide. 1050 if (BD.getSize() == 0) { 1051 continue; 1052 } 1053 1054 const uint64_t Hash = BD.getSection().hash(BD); 1055 const size_t Idx = Name.find("0x"); 1056 std::string NewName = 1057 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1058 if (getBinaryDataByName(NewName)) { 1059 // Ignore collisions for symbols that appear to be padding 1060 // (i.e. all zeros or a "hole") 1061 if (!isPadding(BD)) { 1062 if (opts::Verbosity) { 1063 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1064 << " with new name (" << NewName << "), skipping.\n"; 1065 } 1066 ++NumCollisions; 1067 } 1068 continue; 1069 } 1070 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1071 GlobalSymbols[NewName] = &BD; 1072 } 1073 if (NumCollisions) { 1074 errs() << "BOLT-WARNING: " << NumCollisions 1075 << " collisions detected while hashing binary objects"; 1076 if (!opts::Verbosity) 1077 errs() << ". Use -v=1 to see the list."; 1078 errs() << '\n'; 1079 } 1080 } 1081 1082 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1083 BinaryFunction &Function) const { 1084 if (!isPotentialFragmentByName(TargetFunction, Function)) 1085 return false; 1086 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1087 if (TargetFunction.isParentFragment(&Function)) 1088 return true; 1089 TargetFunction.addParentFragment(Function); 1090 Function.addFragment(TargetFunction); 1091 if (!HasRelocations) { 1092 TargetFunction.setSimple(false); 1093 Function.setSimple(false); 1094 } 1095 if (opts::Verbosity >= 1) { 1096 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1097 << Function << '\n'; 1098 } 1099 return true; 1100 } 1101 1102 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1103 for (uint64_t Address : Function.InterproceduralReferences) { 1104 if (!Address) 1105 continue; 1106 1107 BinaryFunction *TargetFunction = 1108 getBinaryFunctionContainingAddress(Address); 1109 if (&Function == TargetFunction) 1110 continue; 1111 1112 if (TargetFunction) { 1113 if (TargetFunction->IsFragment && 1114 !registerFragment(*TargetFunction, Function)) { 1115 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1116 "fragments: " 1117 << Function.getPrintName() << " and " 1118 << TargetFunction->getPrintName() << '\n'; 1119 } 1120 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1121 TargetFunction->addEntryPointAtOffset(Offset); 1122 1123 continue; 1124 } 1125 1126 // Check if address falls in function padding space - this could be 1127 // unmarked data in code. In this case adjust the padding space size. 1128 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1129 assert(Section && "cannot get section for referenced address"); 1130 1131 if (!Section->isText()) 1132 continue; 1133 1134 // PLT requires special handling and could be ignored in this context. 1135 StringRef SectionName = Section->getName(); 1136 if (SectionName == ".plt" || SectionName == ".plt.got") 1137 continue; 1138 1139 if (opts::processAllFunctions()) { 1140 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1141 << "object in code at address 0x" << Twine::utohexstr(Address) 1142 << " belonging to section " << SectionName << " in current mode\n"; 1143 exit(1); 1144 } 1145 1146 TargetFunction = getBinaryFunctionContainingAddress(Address, 1147 /*CheckPastEnd=*/false, 1148 /*UseMaxSize=*/true); 1149 // We are not going to overwrite non-simple functions, but for simple 1150 // ones - adjust the padding size. 1151 if (TargetFunction && TargetFunction->isSimple()) { 1152 errs() << "BOLT-WARNING: function " << *TargetFunction 1153 << " has an object detected in a padding region at address 0x" 1154 << Twine::utohexstr(Address) << '\n'; 1155 TargetFunction->setMaxSize(TargetFunction->getSize()); 1156 } 1157 } 1158 1159 clearList(Function.InterproceduralReferences); 1160 } 1161 1162 void BinaryContext::postProcessSymbolTable() { 1163 fixBinaryDataHoles(); 1164 bool Valid = true; 1165 for (auto &Entry : BinaryDataMap) { 1166 BinaryData *BD = Entry.second; 1167 if ((BD->getName().startswith("SYMBOLat") || 1168 BD->getName().startswith("DATAat")) && 1169 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1170 BD->getSection()) { 1171 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1172 Valid = false; 1173 } 1174 } 1175 assert(Valid); 1176 (void)Valid; 1177 generateSymbolHashes(); 1178 } 1179 1180 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1181 BinaryFunction &ParentBF) { 1182 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1183 "cannot merge functions with multiple entry points"); 1184 1185 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1186 std::defer_lock); 1187 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1188 SymbolToFunctionMapMutex, std::defer_lock); 1189 1190 const StringRef ChildName = ChildBF.getOneName(); 1191 1192 // Move symbols over and update bookkeeping info. 1193 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1194 ParentBF.getSymbols().push_back(Symbol); 1195 WriteSymbolMapLock.lock(); 1196 SymbolToFunctionMap[Symbol] = &ParentBF; 1197 WriteSymbolMapLock.unlock(); 1198 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1199 } 1200 ChildBF.getSymbols().clear(); 1201 1202 // Move other names the child function is known under. 1203 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1204 std::back_inserter(ParentBF.Aliases)); 1205 ChildBF.Aliases.clear(); 1206 1207 if (HasRelocations) { 1208 // Merge execution counts of ChildBF into those of ParentBF. 1209 // Without relocations, we cannot reliably merge profiles as both functions 1210 // continue to exist and either one can be executed. 1211 ChildBF.mergeProfileDataInto(ParentBF); 1212 1213 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1214 std::defer_lock); 1215 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1216 std::defer_lock); 1217 // Remove ChildBF from the global set of functions in relocs mode. 1218 ReadBfsLock.lock(); 1219 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1220 ReadBfsLock.unlock(); 1221 1222 assert(FI != BinaryFunctions.end() && "function not found"); 1223 assert(&ChildBF == &FI->second && "function mismatch"); 1224 1225 WriteBfsLock.lock(); 1226 ChildBF.clearDisasmState(); 1227 FI = BinaryFunctions.erase(FI); 1228 WriteBfsLock.unlock(); 1229 1230 } else { 1231 // In non-relocation mode we keep the function, but rename it. 1232 std::string NewName = "__ICF_" + ChildName.str(); 1233 1234 WriteCtxLock.lock(); 1235 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1236 WriteCtxLock.unlock(); 1237 1238 ChildBF.setFolded(&ParentBF); 1239 } 1240 } 1241 1242 void BinaryContext::fixBinaryDataHoles() { 1243 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1244 1245 for (BinarySection &Section : allocatableSections()) { 1246 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1247 1248 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1249 BinaryData *BD = Itr->second; 1250 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1251 (BD->getName().startswith("SYMBOLat0x") || 1252 BD->getName().startswith("DATAat0x") || 1253 BD->getName().startswith("ANONYMOUS"))); 1254 return !isHole && BD->getSection() == Section && !BD->getParent(); 1255 }; 1256 1257 auto BDStart = BinaryDataMap.begin(); 1258 auto BDEnd = BinaryDataMap.end(); 1259 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1260 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1261 1262 uint64_t EndAddress = Section.getAddress(); 1263 1264 while (Itr != End) { 1265 if (Itr->second->getAddress() > EndAddress) { 1266 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1267 Holes.emplace_back(EndAddress, Gap); 1268 } 1269 EndAddress = Itr->second->getEndAddress(); 1270 ++Itr; 1271 } 1272 1273 if (EndAddress < Section.getEndAddress()) 1274 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1275 1276 // If there is already a symbol at the start of the hole, grow that symbol 1277 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1278 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1279 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1280 if (BD) { 1281 // BD->getSection() can be != Section if there are sections that 1282 // overlap. In this case it is probably safe to just skip the holes 1283 // since the overlapping section will not(?) have any symbols in it. 1284 if (BD->getSection() == Section) 1285 setBinaryDataSize(Hole.first, Hole.second); 1286 } else { 1287 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1288 } 1289 } 1290 } 1291 1292 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1293 assert(validateHoles() && "top level hole detected in object map"); 1294 } 1295 1296 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1297 const BinarySection *CurrentSection = nullptr; 1298 bool FirstSection = true; 1299 1300 for (auto &Entry : BinaryDataMap) { 1301 const BinaryData *BD = Entry.second; 1302 const BinarySection &Section = BD->getSection(); 1303 if (FirstSection || Section != *CurrentSection) { 1304 uint64_t Address, Size; 1305 StringRef Name = Section.getName(); 1306 if (Section) { 1307 Address = Section.getAddress(); 1308 Size = Section.getSize(); 1309 } else { 1310 Address = BD->getAddress(); 1311 Size = BD->getSize(); 1312 } 1313 OS << "BOLT-INFO: Section " << Name << ", " 1314 << "0x" + Twine::utohexstr(Address) << ":" 1315 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1316 CurrentSection = &Section; 1317 FirstSection = false; 1318 } 1319 1320 OS << "BOLT-INFO: "; 1321 const BinaryData *P = BD->getParent(); 1322 while (P) { 1323 OS << " "; 1324 P = P->getParent(); 1325 } 1326 OS << *BD << "\n"; 1327 } 1328 } 1329 1330 Expected<unsigned> BinaryContext::getDwarfFile( 1331 StringRef Directory, StringRef FileName, unsigned FileNumber, 1332 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1333 unsigned CUID, unsigned DWARFVersion) { 1334 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1335 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1336 FileNumber); 1337 } 1338 1339 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1340 const uint32_t SrcCUID, 1341 unsigned FileIndex) { 1342 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1343 const DWARFDebugLine::LineTable *LineTable = 1344 DwCtx->getLineTableForUnit(SrcUnit); 1345 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1346 LineTable->Prologue.FileNames; 1347 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1348 // means empty dir. 1349 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1350 "FileIndex out of range for the compilation unit."); 1351 StringRef Dir = ""; 1352 if (FileNames[FileIndex - 1].DirIdx != 0) { 1353 if (Optional<const char *> DirName = dwarf::toString( 1354 LineTable->Prologue 1355 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1356 Dir = *DirName; 1357 } 1358 } 1359 StringRef FileName = ""; 1360 if (Optional<const char *> FName = 1361 dwarf::toString(FileNames[FileIndex - 1].Name)) 1362 FileName = *FName; 1363 assert(FileName != ""); 1364 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1365 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1366 DstUnit->getVersion())); 1367 } 1368 1369 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1370 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1371 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1372 SortedFunctions.begin(), 1373 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1374 return &BFI.second; 1375 }); 1376 1377 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1378 [](const BinaryFunction *A, const BinaryFunction *B) { 1379 if (A->hasValidIndex() && B->hasValidIndex()) { 1380 return A->getIndex() < B->getIndex(); 1381 } 1382 return A->hasValidIndex(); 1383 }); 1384 return SortedFunctions; 1385 } 1386 1387 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1388 std::vector<BinaryFunction *> AllFunctions; 1389 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1390 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1391 std::back_inserter(AllFunctions), 1392 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1393 return &BFI.second; 1394 }); 1395 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1396 std::back_inserter(AllFunctions)); 1397 1398 return AllFunctions; 1399 } 1400 1401 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1402 auto Iter = DWOCUs.find(DWOId); 1403 if (Iter == DWOCUs.end()) 1404 return None; 1405 1406 return Iter->second; 1407 } 1408 1409 DWARFContext *BinaryContext::getDWOContext() { 1410 if (DWOCUs.empty()) 1411 return nullptr; 1412 return &DWOCUs.begin()->second->getContext(); 1413 } 1414 1415 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1416 void BinaryContext::preprocessDWODebugInfo() { 1417 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1418 DWARFUnit *const DwarfUnit = CU.get(); 1419 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1420 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1421 if (!DWOCU->isDWOUnit()) { 1422 std::string DWOName = dwarf::toString( 1423 DwarfUnit->getUnitDIE().find( 1424 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1425 ""); 1426 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1427 << DWOName 1428 << " was not retrieved and won't be updated. Please check " 1429 "relative path.\n"; 1430 continue; 1431 } 1432 DWOCUs[*DWOId] = DWOCU; 1433 } 1434 } 1435 } 1436 1437 void BinaryContext::preprocessDebugInfo() { 1438 struct CURange { 1439 uint64_t LowPC; 1440 uint64_t HighPC; 1441 DWARFUnit *Unit; 1442 1443 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1444 }; 1445 1446 // Building a map of address ranges to CUs similar to .debug_aranges and use 1447 // it to assign CU to functions. 1448 std::vector<CURange> AllRanges; 1449 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1450 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1451 Expected<DWARFAddressRangesVector> RangesOrError = 1452 CU->getUnitDIE().getAddressRanges(); 1453 if (!RangesOrError) { 1454 consumeError(RangesOrError.takeError()); 1455 continue; 1456 } 1457 for (DWARFAddressRange &Range : *RangesOrError) { 1458 // Parts of the debug info could be invalidated due to corresponding code 1459 // being removed from the binary by the linker. Hence we check if the 1460 // address is a valid one. 1461 if (containsAddress(Range.LowPC)) 1462 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1463 } 1464 1465 ContainsDwarf5 |= CU->getVersion() >= 5; 1466 ContainsDwarfLegacy |= CU->getVersion() < 5; 1467 } 1468 1469 if (ContainsDwarf5 && ContainsDwarfLegacy) 1470 llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with " 1471 "DWARF5 and DWARF{2,3,4}.\n"; 1472 1473 std::sort(AllRanges.begin(), AllRanges.end()); 1474 for (auto &KV : BinaryFunctions) { 1475 const uint64_t FunctionAddress = KV.first; 1476 BinaryFunction &Function = KV.second; 1477 1478 auto It = std::partition_point( 1479 AllRanges.begin(), AllRanges.end(), 1480 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1481 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1482 Function.setDWARFUnit(It->Unit); 1483 } 1484 } 1485 1486 // Discover units with debug info that needs to be updated. 1487 for (const auto &KV : BinaryFunctions) { 1488 const BinaryFunction &BF = KV.second; 1489 if (shouldEmit(BF) && BF.getDWARFUnit()) 1490 ProcessedCUs.insert(BF.getDWARFUnit()); 1491 } 1492 1493 // Clear debug info for functions from units that we are not going to process. 1494 for (auto &KV : BinaryFunctions) { 1495 BinaryFunction &BF = KV.second; 1496 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1497 BF.setDWARFUnit(nullptr); 1498 } 1499 1500 if (opts::Verbosity >= 1) { 1501 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1502 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1503 } 1504 1505 preprocessDWODebugInfo(); 1506 1507 // Populate MCContext with DWARF files from all units. 1508 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1509 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1510 const uint64_t CUID = CU->getOffset(); 1511 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1512 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1513 GlobalPrefix + "line_table_start" + Twine(CUID))); 1514 1515 if (!ProcessedCUs.count(CU.get())) 1516 continue; 1517 1518 const DWARFDebugLine::LineTable *LineTable = 1519 DwCtx->getLineTableForUnit(CU.get()); 1520 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1521 LineTable->Prologue.FileNames; 1522 1523 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1524 if (DwarfVersion >= 5) { 1525 Optional<MD5::MD5Result> Checksum = None; 1526 if (LineTable->Prologue.ContentTypes.HasMD5) 1527 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1528 Optional<const char *> Name = 1529 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1530 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1531 auto Iter = DWOCUs.find(*DWOID); 1532 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1533 Name = dwarf::toString( 1534 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1535 } 1536 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1537 None); 1538 } 1539 1540 BinaryLineTable.setDwarfVersion(DwarfVersion); 1541 1542 // Assign a unique label to every line table, one per CU. 1543 // Make sure empty debug line tables are registered too. 1544 if (FileNames.empty()) { 1545 cantFail( 1546 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1547 continue; 1548 } 1549 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1550 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1551 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1552 // means empty dir. 1553 StringRef Dir = ""; 1554 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1555 if (Optional<const char *> DirName = dwarf::toString( 1556 LineTable->Prologue 1557 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1558 Dir = *DirName; 1559 StringRef FileName = ""; 1560 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1561 FileName = *FName; 1562 assert(FileName != ""); 1563 Optional<MD5::MD5Result> Checksum = None; 1564 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1565 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1566 cantFail( 1567 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1568 } 1569 } 1570 } 1571 1572 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1573 if (Function.isPseudo()) 1574 return false; 1575 1576 if (opts::processAllFunctions()) 1577 return true; 1578 1579 if (Function.isIgnored()) 1580 return false; 1581 1582 // In relocation mode we will emit non-simple functions with CFG. 1583 // If the function does not have a CFG it should be marked as ignored. 1584 return HasRelocations || Function.isSimple(); 1585 } 1586 1587 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1588 uint32_t Operation = Inst.getOperation(); 1589 switch (Operation) { 1590 case MCCFIInstruction::OpSameValue: 1591 OS << "OpSameValue Reg" << Inst.getRegister(); 1592 break; 1593 case MCCFIInstruction::OpRememberState: 1594 OS << "OpRememberState"; 1595 break; 1596 case MCCFIInstruction::OpRestoreState: 1597 OS << "OpRestoreState"; 1598 break; 1599 case MCCFIInstruction::OpOffset: 1600 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1601 break; 1602 case MCCFIInstruction::OpDefCfaRegister: 1603 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1604 break; 1605 case MCCFIInstruction::OpDefCfaOffset: 1606 OS << "OpDefCfaOffset " << Inst.getOffset(); 1607 break; 1608 case MCCFIInstruction::OpDefCfa: 1609 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1610 break; 1611 case MCCFIInstruction::OpRelOffset: 1612 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1613 break; 1614 case MCCFIInstruction::OpAdjustCfaOffset: 1615 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1616 break; 1617 case MCCFIInstruction::OpEscape: 1618 OS << "OpEscape"; 1619 break; 1620 case MCCFIInstruction::OpRestore: 1621 OS << "OpRestore Reg" << Inst.getRegister(); 1622 break; 1623 case MCCFIInstruction::OpUndefined: 1624 OS << "OpUndefined Reg" << Inst.getRegister(); 1625 break; 1626 case MCCFIInstruction::OpRegister: 1627 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1628 << Inst.getRegister2(); 1629 break; 1630 case MCCFIInstruction::OpWindowSave: 1631 OS << "OpWindowSave"; 1632 break; 1633 case MCCFIInstruction::OpGnuArgsSize: 1634 OS << "OpGnuArgsSize"; 1635 break; 1636 default: 1637 OS << "Op#" << Operation; 1638 break; 1639 } 1640 } 1641 1642 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1643 uint64_t Offset, 1644 const BinaryFunction *Function, 1645 bool PrintMCInst, bool PrintMemData, 1646 bool PrintRelocations) const { 1647 if (MIB->isEHLabel(Instruction)) { 1648 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << '\n'; 1649 return; 1650 } 1651 OS << format(" %08" PRIx64 ": ", Offset); 1652 if (MIB->isCFI(Instruction)) { 1653 uint32_t Offset = Instruction.getOperand(0).getImm(); 1654 OS << "\t!CFI\t$" << Offset << "\t; "; 1655 if (Function) 1656 printCFI(OS, *Function->getCFIFor(Instruction)); 1657 OS << "\n"; 1658 return; 1659 } 1660 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1661 if (MIB->isCall(Instruction)) { 1662 if (MIB->isTailCall(Instruction)) 1663 OS << " # TAILCALL "; 1664 if (MIB->isInvoke(Instruction)) { 1665 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1666 OS << " # handler: "; 1667 if (EHInfo->first) 1668 OS << *EHInfo->first; 1669 else 1670 OS << '0'; 1671 OS << "; action: " << EHInfo->second; 1672 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1673 if (GnuArgsSize >= 0) 1674 OS << "; GNU_args_size = " << GnuArgsSize; 1675 } 1676 } else if (MIB->isIndirectBranch(Instruction)) { 1677 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1678 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1679 } else { 1680 OS << " # UNKNOWN CONTROL FLOW"; 1681 } 1682 } 1683 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1684 OS << " # Offset: " << *Offset; 1685 1686 MIB->printAnnotations(Instruction, OS); 1687 1688 if (opts::PrintDebugInfo) { 1689 DebugLineTableRowRef RowRef = 1690 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1691 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1692 const DWARFDebugLine::LineTable *LineTable; 1693 if (Function && Function->getDWARFUnit() && 1694 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1695 LineTable = Function->getDWARFLineTable(); 1696 } else { 1697 LineTable = DwCtx->getLineTableForUnit( 1698 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1699 } 1700 assert(LineTable && 1701 "line table expected for instruction with debug info"); 1702 1703 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1704 StringRef FileName = ""; 1705 if (Optional<const char *> FName = 1706 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1707 FileName = *FName; 1708 OS << " # debug line " << FileName << ":" << Row.Line; 1709 if (Row.Column) 1710 OS << ":" << Row.Column; 1711 if (Row.Discriminator) 1712 OS << " discriminator:" << Row.Discriminator; 1713 } 1714 } 1715 1716 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1717 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1718 Function->printRelocations(OS, Offset, Size); 1719 } 1720 1721 OS << "\n"; 1722 1723 if (PrintMCInst) { 1724 Instruction.dump_pretty(OS, InstPrinter.get()); 1725 OS << "\n"; 1726 } 1727 } 1728 1729 Optional<uint64_t> 1730 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1731 uint64_t FileOffset) const { 1732 // Find a segment with a matching file offset. 1733 for (auto &KV : SegmentMapInfo) { 1734 const SegmentInfo &SegInfo = KV.second; 1735 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1736 // Use segment's aligned memory offset to calculate the base address. 1737 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1738 return MMapAddress - MemOffset; 1739 } 1740 } 1741 1742 return NoneType(); 1743 } 1744 1745 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1746 auto SI = AddressToSection.upper_bound(Address); 1747 if (SI != AddressToSection.begin()) { 1748 --SI; 1749 uint64_t UpperBound = SI->first + SI->second->getSize(); 1750 if (!SI->second->getSize()) 1751 UpperBound += 1; 1752 if (UpperBound > Address) 1753 return *SI->second; 1754 } 1755 return std::make_error_code(std::errc::bad_address); 1756 } 1757 1758 ErrorOr<StringRef> 1759 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1760 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1761 return Section->getName(); 1762 return std::make_error_code(std::errc::bad_address); 1763 } 1764 1765 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1766 auto Res = Sections.insert(Section); 1767 (void)Res; 1768 assert(Res.second && "can't register the same section twice."); 1769 1770 // Only register allocatable sections in the AddressToSection map. 1771 if (Section->isAllocatable() && Section->getAddress()) 1772 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1773 NameToSection.insert( 1774 std::make_pair(std::string(Section->getName()), Section)); 1775 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1776 return *Section; 1777 } 1778 1779 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1780 return registerSection(new BinarySection(*this, Section)); 1781 } 1782 1783 BinarySection & 1784 BinaryContext::registerSection(StringRef SectionName, 1785 const BinarySection &OriginalSection) { 1786 return registerSection( 1787 new BinarySection(*this, SectionName, OriginalSection)); 1788 } 1789 1790 BinarySection & 1791 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1792 unsigned ELFFlags, uint8_t *Data, 1793 uint64_t Size, unsigned Alignment) { 1794 auto NamedSections = getSectionByName(Name); 1795 if (NamedSections.begin() != NamedSections.end()) { 1796 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1797 "can only update unique sections"); 1798 BinarySection *Section = NamedSections.begin()->second; 1799 1800 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1801 const bool Flag = Section->isAllocatable(); 1802 (void)Flag; 1803 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1804 LLVM_DEBUG(dbgs() << *Section << "\n"); 1805 // FIXME: Fix section flags/attributes for MachO. 1806 if (isELF()) 1807 assert(Flag == Section->isAllocatable() && 1808 "can't change section allocation status"); 1809 return *Section; 1810 } 1811 1812 return registerSection( 1813 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1814 } 1815 1816 bool BinaryContext::deregisterSection(BinarySection &Section) { 1817 BinarySection *SectionPtr = &Section; 1818 auto Itr = Sections.find(SectionPtr); 1819 if (Itr != Sections.end()) { 1820 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1821 while (Range.first != Range.second) { 1822 if (Range.first->second == SectionPtr) { 1823 AddressToSection.erase(Range.first); 1824 break; 1825 } 1826 ++Range.first; 1827 } 1828 1829 auto NameRange = 1830 NameToSection.equal_range(std::string(SectionPtr->getName())); 1831 while (NameRange.first != NameRange.second) { 1832 if (NameRange.first->second == SectionPtr) { 1833 NameToSection.erase(NameRange.first); 1834 break; 1835 } 1836 ++NameRange.first; 1837 } 1838 1839 Sections.erase(Itr); 1840 delete SectionPtr; 1841 return true; 1842 } 1843 return false; 1844 } 1845 1846 void BinaryContext::printSections(raw_ostream &OS) const { 1847 for (BinarySection *const &Section : Sections) 1848 OS << "BOLT-INFO: " << *Section << "\n"; 1849 } 1850 1851 BinarySection &BinaryContext::absoluteSection() { 1852 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1853 return *Section; 1854 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1855 } 1856 1857 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1858 size_t Size) const { 1859 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1860 if (!Section) 1861 return std::make_error_code(std::errc::bad_address); 1862 1863 if (Section->isVirtual()) 1864 return 0; 1865 1866 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1867 AsmInfo->getCodePointerSize()); 1868 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1869 return DE.getUnsigned(&ValueOffset, Size); 1870 } 1871 1872 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1873 size_t Size) const { 1874 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1875 if (!Section) 1876 return std::make_error_code(std::errc::bad_address); 1877 1878 if (Section->isVirtual()) 1879 return 0; 1880 1881 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1882 AsmInfo->getCodePointerSize()); 1883 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1884 return DE.getSigned(&ValueOffset, Size); 1885 } 1886 1887 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1888 uint64_t Type, uint64_t Addend, 1889 uint64_t Value) { 1890 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1891 assert(Section && "cannot find section for address"); 1892 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1893 Value); 1894 } 1895 1896 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1897 uint64_t Type, uint64_t Addend, 1898 uint64_t Value) { 1899 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1900 assert(Section && "cannot find section for address"); 1901 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1902 Addend, Value); 1903 } 1904 1905 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1906 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1907 assert(Section && "cannot find section for address"); 1908 return Section->removeRelocationAt(Address - Section->getAddress()); 1909 } 1910 1911 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1912 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1913 if (!Section) 1914 return nullptr; 1915 1916 return Section->getRelocationAt(Address - Section->getAddress()); 1917 } 1918 1919 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1920 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1921 if (!Section) 1922 return nullptr; 1923 1924 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1925 } 1926 1927 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1928 const uint64_t Address) { 1929 auto setImmovable = [&](BinaryData &BD) { 1930 BinaryData *Root = BD.getAtomicRoot(); 1931 LLVM_DEBUG(if (Root->isMoveable()) { 1932 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1933 << "due to ambiguous relocation referencing 0x" 1934 << Twine::utohexstr(Address) << '\n'; 1935 }); 1936 Root->setIsMoveable(false); 1937 }; 1938 1939 if (Address == BD.getAddress()) { 1940 setImmovable(BD); 1941 1942 // Set previous symbol as immovable 1943 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1944 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1945 setImmovable(*Prev); 1946 } 1947 1948 if (Address == BD.getEndAddress()) { 1949 setImmovable(BD); 1950 1951 // Set next symbol as immovable 1952 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1953 if (Next && Next->getAddress() == BD.getEndAddress()) 1954 setImmovable(*Next); 1955 } 1956 } 1957 1958 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1959 uint64_t *EntryDesc) { 1960 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1961 auto BFI = SymbolToFunctionMap.find(Symbol); 1962 if (BFI == SymbolToFunctionMap.end()) 1963 return nullptr; 1964 1965 BinaryFunction *BF = BFI->second; 1966 if (EntryDesc) 1967 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 1968 1969 return BF; 1970 } 1971 1972 void BinaryContext::exitWithBugReport(StringRef Message, 1973 const BinaryFunction &Function) const { 1974 errs() << "=======================================\n"; 1975 errs() << "BOLT is unable to proceed because it couldn't properly understand " 1976 "this function.\n"; 1977 errs() << "If you are running the most recent version of BOLT, you may " 1978 "want to " 1979 "report this and paste this dump.\nPlease check that there is no " 1980 "sensitive contents being shared in this dump.\n"; 1981 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 1982 ScopedPrinter SP(errs()); 1983 SP.printBinaryBlock("Function contents", *Function.getData()); 1984 errs() << "\n"; 1985 Function.dump(); 1986 errs() << "ERROR: " << Message; 1987 errs() << "\n=======================================\n"; 1988 exit(1); 1989 } 1990 1991 BinaryFunction * 1992 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 1993 bool IsSimple) { 1994 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 1995 BinaryFunction *BF = InjectedBinaryFunctions.back(); 1996 setSymbolToFunctionMap(BF->getSymbol(), BF); 1997 BF->CurrentState = BinaryFunction::State::CFG; 1998 return BF; 1999 } 2000 2001 std::pair<size_t, size_t> 2002 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2003 // Adjust branch instruction to match the current layout. 2004 if (FixBranches) 2005 BF.fixBranches(); 2006 2007 // Create local MC context to isolate the effect of ephemeral code emission. 2008 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2009 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2010 MCAsmBackend *MAB = 2011 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2012 2013 SmallString<256> Code; 2014 raw_svector_ostream VecOS(Code); 2015 2016 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2017 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2018 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2019 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2020 /*RelaxAll=*/false, 2021 /*IncrementalLinkerCompatible=*/false, 2022 /*DWARFMustBeAtTheEnd=*/false)); 2023 2024 Streamer->initSections(false, *STI); 2025 2026 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2027 Section->setHasInstructions(true); 2028 2029 // Create symbols in the LocalCtx so that they get destroyed with it. 2030 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2031 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2032 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2033 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2034 2035 Streamer->SwitchSection(Section); 2036 Streamer->emitLabel(StartLabel); 2037 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2038 /*EmitCodeOnly=*/true); 2039 Streamer->emitLabel(EndLabel); 2040 2041 if (BF.isSplit()) { 2042 MCSectionELF *ColdSection = 2043 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2044 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2045 ColdSection->setHasInstructions(true); 2046 2047 Streamer->SwitchSection(ColdSection); 2048 Streamer->emitLabel(ColdStartLabel); 2049 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2050 /*EmitCodeOnly=*/true); 2051 Streamer->emitLabel(ColdEndLabel); 2052 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2053 Streamer->emitBytes(StringRef("")); 2054 Streamer->SwitchSection(Section); 2055 } 2056 2057 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2058 // MCStreamer::Finish(), which does more than we want 2059 Streamer->emitBytes(StringRef("")); 2060 2061 MCAssembler &Assembler = 2062 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2063 MCAsmLayout Layout(Assembler); 2064 Assembler.layout(Layout); 2065 2066 const uint64_t HotSize = 2067 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2068 const uint64_t ColdSize = BF.isSplit() 2069 ? Layout.getSymbolOffset(*ColdEndLabel) - 2070 Layout.getSymbolOffset(*ColdStartLabel) 2071 : 0ULL; 2072 2073 // Clean-up the effect of the code emission. 2074 for (const MCSymbol &Symbol : Assembler.symbols()) { 2075 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2076 MutableSymbol->setUndefined(); 2077 MutableSymbol->setIsRegistered(false); 2078 } 2079 2080 return std::make_pair(HotSize, ColdSize); 2081 } 2082 2083 bool BinaryContext::validateEncoding(const MCInst &Inst, 2084 ArrayRef<uint8_t> InputEncoding) const { 2085 SmallString<256> Code; 2086 SmallVector<MCFixup, 4> Fixups; 2087 raw_svector_ostream VecOS(Code); 2088 2089 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2090 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2091 if (InputEncoding != EncodedData) { 2092 if (opts::Verbosity > 1) { 2093 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2094 << " input: " << InputEncoding << '\n' 2095 << " output: " << EncodedData << '\n'; 2096 } 2097 return false; 2098 } 2099 2100 return true; 2101 } 2102 2103 uint64_t BinaryContext::getHotThreshold() const { 2104 static uint64_t Threshold = 0; 2105 if (Threshold == 0) { 2106 Threshold = std::max( 2107 (uint64_t)opts::ExecutionCountThreshold, 2108 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2109 } 2110 return Threshold; 2111 } 2112 2113 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2114 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2115 auto FI = BinaryFunctions.upper_bound(Address); 2116 if (FI == BinaryFunctions.begin()) 2117 return nullptr; 2118 --FI; 2119 2120 const uint64_t UsedSize = 2121 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2122 2123 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2124 return nullptr; 2125 2126 return &FI->second; 2127 } 2128 2129 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2130 // First, try to find a function starting at the given address. If the 2131 // function was folded, this will get us the original folded function if it 2132 // wasn't removed from the list, e.g. in non-relocation mode. 2133 auto BFI = BinaryFunctions.find(Address); 2134 if (BFI != BinaryFunctions.end()) 2135 return &BFI->second; 2136 2137 // We might have folded the function matching the object at the given 2138 // address. In such case, we look for a function matching the symbol 2139 // registered at the original address. The new function (the one that the 2140 // original was folded into) will hold the symbol. 2141 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2142 uint64_t EntryID = 0; 2143 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2144 if (BF && EntryID == 0) 2145 return BF; 2146 } 2147 return nullptr; 2148 } 2149 2150 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2151 const DWARFAddressRangesVector &InputRanges) const { 2152 DebugAddressRangesVector OutputRanges; 2153 2154 for (const DWARFAddressRange Range : InputRanges) { 2155 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2156 while (BFI != BinaryFunctions.end()) { 2157 const BinaryFunction &Function = BFI->second; 2158 if (Function.getAddress() >= Range.HighPC) 2159 break; 2160 const DebugAddressRangesVector FunctionRanges = 2161 Function.getOutputAddressRanges(); 2162 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2163 std::back_inserter(OutputRanges)); 2164 std::advance(BFI, 1); 2165 } 2166 } 2167 2168 return OutputRanges; 2169 } 2170 2171 } // namespace bolt 2172 } // namespace llvm 2173