1 //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryContext class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryContext.h" 14 #include "bolt/Core/BinaryEmitter.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "bolt/Utils/NameResolver.h" 18 #include "bolt/Utils/Utils.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 21 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 22 #include "llvm/DebugInfo/DWARF/DWARFUnit.h" 23 #include "llvm/MC/MCAsmLayout.h" 24 #include "llvm/MC/MCAssembler.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 27 #include "llvm/MC/MCInstPrinter.h" 28 #include "llvm/MC/MCObjectStreamer.h" 29 #include "llvm/MC/MCObjectWriter.h" 30 #include "llvm/MC/MCRegisterInfo.h" 31 #include "llvm/MC/MCSectionELF.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/Regex.h" 38 #include <algorithm> 39 #include <functional> 40 #include <iterator> 41 #include <unordered_set> 42 43 using namespace llvm; 44 45 #undef DEBUG_TYPE 46 #define DEBUG_TYPE "bolt" 47 48 namespace opts { 49 50 cl::opt<bool> 51 NoHugePages("no-huge-pages", 52 cl::desc("use regular size pages for code alignment"), 53 cl::ZeroOrMore, 54 cl::Hidden, 55 cl::cat(BoltCategory)); 56 57 static cl::opt<bool> 58 PrintDebugInfo("print-debug-info", 59 cl::desc("print debug info when printing functions"), 60 cl::Hidden, 61 cl::ZeroOrMore, 62 cl::cat(BoltCategory)); 63 64 cl::opt<bool> 65 PrintRelocations("print-relocations", 66 cl::desc("print relocations when printing functions/objects"), 67 cl::Hidden, 68 cl::ZeroOrMore, 69 cl::cat(BoltCategory)); 70 71 static cl::opt<bool> 72 PrintMemData("print-mem-data", 73 cl::desc("print memory data annotations when printing functions"), 74 cl::Hidden, 75 cl::ZeroOrMore, 76 cl::cat(BoltCategory)); 77 78 } // namespace opts 79 80 namespace llvm { 81 namespace bolt { 82 83 BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, 84 std::unique_ptr<DWARFContext> DwCtx, 85 std::unique_ptr<Triple> TheTriple, 86 const Target *TheTarget, std::string TripleName, 87 std::unique_ptr<MCCodeEmitter> MCE, 88 std::unique_ptr<MCObjectFileInfo> MOFI, 89 std::unique_ptr<const MCAsmInfo> AsmInfo, 90 std::unique_ptr<const MCInstrInfo> MII, 91 std::unique_ptr<const MCSubtargetInfo> STI, 92 std::unique_ptr<MCInstPrinter> InstPrinter, 93 std::unique_ptr<const MCInstrAnalysis> MIA, 94 std::unique_ptr<MCPlusBuilder> MIB, 95 std::unique_ptr<const MCRegisterInfo> MRI, 96 std::unique_ptr<MCDisassembler> DisAsm) 97 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), 98 TheTriple(std::move(TheTriple)), TheTarget(TheTarget), 99 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), 100 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), 101 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), 102 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) { 103 Relocation::Arch = this->TheTriple->getArch(); 104 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; 105 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; 106 } 107 108 BinaryContext::~BinaryContext() { 109 for (BinarySection *Section : Sections) 110 delete Section; 111 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) 112 delete InjectedFunction; 113 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) 114 delete JTI.second; 115 clearBinaryData(); 116 } 117 118 /// Create BinaryContext for a given architecture \p ArchName and 119 /// triple \p TripleName. 120 Expected<std::unique_ptr<BinaryContext>> 121 BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC, 122 std::unique_ptr<DWARFContext> DwCtx) { 123 StringRef ArchName = ""; 124 StringRef FeaturesStr = ""; 125 switch (File->getArch()) { 126 case llvm::Triple::x86_64: 127 ArchName = "x86-64"; 128 FeaturesStr = "+nopl"; 129 break; 130 case llvm::Triple::aarch64: 131 ArchName = "aarch64"; 132 FeaturesStr = "+fp-armv8,+neon,+crypto,+dotprod,+crc,+lse,+ras,+rdm," 133 "+fullfp16,+spe,+fuse-aes,+rcpc"; 134 break; 135 default: 136 return createStringError(std::errc::not_supported, 137 "BOLT-ERROR: Unrecognized machine in ELF file"); 138 } 139 140 auto TheTriple = std::make_unique<Triple>(File->makeTriple()); 141 const std::string TripleName = TheTriple->str(); 142 143 std::string Error; 144 const Target *TheTarget = 145 TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error); 146 if (!TheTarget) 147 return createStringError(make_error_code(std::errc::not_supported), 148 Twine("BOLT-ERROR: ", Error)); 149 150 std::unique_ptr<const MCRegisterInfo> MRI( 151 TheTarget->createMCRegInfo(TripleName)); 152 if (!MRI) 153 return createStringError( 154 make_error_code(std::errc::not_supported), 155 Twine("BOLT-ERROR: no register info for target ", TripleName)); 156 157 // Set up disassembler. 158 std::unique_ptr<MCAsmInfo> AsmInfo( 159 TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); 160 if (!AsmInfo) 161 return createStringError( 162 make_error_code(std::errc::not_supported), 163 Twine("BOLT-ERROR: no assembly info for target ", TripleName)); 164 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump 165 // we want to emit such names as using @PLT without double quotes to convey 166 // variant kind to the assembler. BOLT doesn't rely on the linker so we can 167 // override the default AsmInfo behavior to emit names the way we want. 168 AsmInfo->setAllowAtInName(true); 169 170 std::unique_ptr<const MCSubtargetInfo> STI( 171 TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); 172 if (!STI) 173 return createStringError( 174 make_error_code(std::errc::not_supported), 175 Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); 176 177 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 178 if (!MII) 179 return createStringError( 180 make_error_code(std::errc::not_supported), 181 Twine("BOLT-ERROR: no instruction info for target ", TripleName)); 182 183 std::unique_ptr<MCContext> Ctx( 184 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 185 std::unique_ptr<MCObjectFileInfo> MOFI( 186 TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); 187 Ctx->setObjectFileInfo(MOFI.get()); 188 // We do not support X86 Large code model. Change this in the future. 189 bool Large = false; 190 if (TheTriple->getArch() == llvm::Triple::aarch64) 191 Large = true; 192 unsigned LSDAEncoding = 193 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 194 unsigned TTypeEncoding = 195 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; 196 if (IsPIC) { 197 LSDAEncoding = dwarf::DW_EH_PE_pcrel | 198 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 199 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | 200 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); 201 } 202 203 std::unique_ptr<MCDisassembler> DisAsm( 204 TheTarget->createMCDisassembler(*STI, *Ctx)); 205 206 if (!DisAsm) 207 return createStringError( 208 make_error_code(std::errc::not_supported), 209 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 210 211 std::unique_ptr<const MCInstrAnalysis> MIA( 212 TheTarget->createMCInstrAnalysis(MII.get())); 213 if (!MIA) 214 return createStringError( 215 make_error_code(std::errc::not_supported), 216 Twine("BOLT-ERROR: failed to create instruction analysis for target ", 217 TripleName)); 218 219 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 220 std::unique_ptr<MCInstPrinter> InstructionPrinter( 221 TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo, 222 *MII, *MRI)); 223 if (!InstructionPrinter) 224 return createStringError( 225 make_error_code(std::errc::not_supported), 226 Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); 227 InstructionPrinter->setPrintImmHex(true); 228 229 std::unique_ptr<MCCodeEmitter> MCE( 230 TheTarget->createMCCodeEmitter(*MII, *Ctx)); 231 232 // Make sure we don't miss any output on core dumps. 233 outs().SetUnbuffered(); 234 errs().SetUnbuffered(); 235 dbgs().SetUnbuffered(); 236 237 auto BC = std::make_unique<BinaryContext>( 238 std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget, 239 std::string(TripleName), std::move(MCE), std::move(MOFI), 240 std::move(AsmInfo), std::move(MII), std::move(STI), 241 std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), 242 std::move(DisAsm)); 243 244 BC->TTypeEncoding = TTypeEncoding; 245 BC->LSDAEncoding = LSDAEncoding; 246 247 BC->MAB = std::unique_ptr<MCAsmBackend>( 248 BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); 249 250 BC->setFilename(File->getFileName()); 251 252 BC->HasFixedLoadAddress = !IsPIC; 253 254 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( 255 BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); 256 257 if (!BC->SymbolicDisAsm) 258 return createStringError( 259 make_error_code(std::errc::not_supported), 260 Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); 261 262 return std::move(BC); 263 } 264 265 bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { 266 if (opts::HotText && 267 (SymbolName == "__hot_start" || SymbolName == "__hot_end")) 268 return true; 269 270 if (opts::HotData && 271 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) 272 return true; 273 274 if (SymbolName == "_end") 275 return true; 276 277 return false; 278 } 279 280 std::unique_ptr<MCObjectWriter> 281 BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { 282 return MAB->createObjectWriter(OS); 283 } 284 285 bool BinaryContext::validateObjectNesting() const { 286 auto Itr = BinaryDataMap.begin(); 287 auto End = BinaryDataMap.end(); 288 bool Valid = true; 289 while (Itr != End) { 290 auto Next = std::next(Itr); 291 while (Next != End && 292 Itr->second->getSection() == Next->second->getSection() && 293 Itr->second->containsRange(Next->second->getAddress(), 294 Next->second->getSize())) { 295 if (Next->second->Parent != Itr->second) { 296 errs() << "BOLT-WARNING: object nesting incorrect for:\n" 297 << "BOLT-WARNING: " << *Itr->second << "\n" 298 << "BOLT-WARNING: " << *Next->second << "\n"; 299 Valid = false; 300 } 301 ++Next; 302 } 303 Itr = Next; 304 } 305 return Valid; 306 } 307 308 bool BinaryContext::validateHoles() const { 309 bool Valid = true; 310 for (BinarySection &Section : sections()) { 311 for (const Relocation &Rel : Section.relocations()) { 312 uint64_t RelAddr = Rel.Offset + Section.getAddress(); 313 const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); 314 if (!BD) { 315 errs() << "BOLT-WARNING: no BinaryData found for relocation at address" 316 << " 0x" << Twine::utohexstr(RelAddr) << " in " 317 << Section.getName() << "\n"; 318 Valid = false; 319 } else if (!BD->getAtomicRoot()) { 320 errs() << "BOLT-WARNING: no atomic BinaryData found for relocation at " 321 << "address 0x" << Twine::utohexstr(RelAddr) << " in " 322 << Section.getName() << "\n"; 323 Valid = false; 324 } 325 } 326 } 327 return Valid; 328 } 329 330 void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { 331 const uint64_t Address = GAI->second->getAddress(); 332 const uint64_t Size = GAI->second->getSize(); 333 334 auto fixParents = [&](BinaryDataMapType::iterator Itr, 335 BinaryData *NewParent) { 336 BinaryData *OldParent = Itr->second->Parent; 337 Itr->second->Parent = NewParent; 338 ++Itr; 339 while (Itr != BinaryDataMap.end() && OldParent && 340 Itr->second->Parent == OldParent) { 341 Itr->second->Parent = NewParent; 342 ++Itr; 343 } 344 }; 345 346 // Check if the previous symbol contains the newly added symbol. 347 if (GAI != BinaryDataMap.begin()) { 348 BinaryData *Prev = std::prev(GAI)->second; 349 while (Prev) { 350 if (Prev->getSection() == GAI->second->getSection() && 351 Prev->containsRange(Address, Size)) { 352 fixParents(GAI, Prev); 353 } else { 354 fixParents(GAI, nullptr); 355 } 356 Prev = Prev->Parent; 357 } 358 } 359 360 // Check if the newly added symbol contains any subsequent symbols. 361 if (Size != 0) { 362 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; 363 auto Itr = std::next(GAI); 364 while ( 365 Itr != BinaryDataMap.end() && 366 BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { 367 Itr->second->Parent = BD; 368 ++Itr; 369 } 370 } 371 } 372 373 iterator_range<BinaryContext::binary_data_iterator> 374 BinaryContext::getSubBinaryData(BinaryData *BD) { 375 auto Start = std::next(BinaryDataMap.find(BD->getAddress())); 376 auto End = Start; 377 while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) 378 ++End; 379 return make_range(Start, End); 380 } 381 382 std::pair<const MCSymbol *, uint64_t> 383 BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, 384 bool IsPCRel) { 385 uint64_t Addend = 0; 386 387 if (isAArch64()) { 388 // Check if this is an access to a constant island and create bookkeeping 389 // to keep track of it and emit it later as part of this function. 390 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) 391 return std::make_pair(IslandSym, Addend); 392 393 // Detect custom code written in assembly that refers to arbitrary 394 // constant islands from other functions. Write this reference so we 395 // can pull this constant island and emit it as part of this function 396 // too. 397 auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); 398 if (IslandIter != AddressToConstantIslandMap.end()) { 399 if (MCSymbol *IslandSym = 400 IslandIter->second->getOrCreateProxyIslandAccess(Address, BF)) { 401 BF.createIslandDependency(IslandSym, IslandIter->second); 402 return std::make_pair(IslandSym, Addend); 403 } 404 } 405 } 406 407 // Note that the address does not necessarily have to reside inside 408 // a section, it could be an absolute address too. 409 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 410 if (Section && Section->isText()) { 411 if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { 412 if (Address != BF.getAddress()) { 413 // The address could potentially escape. Mark it as another entry 414 // point into the function. 415 if (opts::Verbosity >= 1) { 416 outs() << "BOLT-INFO: potentially escaped address 0x" 417 << Twine::utohexstr(Address) << " in function " << BF << '\n'; 418 } 419 BF.HasInternalLabelReference = true; 420 return std::make_pair( 421 BF.addEntryPointAtOffset(Address - BF.getAddress()), Addend); 422 } 423 } else { 424 BF.InterproceduralReferences.insert(Address); 425 } 426 } 427 428 // With relocations, catch jump table references outside of the basic block 429 // containing the indirect jump. 430 if (HasRelocations) { 431 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); 432 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { 433 const MCSymbol *Symbol = 434 getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); 435 436 return std::make_pair(Symbol, Addend); 437 } 438 } 439 440 if (BinaryData *BD = getBinaryDataContainingAddress(Address)) 441 return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); 442 443 // TODO: use DWARF info to get size/alignment here? 444 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); 445 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); 446 return std::make_pair(TargetSymbol, Addend); 447 } 448 449 MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, 450 BinaryFunction &BF) { 451 if (!isX86()) 452 return MemoryContentsType::UNKNOWN; 453 454 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 455 if (!Section) { 456 // No section - possibly an absolute address. Since we don't allow 457 // internal function addresses to escape the function scope - we 458 // consider it a tail call. 459 if (opts::Verbosity > 1) { 460 errs() << "BOLT-WARNING: no section for address 0x" 461 << Twine::utohexstr(Address) << " referenced from function " << BF 462 << '\n'; 463 } 464 return MemoryContentsType::UNKNOWN; 465 } 466 467 if (Section->isVirtual()) { 468 // The contents are filled at runtime. 469 return MemoryContentsType::UNKNOWN; 470 } 471 472 // No support for jump tables in code yet. 473 if (Section->isText()) 474 return MemoryContentsType::UNKNOWN; 475 476 // Start with checking for PIC jump table. We expect non-PIC jump tables 477 // to have high 32 bits set to 0. 478 if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) 479 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; 480 481 if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) 482 return MemoryContentsType::POSSIBLE_JUMP_TABLE; 483 484 return MemoryContentsType::UNKNOWN; 485 } 486 487 /// Check if <fragment restored name> == <parent restored name>.cold(.\d+)? 488 bool isPotentialFragmentByName(BinaryFunction &Fragment, 489 BinaryFunction &Parent) { 490 for (StringRef Name : Parent.getNames()) { 491 std::string NamePrefix = Regex::escape(NameResolver::restore(Name)); 492 std::string NameRegex = Twine(NamePrefix, "\\.cold(\\.[0-9]+)?").str(); 493 if (Fragment.hasRestoredNameRegex(NameRegex)) 494 return true; 495 } 496 return false; 497 } 498 499 bool BinaryContext::analyzeJumpTable(const uint64_t Address, 500 const JumpTable::JumpTableType Type, 501 BinaryFunction &BF, 502 const uint64_t NextJTAddress, 503 JumpTable::OffsetsType *Offsets) { 504 // Is one of the targets __builtin_unreachable? 505 bool HasUnreachable = false; 506 507 // Number of targets other than __builtin_unreachable. 508 uint64_t NumRealEntries = 0; 509 510 constexpr uint64_t INVALID_OFFSET = std::numeric_limits<uint64_t>::max(); 511 auto addOffset = [&](uint64_t Offset) { 512 if (Offsets) 513 Offsets->emplace_back(Offset); 514 }; 515 516 auto doesBelongToFunction = [&](const uint64_t Addr, 517 BinaryFunction *TargetBF) -> bool { 518 if (BF.containsAddress(Addr)) 519 return true; 520 // Nothing to do if we failed to identify the containing function. 521 if (!TargetBF) 522 return false; 523 // Case 1: check if BF is a fragment and TargetBF is its parent. 524 if (BF.isFragment()) { 525 // Parent function may or may not be already registered. 526 // Set parent link based on function name matching heuristic. 527 return registerFragment(BF, *TargetBF); 528 } 529 // Case 2: check if TargetBF is a fragment and BF is its parent. 530 return TargetBF->isFragment() && registerFragment(*TargetBF, BF); 531 }; 532 533 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 534 if (!Section) 535 return false; 536 537 // The upper bound is defined by containing object, section limits, and 538 // the next jump table in memory. 539 uint64_t UpperBound = Section->getEndAddress(); 540 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); 541 if (JumpTableBD && JumpTableBD->getSize()) { 542 assert(JumpTableBD->getEndAddress() <= UpperBound && 543 "data object cannot cross a section boundary"); 544 UpperBound = JumpTableBD->getEndAddress(); 545 } 546 if (NextJTAddress) 547 UpperBound = std::min(NextJTAddress, UpperBound); 548 549 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() 550 << '\n'); 551 const uint64_t EntrySize = getJumpTableEntrySize(Type); 552 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; 553 EntryAddress += EntrySize) { 554 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) 555 << " -> "); 556 // Check if there's a proper relocation against the jump table entry. 557 if (HasRelocations) { 558 if (Type == JumpTable::JTT_PIC && 559 !DataPCRelocations.count(EntryAddress)) { 560 LLVM_DEBUG( 561 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); 562 break; 563 } 564 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { 565 LLVM_DEBUG( 566 dbgs() 567 << "FAIL: JTT_NORMAL table, no relocation for this address\n"); 568 break; 569 } 570 } 571 572 const uint64_t Value = 573 (Type == JumpTable::JTT_PIC) 574 ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) 575 : *getPointerAtAddress(EntryAddress); 576 577 // __builtin_unreachable() case. 578 if (Value == BF.getAddress() + BF.getSize()) { 579 addOffset(Value - BF.getAddress()); 580 HasUnreachable = true; 581 LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); 582 continue; 583 } 584 585 // Function or one of its fragments. 586 BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); 587 588 // We assume that a jump table cannot have function start as an entry. 589 if (!doesBelongToFunction(Value, TargetBF) || Value == BF.getAddress()) { 590 LLVM_DEBUG({ 591 if (!BF.containsAddress(Value)) { 592 dbgs() << "FAIL: function doesn't contain this address\n"; 593 if (TargetBF) { 594 dbgs() << " ! function containing this address: " 595 << TargetBF->getPrintName() << '\n'; 596 if (TargetBF->isFragment()) 597 dbgs() << " ! is a fragment\n"; 598 for (BinaryFunction *TargetParent : TargetBF->ParentFragments) 599 dbgs() << " ! its parent is " 600 << (TargetParent ? TargetParent->getPrintName() : "(none)") 601 << '\n'; 602 } 603 } 604 if (Value == BF.getAddress()) 605 dbgs() << "FAIL: jump table cannot have function start as an entry\n"; 606 }); 607 break; 608 } 609 610 // Check there's an instruction at this offset. 611 if (TargetBF->getState() == BinaryFunction::State::Disassembled && 612 !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { 613 LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); 614 break; 615 } 616 617 ++NumRealEntries; 618 619 if (TargetBF == &BF) { 620 // Address inside the function. 621 addOffset(Value - TargetBF->getAddress()); 622 LLVM_DEBUG(dbgs() << "OK: real entry\n"); 623 } else { 624 // Address in split fragment. 625 BF.setHasSplitJumpTable(true); 626 // Add invalid offset for proper identification of jump table size. 627 addOffset(INVALID_OFFSET); 628 LLVM_DEBUG(dbgs() << "OK: address in split fragment " 629 << TargetBF->getPrintName() << '\n'); 630 } 631 } 632 633 // It's a jump table if the number of real entries is more than 1, or there's 634 // one real entry and "unreachable" targets. If there are only multiple 635 // "unreachable" targets, then it's not a jump table. 636 return NumRealEntries + HasUnreachable >= 2; 637 } 638 639 void BinaryContext::populateJumpTables() { 640 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() 641 << '\n'); 642 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; 643 ++JTI) { 644 JumpTable *JT = JTI->second; 645 BinaryFunction &BF = *JT->Parent; 646 647 if (!BF.isSimple()) 648 continue; 649 650 uint64_t NextJTAddress = 0; 651 auto NextJTI = std::next(JTI); 652 if (NextJTI != JTE) 653 NextJTAddress = NextJTI->second->getAddress(); 654 655 const bool Success = analyzeJumpTable(JT->getAddress(), JT->Type, BF, 656 NextJTAddress, &JT->OffsetEntries); 657 if (!Success) { 658 dbgs() << "failed to analyze jump table in function " << BF << '\n'; 659 JT->print(dbgs()); 660 if (NextJTI != JTE) { 661 dbgs() << "next jump table at 0x" 662 << Twine::utohexstr(NextJTI->second->getAddress()) 663 << " belongs to function " << *NextJTI->second->Parent << '\n'; 664 NextJTI->second->print(dbgs()); 665 } 666 llvm_unreachable("jump table heuristic failure"); 667 } 668 669 for (uint64_t EntryOffset : JT->OffsetEntries) { 670 if (EntryOffset == BF.getSize()) 671 BF.IgnoredBranches.emplace_back(EntryOffset, BF.getSize()); 672 else 673 BF.registerReferencedOffset(EntryOffset); 674 } 675 676 // In strict mode, erase PC-relative relocation record. Later we check that 677 // all such records are erased and thus have been accounted for. 678 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { 679 for (uint64_t Address = JT->getAddress(); 680 Address < JT->getAddress() + JT->getSize(); 681 Address += JT->EntrySize) { 682 DataPCRelocations.erase(DataPCRelocations.find(Address)); 683 } 684 } 685 686 // Mark to skip the function and all its fragments. 687 if (BF.hasSplitJumpTable()) 688 FragmentsToSkip.push_back(&BF); 689 } 690 691 if (opts::StrictMode && DataPCRelocations.size()) { 692 LLVM_DEBUG({ 693 dbgs() << DataPCRelocations.size() 694 << " unclaimed PC-relative relocations left in data:\n"; 695 for (uint64_t Reloc : DataPCRelocations) 696 dbgs() << Twine::utohexstr(Reloc) << '\n'; 697 }); 698 assert(0 && "unclaimed PC-relative relocations left in data\n"); 699 } 700 clearList(DataPCRelocations); 701 } 702 703 void BinaryContext::skipMarkedFragments() { 704 // Unique functions in the vector. 705 std::unordered_set<BinaryFunction *> UniqueFunctions(FragmentsToSkip.begin(), 706 FragmentsToSkip.end()); 707 // Copy the functions back to FragmentsToSkip. 708 FragmentsToSkip.assign(UniqueFunctions.begin(), UniqueFunctions.end()); 709 auto addToWorklist = [&](BinaryFunction *Function) -> void { 710 if (UniqueFunctions.count(Function)) 711 return; 712 FragmentsToSkip.push_back(Function); 713 UniqueFunctions.insert(Function); 714 }; 715 // Functions containing split jump tables need to be skipped with all 716 // fragments (transitively). 717 for (size_t I = 0; I != FragmentsToSkip.size(); I++) { 718 BinaryFunction *BF = FragmentsToSkip[I]; 719 assert(UniqueFunctions.count(BF) && 720 "internal error in traversing function fragments"); 721 if (opts::Verbosity >= 1) 722 errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; 723 BF->setIgnored(); 724 std::for_each(BF->Fragments.begin(), BF->Fragments.end(), addToWorklist); 725 std::for_each(BF->ParentFragments.begin(), BF->ParentFragments.end(), 726 addToWorklist); 727 } 728 if (!FragmentsToSkip.empty()) 729 errs() << "BOLT-WARNING: ignored " << FragmentsToSkip.size() << " function" 730 << (FragmentsToSkip.size() == 1 ? "" : "s") 731 << " due to cold fragments\n"; 732 FragmentsToSkip.clear(); 733 } 734 735 MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 736 uint64_t Size, 737 uint16_t Alignment, 738 unsigned Flags) { 739 auto Itr = BinaryDataMap.find(Address); 740 if (Itr != BinaryDataMap.end()) { 741 assert(Itr->second->getSize() == Size || !Size); 742 return Itr->second->getSymbol(); 743 } 744 745 std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); 746 assert(!GlobalSymbols.count(Name) && "created name is not unique"); 747 return registerNameAtAddress(Name, Address, Size, Alignment, Flags); 748 } 749 750 MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { 751 return Ctx->getOrCreateSymbol(Name); 752 } 753 754 BinaryFunction *BinaryContext::createBinaryFunction( 755 const std::string &Name, BinarySection &Section, uint64_t Address, 756 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { 757 auto Result = BinaryFunctions.emplace( 758 Address, BinaryFunction(Name, Section, Address, Size, *this)); 759 assert(Result.second == true && "unexpected duplicate function"); 760 BinaryFunction *BF = &Result.first->second; 761 registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, 762 Alignment); 763 setSymbolToFunctionMap(BF->getSymbol(), BF); 764 return BF; 765 } 766 767 const MCSymbol * 768 BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, 769 JumpTable::JumpTableType Type) { 770 if (JumpTable *JT = getJumpTableContainingAddress(Address)) { 771 assert(JT->Type == Type && "jump table types have to match"); 772 assert(JT->Parent == &Function && 773 "cannot re-use jump table of a different function"); 774 assert(Address == JT->getAddress() && "unexpected non-empty jump table"); 775 776 return JT->getFirstLabel(); 777 } 778 779 // Re-use the existing symbol if possible. 780 MCSymbol *JTLabel = nullptr; 781 if (BinaryData *Object = getBinaryDataAtAddress(Address)) { 782 if (!isInternalSymbolName(Object->getSymbol()->getName())) 783 JTLabel = Object->getSymbol(); 784 } 785 786 const uint64_t EntrySize = getJumpTableEntrySize(Type); 787 if (!JTLabel) { 788 const std::string JumpTableName = generateJumpTableName(Function, Address); 789 JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); 790 } 791 792 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() 793 << " in function " << Function << '\n'); 794 795 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, 796 JumpTable::LabelMapType{{0, JTLabel}}, Function, 797 *getSectionForAddress(Address)); 798 JumpTables.emplace(Address, JT); 799 800 // Duplicate the entry for the parent function for easy access. 801 Function.JumpTables.emplace(Address, JT); 802 803 return JTLabel; 804 } 805 806 std::pair<uint64_t, const MCSymbol *> 807 BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 808 const MCSymbol *OldLabel) { 809 auto L = scopeLock(); 810 unsigned Offset = 0; 811 bool Found = false; 812 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { 813 if (Elmt.second != OldLabel) 814 continue; 815 Offset = Elmt.first; 816 Found = true; 817 break; 818 } 819 assert(Found && "Label not found"); 820 (void)Found; 821 MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); 822 JumpTable *NewJT = 823 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, 824 JumpTable::LabelMapType{{Offset, NewLabel}}, Function, 825 *getSectionForAddress(JT->getAddress())); 826 NewJT->Entries = JT->Entries; 827 NewJT->Counts = JT->Counts; 828 uint64_t JumpTableID = ++DuplicatedJumpTables; 829 // Invert it to differentiate from regular jump tables whose IDs are their 830 // addresses in the input binary memory space 831 JumpTableID = ~JumpTableID; 832 JumpTables.emplace(JumpTableID, NewJT); 833 Function.JumpTables.emplace(JumpTableID, NewJT); 834 return std::make_pair(JumpTableID, NewLabel); 835 } 836 837 std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, 838 uint64_t Address) { 839 size_t Id; 840 uint64_t Offset = 0; 841 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { 842 Offset = Address - JT->getAddress(); 843 auto Itr = JT->Labels.find(Offset); 844 if (Itr != JT->Labels.end()) 845 return std::string(Itr->second->getName()); 846 Id = JumpTableIds.at(JT->getAddress()); 847 } else { 848 Id = JumpTableIds[Address] = BF.JumpTables.size(); 849 } 850 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + 851 (Offset ? ("." + std::to_string(Offset)) : "")); 852 } 853 854 bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { 855 // FIXME: aarch64 support is missing. 856 if (!isX86()) 857 return true; 858 859 if (BF.getSize() == BF.getMaxSize()) 860 return true; 861 862 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); 863 assert(FunctionData && "cannot get function as data"); 864 865 uint64_t Offset = BF.getSize(); 866 MCInst Instr; 867 uint64_t InstrSize = 0; 868 uint64_t InstrAddress = BF.getAddress() + Offset; 869 using std::placeholders::_1; 870 871 // Skip instructions that satisfy the predicate condition. 872 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { 873 const uint64_t StartOffset = Offset; 874 for (; Offset < BF.getMaxSize(); 875 Offset += InstrSize, InstrAddress += InstrSize) { 876 if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), 877 InstrAddress, nulls())) 878 break; 879 if (!Predicate(Instr)) 880 break; 881 } 882 883 return Offset - StartOffset; 884 }; 885 886 // Skip a sequence of zero bytes. 887 auto skipZeros = [&]() { 888 const uint64_t StartOffset = Offset; 889 for (; Offset < BF.getMaxSize(); ++Offset) 890 if ((*FunctionData)[Offset] != 0) 891 break; 892 893 return Offset - StartOffset; 894 }; 895 896 // Accept the whole padding area filled with breakpoints. 897 auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); 898 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) 899 return true; 900 901 auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); 902 903 // Some functions have a jump to the next function or to the padding area 904 // inserted after the body. 905 auto isSkipJump = [&](const MCInst &Instr) { 906 uint64_t TargetAddress = 0; 907 if (MIB->isUnconditionalBranch(Instr) && 908 MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { 909 if (TargetAddress >= InstrAddress + InstrSize && 910 TargetAddress <= BF.getAddress() + BF.getMaxSize()) { 911 return true; 912 } 913 } 914 return false; 915 }; 916 917 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). 918 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || 919 skipZeros()) 920 ; 921 922 if (Offset == BF.getMaxSize()) 923 return true; 924 925 if (opts::Verbosity >= 1) { 926 errs() << "BOLT-WARNING: bad padding at address 0x" 927 << Twine::utohexstr(BF.getAddress() + BF.getSize()) 928 << " starting at offset " << (Offset - BF.getSize()) 929 << " in function " << BF << '\n' 930 << FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize()) 931 << '\n'; 932 } 933 934 return false; 935 } 936 937 void BinaryContext::adjustCodePadding() { 938 for (auto &BFI : BinaryFunctions) { 939 BinaryFunction &BF = BFI.second; 940 if (!shouldEmit(BF)) 941 continue; 942 943 if (!hasValidCodePadding(BF)) { 944 if (HasRelocations) { 945 if (opts::Verbosity >= 1) { 946 outs() << "BOLT-INFO: function " << BF 947 << " has invalid padding. Ignoring the function.\n"; 948 } 949 BF.setIgnored(); 950 } else { 951 BF.setMaxSize(BF.getSize()); 952 } 953 } 954 } 955 } 956 957 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, 958 uint64_t Size, 959 uint16_t Alignment, 960 unsigned Flags) { 961 // Register the name with MCContext. 962 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); 963 964 auto GAI = BinaryDataMap.find(Address); 965 BinaryData *BD; 966 if (GAI == BinaryDataMap.end()) { 967 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); 968 BinarySection &Section = 969 SectionOrErr ? SectionOrErr.get() : absoluteSection(); 970 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, 971 Section, Flags); 972 GAI = BinaryDataMap.emplace(Address, BD).first; 973 GlobalSymbols[Name] = BD; 974 updateObjectNesting(GAI); 975 } else { 976 BD = GAI->second; 977 if (!BD->hasName(Name)) { 978 GlobalSymbols[Name] = BD; 979 BD->Symbols.push_back(Symbol); 980 } 981 } 982 983 return Symbol; 984 } 985 986 const BinaryData * 987 BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { 988 auto NI = BinaryDataMap.lower_bound(Address); 989 auto End = BinaryDataMap.end(); 990 if ((NI != End && Address == NI->first) || 991 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { 992 if (NI->second->containsAddress(Address)) 993 return NI->second; 994 995 // If this is a sub-symbol, see if a parent data contains the address. 996 const BinaryData *BD = NI->second->getParent(); 997 while (BD) { 998 if (BD->containsAddress(Address)) 999 return BD; 1000 BD = BD->getParent(); 1001 } 1002 } 1003 return nullptr; 1004 } 1005 1006 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { 1007 auto NI = BinaryDataMap.find(Address); 1008 assert(NI != BinaryDataMap.end()); 1009 if (NI == BinaryDataMap.end()) 1010 return false; 1011 // TODO: it's possible that a jump table starts at the same address 1012 // as a larger blob of private data. When we set the size of the 1013 // jump table, it might be smaller than the total blob size. In this 1014 // case we just leave the original size since (currently) it won't really 1015 // affect anything. 1016 assert((!NI->second->Size || NI->second->Size == Size || 1017 (NI->second->isJumpTable() && NI->second->Size > Size)) && 1018 "can't change the size of a symbol that has already had its " 1019 "size set"); 1020 if (!NI->second->Size) { 1021 NI->second->Size = Size; 1022 updateObjectNesting(NI); 1023 return true; 1024 } 1025 return false; 1026 } 1027 1028 void BinaryContext::generateSymbolHashes() { 1029 auto isPadding = [](const BinaryData &BD) { 1030 StringRef Contents = BD.getSection().getContents(); 1031 StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); 1032 return (BD.getName().startswith("HOLEat") || 1033 SymData.find_first_not_of(0) == StringRef::npos); 1034 }; 1035 1036 uint64_t NumCollisions = 0; 1037 for (auto &Entry : BinaryDataMap) { 1038 BinaryData &BD = *Entry.second; 1039 StringRef Name = BD.getName(); 1040 1041 if (!isInternalSymbolName(Name)) 1042 continue; 1043 1044 // First check if a non-anonymous alias exists and move it to the front. 1045 if (BD.getSymbols().size() > 1) { 1046 auto Itr = std::find_if(BD.getSymbols().begin(), BD.getSymbols().end(), 1047 [&](const MCSymbol *Symbol) { 1048 return !isInternalSymbolName(Symbol->getName()); 1049 }); 1050 if (Itr != BD.getSymbols().end()) { 1051 size_t Idx = std::distance(BD.getSymbols().begin(), Itr); 1052 std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); 1053 continue; 1054 } 1055 } 1056 1057 // We have to skip 0 size symbols since they will all collide. 1058 if (BD.getSize() == 0) { 1059 continue; 1060 } 1061 1062 const uint64_t Hash = BD.getSection().hash(BD); 1063 const size_t Idx = Name.find("0x"); 1064 std::string NewName = 1065 (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); 1066 if (getBinaryDataByName(NewName)) { 1067 // Ignore collisions for symbols that appear to be padding 1068 // (i.e. all zeros or a "hole") 1069 if (!isPadding(BD)) { 1070 if (opts::Verbosity) { 1071 errs() << "BOLT-WARNING: collision detected when hashing " << BD 1072 << " with new name (" << NewName << "), skipping.\n"; 1073 } 1074 ++NumCollisions; 1075 } 1076 continue; 1077 } 1078 BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); 1079 GlobalSymbols[NewName] = &BD; 1080 } 1081 if (NumCollisions) { 1082 errs() << "BOLT-WARNING: " << NumCollisions 1083 << " collisions detected while hashing binary objects"; 1084 if (!opts::Verbosity) 1085 errs() << ". Use -v=1 to see the list."; 1086 errs() << '\n'; 1087 } 1088 } 1089 1090 bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, 1091 BinaryFunction &Function) const { 1092 if (!isPotentialFragmentByName(TargetFunction, Function)) 1093 return false; 1094 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); 1095 if (TargetFunction.isParentFragment(&Function)) 1096 return true; 1097 TargetFunction.addParentFragment(Function); 1098 Function.addFragment(TargetFunction); 1099 if (!HasRelocations) { 1100 TargetFunction.setSimple(false); 1101 Function.setSimple(false); 1102 } 1103 if (opts::Verbosity >= 1) { 1104 outs() << "BOLT-INFO: marking " << TargetFunction << " as a fragment of " 1105 << Function << '\n'; 1106 } 1107 return true; 1108 } 1109 1110 void BinaryContext::processInterproceduralReferences(BinaryFunction &Function) { 1111 for (uint64_t Address : Function.InterproceduralReferences) { 1112 if (!Address) 1113 continue; 1114 1115 BinaryFunction *TargetFunction = 1116 getBinaryFunctionContainingAddress(Address); 1117 if (&Function == TargetFunction) 1118 continue; 1119 1120 if (TargetFunction) { 1121 if (TargetFunction->IsFragment && 1122 !registerFragment(*TargetFunction, Function)) { 1123 errs() << "BOLT-WARNING: interprocedural reference between unrelated " 1124 "fragments: " 1125 << Function.getPrintName() << " and " 1126 << TargetFunction->getPrintName() << '\n'; 1127 } 1128 if (uint64_t Offset = Address - TargetFunction->getAddress()) 1129 TargetFunction->addEntryPointAtOffset(Offset); 1130 1131 continue; 1132 } 1133 1134 // Check if address falls in function padding space - this could be 1135 // unmarked data in code. In this case adjust the padding space size. 1136 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1137 assert(Section && "cannot get section for referenced address"); 1138 1139 if (!Section->isText()) 1140 continue; 1141 1142 // PLT requires special handling and could be ignored in this context. 1143 StringRef SectionName = Section->getName(); 1144 if (SectionName == ".plt" || SectionName == ".plt.got") 1145 continue; 1146 1147 if (opts::processAllFunctions()) { 1148 errs() << "BOLT-ERROR: cannot process binaries with unmarked " 1149 << "object in code at address 0x" << Twine::utohexstr(Address) 1150 << " belonging to section " << SectionName << " in current mode\n"; 1151 exit(1); 1152 } 1153 1154 TargetFunction = getBinaryFunctionContainingAddress(Address, 1155 /*CheckPastEnd=*/false, 1156 /*UseMaxSize=*/true); 1157 // We are not going to overwrite non-simple functions, but for simple 1158 // ones - adjust the padding size. 1159 if (TargetFunction && TargetFunction->isSimple()) { 1160 errs() << "BOLT-WARNING: function " << *TargetFunction 1161 << " has an object detected in a padding region at address 0x" 1162 << Twine::utohexstr(Address) << '\n'; 1163 TargetFunction->setMaxSize(TargetFunction->getSize()); 1164 } 1165 } 1166 1167 clearList(Function.InterproceduralReferences); 1168 } 1169 1170 void BinaryContext::postProcessSymbolTable() { 1171 fixBinaryDataHoles(); 1172 bool Valid = true; 1173 for (auto &Entry : BinaryDataMap) { 1174 BinaryData *BD = Entry.second; 1175 if ((BD->getName().startswith("SYMBOLat") || 1176 BD->getName().startswith("DATAat")) && 1177 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && 1178 BD->getSection()) { 1179 errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD << "\n"; 1180 Valid = false; 1181 } 1182 } 1183 assert(Valid); 1184 (void)Valid; 1185 generateSymbolHashes(); 1186 } 1187 1188 void BinaryContext::foldFunction(BinaryFunction &ChildBF, 1189 BinaryFunction &ParentBF) { 1190 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && 1191 "cannot merge functions with multiple entry points"); 1192 1193 std::unique_lock<std::shared_timed_mutex> WriteCtxLock(CtxMutex, 1194 std::defer_lock); 1195 std::unique_lock<std::shared_timed_mutex> WriteSymbolMapLock( 1196 SymbolToFunctionMapMutex, std::defer_lock); 1197 1198 const StringRef ChildName = ChildBF.getOneName(); 1199 1200 // Move symbols over and update bookkeeping info. 1201 for (MCSymbol *Symbol : ChildBF.getSymbols()) { 1202 ParentBF.getSymbols().push_back(Symbol); 1203 WriteSymbolMapLock.lock(); 1204 SymbolToFunctionMap[Symbol] = &ParentBF; 1205 WriteSymbolMapLock.unlock(); 1206 // NB: there's no need to update BinaryDataMap and GlobalSymbols. 1207 } 1208 ChildBF.getSymbols().clear(); 1209 1210 // Move other names the child function is known under. 1211 std::move(ChildBF.Aliases.begin(), ChildBF.Aliases.end(), 1212 std::back_inserter(ParentBF.Aliases)); 1213 ChildBF.Aliases.clear(); 1214 1215 if (HasRelocations) { 1216 // Merge execution counts of ChildBF into those of ParentBF. 1217 // Without relocations, we cannot reliably merge profiles as both functions 1218 // continue to exist and either one can be executed. 1219 ChildBF.mergeProfileDataInto(ParentBF); 1220 1221 std::shared_lock<std::shared_timed_mutex> ReadBfsLock(BinaryFunctionsMutex, 1222 std::defer_lock); 1223 std::unique_lock<std::shared_timed_mutex> WriteBfsLock(BinaryFunctionsMutex, 1224 std::defer_lock); 1225 // Remove ChildBF from the global set of functions in relocs mode. 1226 ReadBfsLock.lock(); 1227 auto FI = BinaryFunctions.find(ChildBF.getAddress()); 1228 ReadBfsLock.unlock(); 1229 1230 assert(FI != BinaryFunctions.end() && "function not found"); 1231 assert(&ChildBF == &FI->second && "function mismatch"); 1232 1233 WriteBfsLock.lock(); 1234 ChildBF.clearDisasmState(); 1235 FI = BinaryFunctions.erase(FI); 1236 WriteBfsLock.unlock(); 1237 1238 } else { 1239 // In non-relocation mode we keep the function, but rename it. 1240 std::string NewName = "__ICF_" + ChildName.str(); 1241 1242 WriteCtxLock.lock(); 1243 ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); 1244 WriteCtxLock.unlock(); 1245 1246 ChildBF.setFolded(&ParentBF); 1247 } 1248 } 1249 1250 void BinaryContext::fixBinaryDataHoles() { 1251 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1252 1253 for (BinarySection &Section : allocatableSections()) { 1254 std::vector<std::pair<uint64_t, uint64_t>> Holes; 1255 1256 auto isNotHole = [&Section](const binary_data_iterator &Itr) { 1257 BinaryData *BD = Itr->second; 1258 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && 1259 (BD->getName().startswith("SYMBOLat0x") || 1260 BD->getName().startswith("DATAat0x") || 1261 BD->getName().startswith("ANONYMOUS"))); 1262 return !isHole && BD->getSection() == Section && !BD->getParent(); 1263 }; 1264 1265 auto BDStart = BinaryDataMap.begin(); 1266 auto BDEnd = BinaryDataMap.end(); 1267 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); 1268 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); 1269 1270 uint64_t EndAddress = Section.getAddress(); 1271 1272 while (Itr != End) { 1273 if (Itr->second->getAddress() > EndAddress) { 1274 uint64_t Gap = Itr->second->getAddress() - EndAddress; 1275 Holes.emplace_back(EndAddress, Gap); 1276 } 1277 EndAddress = Itr->second->getEndAddress(); 1278 ++Itr; 1279 } 1280 1281 if (EndAddress < Section.getEndAddress()) 1282 Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); 1283 1284 // If there is already a symbol at the start of the hole, grow that symbol 1285 // to cover the rest. Otherwise, create a new symbol to cover the hole. 1286 for (std::pair<uint64_t, uint64_t> &Hole : Holes) { 1287 BinaryData *BD = getBinaryDataAtAddress(Hole.first); 1288 if (BD) { 1289 // BD->getSection() can be != Section if there are sections that 1290 // overlap. In this case it is probably safe to just skip the holes 1291 // since the overlapping section will not(?) have any symbols in it. 1292 if (BD->getSection() == Section) 1293 setBinaryDataSize(Hole.first, Hole.second); 1294 } else { 1295 getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); 1296 } 1297 } 1298 } 1299 1300 assert(validateObjectNesting() && "object nesting inconsitency detected"); 1301 assert(validateHoles() && "top level hole detected in object map"); 1302 } 1303 1304 void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { 1305 const BinarySection *CurrentSection = nullptr; 1306 bool FirstSection = true; 1307 1308 for (auto &Entry : BinaryDataMap) { 1309 const BinaryData *BD = Entry.second; 1310 const BinarySection &Section = BD->getSection(); 1311 if (FirstSection || Section != *CurrentSection) { 1312 uint64_t Address, Size; 1313 StringRef Name = Section.getName(); 1314 if (Section) { 1315 Address = Section.getAddress(); 1316 Size = Section.getSize(); 1317 } else { 1318 Address = BD->getAddress(); 1319 Size = BD->getSize(); 1320 } 1321 OS << "BOLT-INFO: Section " << Name << ", " 1322 << "0x" + Twine::utohexstr(Address) << ":" 1323 << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; 1324 CurrentSection = &Section; 1325 FirstSection = false; 1326 } 1327 1328 OS << "BOLT-INFO: "; 1329 const BinaryData *P = BD->getParent(); 1330 while (P) { 1331 OS << " "; 1332 P = P->getParent(); 1333 } 1334 OS << *BD << "\n"; 1335 } 1336 } 1337 1338 Expected<unsigned> BinaryContext::getDwarfFile( 1339 StringRef Directory, StringRef FileName, unsigned FileNumber, 1340 Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, 1341 unsigned CUID, unsigned DWARFVersion) { 1342 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; 1343 return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, 1344 FileNumber); 1345 } 1346 1347 unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, 1348 const uint32_t SrcCUID, 1349 unsigned FileIndex) { 1350 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); 1351 const DWARFDebugLine::LineTable *LineTable = 1352 DwCtx->getLineTableForUnit(SrcUnit); 1353 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1354 LineTable->Prologue.FileNames; 1355 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1356 // means empty dir. 1357 assert(FileIndex > 0 && FileIndex <= FileNames.size() && 1358 "FileIndex out of range for the compilation unit."); 1359 StringRef Dir = ""; 1360 if (FileNames[FileIndex - 1].DirIdx != 0) { 1361 if (Optional<const char *> DirName = dwarf::toString( 1362 LineTable->Prologue 1363 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { 1364 Dir = *DirName; 1365 } 1366 } 1367 StringRef FileName = ""; 1368 if (Optional<const char *> FName = 1369 dwarf::toString(FileNames[FileIndex - 1].Name)) 1370 FileName = *FName; 1371 assert(FileName != ""); 1372 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); 1373 return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, 1374 DstUnit->getVersion())); 1375 } 1376 1377 std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { 1378 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); 1379 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1380 SortedFunctions.begin(), 1381 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1382 return &BFI.second; 1383 }); 1384 1385 std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(), 1386 [](const BinaryFunction *A, const BinaryFunction *B) { 1387 if (A->hasValidIndex() && B->hasValidIndex()) { 1388 return A->getIndex() < B->getIndex(); 1389 } 1390 return A->hasValidIndex(); 1391 }); 1392 return SortedFunctions; 1393 } 1394 1395 std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { 1396 std::vector<BinaryFunction *> AllFunctions; 1397 AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); 1398 std::transform(BinaryFunctions.begin(), BinaryFunctions.end(), 1399 std::back_inserter(AllFunctions), 1400 [](std::pair<const uint64_t, BinaryFunction> &BFI) { 1401 return &BFI.second; 1402 }); 1403 std::copy(InjectedBinaryFunctions.begin(), InjectedBinaryFunctions.end(), 1404 std::back_inserter(AllFunctions)); 1405 1406 return AllFunctions; 1407 } 1408 1409 Optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { 1410 auto Iter = DWOCUs.find(DWOId); 1411 if (Iter == DWOCUs.end()) 1412 return None; 1413 1414 return Iter->second; 1415 } 1416 1417 DWARFContext *BinaryContext::getDWOContext() { 1418 if (DWOCUs.empty()) 1419 return nullptr; 1420 return &DWOCUs.begin()->second->getContext(); 1421 } 1422 1423 /// Handles DWO sections that can either be in .o, .dwo or .dwp files. 1424 void BinaryContext::preprocessDWODebugInfo() { 1425 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1426 DWARFUnit *const DwarfUnit = CU.get(); 1427 if (llvm::Optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { 1428 DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit(); 1429 if (!DWOCU->isDWOUnit()) { 1430 std::string DWOName = dwarf::toString( 1431 DwarfUnit->getUnitDIE().find( 1432 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 1433 ""); 1434 outs() << "BOLT-WARNING: Debug Fission: DWO debug information for " 1435 << DWOName 1436 << " was not retrieved and won't be updated. Please check " 1437 "relative path.\n"; 1438 continue; 1439 } 1440 DWOCUs[*DWOId] = DWOCU; 1441 } 1442 } 1443 } 1444 1445 void BinaryContext::preprocessDebugInfo() { 1446 struct CURange { 1447 uint64_t LowPC; 1448 uint64_t HighPC; 1449 DWARFUnit *Unit; 1450 1451 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } 1452 }; 1453 1454 // Building a map of address ranges to CUs similar to .debug_aranges and use 1455 // it to assign CU to functions. 1456 std::vector<CURange> AllRanges; 1457 AllRanges.reserve(DwCtx->getNumCompileUnits()); 1458 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1459 Expected<DWARFAddressRangesVector> RangesOrError = 1460 CU->getUnitDIE().getAddressRanges(); 1461 if (!RangesOrError) { 1462 consumeError(RangesOrError.takeError()); 1463 continue; 1464 } 1465 for (DWARFAddressRange &Range : *RangesOrError) { 1466 // Parts of the debug info could be invalidated due to corresponding code 1467 // being removed from the binary by the linker. Hence we check if the 1468 // address is a valid one. 1469 if (containsAddress(Range.LowPC)) 1470 AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); 1471 } 1472 1473 ContainsDwarf5 |= CU->getVersion() >= 5; 1474 ContainsDwarfLegacy |= CU->getVersion() < 5; 1475 } 1476 1477 if (ContainsDwarf5 && ContainsDwarfLegacy) 1478 llvm::errs() << "BOLT-WARNING: BOLT does not support mix mode binary with " 1479 "DWARF5 and DWARF{2,3,4}.\n"; 1480 1481 std::sort(AllRanges.begin(), AllRanges.end()); 1482 for (auto &KV : BinaryFunctions) { 1483 const uint64_t FunctionAddress = KV.first; 1484 BinaryFunction &Function = KV.second; 1485 1486 auto It = std::partition_point( 1487 AllRanges.begin(), AllRanges.end(), 1488 [=](CURange R) { return R.HighPC <= FunctionAddress; }); 1489 if (It != AllRanges.end() && It->LowPC <= FunctionAddress) { 1490 Function.setDWARFUnit(It->Unit); 1491 } 1492 } 1493 1494 // Discover units with debug info that needs to be updated. 1495 for (const auto &KV : BinaryFunctions) { 1496 const BinaryFunction &BF = KV.second; 1497 if (shouldEmit(BF) && BF.getDWARFUnit()) 1498 ProcessedCUs.insert(BF.getDWARFUnit()); 1499 } 1500 1501 // Clear debug info for functions from units that we are not going to process. 1502 for (auto &KV : BinaryFunctions) { 1503 BinaryFunction &BF = KV.second; 1504 if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) 1505 BF.setDWARFUnit(nullptr); 1506 } 1507 1508 if (opts::Verbosity >= 1) { 1509 outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " 1510 << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; 1511 } 1512 1513 preprocessDWODebugInfo(); 1514 1515 // Populate MCContext with DWARF files from all units. 1516 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); 1517 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { 1518 const uint64_t CUID = CU->getOffset(); 1519 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); 1520 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( 1521 GlobalPrefix + "line_table_start" + Twine(CUID))); 1522 1523 if (!ProcessedCUs.count(CU.get())) 1524 continue; 1525 1526 const DWARFDebugLine::LineTable *LineTable = 1527 DwCtx->getLineTableForUnit(CU.get()); 1528 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = 1529 LineTable->Prologue.FileNames; 1530 1531 uint16_t DwarfVersion = LineTable->Prologue.getVersion(); 1532 if (DwarfVersion >= 5) { 1533 Optional<MD5::MD5Result> Checksum = None; 1534 if (LineTable->Prologue.ContentTypes.HasMD5) 1535 Checksum = LineTable->Prologue.FileNames[0].Checksum; 1536 Optional<const char *> Name = 1537 dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1538 if (Optional<uint64_t> DWOID = CU->getDWOId()) { 1539 auto Iter = DWOCUs.find(*DWOID); 1540 assert(Iter != DWOCUs.end() && "DWO CU was not found."); 1541 Name = dwarf::toString( 1542 Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); 1543 } 1544 BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, 1545 None); 1546 } 1547 1548 BinaryLineTable.setDwarfVersion(DwarfVersion); 1549 1550 // Assign a unique label to every line table, one per CU. 1551 // Make sure empty debug line tables are registered too. 1552 if (FileNames.empty()) { 1553 cantFail( 1554 getDwarfFile("", "<unknown>", 0, None, None, CUID, DwarfVersion)); 1555 continue; 1556 } 1557 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; 1558 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { 1559 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 1560 // means empty dir. 1561 StringRef Dir = ""; 1562 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) 1563 if (Optional<const char *> DirName = dwarf::toString( 1564 LineTable->Prologue 1565 .IncludeDirectories[FileNames[I].DirIdx - Offset])) 1566 Dir = *DirName; 1567 StringRef FileName = ""; 1568 if (Optional<const char *> FName = dwarf::toString(FileNames[I].Name)) 1569 FileName = *FName; 1570 assert(FileName != ""); 1571 Optional<MD5::MD5Result> Checksum = None; 1572 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) 1573 Checksum = LineTable->Prologue.FileNames[I].Checksum; 1574 cantFail( 1575 getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); 1576 } 1577 } 1578 } 1579 1580 bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { 1581 if (Function.isPseudo()) 1582 return false; 1583 1584 if (opts::processAllFunctions()) 1585 return true; 1586 1587 if (Function.isIgnored()) 1588 return false; 1589 1590 // In relocation mode we will emit non-simple functions with CFG. 1591 // If the function does not have a CFG it should be marked as ignored. 1592 return HasRelocations || Function.isSimple(); 1593 } 1594 1595 void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { 1596 uint32_t Operation = Inst.getOperation(); 1597 switch (Operation) { 1598 case MCCFIInstruction::OpSameValue: 1599 OS << "OpSameValue Reg" << Inst.getRegister(); 1600 break; 1601 case MCCFIInstruction::OpRememberState: 1602 OS << "OpRememberState"; 1603 break; 1604 case MCCFIInstruction::OpRestoreState: 1605 OS << "OpRestoreState"; 1606 break; 1607 case MCCFIInstruction::OpOffset: 1608 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1609 break; 1610 case MCCFIInstruction::OpDefCfaRegister: 1611 OS << "OpDefCfaRegister Reg" << Inst.getRegister(); 1612 break; 1613 case MCCFIInstruction::OpDefCfaOffset: 1614 OS << "OpDefCfaOffset " << Inst.getOffset(); 1615 break; 1616 case MCCFIInstruction::OpDefCfa: 1617 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1618 break; 1619 case MCCFIInstruction::OpRelOffset: 1620 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); 1621 break; 1622 case MCCFIInstruction::OpAdjustCfaOffset: 1623 OS << "OfAdjustCfaOffset " << Inst.getOffset(); 1624 break; 1625 case MCCFIInstruction::OpEscape: 1626 OS << "OpEscape"; 1627 break; 1628 case MCCFIInstruction::OpRestore: 1629 OS << "OpRestore Reg" << Inst.getRegister(); 1630 break; 1631 case MCCFIInstruction::OpUndefined: 1632 OS << "OpUndefined Reg" << Inst.getRegister(); 1633 break; 1634 case MCCFIInstruction::OpRegister: 1635 OS << "OpRegister Reg" << Inst.getRegister() << " Reg" 1636 << Inst.getRegister2(); 1637 break; 1638 case MCCFIInstruction::OpWindowSave: 1639 OS << "OpWindowSave"; 1640 break; 1641 case MCCFIInstruction::OpGnuArgsSize: 1642 OS << "OpGnuArgsSize"; 1643 break; 1644 default: 1645 OS << "Op#" << Operation; 1646 break; 1647 } 1648 } 1649 1650 MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { 1651 // For aarch64, the ABI defines mapping symbols so we identify data in the 1652 // code section (see IHI0056B). $x identifies a symbol starting code or the 1653 // end of a data chunk inside code, $d indentifies start of data. 1654 if (!isAArch64() || ELFSymbolRef(Symbol).getSize()) 1655 return MarkerSymType::NONE; 1656 1657 Expected<StringRef> NameOrError = Symbol.getName(); 1658 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); 1659 1660 if (!TypeOrError || !NameOrError) 1661 return MarkerSymType::NONE; 1662 1663 if (*TypeOrError != SymbolRef::ST_Unknown) 1664 return MarkerSymType::NONE; 1665 1666 if (*NameOrError == "$x" || NameOrError->startswith("$x.")) 1667 return MarkerSymType::CODE; 1668 1669 if (*NameOrError == "$d" || NameOrError->startswith("$d.")) 1670 return MarkerSymType::DATA; 1671 1672 return MarkerSymType::NONE; 1673 } 1674 1675 bool BinaryContext::isMarker(const SymbolRef &Symbol) const { 1676 return getMarkerType(Symbol) != MarkerSymType::NONE; 1677 } 1678 1679 void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, 1680 uint64_t Offset, 1681 const BinaryFunction *Function, 1682 bool PrintMCInst, bool PrintMemData, 1683 bool PrintRelocations, 1684 StringRef Endl) const { 1685 if (MIB->isEHLabel(Instruction)) { 1686 OS << " EH_LABEL: " << *MIB->getTargetSymbol(Instruction) << Endl; 1687 return; 1688 } 1689 OS << format(" %08" PRIx64 ": ", Offset); 1690 if (MIB->isCFI(Instruction)) { 1691 uint32_t Offset = Instruction.getOperand(0).getImm(); 1692 OS << "\t!CFI\t$" << Offset << "\t; "; 1693 if (Function) 1694 printCFI(OS, *Function->getCFIFor(Instruction)); 1695 OS << Endl; 1696 return; 1697 } 1698 InstPrinter->printInst(&Instruction, 0, "", *STI, OS); 1699 if (MIB->isCall(Instruction)) { 1700 if (MIB->isTailCall(Instruction)) 1701 OS << " # TAILCALL "; 1702 if (MIB->isInvoke(Instruction)) { 1703 const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instruction); 1704 OS << " # handler: "; 1705 if (EHInfo->first) 1706 OS << *EHInfo->first; 1707 else 1708 OS << '0'; 1709 OS << "; action: " << EHInfo->second; 1710 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); 1711 if (GnuArgsSize >= 0) 1712 OS << "; GNU_args_size = " << GnuArgsSize; 1713 } 1714 } else if (MIB->isIndirectBranch(Instruction)) { 1715 if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { 1716 OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); 1717 } else { 1718 OS << " # UNKNOWN CONTROL FLOW"; 1719 } 1720 } 1721 if (Optional<uint32_t> Offset = MIB->getOffset(Instruction)) 1722 OS << " # Offset: " << *Offset; 1723 1724 MIB->printAnnotations(Instruction, OS); 1725 1726 if (opts::PrintDebugInfo) { 1727 DebugLineTableRowRef RowRef = 1728 DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); 1729 if (RowRef != DebugLineTableRowRef::NULL_ROW) { 1730 const DWARFDebugLine::LineTable *LineTable; 1731 if (Function && Function->getDWARFUnit() && 1732 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { 1733 LineTable = Function->getDWARFLineTable(); 1734 } else { 1735 LineTable = DwCtx->getLineTableForUnit( 1736 DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); 1737 } 1738 assert(LineTable && 1739 "line table expected for instruction with debug info"); 1740 1741 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; 1742 StringRef FileName = ""; 1743 if (Optional<const char *> FName = 1744 dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) 1745 FileName = *FName; 1746 OS << " # debug line " << FileName << ":" << Row.Line; 1747 if (Row.Column) 1748 OS << ":" << Row.Column; 1749 if (Row.Discriminator) 1750 OS << " discriminator:" << Row.Discriminator; 1751 } 1752 } 1753 1754 if ((opts::PrintRelocations || PrintRelocations) && Function) { 1755 const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); 1756 Function->printRelocations(OS, Offset, Size); 1757 } 1758 1759 OS << Endl; 1760 1761 if (PrintMCInst) { 1762 Instruction.dump_pretty(OS, InstPrinter.get()); 1763 OS << Endl; 1764 } 1765 } 1766 1767 Optional<uint64_t> 1768 BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, 1769 uint64_t FileOffset) const { 1770 // Find a segment with a matching file offset. 1771 for (auto &KV : SegmentMapInfo) { 1772 const SegmentInfo &SegInfo = KV.second; 1773 if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) { 1774 // Use segment's aligned memory offset to calculate the base address. 1775 const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment); 1776 return MMapAddress - MemOffset; 1777 } 1778 } 1779 1780 return NoneType(); 1781 } 1782 1783 ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { 1784 auto SI = AddressToSection.upper_bound(Address); 1785 if (SI != AddressToSection.begin()) { 1786 --SI; 1787 uint64_t UpperBound = SI->first + SI->second->getSize(); 1788 if (!SI->second->getSize()) 1789 UpperBound += 1; 1790 if (UpperBound > Address) 1791 return *SI->second; 1792 } 1793 return std::make_error_code(std::errc::bad_address); 1794 } 1795 1796 ErrorOr<StringRef> 1797 BinaryContext::getSectionNameForAddress(uint64_t Address) const { 1798 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) 1799 return Section->getName(); 1800 return std::make_error_code(std::errc::bad_address); 1801 } 1802 1803 BinarySection &BinaryContext::registerSection(BinarySection *Section) { 1804 auto Res = Sections.insert(Section); 1805 (void)Res; 1806 assert(Res.second && "can't register the same section twice."); 1807 1808 // Only register allocatable sections in the AddressToSection map. 1809 if (Section->isAllocatable() && Section->getAddress()) 1810 AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); 1811 NameToSection.insert( 1812 std::make_pair(std::string(Section->getName()), Section)); 1813 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); 1814 return *Section; 1815 } 1816 1817 BinarySection &BinaryContext::registerSection(SectionRef Section) { 1818 return registerSection(new BinarySection(*this, Section)); 1819 } 1820 1821 BinarySection & 1822 BinaryContext::registerSection(StringRef SectionName, 1823 const BinarySection &OriginalSection) { 1824 return registerSection( 1825 new BinarySection(*this, SectionName, OriginalSection)); 1826 } 1827 1828 BinarySection & 1829 BinaryContext::registerOrUpdateSection(StringRef Name, unsigned ELFType, 1830 unsigned ELFFlags, uint8_t *Data, 1831 uint64_t Size, unsigned Alignment) { 1832 auto NamedSections = getSectionByName(Name); 1833 if (NamedSections.begin() != NamedSections.end()) { 1834 assert(std::next(NamedSections.begin()) == NamedSections.end() && 1835 "can only update unique sections"); 1836 BinarySection *Section = NamedSections.begin()->second; 1837 1838 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); 1839 const bool Flag = Section->isAllocatable(); 1840 (void)Flag; 1841 Section->update(Data, Size, Alignment, ELFType, ELFFlags); 1842 LLVM_DEBUG(dbgs() << *Section << "\n"); 1843 // FIXME: Fix section flags/attributes for MachO. 1844 if (isELF()) 1845 assert(Flag == Section->isAllocatable() && 1846 "can't change section allocation status"); 1847 return *Section; 1848 } 1849 1850 return registerSection( 1851 new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); 1852 } 1853 1854 bool BinaryContext::deregisterSection(BinarySection &Section) { 1855 BinarySection *SectionPtr = &Section; 1856 auto Itr = Sections.find(SectionPtr); 1857 if (Itr != Sections.end()) { 1858 auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); 1859 while (Range.first != Range.second) { 1860 if (Range.first->second == SectionPtr) { 1861 AddressToSection.erase(Range.first); 1862 break; 1863 } 1864 ++Range.first; 1865 } 1866 1867 auto NameRange = 1868 NameToSection.equal_range(std::string(SectionPtr->getName())); 1869 while (NameRange.first != NameRange.second) { 1870 if (NameRange.first->second == SectionPtr) { 1871 NameToSection.erase(NameRange.first); 1872 break; 1873 } 1874 ++NameRange.first; 1875 } 1876 1877 Sections.erase(Itr); 1878 delete SectionPtr; 1879 return true; 1880 } 1881 return false; 1882 } 1883 1884 void BinaryContext::printSections(raw_ostream &OS) const { 1885 for (BinarySection *const &Section : Sections) 1886 OS << "BOLT-INFO: " << *Section << "\n"; 1887 } 1888 1889 BinarySection &BinaryContext::absoluteSection() { 1890 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) 1891 return *Section; 1892 return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); 1893 } 1894 1895 ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, 1896 size_t Size) const { 1897 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1898 if (!Section) 1899 return std::make_error_code(std::errc::bad_address); 1900 1901 if (Section->isVirtual()) 1902 return 0; 1903 1904 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1905 AsmInfo->getCodePointerSize()); 1906 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1907 return DE.getUnsigned(&ValueOffset, Size); 1908 } 1909 1910 ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, 1911 size_t Size) const { 1912 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); 1913 if (!Section) 1914 return std::make_error_code(std::errc::bad_address); 1915 1916 if (Section->isVirtual()) 1917 return 0; 1918 1919 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), 1920 AsmInfo->getCodePointerSize()); 1921 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); 1922 return DE.getSigned(&ValueOffset, Size); 1923 } 1924 1925 void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, 1926 uint64_t Type, uint64_t Addend, 1927 uint64_t Value) { 1928 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1929 assert(Section && "cannot find section for address"); 1930 Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, 1931 Value); 1932 } 1933 1934 void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, 1935 uint64_t Type, uint64_t Addend, 1936 uint64_t Value) { 1937 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1938 assert(Section && "cannot find section for address"); 1939 Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, 1940 Addend, Value); 1941 } 1942 1943 bool BinaryContext::removeRelocationAt(uint64_t Address) { 1944 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1945 assert(Section && "cannot find section for address"); 1946 return Section->removeRelocationAt(Address - Section->getAddress()); 1947 } 1948 1949 const Relocation *BinaryContext::getRelocationAt(uint64_t Address) { 1950 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1951 if (!Section) 1952 return nullptr; 1953 1954 return Section->getRelocationAt(Address - Section->getAddress()); 1955 } 1956 1957 const Relocation *BinaryContext::getDynamicRelocationAt(uint64_t Address) { 1958 ErrorOr<BinarySection &> Section = getSectionForAddress(Address); 1959 if (!Section) 1960 return nullptr; 1961 1962 return Section->getDynamicRelocationAt(Address - Section->getAddress()); 1963 } 1964 1965 void BinaryContext::markAmbiguousRelocations(BinaryData &BD, 1966 const uint64_t Address) { 1967 auto setImmovable = [&](BinaryData &BD) { 1968 BinaryData *Root = BD.getAtomicRoot(); 1969 LLVM_DEBUG(if (Root->isMoveable()) { 1970 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " 1971 << "due to ambiguous relocation referencing 0x" 1972 << Twine::utohexstr(Address) << '\n'; 1973 }); 1974 Root->setIsMoveable(false); 1975 }; 1976 1977 if (Address == BD.getAddress()) { 1978 setImmovable(BD); 1979 1980 // Set previous symbol as immovable 1981 BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); 1982 if (Prev && Prev->getEndAddress() == BD.getAddress()) 1983 setImmovable(*Prev); 1984 } 1985 1986 if (Address == BD.getEndAddress()) { 1987 setImmovable(BD); 1988 1989 // Set next symbol as immovable 1990 BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); 1991 if (Next && Next->getAddress() == BD.getEndAddress()) 1992 setImmovable(*Next); 1993 } 1994 } 1995 1996 BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, 1997 uint64_t *EntryDesc) { 1998 std::shared_lock<std::shared_timed_mutex> Lock(SymbolToFunctionMapMutex); 1999 auto BFI = SymbolToFunctionMap.find(Symbol); 2000 if (BFI == SymbolToFunctionMap.end()) 2001 return nullptr; 2002 2003 BinaryFunction *BF = BFI->second; 2004 if (EntryDesc) 2005 *EntryDesc = BF->getEntryIDForSymbol(Symbol); 2006 2007 return BF; 2008 } 2009 2010 void BinaryContext::exitWithBugReport(StringRef Message, 2011 const BinaryFunction &Function) const { 2012 errs() << "=======================================\n"; 2013 errs() << "BOLT is unable to proceed because it couldn't properly understand " 2014 "this function.\n"; 2015 errs() << "If you are running the most recent version of BOLT, you may " 2016 "want to " 2017 "report this and paste this dump.\nPlease check that there is no " 2018 "sensitive contents being shared in this dump.\n"; 2019 errs() << "\nOffending function: " << Function.getPrintName() << "\n\n"; 2020 ScopedPrinter SP(errs()); 2021 SP.printBinaryBlock("Function contents", *Function.getData()); 2022 errs() << "\n"; 2023 Function.dump(); 2024 errs() << "ERROR: " << Message; 2025 errs() << "\n=======================================\n"; 2026 exit(1); 2027 } 2028 2029 BinaryFunction * 2030 BinaryContext::createInjectedBinaryFunction(const std::string &Name, 2031 bool IsSimple) { 2032 InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); 2033 BinaryFunction *BF = InjectedBinaryFunctions.back(); 2034 setSymbolToFunctionMap(BF->getSymbol(), BF); 2035 BF->CurrentState = BinaryFunction::State::CFG; 2036 return BF; 2037 } 2038 2039 std::pair<size_t, size_t> 2040 BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { 2041 // Adjust branch instruction to match the current layout. 2042 if (FixBranches) 2043 BF.fixBranches(); 2044 2045 // Create local MC context to isolate the effect of ephemeral code emission. 2046 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); 2047 MCContext *LocalCtx = MCEInstance.LocalCtx.get(); 2048 MCAsmBackend *MAB = 2049 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 2050 2051 SmallString<256> Code; 2052 raw_svector_ostream VecOS(Code); 2053 2054 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); 2055 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 2056 *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 2057 std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI, 2058 /*RelaxAll=*/false, 2059 /*IncrementalLinkerCompatible=*/false, 2060 /*DWARFMustBeAtTheEnd=*/false)); 2061 2062 Streamer->initSections(false, *STI); 2063 2064 MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); 2065 Section->setHasInstructions(true); 2066 2067 // Create symbols in the LocalCtx so that they get destroyed with it. 2068 MCSymbol *StartLabel = LocalCtx->createTempSymbol(); 2069 MCSymbol *EndLabel = LocalCtx->createTempSymbol(); 2070 MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol(); 2071 MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol(); 2072 2073 Streamer->SwitchSection(Section); 2074 Streamer->emitLabel(StartLabel); 2075 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false, 2076 /*EmitCodeOnly=*/true); 2077 Streamer->emitLabel(EndLabel); 2078 2079 if (BF.isSplit()) { 2080 MCSectionELF *ColdSection = 2081 LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS, 2082 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 2083 ColdSection->setHasInstructions(true); 2084 2085 Streamer->SwitchSection(ColdSection); 2086 Streamer->emitLabel(ColdStartLabel); 2087 emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true, 2088 /*EmitCodeOnly=*/true); 2089 Streamer->emitLabel(ColdEndLabel); 2090 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private 2091 Streamer->emitBytes(StringRef("")); 2092 Streamer->SwitchSection(Section); 2093 } 2094 2095 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or 2096 // MCStreamer::Finish(), which does more than we want 2097 Streamer->emitBytes(StringRef("")); 2098 2099 MCAssembler &Assembler = 2100 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); 2101 MCAsmLayout Layout(Assembler); 2102 Assembler.layout(Layout); 2103 2104 const uint64_t HotSize = 2105 Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel); 2106 const uint64_t ColdSize = BF.isSplit() 2107 ? Layout.getSymbolOffset(*ColdEndLabel) - 2108 Layout.getSymbolOffset(*ColdStartLabel) 2109 : 0ULL; 2110 2111 // Clean-up the effect of the code emission. 2112 for (const MCSymbol &Symbol : Assembler.symbols()) { 2113 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); 2114 MutableSymbol->setUndefined(); 2115 MutableSymbol->setIsRegistered(false); 2116 } 2117 2118 return std::make_pair(HotSize, ColdSize); 2119 } 2120 2121 bool BinaryContext::validateEncoding(const MCInst &Inst, 2122 ArrayRef<uint8_t> InputEncoding) const { 2123 SmallString<256> Code; 2124 SmallVector<MCFixup, 4> Fixups; 2125 raw_svector_ostream VecOS(Code); 2126 2127 MCE->encodeInstruction(Inst, VecOS, Fixups, *STI); 2128 auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); 2129 if (InputEncoding != EncodedData) { 2130 if (opts::Verbosity > 1) { 2131 errs() << "BOLT-WARNING: mismatched encoding detected\n" 2132 << " input: " << InputEncoding << '\n' 2133 << " output: " << EncodedData << '\n'; 2134 } 2135 return false; 2136 } 2137 2138 return true; 2139 } 2140 2141 uint64_t BinaryContext::getHotThreshold() const { 2142 static uint64_t Threshold = 0; 2143 if (Threshold == 0) { 2144 Threshold = std::max( 2145 (uint64_t)opts::ExecutionCountThreshold, 2146 NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); 2147 } 2148 return Threshold; 2149 } 2150 2151 BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( 2152 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { 2153 auto FI = BinaryFunctions.upper_bound(Address); 2154 if (FI == BinaryFunctions.begin()) 2155 return nullptr; 2156 --FI; 2157 2158 const uint64_t UsedSize = 2159 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); 2160 2161 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) 2162 return nullptr; 2163 2164 return &FI->second; 2165 } 2166 2167 BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { 2168 // First, try to find a function starting at the given address. If the 2169 // function was folded, this will get us the original folded function if it 2170 // wasn't removed from the list, e.g. in non-relocation mode. 2171 auto BFI = BinaryFunctions.find(Address); 2172 if (BFI != BinaryFunctions.end()) 2173 return &BFI->second; 2174 2175 // We might have folded the function matching the object at the given 2176 // address. In such case, we look for a function matching the symbol 2177 // registered at the original address. The new function (the one that the 2178 // original was folded into) will hold the symbol. 2179 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { 2180 uint64_t EntryID = 0; 2181 BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); 2182 if (BF && EntryID == 0) 2183 return BF; 2184 } 2185 return nullptr; 2186 } 2187 2188 DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( 2189 const DWARFAddressRangesVector &InputRanges) const { 2190 DebugAddressRangesVector OutputRanges; 2191 2192 for (const DWARFAddressRange Range : InputRanges) { 2193 auto BFI = BinaryFunctions.lower_bound(Range.LowPC); 2194 while (BFI != BinaryFunctions.end()) { 2195 const BinaryFunction &Function = BFI->second; 2196 if (Function.getAddress() >= Range.HighPC) 2197 break; 2198 const DebugAddressRangesVector FunctionRanges = 2199 Function.getOutputAddressRanges(); 2200 std::move(std::begin(FunctionRanges), std::end(FunctionRanges), 2201 std::back_inserter(OutputRanges)); 2202 std::advance(BFI, 1); 2203 } 2204 } 2205 2206 return OutputRanges; 2207 } 2208 2209 } // namespace bolt 2210 } // namespace llvm 2211