1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Support for updating Linux Kernel metadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryFunction.h" 14 #include "bolt/Rewrite/MetadataRewriter.h" 15 #include "bolt/Rewrite/MetadataRewriters.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 20 #include "llvm/Support/BinaryStreamWriter.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Errc.h" 24 25 #define DEBUG_TYPE "bolt-linux" 26 27 using namespace llvm; 28 using namespace bolt; 29 30 namespace opts { 31 32 static cl::opt<bool> 33 AltInstHasPadLen("alt-inst-has-padlen", 34 cl::desc("specify that .altinstructions has padlen field"), 35 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 36 37 static cl::opt<uint32_t> 38 AltInstFeatureSize("alt-inst-feature-size", 39 cl::desc("size of feature field in .altinstructions"), 40 cl::init(2), cl::Hidden, cl::cat(BoltCategory)); 41 42 static cl::opt<bool> 43 DumpAltInstructions("dump-alt-instructions", 44 cl::desc("dump Linux alternative instructions info"), 45 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 46 47 static cl::opt<bool> 48 DumpExceptions("dump-linux-exceptions", 49 cl::desc("dump Linux kernel exception table"), 50 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 51 52 static cl::opt<bool> 53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), 54 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> DumpParavirtualPatchSites( 57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"), 58 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 59 60 static cl::opt<bool> 61 DumpPCIFixups("dump-pci-fixups", 62 cl::desc("dump Linux kernel PCI fixup table"), 63 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 64 65 static cl::opt<bool> DumpSMPLocks("dump-smp-locks", 66 cl::desc("dump Linux kernel SMP locks"), 67 cl::init(false), cl::Hidden, 68 cl::cat(BoltCategory)); 69 70 static cl::opt<bool> DumpStaticCalls("dump-static-calls", 71 cl::desc("dump Linux kernel static calls"), 72 cl::init(false), cl::Hidden, 73 cl::cat(BoltCategory)); 74 75 static cl::opt<bool> 76 DumpStaticKeys("dump-static-keys", 77 cl::desc("dump Linux kernel static keys jump table"), 78 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 79 80 static cl::opt<bool> LongJumpLabels( 81 "long-jump-labels", 82 cl::desc("always use long jumps/nops for Linux kernel static keys"), 83 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 84 85 static cl::opt<bool> 86 PrintORC("print-orc", 87 cl::desc("print ORC unwind information for instructions"), 88 cl::init(true), cl::Hidden, cl::cat(BoltCategory)); 89 90 } // namespace opts 91 92 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). 93 /// ORC state at every IP can be described by the following data structure. 94 struct ORCState { 95 int16_t SPOffset; 96 int16_t BPOffset; 97 int16_t Info; 98 99 bool operator==(const ORCState &Other) const { 100 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset && 101 Info == Other.Info; 102 } 103 104 bool operator!=(const ORCState &Other) const { return !(*this == Other); } 105 }; 106 107 /// Section terminator ORC entry. 108 static ORCState NullORC = {0, 0, 0}; 109 110 /// Basic printer for ORC entry. It does not provide the same level of 111 /// information as objtool (for now). 112 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { 113 if (!opts::PrintORC) 114 return OS; 115 if (E != NullORC) 116 OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset, 117 E.Info); 118 else 119 OS << "{terminator}"; 120 121 return OS; 122 } 123 124 namespace { 125 126 class LinuxKernelRewriter final : public MetadataRewriter { 127 /// Information required for updating metadata referencing an instruction. 128 struct InstructionFixup { 129 BinarySection &Section; // Section referencing the instruction. 130 uint64_t Offset; // Offset in the section above. 131 BinaryFunction &BF; // Function containing the instruction. 132 MCSymbol &Label; // Label marking the instruction. 133 bool IsPCRelative; // If the reference type is relative. 134 }; 135 std::vector<InstructionFixup> Fixups; 136 137 /// Size of an entry in .smp_locks section. 138 static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4; 139 140 /// Linux ORC sections. 141 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address; 142 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address; 143 144 /// Size of entries in ORC sections. 145 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6; 146 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4; 147 148 struct ORCListEntry { 149 uint64_t IP; /// Instruction address. 150 BinaryFunction *BF; /// Binary function corresponding to the entry. 151 ORCState ORC; /// Stack unwind info in ORC format. 152 153 /// ORC entries are sorted by their IPs. Terminator entries (NullORC) 154 /// should precede other entries with the same address. 155 bool operator<(const ORCListEntry &Other) const { 156 if (IP < Other.IP) 157 return 1; 158 if (IP > Other.IP) 159 return 0; 160 return ORC == NullORC && Other.ORC != NullORC; 161 } 162 }; 163 164 using ORCListType = std::vector<ORCListEntry>; 165 ORCListType ORCEntries; 166 167 /// Number of entries in the input file ORC sections. 168 uint64_t NumORCEntries = 0; 169 170 /// Section containing static keys jump table. 171 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address; 172 uint64_t StaticKeysJumpTableAddress = 0; 173 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8; 174 175 struct JumpInfoEntry { 176 bool Likely; 177 bool InitValue; 178 }; 179 SmallVector<JumpInfoEntry, 16> JumpInfo; 180 181 /// Static key entries that need nop conversion. 182 DenseSet<uint32_t> NopIDs; 183 184 /// Section containing static call table. 185 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address; 186 uint64_t StaticCallTableAddress = 0; 187 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8; 188 189 struct StaticCallInfo { 190 uint32_t ID; /// Identifier of the entry in the table. 191 BinaryFunction *Function; /// Function containing associated call. 192 MCSymbol *Label; /// Label attached to the call. 193 }; 194 using StaticCallListType = std::vector<StaticCallInfo>; 195 StaticCallListType StaticCallEntries; 196 197 /// Section containing the Linux exception table. 198 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address; 199 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; 200 201 /// Functions with exception handling code. 202 DenseSet<BinaryFunction *> FunctionsWithExceptions; 203 204 /// Section with paravirtual patch sites. 205 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address; 206 207 /// Alignment of paravirtual patch structures. 208 static constexpr size_t PARA_PATCH_ALIGN = 8; 209 210 /// .altinstructions section. 211 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address; 212 213 /// Section containing Linux bug table. 214 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; 215 216 /// Size of bug_entry struct. 217 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12; 218 219 /// List of bug entries per function. 220 using FunctionBugListType = 221 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>; 222 FunctionBugListType FunctionBugList; 223 224 /// .pci_fixup section. 225 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address; 226 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16; 227 228 /// Process linux kernel special sections and their relocations. 229 void processLKSections(); 230 231 /// Process __ksymtab and __ksymtab_gpl. 232 void processLKKSymtab(bool IsGPL = false); 233 234 // Create relocations in sections requiring fixups. 235 // 236 // Make sure functions that will not be emitted are marked as such before this 237 // function is executed. 238 void processInstructionFixups(); 239 240 /// Process .smp_locks section. 241 Error processSMPLocks(); 242 243 /// Read ORC unwind information and annotate instructions. 244 Error readORCTables(); 245 246 /// Update ORC for functions once CFG is constructed. 247 Error processORCPostCFG(); 248 249 /// Update ORC data in the binary. 250 Error rewriteORCTables(); 251 252 /// Validate written ORC tables after binary emission. 253 Error validateORCTables(); 254 255 /// Static call table handling. 256 Error readStaticCalls(); 257 Error rewriteStaticCalls(); 258 259 Error readExceptionTable(); 260 Error rewriteExceptionTable(); 261 262 /// Paravirtual instruction patch sites. 263 Error readParaInstructions(); 264 Error rewriteParaInstructions(); 265 266 /// __bug_table section handling. 267 Error readBugTable(); 268 Error rewriteBugTable(); 269 270 /// Do no process functions containing instruction annotated with 271 /// \p Annotation. 272 void skipFunctionsWithAnnotation(StringRef Annotation) const; 273 274 /// Handle alternative instruction info from .altinstructions. 275 Error readAltInstructions(); 276 Error rewriteAltInstructions(); 277 278 /// Read .pci_fixup 279 Error readPCIFixupTable(); 280 281 /// Handle static keys jump table. 282 Error readStaticKeysJumpTable(); 283 Error rewriteStaticKeysJumpTable(); 284 Error updateStaticKeysJumpTablePostEmit(); 285 286 public: 287 LinuxKernelRewriter(BinaryContext &BC) 288 : MetadataRewriter("linux-kernel-rewriter", BC) {} 289 290 Error preCFGInitializer() override { 291 processLKSections(); 292 293 if (Error E = processSMPLocks()) 294 return E; 295 296 if (Error E = readORCTables()) 297 return E; 298 299 if (Error E = readStaticCalls()) 300 return E; 301 302 if (Error E = readExceptionTable()) 303 return E; 304 305 if (Error E = readParaInstructions()) 306 return E; 307 308 if (Error E = readBugTable()) 309 return E; 310 311 if (Error E = readAltInstructions()) 312 return E; 313 314 if (Error E = readPCIFixupTable()) 315 return E; 316 317 if (Error E = readStaticKeysJumpTable()) 318 return E; 319 320 return Error::success(); 321 } 322 323 Error postCFGInitializer() override { 324 if (Error E = processORCPostCFG()) 325 return E; 326 327 return Error::success(); 328 } 329 330 Error preEmitFinalizer() override { 331 // Since rewriteExceptionTable() can mark functions as non-simple, run it 332 // before other rewriters that depend on simple/emit status. 333 if (Error E = rewriteExceptionTable()) 334 return E; 335 336 if (Error E = rewriteAltInstructions()) 337 return E; 338 339 if (Error E = rewriteParaInstructions()) 340 return E; 341 342 if (Error E = rewriteORCTables()) 343 return E; 344 345 if (Error E = rewriteStaticCalls()) 346 return E; 347 348 if (Error E = rewriteStaticKeysJumpTable()) 349 return E; 350 351 if (Error E = rewriteBugTable()) 352 return E; 353 354 processInstructionFixups(); 355 356 return Error::success(); 357 } 358 359 Error postEmitFinalizer() override { 360 if (Error E = updateStaticKeysJumpTablePostEmit()) 361 return E; 362 363 if (Error E = validateORCTables()) 364 return E; 365 366 return Error::success(); 367 } 368 }; 369 370 void LinuxKernelRewriter::processLKSections() { 371 processLKKSymtab(); 372 processLKKSymtab(true); 373 } 374 375 /// Process __ksymtab[_gpl] sections of Linux Kernel. 376 /// This section lists all the vmlinux symbols that kernel modules can access. 377 /// 378 /// All the entries are 4 bytes each and hence we can read them by one by one 379 /// and ignore the ones that are not pointing to the .text section. All pointers 380 /// are PC relative offsets. Always, points to the beginning of the function. 381 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) { 382 StringRef SectionName = "__ksymtab"; 383 if (IsGPL) 384 SectionName = "__ksymtab_gpl"; 385 ErrorOr<BinarySection &> SectionOrError = 386 BC.getUniqueSectionByName(SectionName); 387 assert(SectionOrError && 388 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 389 const uint64_t SectionSize = SectionOrError->getSize(); 390 const uint64_t SectionAddress = SectionOrError->getAddress(); 391 assert((SectionSize % 4) == 0 && 392 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 393 394 for (uint64_t I = 0; I < SectionSize; I += 4) { 395 const uint64_t EntryAddress = SectionAddress + I; 396 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 397 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 398 const int32_t SignedOffset = *Offset; 399 const uint64_t RefAddress = EntryAddress + SignedOffset; 400 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress); 401 if (!BF) 402 continue; 403 404 BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 405 *Offset); 406 } 407 } 408 409 /// .smp_locks section contains PC-relative references to instructions with LOCK 410 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 411 Error LinuxKernelRewriter::processSMPLocks() { 412 ErrorOr<BinarySection &> SMPLocksSection = 413 BC.getUniqueSectionByName(".smp_locks"); 414 if (!SMPLocksSection) 415 return Error::success(); 416 417 const uint64_t SectionSize = SMPLocksSection->getSize(); 418 const uint64_t SectionAddress = SMPLocksSection->getAddress(); 419 if (SectionSize % SMP_LOCKS_ENTRY_SIZE) 420 return createStringError(errc::executable_format_error, 421 "bad size of .smp_locks section"); 422 423 DataExtractor DE = DataExtractor(SMPLocksSection->getContents(), 424 BC.AsmInfo->isLittleEndian(), 425 BC.AsmInfo->getCodePointerSize()); 426 DataExtractor::Cursor Cursor(0); 427 while (Cursor && Cursor.tell() < SectionSize) { 428 const uint64_t Offset = Cursor.tell(); 429 const uint64_t IP = SectionAddress + Offset + (int32_t)DE.getU32(Cursor); 430 431 // Consume the status of the cursor. 432 if (!Cursor) 433 return createStringError(errc::executable_format_error, 434 "error while reading .smp_locks: %s", 435 toString(Cursor.takeError()).c_str()); 436 437 if (opts::DumpSMPLocks) 438 BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(IP) << '\n'; 439 440 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(IP); 441 if (!BF || !BC.shouldEmit(*BF)) 442 continue; 443 444 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); 445 if (!Inst) 446 return createStringError(errc::executable_format_error, 447 "no instruction matches lock at 0x%" PRIx64, IP); 448 449 // Check for duplicate entries. 450 if (BC.MIB->hasAnnotation(*Inst, "SMPLock")) 451 return createStringError(errc::executable_format_error, 452 "duplicate SMP lock at 0x%" PRIx64, IP); 453 454 BC.MIB->addAnnotation(*Inst, "SMPLock", true); 455 MCSymbol *Label = 456 BC.MIB->getOrCreateInstLabel(*Inst, "__SMPLock_", BC.Ctx.get()); 457 458 Fixups.push_back({*SMPLocksSection, Offset, *BF, *Label, 459 /*IsPCRelative*/ true}); 460 } 461 462 const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE; 463 BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n"; 464 465 return Error::success(); 466 } 467 468 void LinuxKernelRewriter::processInstructionFixups() { 469 for (InstructionFixup &Fixup : Fixups) { 470 if (!BC.shouldEmit(Fixup.BF)) 471 continue; 472 473 Fixup.Section.addRelocation(Fixup.Offset, &Fixup.Label, 474 Fixup.IsPCRelative ? ELF::R_X86_64_PC32 475 : ELF::R_X86_64_64, 476 /*Addend*/ 0); 477 } 478 } 479 480 Error LinuxKernelRewriter::readORCTables() { 481 // NOTE: we should ignore relocations for orc tables as the tables are sorted 482 // post-link time and relocations are not updated. 483 ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind"); 484 ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip"); 485 486 if (!ORCUnwindSection && !ORCUnwindIPSection) 487 return Error::success(); 488 489 if (!ORCUnwindSection || !ORCUnwindIPSection) 490 return createStringError(errc::executable_format_error, 491 "missing ORC section"); 492 493 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE; 494 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE || 495 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE) 496 return createStringError(errc::executable_format_error, 497 "ORC entries number mismatch detected"); 498 499 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 500 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(), 501 BC.AsmInfo->isLittleEndian(), 502 BC.AsmInfo->getCodePointerSize()); 503 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(), 504 BC.AsmInfo->isLittleEndian(), 505 BC.AsmInfo->getCodePointerSize()); 506 DataExtractor::Cursor ORCCursor(0); 507 DataExtractor::Cursor IPCursor(0); 508 uint64_t PrevIP = 0; 509 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 510 const uint64_t IP = 511 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 512 513 // Consume the status of the cursor. 514 if (!IPCursor) 515 return createStringError(errc::executable_format_error, 516 "out of bounds while reading ORC IP table: %s", 517 toString(IPCursor.takeError()).c_str()); 518 519 if (IP < PrevIP && opts::Verbosity) 520 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) 521 << " detected while reading ORC\n"; 522 523 PrevIP = IP; 524 525 // Store all entries, includes those we are not going to update as the 526 // tables need to be sorted globally before being written out. 527 ORCEntries.push_back(ORCListEntry()); 528 ORCListEntry &Entry = ORCEntries.back(); 529 530 Entry.IP = IP; 531 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor); 532 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); 533 Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); 534 Entry.BF = nullptr; 535 536 // Consume the status of the cursor. 537 if (!ORCCursor) 538 return createStringError(errc::executable_format_error, 539 "out of bounds while reading ORC: %s", 540 toString(ORCCursor.takeError()).c_str()); 541 542 if (Entry.ORC == NullORC) 543 continue; 544 545 BinaryFunction *&BF = Entry.BF; 546 BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true); 547 548 // If the entry immediately pointing past the end of the function is not 549 // the terminator entry, then it does not belong to this function. 550 if (BF && BF->getAddress() + BF->getSize() == IP) 551 BF = 0; 552 553 if (!BF) { 554 if (opts::Verbosity) 555 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x" 556 << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n'; 557 continue; 558 } 559 560 BF->setHasORC(true); 561 562 if (!BF->hasInstructions()) 563 continue; 564 565 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); 566 if (!Inst) 567 return createStringError( 568 errc::executable_format_error, 569 "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP); 570 571 // Some addresses will have two entries associated with them. The first 572 // one being a "weak" section terminator. Since we ignore the terminator, 573 // we should only assign one entry per instruction. 574 if (BC.MIB->hasAnnotation(*Inst, "ORC")) 575 return createStringError( 576 errc::executable_format_error, 577 "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); 578 579 BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); 580 } 581 582 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n"; 583 584 if (opts::DumpORC) { 585 BC.outs() << "BOLT-INFO: ORC unwind information:\n"; 586 for (const ORCListEntry &E : ORCEntries) { 587 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 588 if (E.BF) 589 BC.outs() << ": " << *E.BF; 590 BC.outs() << '\n'; 591 } 592 } 593 594 // Add entries for functions that don't have explicit ORC info at the start. 595 // We'll have the correct info for them even if ORC for the preceding function 596 // changes. 597 ORCListType NewEntries; 598 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 599 auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 600 return E.IP <= BF.getAddress(); 601 }); 602 if (It != ORCEntries.begin()) 603 --It; 604 605 if (It->BF == &BF) 606 continue; 607 608 if (It->ORC == NullORC && It->IP == BF.getAddress()) { 609 assert(!It->BF); 610 It->BF = &BF; 611 continue; 612 } 613 614 NewEntries.push_back({BF.getAddress(), &BF, It->ORC}); 615 if (It->ORC != NullORC) 616 BF.setHasORC(true); 617 } 618 619 llvm::copy(NewEntries, std::back_inserter(ORCEntries)); 620 llvm::sort(ORCEntries); 621 622 if (opts::DumpORC) { 623 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n"; 624 for (const ORCListEntry &E : ORCEntries) { 625 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 626 if (E.BF) 627 BC.outs() << ": " << *E.BF; 628 BC.outs() << '\n'; 629 } 630 } 631 632 return Error::success(); 633 } 634 635 Error LinuxKernelRewriter::processORCPostCFG() { 636 if (!NumORCEntries) 637 return Error::success(); 638 639 // Propagate ORC to the rest of the function. We can annotate every 640 // instruction in every function, but to minimize the overhead, we annotate 641 // the first instruction in every basic block to reflect the state at the 642 // entry. This way, the ORC state can be calculated based on annotations 643 // regardless of the basic block layout. Note that if we insert/delete 644 // instructions, we must take care to attach ORC info to the new/deleted ones. 645 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 646 647 std::optional<ORCState> CurrentState; 648 for (BinaryBasicBlock &BB : BF) { 649 for (MCInst &Inst : BB) { 650 ErrorOr<ORCState> State = 651 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 652 653 if (State) { 654 CurrentState = *State; 655 continue; 656 } 657 658 // Get state for the start of the function. 659 if (!CurrentState) { 660 // A terminator entry (NullORC) can match the function address. If 661 // there's also a non-terminator entry, it will be placed after the 662 // terminator. Hence, we are looking for the last ORC entry that 663 // matches the address. 664 auto It = 665 llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 666 return E.IP <= BF.getAddress(); 667 }); 668 if (It != ORCEntries.begin()) 669 --It; 670 671 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) && 672 "ORC info at function entry expected."); 673 674 if (It->ORC == NullORC && BF.hasORC()) { 675 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " 676 << BF << '\n'; 677 } 678 679 It->BF = &BF; 680 681 CurrentState = It->ORC; 682 if (It->ORC != NullORC) 683 BF.setHasORC(true); 684 } 685 686 // While printing ORC, attach info to every instruction for convenience. 687 if (opts::PrintORC || &Inst == &BB.front()) 688 BC.MIB->addAnnotation(Inst, "ORC", *CurrentState); 689 } 690 } 691 } 692 693 return Error::success(); 694 } 695 696 Error LinuxKernelRewriter::rewriteORCTables() { 697 if (!NumORCEntries) 698 return Error::success(); 699 700 // Update ORC sections in-place. As we change the code, the number of ORC 701 // entries may increase for some functions. However, as we remove terminator 702 // redundancy (see below), more space is freed up and we should always be able 703 // to fit new ORC tables in the reserved space. 704 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter { 705 const size_t Size = Section.getSize(); 706 uint8_t *NewContents = new uint8_t[Size]; 707 Section.updateContents(NewContents, Size); 708 Section.setOutputFileOffset(Section.getInputFileOffset()); 709 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian() 710 ? endianness::little 711 : endianness::big); 712 }; 713 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection); 714 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection); 715 716 uint64_t NumEmitted = 0; 717 std::optional<ORCState> LastEmittedORC; 718 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC, 719 MCSymbol *Label = 0, bool Force = false) -> Error { 720 if (LastEmittedORC && ORC == *LastEmittedORC && !Force) 721 return Error::success(); 722 723 LastEmittedORC = ORC; 724 725 if (++NumEmitted > NumORCEntries) 726 return createStringError(errc::executable_format_error, 727 "exceeded the number of allocated ORC entries"); 728 729 if (Label) 730 ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label, 731 Relocation::getPC32(), /*Addend*/ 0); 732 733 const int32_t IPValue = 734 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset(); 735 if (Error E = UnwindIPWriter.writeInteger(IPValue)) 736 return E; 737 738 if (Error E = UnwindWriter.writeInteger(ORC.SPOffset)) 739 return E; 740 if (Error E = UnwindWriter.writeInteger(ORC.BPOffset)) 741 return E; 742 if (Error E = UnwindWriter.writeInteger(ORC.Info)) 743 return E; 744 745 return Error::success(); 746 }; 747 748 // Emit new ORC entries for the emitted function. 749 auto emitORC = [&](const FunctionFragment &FF) -> Error { 750 ORCState CurrentState = NullORC; 751 for (BinaryBasicBlock *BB : FF) { 752 for (MCInst &Inst : *BB) { 753 ErrorOr<ORCState> ErrorOrState = 754 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 755 if (!ErrorOrState || *ErrorOrState == CurrentState) 756 continue; 757 758 // Issue label for the instruction. 759 MCSymbol *Label = 760 BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); 761 762 if (Error E = emitORCEntry(0, *ErrorOrState, Label)) 763 return E; 764 765 CurrentState = *ErrorOrState; 766 } 767 } 768 769 return Error::success(); 770 }; 771 772 // Emit ORC entries for cold fragments. We assume that these fragments are 773 // emitted contiguously in memory using reserved space in the kernel. This 774 // assumption is validated in post-emit pass validateORCTables() where we 775 // check that ORC entries are sorted by their addresses. 776 auto emitColdORC = [&]() -> Error { 777 for (BinaryFunction &BF : 778 llvm::make_second_range(BC.getBinaryFunctions())) { 779 if (!BC.shouldEmit(BF)) 780 continue; 781 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) 782 if (Error E = emitORC(FF)) 783 return E; 784 } 785 786 return Error::success(); 787 }; 788 789 bool ShouldEmitCold = !BC.BOLTReserved.empty(); 790 for (ORCListEntry &Entry : ORCEntries) { 791 if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) { 792 if (Error E = emitColdORC()) 793 return E; 794 795 // Emit terminator entry at the end of the reserved region. 796 if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC)) 797 return E; 798 799 ShouldEmitCold = false; 800 } 801 802 // Emit original entries for functions that we haven't modified. 803 if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { 804 // Emit terminator only if it marks the start of a function. 805 if (Entry.ORC == NullORC && !Entry.BF) 806 continue; 807 if (Error E = emitORCEntry(Entry.IP, Entry.ORC)) 808 return E; 809 continue; 810 } 811 812 // Emit all ORC entries for a function referenced by an entry and skip over 813 // the rest of entries for this function by resetting its ORC attribute. 814 if (Entry.BF->hasORC()) { 815 if (Error E = emitORC(Entry.BF->getLayout().getMainFragment())) 816 return E; 817 Entry.BF->setHasORC(false); 818 } 819 } 820 821 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted 822 << " ORC entries\n"); 823 824 // Populate ORC tables with a terminator entry with max address to match the 825 // original table sizes. 826 const uint64_t LastIP = std::numeric_limits<uint64_t>::max(); 827 while (UnwindWriter.bytesRemaining()) { 828 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) 829 return E; 830 } 831 832 return Error::success(); 833 } 834 835 Error LinuxKernelRewriter::validateORCTables() { 836 if (!ORCUnwindIPSection) 837 return Error::success(); 838 839 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 840 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(), 841 BC.AsmInfo->isLittleEndian(), 842 BC.AsmInfo->getCodePointerSize()); 843 DataExtractor::Cursor IPCursor(0); 844 uint64_t PrevIP = 0; 845 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 846 const uint64_t IP = 847 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 848 if (!IPCursor) 849 return createStringError(errc::executable_format_error, 850 "out of bounds while reading ORC IP table: %s", 851 toString(IPCursor.takeError()).c_str()); 852 853 assert(IP >= PrevIP && "Unsorted ORC table detected"); 854 (void)PrevIP; 855 PrevIP = IP; 856 } 857 858 return Error::success(); 859 } 860 861 /// The static call site table is created by objtool and contains entries in the 862 /// following format: 863 /// 864 /// struct static_call_site { 865 /// s32 addr; 866 /// s32 key; 867 /// }; 868 /// 869 Error LinuxKernelRewriter::readStaticCalls() { 870 const BinaryData *StaticCallTable = 871 BC.getBinaryDataByName("__start_static_call_sites"); 872 if (!StaticCallTable) 873 return Error::success(); 874 875 StaticCallTableAddress = StaticCallTable->getAddress(); 876 877 const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites"); 878 if (!Stop) 879 return createStringError(errc::executable_format_error, 880 "missing __stop_static_call_sites symbol"); 881 882 ErrorOr<BinarySection &> ErrorOrSection = 883 BC.getSectionForAddress(StaticCallTableAddress); 884 if (!ErrorOrSection) 885 return createStringError(errc::executable_format_error, 886 "no section matching __start_static_call_sites"); 887 888 StaticCallSection = *ErrorOrSection; 889 if (!StaticCallSection->containsAddress(Stop->getAddress() - 1)) 890 return createStringError(errc::executable_format_error, 891 "__stop_static_call_sites not in the same section " 892 "as __start_static_call_sites"); 893 894 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE) 895 return createStringError(errc::executable_format_error, 896 "static call table size error"); 897 898 const uint64_t SectionAddress = StaticCallSection->getAddress(); 899 DataExtractor DE(StaticCallSection->getContents(), 900 BC.AsmInfo->isLittleEndian(), 901 BC.AsmInfo->getCodePointerSize()); 902 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); 903 uint32_t EntryID = 0; 904 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 905 const uint64_t CallAddress = 906 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 907 const uint64_t KeyAddress = 908 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 909 910 // Consume the status of the cursor. 911 if (!Cursor) 912 return createStringError(errc::executable_format_error, 913 "out of bounds while reading static calls: %s", 914 toString(Cursor.takeError()).c_str()); 915 916 ++EntryID; 917 918 if (opts::DumpStaticCalls) { 919 BC.outs() << "Static Call Site: " << EntryID << '\n'; 920 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress) 921 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 922 << '\n'; 923 } 924 925 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress); 926 if (!BF) 927 continue; 928 929 if (!BC.shouldEmit(*BF)) 930 continue; 931 932 if (!BF->hasInstructions()) 933 continue; 934 935 MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress()); 936 if (!Inst) 937 return createStringError(errc::executable_format_error, 938 "no instruction at call site address 0x%" PRIx64, 939 CallAddress); 940 941 // Check for duplicate entries. 942 if (BC.MIB->hasAnnotation(*Inst, "StaticCall")) 943 return createStringError(errc::executable_format_error, 944 "duplicate static call site at 0x%" PRIx64, 945 CallAddress); 946 947 BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); 948 949 MCSymbol *Label = 950 BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); 951 952 StaticCallEntries.push_back({EntryID, BF, Label}); 953 } 954 955 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size() 956 << " static call entries\n"; 957 958 return Error::success(); 959 } 960 961 /// The static call table is sorted during boot time in 962 /// static_call_sort_entries(). This makes it possible to update existing 963 /// entries in-place ignoring their relative order. 964 Error LinuxKernelRewriter::rewriteStaticCalls() { 965 if (!StaticCallTableAddress || !StaticCallSection) 966 return Error::success(); 967 968 for (auto &Entry : StaticCallEntries) { 969 if (!Entry.Function) 970 continue; 971 972 BinaryFunction &BF = *Entry.Function; 973 if (!BC.shouldEmit(BF)) 974 continue; 975 976 // Create a relocation against the label. 977 const uint64_t EntryOffset = StaticCallTableAddress - 978 StaticCallSection->getAddress() + 979 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; 980 StaticCallSection->addRelocation(EntryOffset, Entry.Label, 981 ELF::R_X86_64_PC32, /*Addend*/ 0); 982 } 983 984 return Error::success(); 985 } 986 987 /// Instructions that access user-space memory can cause page faults. These 988 /// faults will be handled by the kernel and execution will resume at the fixup 989 /// code location if the address was invalid. The kernel uses the exception 990 /// table to match the faulting instruction to its fixup. The table consists of 991 /// the following entries: 992 /// 993 /// struct exception_table_entry { 994 /// int insn; 995 /// int fixup; 996 /// int data; 997 /// }; 998 /// 999 /// More info at: 1000 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt 1001 Error LinuxKernelRewriter::readExceptionTable() { 1002 ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); 1003 if (!ExceptionsSection) 1004 return Error::success(); 1005 1006 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) 1007 return createStringError(errc::executable_format_error, 1008 "exception table size error"); 1009 1010 const uint64_t SectionAddress = ExceptionsSection->getAddress(); 1011 DataExtractor DE(ExceptionsSection->getContents(), 1012 BC.AsmInfo->isLittleEndian(), 1013 BC.AsmInfo->getCodePointerSize()); 1014 DataExtractor::Cursor Cursor(0); 1015 uint32_t EntryID = 0; 1016 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { 1017 const uint64_t InstAddress = 1018 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1019 const uint64_t FixupAddress = 1020 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1021 const uint64_t Data = DE.getU32(Cursor); 1022 1023 // Consume the status of the cursor. 1024 if (!Cursor) 1025 return createStringError( 1026 errc::executable_format_error, 1027 "out of bounds while reading exception table: %s", 1028 toString(Cursor.takeError()).c_str()); 1029 1030 ++EntryID; 1031 1032 if (opts::DumpExceptions) { 1033 BC.outs() << "Exception Entry: " << EntryID << '\n'; 1034 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' 1035 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' 1036 << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; 1037 } 1038 1039 MCInst *Inst = nullptr; 1040 MCSymbol *FixupLabel = nullptr; 1041 1042 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress); 1043 if (InstBF && BC.shouldEmit(*InstBF)) { 1044 Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress()); 1045 if (!Inst) 1046 return createStringError(errc::executable_format_error, 1047 "no instruction at address 0x%" PRIx64 1048 " in exception table", 1049 InstAddress); 1050 BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID); 1051 FunctionsWithExceptions.insert(InstBF); 1052 } 1053 1054 if (!InstBF && opts::Verbosity) { 1055 BC.outs() << "BOLT-INFO: no function matches instruction at 0x" 1056 << Twine::utohexstr(InstAddress) 1057 << " referenced by Linux exception table\n"; 1058 } 1059 1060 BinaryFunction *FixupBF = 1061 BC.getBinaryFunctionContainingAddress(FixupAddress); 1062 if (FixupBF && BC.shouldEmit(*FixupBF)) { 1063 const uint64_t Offset = FixupAddress - FixupBF->getAddress(); 1064 if (!FixupBF->getInstructionAtOffset(Offset)) 1065 return createStringError(errc::executable_format_error, 1066 "no instruction at fixup address 0x%" PRIx64 1067 " in exception table", 1068 FixupAddress); 1069 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset) 1070 : FixupBF->getSymbol(); 1071 if (Inst) 1072 BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName()); 1073 FunctionsWithExceptions.insert(FixupBF); 1074 } 1075 1076 if (!FixupBF && opts::Verbosity) { 1077 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x" 1078 << Twine::utohexstr(FixupAddress) 1079 << " referenced by Linux exception table\n"; 1080 } 1081 } 1082 1083 BC.outs() << "BOLT-INFO: parsed " 1084 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE 1085 << " exception table entries\n"; 1086 1087 return Error::success(); 1088 } 1089 1090 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects 1091 /// the exception table to be sorted. Hence we have to sort it after code 1092 /// reordering. 1093 Error LinuxKernelRewriter::rewriteExceptionTable() { 1094 // Disable output of functions with exceptions before rewrite support is 1095 // added. 1096 for (BinaryFunction *BF : FunctionsWithExceptions) 1097 BF->setSimple(false); 1098 1099 return Error::success(); 1100 } 1101 1102 /// .parainsrtuctions section contains information for patching parvirtual call 1103 /// instructions during runtime. The entries in the section are in the form: 1104 /// 1105 /// struct paravirt_patch_site { 1106 /// u8 *instr; /* original instructions */ 1107 /// u8 type; /* type of this instruction */ 1108 /// u8 len; /* length of original instruction */ 1109 /// }; 1110 /// 1111 /// Note that the structures are aligned at 8-byte boundary. 1112 Error LinuxKernelRewriter::readParaInstructions() { 1113 ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions"); 1114 if (!ParavirtualPatchSection) 1115 return Error::success(); 1116 1117 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(), 1118 BC.AsmInfo->isLittleEndian(), 1119 BC.AsmInfo->getCodePointerSize()); 1120 uint32_t EntryID = 0; 1121 DataExtractor::Cursor Cursor(0); 1122 while (Cursor && !DE.eof(Cursor)) { 1123 const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN)); 1124 if (!DE.isValidOffset(NextOffset)) 1125 break; 1126 1127 Cursor.seek(NextOffset); 1128 1129 const uint64_t InstrLocation = DE.getU64(Cursor); 1130 const uint8_t Type = DE.getU8(Cursor); 1131 const uint8_t Len = DE.getU8(Cursor); 1132 1133 if (!Cursor) 1134 return createStringError( 1135 errc::executable_format_error, 1136 "out of bounds while reading .parainstructions: %s", 1137 toString(Cursor.takeError()).c_str()); 1138 1139 ++EntryID; 1140 1141 if (opts::DumpParavirtualPatchSites) { 1142 BC.outs() << "Paravirtual patch site: " << EntryID << '\n'; 1143 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation) 1144 << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x" 1145 << Twine::utohexstr(Len) << '\n'; 1146 } 1147 1148 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation); 1149 if (!BF && opts::Verbosity) { 1150 BC.outs() << "BOLT-INFO: no function matches address 0x" 1151 << Twine::utohexstr(InstrLocation) 1152 << " referenced by paravirutal patch site\n"; 1153 } 1154 1155 if (BF && BC.shouldEmit(*BF)) { 1156 MCInst *Inst = 1157 BF->getInstructionAtOffset(InstrLocation - BF->getAddress()); 1158 if (!Inst) 1159 return createStringError(errc::executable_format_error, 1160 "no instruction at address 0x%" PRIx64 1161 " in paravirtual call site %d", 1162 InstrLocation, EntryID); 1163 BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID); 1164 } 1165 } 1166 1167 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; 1168 1169 return Error::success(); 1170 } 1171 1172 void LinuxKernelRewriter::skipFunctionsWithAnnotation( 1173 StringRef Annotation) const { 1174 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1175 if (!BC.shouldEmit(BF)) 1176 continue; 1177 for (const BinaryBasicBlock &BB : BF) { 1178 const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) { 1179 return BC.MIB->hasAnnotation(Inst, Annotation); 1180 }); 1181 if (HasAnnotation) { 1182 BF.setSimple(false); 1183 break; 1184 } 1185 } 1186 } 1187 } 1188 1189 Error LinuxKernelRewriter::rewriteParaInstructions() { 1190 // Disable output of functions with paravirtual instructions before the 1191 // rewrite support is complete. 1192 skipFunctionsWithAnnotation("ParaSite"); 1193 1194 return Error::success(); 1195 } 1196 1197 /// Process __bug_table section. 1198 /// This section contains information useful for kernel debugging, mostly 1199 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON(). 1200 /// 1201 /// Each entry in the section is a struct bug_entry that contains a pointer to 1202 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1203 /// pointers use PC relative offset addressing), line number, and flags. 1204 /// The definition of the struct bug_entry can be found in 1205 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction 1206 /// address encoded as a PC-relative offset. In theory, it could be an absolute 1207 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice 1208 /// the kernel code relies on it being a relative offset on x86-64. 1209 Error LinuxKernelRewriter::readBugTable() { 1210 BugTableSection = BC.getUniqueSectionByName("__bug_table"); 1211 if (!BugTableSection) 1212 return Error::success(); 1213 1214 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE) 1215 return createStringError(errc::executable_format_error, 1216 "bug table size error"); 1217 1218 const uint64_t SectionAddress = BugTableSection->getAddress(); 1219 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(), 1220 BC.AsmInfo->getCodePointerSize()); 1221 DataExtractor::Cursor Cursor(0); 1222 uint32_t EntryID = 0; 1223 while (Cursor && Cursor.tell() < BugTableSection->getSize()) { 1224 const uint64_t Pos = Cursor.tell(); 1225 const uint64_t InstAddress = 1226 SectionAddress + Pos + (int32_t)DE.getU32(Cursor); 1227 Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); 1228 1229 if (!Cursor) 1230 return createStringError(errc::executable_format_error, 1231 "out of bounds while reading __bug_table: %s", 1232 toString(Cursor.takeError()).c_str()); 1233 1234 ++EntryID; 1235 1236 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress); 1237 if (!BF && opts::Verbosity) { 1238 BC.outs() << "BOLT-INFO: no function matches address 0x" 1239 << Twine::utohexstr(InstAddress) 1240 << " referenced by bug table\n"; 1241 } 1242 1243 if (BF && BC.shouldEmit(*BF)) { 1244 MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress()); 1245 if (!Inst) 1246 return createStringError(errc::executable_format_error, 1247 "no instruction at address 0x%" PRIx64 1248 " referenced by bug table entry %d", 1249 InstAddress, EntryID); 1250 BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID); 1251 1252 FunctionBugList[BF].push_back(EntryID); 1253 } 1254 } 1255 1256 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n"; 1257 1258 return Error::success(); 1259 } 1260 1261 /// find_bug() uses linear search to match an address to an entry in the bug 1262 /// table. Hence, there is no need to sort entries when rewriting the table. 1263 /// When we need to erase an entry, we set its instruction address to zero. 1264 Error LinuxKernelRewriter::rewriteBugTable() { 1265 if (!BugTableSection) 1266 return Error::success(); 1267 1268 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1269 if (!BC.shouldEmit(BF)) 1270 continue; 1271 1272 if (!FunctionBugList.count(&BF)) 1273 continue; 1274 1275 // Bugs that will be emitted for this function. 1276 DenseSet<uint32_t> EmittedIDs; 1277 for (BinaryBasicBlock &BB : BF) { 1278 for (MCInst &Inst : BB) { 1279 if (!BC.MIB->hasAnnotation(Inst, "BugEntry")) 1280 continue; 1281 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry"); 1282 EmittedIDs.insert(ID); 1283 1284 // Create a relocation entry for this bug entry. 1285 MCSymbol *Label = 1286 BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get()); 1287 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1288 BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32, 1289 /*Addend*/ 0); 1290 } 1291 } 1292 1293 // Clear bug entries that were not emitted for this function, e.g. as a 1294 // result of DCE, but setting their instruction address to zero. 1295 for (const uint32_t ID : FunctionBugList[&BF]) { 1296 if (!EmittedIDs.count(ID)) { 1297 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1298 BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32, 1299 /*Addend*/ 0); 1300 } 1301 } 1302 } 1303 1304 return Error::success(); 1305 } 1306 1307 /// The kernel can replace certain instruction sequences depending on hardware 1308 /// it is running on and features specified during boot time. The information 1309 /// about alternative instruction sequences is stored in .altinstructions 1310 /// section. The format of entries in this section is defined in 1311 /// arch/x86/include/asm/alternative.h: 1312 /// 1313 /// struct alt_instr { 1314 /// s32 instr_offset; 1315 /// s32 repl_offset; 1316 /// uXX feature; 1317 /// u8 instrlen; 1318 /// u8 replacementlen; 1319 /// u8 padlen; // present in older kernels 1320 /// } __packed; 1321 /// 1322 /// Note the structures is packed. 1323 Error LinuxKernelRewriter::readAltInstructions() { 1324 AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); 1325 if (!AltInstrSection) 1326 return Error::success(); 1327 1328 const uint64_t Address = AltInstrSection->getAddress(); 1329 DataExtractor DE = DataExtractor(AltInstrSection->getContents(), 1330 BC.AsmInfo->isLittleEndian(), 1331 BC.AsmInfo->getCodePointerSize()); 1332 uint64_t EntryID = 0; 1333 DataExtractor::Cursor Cursor(0); 1334 while (Cursor && !DE.eof(Cursor)) { 1335 const uint64_t OrgInstAddress = 1336 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1337 const uint64_t AltInstAddress = 1338 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1339 const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); 1340 const uint8_t OrgSize = DE.getU8(Cursor); 1341 const uint8_t AltSize = DE.getU8(Cursor); 1342 1343 // Older kernels may have the padlen field. 1344 const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; 1345 1346 if (!Cursor) 1347 return createStringError( 1348 errc::executable_format_error, 1349 "out of bounds while reading .altinstructions: %s", 1350 toString(Cursor.takeError()).c_str()); 1351 1352 ++EntryID; 1353 1354 if (opts::DumpAltInstructions) { 1355 BC.outs() << "Alternative instruction entry: " << EntryID 1356 << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) 1357 << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) 1358 << "\n\tFeature: 0x" << Twine::utohexstr(Feature) 1359 << "\n\tOrgSize: " << (int)OrgSize 1360 << "\n\tAltSize: " << (int)AltSize << '\n'; 1361 if (opts::AltInstHasPadLen) 1362 BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; 1363 } 1364 1365 if (AltSize > OrgSize) 1366 return createStringError(errc::executable_format_error, 1367 "error reading .altinstructions"); 1368 1369 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); 1370 if (!BF && opts::Verbosity) { 1371 BC.outs() << "BOLT-INFO: no function matches address 0x" 1372 << Twine::utohexstr(OrgInstAddress) 1373 << " of instruction from .altinstructions\n"; 1374 } 1375 1376 BinaryFunction *AltBF = 1377 BC.getBinaryFunctionContainingAddress(AltInstAddress); 1378 if (AltBF && BC.shouldEmit(*AltBF)) { 1379 BC.errs() 1380 << "BOLT-WARNING: alternative instruction sequence found in function " 1381 << *AltBF << '\n'; 1382 AltBF->setIgnored(); 1383 } 1384 1385 if (!BF || !BC.shouldEmit(*BF)) 1386 continue; 1387 1388 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) 1389 return createStringError(errc::executable_format_error, 1390 "error reading .altinstructions"); 1391 1392 MCInst *Inst = 1393 BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); 1394 if (!Inst) 1395 return createStringError(errc::executable_format_error, 1396 "no instruction at address 0x%" PRIx64 1397 " referenced by .altinstructions entry %d", 1398 OrgInstAddress, EntryID); 1399 1400 // There could be more than one alternative instruction sequences for the 1401 // same original instruction. Annotate each alternative separately. 1402 std::string AnnotationName = "AltInst"; 1403 unsigned N = 2; 1404 while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) 1405 AnnotationName = "AltInst" + std::to_string(N++); 1406 1407 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1408 1409 // Annotate all instructions from the original sequence. Note that it's not 1410 // the most efficient way to look for instructions in the address range, 1411 // but since alternative instructions are uncommon, it will do for now. 1412 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { 1413 Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - 1414 BF->getAddress()); 1415 if (Inst) 1416 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1417 } 1418 } 1419 1420 BC.outs() << "BOLT-INFO: parsed " << EntryID 1421 << " alternative instruction entries\n"; 1422 1423 return Error::success(); 1424 } 1425 1426 Error LinuxKernelRewriter::rewriteAltInstructions() { 1427 // Disable output of functions with alt instructions before the rewrite 1428 // support is complete. 1429 skipFunctionsWithAnnotation("AltInst"); 1430 1431 return Error::success(); 1432 } 1433 1434 /// When the Linux kernel needs to handle an error associated with a given PCI 1435 /// device, it uses a table stored in .pci_fixup section to locate a fixup code 1436 /// specific to the vendor and the problematic device. The section contains a 1437 /// list of the following structures defined in include/linux/pci.h: 1438 /// 1439 /// struct pci_fixup { 1440 /// u16 vendor; /* Or PCI_ANY_ID */ 1441 /// u16 device; /* Or PCI_ANY_ID */ 1442 /// u32 class; /* Or PCI_ANY_ID */ 1443 /// unsigned int class_shift; /* should be 0, 8, 16 */ 1444 /// int hook_offset; 1445 /// }; 1446 /// 1447 /// Normally, the hook will point to a function start and we don't have to 1448 /// update the pointer if we are not relocating functions. Hence, while reading 1449 /// the table we validate this assumption. If a function has a fixup code in the 1450 /// middle of its body, we issue a warning and ignore it. 1451 Error LinuxKernelRewriter::readPCIFixupTable() { 1452 PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup"); 1453 if (!PCIFixupSection) 1454 return Error::success(); 1455 1456 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE) 1457 return createStringError(errc::executable_format_error, 1458 "PCI fixup table size error"); 1459 1460 const uint64_t Address = PCIFixupSection->getAddress(); 1461 DataExtractor DE = DataExtractor(PCIFixupSection->getContents(), 1462 BC.AsmInfo->isLittleEndian(), 1463 BC.AsmInfo->getCodePointerSize()); 1464 uint64_t EntryID = 0; 1465 DataExtractor::Cursor Cursor(0); 1466 while (Cursor && !DE.eof(Cursor)) { 1467 const uint16_t Vendor = DE.getU16(Cursor); 1468 const uint16_t Device = DE.getU16(Cursor); 1469 const uint32_t Class = DE.getU32(Cursor); 1470 const uint32_t ClassShift = DE.getU32(Cursor); 1471 const uint64_t HookAddress = 1472 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1473 1474 if (!Cursor) 1475 return createStringError(errc::executable_format_error, 1476 "out of bounds while reading .pci_fixup: %s", 1477 toString(Cursor.takeError()).c_str()); 1478 1479 ++EntryID; 1480 1481 if (opts::DumpPCIFixups) { 1482 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x" 1483 << Twine::utohexstr(Vendor) << "\n\tDevice: 0x" 1484 << Twine::utohexstr(Device) << "\n\tClass: 0x" 1485 << Twine::utohexstr(Class) << "\n\tClassShift: 0x" 1486 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x" 1487 << Twine::utohexstr(HookAddress) << '\n'; 1488 } 1489 1490 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress); 1491 if (!BF && opts::Verbosity) { 1492 BC.outs() << "BOLT-INFO: no function matches address 0x" 1493 << Twine::utohexstr(HookAddress) 1494 << " of hook from .pci_fixup\n"; 1495 } 1496 1497 if (!BF || !BC.shouldEmit(*BF)) 1498 continue; 1499 1500 if (const uint64_t Offset = HookAddress - BF->getAddress()) { 1501 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function " 1502 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n'; 1503 BF->setSimple(false); 1504 } 1505 } 1506 1507 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n"; 1508 1509 return Error::success(); 1510 } 1511 1512 /// Runtime code modification used by static keys is the most ubiquitous 1513 /// self-modifying feature of the Linux kernel. The idea is to eliminate the 1514 /// condition check and associated conditional jump on a hot path if that 1515 /// condition (based on a boolean value of a static key) does not change often. 1516 /// Whenever the condition changes, the kernel runtime modifies all code paths 1517 /// associated with that key flipping the code between nop and (unconditional) 1518 /// jump. The information about the code is stored in a static key jump table 1519 /// and contains the list of entries of the following type from 1520 /// include/linux/jump_label.h: 1521 // 1522 /// struct jump_entry { 1523 /// s32 code; 1524 /// s32 target; 1525 /// long key; // key may be far away from the core kernel under KASLR 1526 /// }; 1527 /// 1528 /// The list does not have to be stored in any sorted way, but it is sorted at 1529 /// boot time (or module initialization time) first by "key" and then by "code". 1530 /// jump_label_sort_entries() is responsible for sorting the table. 1531 /// 1532 /// The key in jump_entry structure uses lower two bits of the key address 1533 /// (which itself is aligned) to store extra information. We are interested in 1534 /// the lower bit which indicates if the key is likely to be set on the code 1535 /// path associated with this jump_entry. 1536 /// 1537 /// static_key_{enable,disable}() functions modify the code based on key and 1538 /// jump table entries. 1539 /// 1540 /// jump_label_update() updates all code entries for a given key. Batch mode is 1541 /// used for x86. 1542 /// 1543 /// The actual patching happens in text_poke_bp_batch() that overrides the first 1544 /// byte of the sequence with int3 before proceeding with actual code 1545 /// replacement. 1546 Error LinuxKernelRewriter::readStaticKeysJumpTable() { 1547 const BinaryData *StaticKeysJumpTable = 1548 BC.getBinaryDataByName("__start___jump_table"); 1549 if (!StaticKeysJumpTable) 1550 return Error::success(); 1551 1552 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress(); 1553 1554 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1555 if (!Stop) 1556 return createStringError(errc::executable_format_error, 1557 "missing __stop___jump_table symbol"); 1558 1559 ErrorOr<BinarySection &> ErrorOrSection = 1560 BC.getSectionForAddress(StaticKeysJumpTableAddress); 1561 if (!ErrorOrSection) 1562 return createStringError(errc::executable_format_error, 1563 "no section matching __start___jump_table"); 1564 1565 StaticKeysJumpSection = *ErrorOrSection; 1566 if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1)) 1567 return createStringError(errc::executable_format_error, 1568 "__stop___jump_table not in the same section " 1569 "as __start___jump_table"); 1570 1571 if ((Stop->getAddress() - StaticKeysJumpTableAddress) % 1572 STATIC_KEYS_JUMP_ENTRY_SIZE) 1573 return createStringError(errc::executable_format_error, 1574 "static keys jump table size error"); 1575 1576 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1577 DataExtractor DE(StaticKeysJumpSection->getContents(), 1578 BC.AsmInfo->isLittleEndian(), 1579 BC.AsmInfo->getCodePointerSize()); 1580 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1581 uint32_t EntryID = 0; 1582 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1583 const uint64_t JumpAddress = 1584 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1585 const uint64_t TargetAddress = 1586 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1587 const uint64_t KeyAddress = 1588 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1589 1590 // Consume the status of the cursor. 1591 if (!Cursor) 1592 return createStringError( 1593 errc::executable_format_error, 1594 "out of bounds while reading static keys jump table: %s", 1595 toString(Cursor.takeError()).c_str()); 1596 1597 ++EntryID; 1598 1599 JumpInfo.push_back(JumpInfoEntry()); 1600 JumpInfoEntry &Info = JumpInfo.back(); 1601 Info.Likely = KeyAddress & 1; 1602 1603 if (opts::DumpStaticKeys) { 1604 BC.outs() << "Static key jump entry: " << EntryID 1605 << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1606 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1607 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 1608 << "\n\tIsLikely: " << Info.Likely << '\n'; 1609 } 1610 1611 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress); 1612 if (!BF && opts::Verbosity) { 1613 BC.outs() 1614 << "BOLT-INFO: no function matches address 0x" 1615 << Twine::utohexstr(JumpAddress) 1616 << " of jump instruction referenced from static keys jump table\n"; 1617 } 1618 1619 if (!BF || !BC.shouldEmit(*BF)) 1620 continue; 1621 1622 MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress()); 1623 if (!Inst) 1624 return createStringError( 1625 errc::executable_format_error, 1626 "no instruction at static keys jump site address 0x%" PRIx64, 1627 JumpAddress); 1628 1629 if (!BF->containsAddress(TargetAddress)) 1630 return createStringError( 1631 errc::executable_format_error, 1632 "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64, 1633 JumpAddress, TargetAddress); 1634 1635 const bool IsBranch = BC.MIB->isBranch(*Inst); 1636 if (!IsBranch && !BC.MIB->isNoop(*Inst)) 1637 return createStringError(errc::executable_format_error, 1638 "jump or nop expected at address 0x%" PRIx64, 1639 JumpAddress); 1640 1641 const uint64_t Size = BC.computeInstructionSize(*Inst); 1642 if (Size != 2 && Size != 5) { 1643 return createStringError( 1644 errc::executable_format_error, 1645 "unexpected static keys jump size at address 0x%" PRIx64, 1646 JumpAddress); 1647 } 1648 1649 MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress); 1650 MCInst StaticKeyBranch; 1651 1652 // Create a conditional branch instruction. The actual conditional code type 1653 // should not matter as long as it's a valid code. The instruction should be 1654 // treated as a conditional branch for control-flow purposes. Before we emit 1655 // the code, it will be converted to a different instruction in 1656 // rewriteStaticKeysJumpTable(). 1657 // 1658 // NB: for older kernels, under LongJumpLabels option, we create long 1659 // conditional branch to guarantee that code size estimation takes 1660 // into account the extra bytes needed for long branch that will be used 1661 // by the kernel patching code. Newer kernels can work with both short 1662 // and long branches. The code for long conditional branch is larger 1663 // than unconditional one, so we are pessimistic in our estimations. 1664 if (opts::LongJumpLabels) 1665 BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1666 else 1667 BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1668 BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch); 1669 BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID); 1670 *Inst = StaticKeyBranch; 1671 1672 // IsBranch = InitialValue ^ LIKELY 1673 // 1674 // 0 0 0 1675 // 1 0 1 1676 // 1 1 0 1677 // 0 1 1 1678 // 1679 // => InitialValue = IsBranch ^ LIKELY 1680 Info.InitValue = IsBranch ^ Info.Likely; 1681 1682 // Add annotations to facilitate manual code analysis. 1683 BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely); 1684 BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue); 1685 if (!BC.MIB->getSize(*Inst)) 1686 BC.MIB->setSize(*Inst, Size); 1687 1688 if (!BC.MIB->getOffset(*Inst)) 1689 BC.MIB->setOffset(*Inst, JumpAddress - BF->getAddress()); 1690 1691 if (opts::LongJumpLabels) 1692 BC.MIB->setSize(*Inst, 5); 1693 } 1694 1695 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n"; 1696 1697 return Error::success(); 1698 } 1699 1700 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be 1701 // relaxed. In post-emit pass we will convert those jumps into nops when 1702 // necessary. We do the unconditional conversion into jumps so that the jumps 1703 // can be relaxed and the optimal size of jump/nop instruction is selected. 1704 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { 1705 if (!StaticKeysJumpSection) 1706 return Error::success(); 1707 1708 uint64_t NumShort = 0; 1709 uint64_t NumLong = 0; 1710 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1711 if (!BC.shouldEmit(BF)) 1712 continue; 1713 1714 for (BinaryBasicBlock &BB : BF) { 1715 for (MCInst &Inst : BB) { 1716 if (!BC.MIB->isDynamicBranch(Inst)) 1717 continue; 1718 1719 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst); 1720 MCSymbol *Target = 1721 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst)); 1722 assert(Target && "Target symbol should be set."); 1723 1724 const JumpInfoEntry &Info = JumpInfo[EntryID - 1]; 1725 const bool IsBranch = Info.Likely ^ Info.InitValue; 1726 1727 uint32_t Size = *BC.MIB->getSize(Inst); 1728 if (Size == 2) 1729 ++NumShort; 1730 else if (Size == 5) 1731 ++NumLong; 1732 else 1733 llvm_unreachable("Wrong size for static keys jump instruction."); 1734 1735 MCInst NewInst; 1736 // Replace the instruction with unconditional jump even if it needs to 1737 // be nop in the binary. 1738 if (opts::LongJumpLabels) { 1739 BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get()); 1740 } else { 1741 // Newer kernels can handle short and long jumps for static keys. 1742 // Optimistically, emit short jump and check if it gets relaxed into 1743 // a long one during post-emit. Only then convert the jump to a nop. 1744 BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get()); 1745 } 1746 1747 BC.MIB->moveAnnotations(std::move(Inst), NewInst); 1748 Inst = NewInst; 1749 1750 // Mark the instruction for nop conversion. 1751 if (!IsBranch) 1752 NopIDs.insert(EntryID); 1753 1754 MCSymbol *Label = 1755 BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get()); 1756 1757 // Create a relocation against the label. 1758 const uint64_t EntryOffset = StaticKeysJumpTableAddress - 1759 StaticKeysJumpSection->getAddress() + 1760 (EntryID - 1) * 16; 1761 StaticKeysJumpSection->addRelocation(EntryOffset, Label, 1762 ELF::R_X86_64_PC32, 1763 /*Addend*/ 0); 1764 StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target, 1765 ELF::R_X86_64_PC32, /*Addend*/ 0); 1766 } 1767 } 1768 } 1769 1770 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and " 1771 << NumLong << " long static keys jumps in optimized functions\n"; 1772 1773 return Error::success(); 1774 } 1775 1776 // Post-emit pass of static keys jump section. Convert jumps to nops. 1777 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { 1778 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized()) 1779 return Error::success(); 1780 1781 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1782 DataExtractor DE(StaticKeysJumpSection->getOutputContents(), 1783 BC.AsmInfo->isLittleEndian(), 1784 BC.AsmInfo->getCodePointerSize()); 1785 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1786 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1787 uint32_t EntryID = 0; 1788 uint64_t NumShort = 0; 1789 uint64_t NumLong = 0; 1790 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1791 const uint64_t JumpAddress = 1792 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1793 const uint64_t TargetAddress = 1794 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1795 const uint64_t KeyAddress = 1796 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1797 1798 // Consume the status of the cursor. 1799 if (!Cursor) 1800 return createStringError(errc::executable_format_error, 1801 "out of bounds while updating static keys: %s", 1802 toString(Cursor.takeError()).c_str()); 1803 1804 ++EntryID; 1805 1806 LLVM_DEBUG({ 1807 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1808 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1809 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n'; 1810 }); 1811 (void)TargetAddress; 1812 (void)KeyAddress; 1813 1814 BinaryFunction *BF = 1815 BC.getBinaryFunctionContainingAddress(JumpAddress, 1816 /*CheckPastEnd*/ false, 1817 /*UseMaxSize*/ true); 1818 assert(BF && "Cannot get function for modified static key."); 1819 1820 if (!BF->isEmitted()) 1821 continue; 1822 1823 // Disassemble instruction to collect stats even if nop-conversion is 1824 // unnecessary. 1825 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>( 1826 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize()); 1827 assert(Contents.size() && "Non-empty function image expected."); 1828 1829 MCInst Inst; 1830 uint64_t Size; 1831 const uint64_t JumpOffset = JumpAddress - BF->getAddress(); 1832 if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0, 1833 nulls())) { 1834 llvm_unreachable("Unable to disassemble jump instruction."); 1835 } 1836 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected."); 1837 1838 if (Size == 2) 1839 ++NumShort; 1840 else if (Size == 5) 1841 ++NumLong; 1842 else 1843 llvm_unreachable("Unexpected size for static keys jump instruction."); 1844 1845 // Check if we need to convert jump instruction into a nop. 1846 if (!NopIDs.contains(EntryID)) 1847 continue; 1848 1849 SmallString<15> NopCode; 1850 raw_svector_ostream VecOS(NopCode); 1851 BC.MAB->writeNopData(VecOS, Size, BC.STI.get()); 1852 for (uint64_t I = 0; I < Size; ++I) 1853 Contents[JumpOffset + I] = NopCode[I]; 1854 } 1855 1856 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong 1857 << " long static keys jumps in optimized functions\n"; 1858 1859 return Error::success(); 1860 } 1861 1862 } // namespace 1863 1864 std::unique_ptr<MetadataRewriter> 1865 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { 1866 return std::make_unique<LinuxKernelRewriter>(BC); 1867 } 1868