1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Support for updating Linux Kernel metadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryFunction.h" 14 #include "bolt/Rewrite/MetadataRewriter.h" 15 #include "bolt/Rewrite/MetadataRewriters.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 20 #include "llvm/Support/BinaryStreamWriter.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Errc.h" 24 25 #define DEBUG_TYPE "bolt-linux" 26 27 using namespace llvm; 28 using namespace bolt; 29 30 namespace opts { 31 32 static cl::opt<bool> 33 AltInstHasPadLen("alt-inst-has-padlen", 34 cl::desc("specify that .altinstructions has padlen field"), 35 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 36 37 static cl::opt<uint32_t> 38 AltInstFeatureSize("alt-inst-feature-size", 39 cl::desc("size of feature field in .altinstructions"), 40 cl::init(2), cl::Hidden, cl::cat(BoltCategory)); 41 42 static cl::opt<bool> 43 DumpAltInstructions("dump-alt-instructions", 44 cl::desc("dump Linux alternative instructions info"), 45 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 46 47 static cl::opt<bool> 48 DumpExceptions("dump-linux-exceptions", 49 cl::desc("dump Linux kernel exception table"), 50 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 51 52 static cl::opt<bool> 53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), 54 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> DumpParavirtualPatchSites( 57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"), 58 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 59 60 static cl::opt<bool> 61 DumpPCIFixups("dump-pci-fixups", 62 cl::desc("dump Linux kernel PCI fixup table"), 63 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 64 65 static cl::opt<bool> DumpStaticCalls("dump-static-calls", 66 cl::desc("dump Linux kernel static calls"), 67 cl::init(false), cl::Hidden, 68 cl::cat(BoltCategory)); 69 70 static cl::opt<bool> 71 DumpStaticKeys("dump-static-keys", 72 cl::desc("dump Linux kernel static keys jump table"), 73 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 74 75 static cl::opt<bool> LongJumpLabels( 76 "long-jump-labels", 77 cl::desc("always use long jumps/nops for Linux kernel static keys"), 78 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 79 80 static cl::opt<bool> 81 PrintORC("print-orc", 82 cl::desc("print ORC unwind information for instructions"), 83 cl::init(true), cl::Hidden, cl::cat(BoltCategory)); 84 85 } // namespace opts 86 87 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). 88 /// ORC state at every IP can be described by the following data structure. 89 struct ORCState { 90 int16_t SPOffset; 91 int16_t BPOffset; 92 int16_t Info; 93 94 bool operator==(const ORCState &Other) const { 95 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset && 96 Info == Other.Info; 97 } 98 99 bool operator!=(const ORCState &Other) const { return !(*this == Other); } 100 }; 101 102 /// Section terminator ORC entry. 103 static ORCState NullORC = {0, 0, 0}; 104 105 /// Basic printer for ORC entry. It does not provide the same level of 106 /// information as objtool (for now). 107 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { 108 if (!opts::PrintORC) 109 return OS; 110 if (E != NullORC) 111 OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset, 112 E.Info); 113 else 114 OS << "{terminator}"; 115 116 return OS; 117 } 118 119 namespace { 120 121 class LinuxKernelRewriter final : public MetadataRewriter { 122 /// Linux Kernel special sections point to a specific instruction in many 123 /// cases. Unlike SDTMarkerInfo, these markers can come from different 124 /// sections. 125 struct LKInstructionMarkerInfo { 126 uint64_t SectionOffset; 127 int32_t PCRelativeOffset; 128 bool IsPCRelative; 129 StringRef SectionName; 130 }; 131 132 /// Map linux kernel program locations/instructions to their pointers in 133 /// special linux kernel sections 134 std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers; 135 136 /// Linux ORC sections. 137 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address; 138 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address; 139 140 /// Size of entries in ORC sections. 141 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6; 142 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4; 143 144 struct ORCListEntry { 145 uint64_t IP; /// Instruction address. 146 BinaryFunction *BF; /// Binary function corresponding to the entry. 147 ORCState ORC; /// Stack unwind info in ORC format. 148 149 /// ORC entries are sorted by their IPs. Terminator entries (NullORC) 150 /// should precede other entries with the same address. 151 bool operator<(const ORCListEntry &Other) const { 152 if (IP < Other.IP) 153 return 1; 154 if (IP > Other.IP) 155 return 0; 156 return ORC == NullORC && Other.ORC != NullORC; 157 } 158 }; 159 160 using ORCListType = std::vector<ORCListEntry>; 161 ORCListType ORCEntries; 162 163 /// Number of entries in the input file ORC sections. 164 uint64_t NumORCEntries = 0; 165 166 /// Section containing static keys jump table. 167 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address; 168 uint64_t StaticKeysJumpTableAddress = 0; 169 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8; 170 171 struct JumpInfoEntry { 172 bool Likely; 173 bool InitValue; 174 }; 175 SmallVector<JumpInfoEntry, 16> JumpInfo; 176 177 /// Static key entries that need nop conversion. 178 DenseSet<uint32_t> NopIDs; 179 180 /// Section containing static call table. 181 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address; 182 uint64_t StaticCallTableAddress = 0; 183 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8; 184 185 struct StaticCallInfo { 186 uint32_t ID; /// Identifier of the entry in the table. 187 BinaryFunction *Function; /// Function containing associated call. 188 MCSymbol *Label; /// Label attached to the call. 189 }; 190 using StaticCallListType = std::vector<StaticCallInfo>; 191 StaticCallListType StaticCallEntries; 192 193 /// Section containing the Linux exception table. 194 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address; 195 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; 196 197 /// Functions with exception handling code. 198 DenseSet<BinaryFunction *> FunctionsWithExceptions; 199 200 /// Section with paravirtual patch sites. 201 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address; 202 203 /// Alignment of paravirtual patch structures. 204 static constexpr size_t PARA_PATCH_ALIGN = 8; 205 206 /// .altinstructions section. 207 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address; 208 209 /// Section containing Linux bug table. 210 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; 211 212 /// Size of bug_entry struct. 213 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12; 214 215 /// List of bug entries per function. 216 using FunctionBugListType = 217 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>; 218 FunctionBugListType FunctionBugList; 219 220 /// .pci_fixup section. 221 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address; 222 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16; 223 224 /// Insert an LKMarker for a given code pointer \p PC from a non-code section 225 /// \p SectionName. 226 void insertLKMarker(uint64_t PC, uint64_t SectionOffset, 227 int32_t PCRelativeOffset, bool IsPCRelative, 228 StringRef SectionName); 229 230 /// Process linux kernel special sections and their relocations. 231 void processLKSections(); 232 233 /// Process __ksymtab and __ksymtab_gpl. 234 void processLKKSymtab(bool IsGPL = false); 235 236 /// Process special linux kernel section, .smp_locks. 237 void processLKSMPLocks(); 238 239 /// Update LKMarkers' locations for the output binary. 240 void updateLKMarkers(); 241 242 /// Read ORC unwind information and annotate instructions. 243 Error readORCTables(); 244 245 /// Update ORC for functions once CFG is constructed. 246 Error processORCPostCFG(); 247 248 /// Update ORC data in the binary. 249 Error rewriteORCTables(); 250 251 /// Validate written ORC tables after binary emission. 252 Error validateORCTables(); 253 254 /// Static call table handling. 255 Error readStaticCalls(); 256 Error rewriteStaticCalls(); 257 258 Error readExceptionTable(); 259 Error rewriteExceptionTable(); 260 261 /// Paravirtual instruction patch sites. 262 Error readParaInstructions(); 263 Error rewriteParaInstructions(); 264 265 /// __bug_table section handling. 266 Error readBugTable(); 267 Error rewriteBugTable(); 268 269 /// Do no process functions containing instruction annotated with 270 /// \p Annotation. 271 void skipFunctionsWithAnnotation(StringRef Annotation) const; 272 273 /// Handle alternative instruction info from .altinstructions. 274 Error readAltInstructions(); 275 Error rewriteAltInstructions(); 276 277 /// Read .pci_fixup 278 Error readPCIFixupTable(); 279 280 /// Handle static keys jump table. 281 Error readStaticKeysJumpTable(); 282 Error rewriteStaticKeysJumpTable(); 283 Error updateStaticKeysJumpTablePostEmit(); 284 285 /// Mark instructions referenced by kernel metadata. 286 Error markInstructions(); 287 288 public: 289 LinuxKernelRewriter(BinaryContext &BC) 290 : MetadataRewriter("linux-kernel-rewriter", BC) {} 291 292 Error preCFGInitializer() override { 293 processLKSections(); 294 if (Error E = markInstructions()) 295 return E; 296 297 if (Error E = readORCTables()) 298 return E; 299 300 if (Error E = readStaticCalls()) 301 return E; 302 303 if (Error E = readExceptionTable()) 304 return E; 305 306 if (Error E = readParaInstructions()) 307 return E; 308 309 if (Error E = readBugTable()) 310 return E; 311 312 if (Error E = readAltInstructions()) 313 return E; 314 315 if (Error E = readPCIFixupTable()) 316 return E; 317 318 if (Error E = readStaticKeysJumpTable()) 319 return E; 320 321 return Error::success(); 322 } 323 324 Error postCFGInitializer() override { 325 if (Error E = processORCPostCFG()) 326 return E; 327 328 return Error::success(); 329 } 330 331 Error preEmitFinalizer() override { 332 // Since rewriteExceptionTable() can mark functions as non-simple, run it 333 // before other rewriters that depend on simple/emit status. 334 if (Error E = rewriteExceptionTable()) 335 return E; 336 337 if (Error E = rewriteAltInstructions()) 338 return E; 339 340 if (Error E = rewriteParaInstructions()) 341 return E; 342 343 if (Error E = rewriteORCTables()) 344 return E; 345 346 if (Error E = rewriteStaticCalls()) 347 return E; 348 349 if (Error E = rewriteStaticKeysJumpTable()) 350 return E; 351 352 if (Error E = rewriteBugTable()) 353 return E; 354 355 return Error::success(); 356 } 357 358 Error postEmitFinalizer() override { 359 updateLKMarkers(); 360 361 if (Error E = updateStaticKeysJumpTablePostEmit()) 362 return E; 363 364 if (Error E = validateORCTables()) 365 return E; 366 367 return Error::success(); 368 } 369 }; 370 371 Error LinuxKernelRewriter::markInstructions() { 372 for (const uint64_t PC : llvm::make_first_range(LKMarkers)) { 373 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(PC); 374 375 if (!BF || !BC.shouldEmit(*BF)) 376 continue; 377 378 const uint64_t Offset = PC - BF->getAddress(); 379 MCInst *Inst = BF->getInstructionAtOffset(Offset); 380 if (!Inst) 381 return createStringError(errc::executable_format_error, 382 "no instruction matches kernel marker offset"); 383 384 BC.MIB->setOffset(*Inst, static_cast<uint32_t>(Offset)); 385 386 BF->setHasSDTMarker(true); 387 } 388 389 return Error::success(); 390 } 391 392 void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 393 int32_t PCRelativeOffset, 394 bool IsPCRelative, 395 StringRef SectionName) { 396 LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 397 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 398 } 399 400 void LinuxKernelRewriter::processLKSections() { 401 processLKKSymtab(); 402 processLKKSymtab(true); 403 processLKSMPLocks(); 404 } 405 406 /// Process __ksymtab[_gpl] sections of Linux Kernel. 407 /// This section lists all the vmlinux symbols that kernel modules can access. 408 /// 409 /// All the entries are 4 bytes each and hence we can read them by one by one 410 /// and ignore the ones that are not pointing to the .text section. All pointers 411 /// are PC relative offsets. Always, points to the beginning of the function. 412 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) { 413 StringRef SectionName = "__ksymtab"; 414 if (IsGPL) 415 SectionName = "__ksymtab_gpl"; 416 ErrorOr<BinarySection &> SectionOrError = 417 BC.getUniqueSectionByName(SectionName); 418 assert(SectionOrError && 419 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 420 const uint64_t SectionSize = SectionOrError->getSize(); 421 const uint64_t SectionAddress = SectionOrError->getAddress(); 422 assert((SectionSize % 4) == 0 && 423 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 424 425 for (uint64_t I = 0; I < SectionSize; I += 4) { 426 const uint64_t EntryAddress = SectionAddress + I; 427 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 428 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 429 const int32_t SignedOffset = *Offset; 430 const uint64_t RefAddress = EntryAddress + SignedOffset; 431 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress); 432 if (!BF) 433 continue; 434 435 BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 436 *Offset); 437 } 438 } 439 440 /// .smp_locks section contains PC-relative references to instructions with LOCK 441 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 442 void LinuxKernelRewriter::processLKSMPLocks() { 443 ErrorOr<BinarySection &> SectionOrError = 444 BC.getUniqueSectionByName(".smp_locks"); 445 if (!SectionOrError) 446 return; 447 448 uint64_t SectionSize = SectionOrError->getSize(); 449 const uint64_t SectionAddress = SectionOrError->getAddress(); 450 assert((SectionSize % 4) == 0 && 451 "The size of the .smp_locks section should be a multiple of 4"); 452 453 for (uint64_t I = 0; I < SectionSize; I += 4) { 454 const uint64_t EntryAddress = SectionAddress + I; 455 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 456 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 457 int32_t SignedOffset = *Offset; 458 uint64_t RefAddress = EntryAddress + SignedOffset; 459 460 BinaryFunction *ContainingBF = 461 BC.getBinaryFunctionContainingAddress(RefAddress); 462 if (!ContainingBF) 463 continue; 464 465 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 466 } 467 } 468 469 void LinuxKernelRewriter::updateLKMarkers() { 470 if (LKMarkers.size() == 0) 471 return; 472 473 std::unordered_map<std::string, uint64_t> PatchCounts; 474 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 475 &LKMarkerInfoKV : LKMarkers) { 476 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 477 const BinaryFunction *BF = 478 BC.getBinaryFunctionContainingAddress(OriginalAddress, false, true); 479 if (!BF) 480 continue; 481 482 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 483 if (NewAddress == 0) 484 continue; 485 486 // Apply base address. 487 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 488 NewAddress = NewAddress + 0xffffffff00000000; 489 490 if (OriginalAddress == NewAddress) 491 continue; 492 493 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 494 StringRef SectionName = LKMarkerInfo.SectionName; 495 SimpleBinaryPatcher *LKPatcher; 496 ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName); 497 assert(BSec && "missing section info for kernel section"); 498 if (!BSec->getPatcher()) 499 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 500 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 501 PatchCounts[std::string(SectionName)]++; 502 if (LKMarkerInfo.IsPCRelative) 503 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 504 NewAddress - OriginalAddress + 505 LKMarkerInfo.PCRelativeOffset); 506 else 507 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 508 } 509 } 510 BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 511 "section are as follows:\n"; 512 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 513 BC.outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 514 << '\n'; 515 } 516 517 Error LinuxKernelRewriter::readORCTables() { 518 // NOTE: we should ignore relocations for orc tables as the tables are sorted 519 // post-link time and relocations are not updated. 520 ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind"); 521 ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip"); 522 523 if (!ORCUnwindSection && !ORCUnwindIPSection) 524 return Error::success(); 525 526 if (!ORCUnwindSection || !ORCUnwindIPSection) 527 return createStringError(errc::executable_format_error, 528 "missing ORC section"); 529 530 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE; 531 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE || 532 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE) 533 return createStringError(errc::executable_format_error, 534 "ORC entries number mismatch detected"); 535 536 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 537 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(), 538 BC.AsmInfo->isLittleEndian(), 539 BC.AsmInfo->getCodePointerSize()); 540 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(), 541 BC.AsmInfo->isLittleEndian(), 542 BC.AsmInfo->getCodePointerSize()); 543 DataExtractor::Cursor ORCCursor(0); 544 DataExtractor::Cursor IPCursor(0); 545 uint64_t PrevIP = 0; 546 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 547 const uint64_t IP = 548 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 549 550 // Consume the status of the cursor. 551 if (!IPCursor) 552 return createStringError(errc::executable_format_error, 553 "out of bounds while reading ORC IP table: %s", 554 toString(IPCursor.takeError()).c_str()); 555 556 if (IP < PrevIP && opts::Verbosity) 557 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) 558 << " detected while reading ORC\n"; 559 560 PrevIP = IP; 561 562 // Store all entries, includes those we are not going to update as the 563 // tables need to be sorted globally before being written out. 564 ORCEntries.push_back(ORCListEntry()); 565 ORCListEntry &Entry = ORCEntries.back(); 566 567 Entry.IP = IP; 568 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor); 569 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); 570 Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); 571 Entry.BF = nullptr; 572 573 // Consume the status of the cursor. 574 if (!ORCCursor) 575 return createStringError(errc::executable_format_error, 576 "out of bounds while reading ORC: %s", 577 toString(ORCCursor.takeError()).c_str()); 578 579 if (Entry.ORC == NullORC) 580 continue; 581 582 BinaryFunction *&BF = Entry.BF; 583 BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true); 584 585 // If the entry immediately pointing past the end of the function is not 586 // the terminator entry, then it does not belong to this function. 587 if (BF && BF->getAddress() + BF->getSize() == IP) 588 BF = 0; 589 590 if (!BF) { 591 if (opts::Verbosity) 592 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x" 593 << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n'; 594 continue; 595 } 596 597 BF->setHasORC(true); 598 599 if (!BF->hasInstructions()) 600 continue; 601 602 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); 603 if (!Inst) 604 return createStringError( 605 errc::executable_format_error, 606 "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP); 607 608 // Some addresses will have two entries associated with them. The first 609 // one being a "weak" section terminator. Since we ignore the terminator, 610 // we should only assign one entry per instruction. 611 if (BC.MIB->hasAnnotation(*Inst, "ORC")) 612 return createStringError( 613 errc::executable_format_error, 614 "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); 615 616 BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); 617 } 618 619 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n"; 620 621 if (opts::DumpORC) { 622 BC.outs() << "BOLT-INFO: ORC unwind information:\n"; 623 for (const ORCListEntry &E : ORCEntries) { 624 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 625 if (E.BF) 626 BC.outs() << ": " << *E.BF; 627 BC.outs() << '\n'; 628 } 629 } 630 631 // Add entries for functions that don't have explicit ORC info at the start. 632 // We'll have the correct info for them even if ORC for the preceding function 633 // changes. 634 ORCListType NewEntries; 635 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 636 auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 637 return E.IP <= BF.getAddress(); 638 }); 639 if (It != ORCEntries.begin()) 640 --It; 641 642 if (It->BF == &BF) 643 continue; 644 645 if (It->ORC == NullORC && It->IP == BF.getAddress()) { 646 assert(!It->BF); 647 It->BF = &BF; 648 continue; 649 } 650 651 NewEntries.push_back({BF.getAddress(), &BF, It->ORC}); 652 if (It->ORC != NullORC) 653 BF.setHasORC(true); 654 } 655 656 llvm::copy(NewEntries, std::back_inserter(ORCEntries)); 657 llvm::sort(ORCEntries); 658 659 if (opts::DumpORC) { 660 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n"; 661 for (const ORCListEntry &E : ORCEntries) { 662 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 663 if (E.BF) 664 BC.outs() << ": " << *E.BF; 665 BC.outs() << '\n'; 666 } 667 } 668 669 return Error::success(); 670 } 671 672 Error LinuxKernelRewriter::processORCPostCFG() { 673 if (!NumORCEntries) 674 return Error::success(); 675 676 // Propagate ORC to the rest of the function. We can annotate every 677 // instruction in every function, but to minimize the overhead, we annotate 678 // the first instruction in every basic block to reflect the state at the 679 // entry. This way, the ORC state can be calculated based on annotations 680 // regardless of the basic block layout. Note that if we insert/delete 681 // instructions, we must take care to attach ORC info to the new/deleted ones. 682 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 683 684 std::optional<ORCState> CurrentState; 685 for (BinaryBasicBlock &BB : BF) { 686 for (MCInst &Inst : BB) { 687 ErrorOr<ORCState> State = 688 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 689 690 if (State) { 691 CurrentState = *State; 692 continue; 693 } 694 695 // Get state for the start of the function. 696 if (!CurrentState) { 697 // A terminator entry (NullORC) can match the function address. If 698 // there's also a non-terminator entry, it will be placed after the 699 // terminator. Hence, we are looking for the last ORC entry that 700 // matches the address. 701 auto It = 702 llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 703 return E.IP <= BF.getAddress(); 704 }); 705 if (It != ORCEntries.begin()) 706 --It; 707 708 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) && 709 "ORC info at function entry expected."); 710 711 if (It->ORC == NullORC && BF.hasORC()) { 712 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " 713 << BF << '\n'; 714 } 715 716 It->BF = &BF; 717 718 CurrentState = It->ORC; 719 if (It->ORC != NullORC) 720 BF.setHasORC(true); 721 } 722 723 // While printing ORC, attach info to every instruction for convenience. 724 if (opts::PrintORC || &Inst == &BB.front()) 725 BC.MIB->addAnnotation(Inst, "ORC", *CurrentState); 726 } 727 } 728 } 729 730 return Error::success(); 731 } 732 733 Error LinuxKernelRewriter::rewriteORCTables() { 734 if (!NumORCEntries) 735 return Error::success(); 736 737 // Update ORC sections in-place. As we change the code, the number of ORC 738 // entries may increase for some functions. However, as we remove terminator 739 // redundancy (see below), more space is freed up and we should always be able 740 // to fit new ORC tables in the reserved space. 741 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter { 742 const size_t Size = Section.getSize(); 743 uint8_t *NewContents = new uint8_t[Size]; 744 Section.updateContents(NewContents, Size); 745 Section.setOutputFileOffset(Section.getInputFileOffset()); 746 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian() 747 ? endianness::little 748 : endianness::big); 749 }; 750 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection); 751 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection); 752 753 uint64_t NumEmitted = 0; 754 std::optional<ORCState> LastEmittedORC; 755 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC, 756 MCSymbol *Label = 0, bool Force = false) -> Error { 757 if (LastEmittedORC && ORC == *LastEmittedORC && !Force) 758 return Error::success(); 759 760 LastEmittedORC = ORC; 761 762 if (++NumEmitted > NumORCEntries) 763 return createStringError(errc::executable_format_error, 764 "exceeded the number of allocated ORC entries"); 765 766 if (Label) 767 ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label, 768 Relocation::getPC32(), /*Addend*/ 0); 769 770 const int32_t IPValue = 771 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset(); 772 if (Error E = UnwindIPWriter.writeInteger(IPValue)) 773 return E; 774 775 if (Error E = UnwindWriter.writeInteger(ORC.SPOffset)) 776 return E; 777 if (Error E = UnwindWriter.writeInteger(ORC.BPOffset)) 778 return E; 779 if (Error E = UnwindWriter.writeInteger(ORC.Info)) 780 return E; 781 782 return Error::success(); 783 }; 784 785 // Emit new ORC entries for the emitted function. 786 auto emitORC = [&](const BinaryFunction &BF) -> Error { 787 assert(!BF.isSplit() && "Split functions not supported by ORC writer yet."); 788 789 ORCState CurrentState = NullORC; 790 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { 791 for (MCInst &Inst : *BB) { 792 ErrorOr<ORCState> ErrorOrState = 793 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 794 if (!ErrorOrState || *ErrorOrState == CurrentState) 795 continue; 796 797 // Issue label for the instruction. 798 MCSymbol *Label = 799 BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); 800 801 if (Error E = emitORCEntry(0, *ErrorOrState, Label)) 802 return E; 803 804 CurrentState = *ErrorOrState; 805 } 806 } 807 808 return Error::success(); 809 }; 810 811 for (ORCListEntry &Entry : ORCEntries) { 812 // Emit original entries for functions that we haven't modified. 813 if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { 814 // Emit terminator only if it marks the start of a function. 815 if (Entry.ORC == NullORC && !Entry.BF) 816 continue; 817 if (Error E = emitORCEntry(Entry.IP, Entry.ORC)) 818 return E; 819 continue; 820 } 821 822 // Emit all ORC entries for a function referenced by an entry and skip over 823 // the rest of entries for this function by resetting its ORC attribute. 824 if (Entry.BF->hasORC()) { 825 if (Error E = emitORC(*Entry.BF)) 826 return E; 827 Entry.BF->setHasORC(false); 828 } 829 } 830 831 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted 832 << " ORC entries\n"); 833 834 // Replicate terminator entry at the end of sections to match the original 835 // table sizes. 836 const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second; 837 const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize(); 838 while (UnwindWriter.bytesRemaining()) { 839 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) 840 return E; 841 } 842 843 return Error::success(); 844 } 845 846 Error LinuxKernelRewriter::validateORCTables() { 847 if (!ORCUnwindIPSection) 848 return Error::success(); 849 850 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 851 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(), 852 BC.AsmInfo->isLittleEndian(), 853 BC.AsmInfo->getCodePointerSize()); 854 DataExtractor::Cursor IPCursor(0); 855 uint64_t PrevIP = 0; 856 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 857 const uint64_t IP = 858 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 859 if (!IPCursor) 860 return createStringError(errc::executable_format_error, 861 "out of bounds while reading ORC IP table: %s", 862 toString(IPCursor.takeError()).c_str()); 863 864 assert(IP >= PrevIP && "Unsorted ORC table detected"); 865 (void)PrevIP; 866 PrevIP = IP; 867 } 868 869 return Error::success(); 870 } 871 872 /// The static call site table is created by objtool and contains entries in the 873 /// following format: 874 /// 875 /// struct static_call_site { 876 /// s32 addr; 877 /// s32 key; 878 /// }; 879 /// 880 Error LinuxKernelRewriter::readStaticCalls() { 881 const BinaryData *StaticCallTable = 882 BC.getBinaryDataByName("__start_static_call_sites"); 883 if (!StaticCallTable) 884 return Error::success(); 885 886 StaticCallTableAddress = StaticCallTable->getAddress(); 887 888 const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites"); 889 if (!Stop) 890 return createStringError(errc::executable_format_error, 891 "missing __stop_static_call_sites symbol"); 892 893 ErrorOr<BinarySection &> ErrorOrSection = 894 BC.getSectionForAddress(StaticCallTableAddress); 895 if (!ErrorOrSection) 896 return createStringError(errc::executable_format_error, 897 "no section matching __start_static_call_sites"); 898 899 StaticCallSection = *ErrorOrSection; 900 if (!StaticCallSection->containsAddress(Stop->getAddress() - 1)) 901 return createStringError(errc::executable_format_error, 902 "__stop_static_call_sites not in the same section " 903 "as __start_static_call_sites"); 904 905 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE) 906 return createStringError(errc::executable_format_error, 907 "static call table size error"); 908 909 const uint64_t SectionAddress = StaticCallSection->getAddress(); 910 DataExtractor DE(StaticCallSection->getContents(), 911 BC.AsmInfo->isLittleEndian(), 912 BC.AsmInfo->getCodePointerSize()); 913 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); 914 uint32_t EntryID = 0; 915 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 916 const uint64_t CallAddress = 917 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 918 const uint64_t KeyAddress = 919 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 920 921 // Consume the status of the cursor. 922 if (!Cursor) 923 return createStringError(errc::executable_format_error, 924 "out of bounds while reading static calls: %s", 925 toString(Cursor.takeError()).c_str()); 926 927 ++EntryID; 928 929 if (opts::DumpStaticCalls) { 930 BC.outs() << "Static Call Site: " << EntryID << '\n'; 931 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress) 932 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 933 << '\n'; 934 } 935 936 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress); 937 if (!BF) 938 continue; 939 940 if (!BC.shouldEmit(*BF)) 941 continue; 942 943 if (!BF->hasInstructions()) 944 continue; 945 946 MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress()); 947 if (!Inst) 948 return createStringError(errc::executable_format_error, 949 "no instruction at call site address 0x%" PRIx64, 950 CallAddress); 951 952 // Check for duplicate entries. 953 if (BC.MIB->hasAnnotation(*Inst, "StaticCall")) 954 return createStringError(errc::executable_format_error, 955 "duplicate static call site at 0x%" PRIx64, 956 CallAddress); 957 958 BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); 959 960 MCSymbol *Label = 961 BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); 962 963 StaticCallEntries.push_back({EntryID, BF, Label}); 964 } 965 966 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size() 967 << " static call entries\n"; 968 969 return Error::success(); 970 } 971 972 /// The static call table is sorted during boot time in 973 /// static_call_sort_entries(). This makes it possible to update existing 974 /// entries in-place ignoring their relative order. 975 Error LinuxKernelRewriter::rewriteStaticCalls() { 976 if (!StaticCallTableAddress || !StaticCallSection) 977 return Error::success(); 978 979 for (auto &Entry : StaticCallEntries) { 980 if (!Entry.Function) 981 continue; 982 983 BinaryFunction &BF = *Entry.Function; 984 if (!BC.shouldEmit(BF)) 985 continue; 986 987 // Create a relocation against the label. 988 const uint64_t EntryOffset = StaticCallTableAddress - 989 StaticCallSection->getAddress() + 990 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; 991 StaticCallSection->addRelocation(EntryOffset, Entry.Label, 992 ELF::R_X86_64_PC32, /*Addend*/ 0); 993 } 994 995 return Error::success(); 996 } 997 998 /// Instructions that access user-space memory can cause page faults. These 999 /// faults will be handled by the kernel and execution will resume at the fixup 1000 /// code location if the address was invalid. The kernel uses the exception 1001 /// table to match the faulting instruction to its fixup. The table consists of 1002 /// the following entries: 1003 /// 1004 /// struct exception_table_entry { 1005 /// int insn; 1006 /// int fixup; 1007 /// int data; 1008 /// }; 1009 /// 1010 /// More info at: 1011 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt 1012 Error LinuxKernelRewriter::readExceptionTable() { 1013 ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); 1014 if (!ExceptionsSection) 1015 return Error::success(); 1016 1017 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) 1018 return createStringError(errc::executable_format_error, 1019 "exception table size error"); 1020 1021 const uint64_t SectionAddress = ExceptionsSection->getAddress(); 1022 DataExtractor DE(ExceptionsSection->getContents(), 1023 BC.AsmInfo->isLittleEndian(), 1024 BC.AsmInfo->getCodePointerSize()); 1025 DataExtractor::Cursor Cursor(0); 1026 uint32_t EntryID = 0; 1027 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { 1028 const uint64_t InstAddress = 1029 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1030 const uint64_t FixupAddress = 1031 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1032 const uint64_t Data = DE.getU32(Cursor); 1033 1034 // Consume the status of the cursor. 1035 if (!Cursor) 1036 return createStringError( 1037 errc::executable_format_error, 1038 "out of bounds while reading exception table: %s", 1039 toString(Cursor.takeError()).c_str()); 1040 1041 ++EntryID; 1042 1043 if (opts::DumpExceptions) { 1044 BC.outs() << "Exception Entry: " << EntryID << '\n'; 1045 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' 1046 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' 1047 << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; 1048 } 1049 1050 MCInst *Inst = nullptr; 1051 MCSymbol *FixupLabel = nullptr; 1052 1053 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress); 1054 if (InstBF && BC.shouldEmit(*InstBF)) { 1055 Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress()); 1056 if (!Inst) 1057 return createStringError(errc::executable_format_error, 1058 "no instruction at address 0x%" PRIx64 1059 " in exception table", 1060 InstAddress); 1061 BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID); 1062 FunctionsWithExceptions.insert(InstBF); 1063 } 1064 1065 if (!InstBF && opts::Verbosity) { 1066 BC.outs() << "BOLT-INFO: no function matches instruction at 0x" 1067 << Twine::utohexstr(InstAddress) 1068 << " referenced by Linux exception table\n"; 1069 } 1070 1071 BinaryFunction *FixupBF = 1072 BC.getBinaryFunctionContainingAddress(FixupAddress); 1073 if (FixupBF && BC.shouldEmit(*FixupBF)) { 1074 const uint64_t Offset = FixupAddress - FixupBF->getAddress(); 1075 if (!FixupBF->getInstructionAtOffset(Offset)) 1076 return createStringError(errc::executable_format_error, 1077 "no instruction at fixup address 0x%" PRIx64 1078 " in exception table", 1079 FixupAddress); 1080 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset) 1081 : FixupBF->getSymbol(); 1082 if (Inst) 1083 BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName()); 1084 FunctionsWithExceptions.insert(FixupBF); 1085 } 1086 1087 if (!FixupBF && opts::Verbosity) { 1088 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x" 1089 << Twine::utohexstr(FixupAddress) 1090 << " referenced by Linux exception table\n"; 1091 } 1092 } 1093 1094 BC.outs() << "BOLT-INFO: parsed " 1095 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE 1096 << " exception table entries\n"; 1097 1098 return Error::success(); 1099 } 1100 1101 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects 1102 /// the exception table to be sorted. Hence we have to sort it after code 1103 /// reordering. 1104 Error LinuxKernelRewriter::rewriteExceptionTable() { 1105 // Disable output of functions with exceptions before rewrite support is 1106 // added. 1107 for (BinaryFunction *BF : FunctionsWithExceptions) 1108 BF->setSimple(false); 1109 1110 return Error::success(); 1111 } 1112 1113 /// .parainsrtuctions section contains information for patching parvirtual call 1114 /// instructions during runtime. The entries in the section are in the form: 1115 /// 1116 /// struct paravirt_patch_site { 1117 /// u8 *instr; /* original instructions */ 1118 /// u8 type; /* type of this instruction */ 1119 /// u8 len; /* length of original instruction */ 1120 /// }; 1121 /// 1122 /// Note that the structures are aligned at 8-byte boundary. 1123 Error LinuxKernelRewriter::readParaInstructions() { 1124 ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions"); 1125 if (!ParavirtualPatchSection) 1126 return Error::success(); 1127 1128 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(), 1129 BC.AsmInfo->isLittleEndian(), 1130 BC.AsmInfo->getCodePointerSize()); 1131 uint32_t EntryID = 0; 1132 DataExtractor::Cursor Cursor(0); 1133 while (Cursor && !DE.eof(Cursor)) { 1134 const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN)); 1135 if (!DE.isValidOffset(NextOffset)) 1136 break; 1137 1138 Cursor.seek(NextOffset); 1139 1140 const uint64_t InstrLocation = DE.getU64(Cursor); 1141 const uint8_t Type = DE.getU8(Cursor); 1142 const uint8_t Len = DE.getU8(Cursor); 1143 1144 if (!Cursor) 1145 return createStringError( 1146 errc::executable_format_error, 1147 "out of bounds while reading .parainstructions: %s", 1148 toString(Cursor.takeError()).c_str()); 1149 1150 ++EntryID; 1151 1152 if (opts::DumpParavirtualPatchSites) { 1153 BC.outs() << "Paravirtual patch site: " << EntryID << '\n'; 1154 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation) 1155 << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x" 1156 << Twine::utohexstr(Len) << '\n'; 1157 } 1158 1159 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation); 1160 if (!BF && opts::Verbosity) { 1161 BC.outs() << "BOLT-INFO: no function matches address 0x" 1162 << Twine::utohexstr(InstrLocation) 1163 << " referenced by paravirutal patch site\n"; 1164 } 1165 1166 if (BF && BC.shouldEmit(*BF)) { 1167 MCInst *Inst = 1168 BF->getInstructionAtOffset(InstrLocation - BF->getAddress()); 1169 if (!Inst) 1170 return createStringError(errc::executable_format_error, 1171 "no instruction at address 0x%" PRIx64 1172 " in paravirtual call site %d", 1173 InstrLocation, EntryID); 1174 BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID); 1175 } 1176 } 1177 1178 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; 1179 1180 return Error::success(); 1181 } 1182 1183 void LinuxKernelRewriter::skipFunctionsWithAnnotation( 1184 StringRef Annotation) const { 1185 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1186 if (!BC.shouldEmit(BF)) 1187 continue; 1188 for (const BinaryBasicBlock &BB : BF) { 1189 const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) { 1190 return BC.MIB->hasAnnotation(Inst, Annotation); 1191 }); 1192 if (HasAnnotation) { 1193 BF.setSimple(false); 1194 break; 1195 } 1196 } 1197 } 1198 } 1199 1200 Error LinuxKernelRewriter::rewriteParaInstructions() { 1201 // Disable output of functions with paravirtual instructions before the 1202 // rewrite support is complete. 1203 skipFunctionsWithAnnotation("ParaSite"); 1204 1205 return Error::success(); 1206 } 1207 1208 /// Process __bug_table section. 1209 /// This section contains information useful for kernel debugging, mostly 1210 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON(). 1211 /// 1212 /// Each entry in the section is a struct bug_entry that contains a pointer to 1213 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1214 /// pointers use PC relative offset addressing), line number, and flags. 1215 /// The definition of the struct bug_entry can be found in 1216 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction 1217 /// address encoded as a PC-relative offset. In theory, it could be an absolute 1218 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice 1219 /// the kernel code relies on it being a relative offset on x86-64. 1220 Error LinuxKernelRewriter::readBugTable() { 1221 BugTableSection = BC.getUniqueSectionByName("__bug_table"); 1222 if (!BugTableSection) 1223 return Error::success(); 1224 1225 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE) 1226 return createStringError(errc::executable_format_error, 1227 "bug table size error"); 1228 1229 const uint64_t SectionAddress = BugTableSection->getAddress(); 1230 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(), 1231 BC.AsmInfo->getCodePointerSize()); 1232 DataExtractor::Cursor Cursor(0); 1233 uint32_t EntryID = 0; 1234 while (Cursor && Cursor.tell() < BugTableSection->getSize()) { 1235 const uint64_t Pos = Cursor.tell(); 1236 const uint64_t InstAddress = 1237 SectionAddress + Pos + (int32_t)DE.getU32(Cursor); 1238 Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); 1239 1240 if (!Cursor) 1241 return createStringError(errc::executable_format_error, 1242 "out of bounds while reading __bug_table: %s", 1243 toString(Cursor.takeError()).c_str()); 1244 1245 ++EntryID; 1246 1247 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress); 1248 if (!BF && opts::Verbosity) { 1249 BC.outs() << "BOLT-INFO: no function matches address 0x" 1250 << Twine::utohexstr(InstAddress) 1251 << " referenced by bug table\n"; 1252 } 1253 1254 if (BF && BC.shouldEmit(*BF)) { 1255 MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress()); 1256 if (!Inst) 1257 return createStringError(errc::executable_format_error, 1258 "no instruction at address 0x%" PRIx64 1259 " referenced by bug table entry %d", 1260 InstAddress, EntryID); 1261 BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID); 1262 1263 FunctionBugList[BF].push_back(EntryID); 1264 } 1265 } 1266 1267 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n"; 1268 1269 return Error::success(); 1270 } 1271 1272 /// find_bug() uses linear search to match an address to an entry in the bug 1273 /// table. Hence, there is no need to sort entries when rewriting the table. 1274 /// When we need to erase an entry, we set its instruction address to zero. 1275 Error LinuxKernelRewriter::rewriteBugTable() { 1276 if (!BugTableSection) 1277 return Error::success(); 1278 1279 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1280 if (!BC.shouldEmit(BF)) 1281 continue; 1282 1283 if (!FunctionBugList.count(&BF)) 1284 continue; 1285 1286 // Bugs that will be emitted for this function. 1287 DenseSet<uint32_t> EmittedIDs; 1288 for (BinaryBasicBlock &BB : BF) { 1289 for (MCInst &Inst : BB) { 1290 if (!BC.MIB->hasAnnotation(Inst, "BugEntry")) 1291 continue; 1292 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry"); 1293 EmittedIDs.insert(ID); 1294 1295 // Create a relocation entry for this bug entry. 1296 MCSymbol *Label = 1297 BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get()); 1298 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1299 BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32, 1300 /*Addend*/ 0); 1301 } 1302 } 1303 1304 // Clear bug entries that were not emitted for this function, e.g. as a 1305 // result of DCE, but setting their instruction address to zero. 1306 for (const uint32_t ID : FunctionBugList[&BF]) { 1307 if (!EmittedIDs.count(ID)) { 1308 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1309 BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32, 1310 /*Addend*/ 0); 1311 } 1312 } 1313 } 1314 1315 return Error::success(); 1316 } 1317 1318 /// The kernel can replace certain instruction sequences depending on hardware 1319 /// it is running on and features specified during boot time. The information 1320 /// about alternative instruction sequences is stored in .altinstructions 1321 /// section. The format of entries in this section is defined in 1322 /// arch/x86/include/asm/alternative.h: 1323 /// 1324 /// struct alt_instr { 1325 /// s32 instr_offset; 1326 /// s32 repl_offset; 1327 /// uXX feature; 1328 /// u8 instrlen; 1329 /// u8 replacementlen; 1330 /// u8 padlen; // present in older kernels 1331 /// } __packed; 1332 /// 1333 /// Note the structures is packed. 1334 Error LinuxKernelRewriter::readAltInstructions() { 1335 AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); 1336 if (!AltInstrSection) 1337 return Error::success(); 1338 1339 const uint64_t Address = AltInstrSection->getAddress(); 1340 DataExtractor DE = DataExtractor(AltInstrSection->getContents(), 1341 BC.AsmInfo->isLittleEndian(), 1342 BC.AsmInfo->getCodePointerSize()); 1343 uint64_t EntryID = 0; 1344 DataExtractor::Cursor Cursor(0); 1345 while (Cursor && !DE.eof(Cursor)) { 1346 const uint64_t OrgInstAddress = 1347 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1348 const uint64_t AltInstAddress = 1349 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1350 const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); 1351 const uint8_t OrgSize = DE.getU8(Cursor); 1352 const uint8_t AltSize = DE.getU8(Cursor); 1353 1354 // Older kernels may have the padlen field. 1355 const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; 1356 1357 if (!Cursor) 1358 return createStringError( 1359 errc::executable_format_error, 1360 "out of bounds while reading .altinstructions: %s", 1361 toString(Cursor.takeError()).c_str()); 1362 1363 ++EntryID; 1364 1365 if (opts::DumpAltInstructions) { 1366 BC.outs() << "Alternative instruction entry: " << EntryID 1367 << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) 1368 << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) 1369 << "\n\tFeature: 0x" << Twine::utohexstr(Feature) 1370 << "\n\tOrgSize: " << (int)OrgSize 1371 << "\n\tAltSize: " << (int)AltSize << '\n'; 1372 if (opts::AltInstHasPadLen) 1373 BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; 1374 } 1375 1376 if (AltSize > OrgSize) 1377 return createStringError(errc::executable_format_error, 1378 "error reading .altinstructions"); 1379 1380 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); 1381 if (!BF && opts::Verbosity) { 1382 BC.outs() << "BOLT-INFO: no function matches address 0x" 1383 << Twine::utohexstr(OrgInstAddress) 1384 << " of instruction from .altinstructions\n"; 1385 } 1386 1387 BinaryFunction *AltBF = 1388 BC.getBinaryFunctionContainingAddress(AltInstAddress); 1389 if (AltBF && BC.shouldEmit(*AltBF)) { 1390 BC.errs() 1391 << "BOLT-WARNING: alternative instruction sequence found in function " 1392 << *AltBF << '\n'; 1393 AltBF->setIgnored(); 1394 } 1395 1396 if (!BF || !BC.shouldEmit(*BF)) 1397 continue; 1398 1399 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) 1400 return createStringError(errc::executable_format_error, 1401 "error reading .altinstructions"); 1402 1403 MCInst *Inst = 1404 BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); 1405 if (!Inst) 1406 return createStringError(errc::executable_format_error, 1407 "no instruction at address 0x%" PRIx64 1408 " referenced by .altinstructions entry %d", 1409 OrgInstAddress, EntryID); 1410 1411 // There could be more than one alternative instruction sequences for the 1412 // same original instruction. Annotate each alternative separately. 1413 std::string AnnotationName = "AltInst"; 1414 unsigned N = 2; 1415 while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) 1416 AnnotationName = "AltInst" + std::to_string(N++); 1417 1418 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1419 1420 // Annotate all instructions from the original sequence. Note that it's not 1421 // the most efficient way to look for instructions in the address range, 1422 // but since alternative instructions are uncommon, it will do for now. 1423 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { 1424 Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - 1425 BF->getAddress()); 1426 if (Inst) 1427 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1428 } 1429 } 1430 1431 BC.outs() << "BOLT-INFO: parsed " << EntryID 1432 << " alternative instruction entries\n"; 1433 1434 return Error::success(); 1435 } 1436 1437 Error LinuxKernelRewriter::rewriteAltInstructions() { 1438 // Disable output of functions with alt instructions before the rewrite 1439 // support is complete. 1440 skipFunctionsWithAnnotation("AltInst"); 1441 1442 return Error::success(); 1443 } 1444 1445 /// When the Linux kernel needs to handle an error associated with a given PCI 1446 /// device, it uses a table stored in .pci_fixup section to locate a fixup code 1447 /// specific to the vendor and the problematic device. The section contains a 1448 /// list of the following structures defined in include/linux/pci.h: 1449 /// 1450 /// struct pci_fixup { 1451 /// u16 vendor; /* Or PCI_ANY_ID */ 1452 /// u16 device; /* Or PCI_ANY_ID */ 1453 /// u32 class; /* Or PCI_ANY_ID */ 1454 /// unsigned int class_shift; /* should be 0, 8, 16 */ 1455 /// int hook_offset; 1456 /// }; 1457 /// 1458 /// Normally, the hook will point to a function start and we don't have to 1459 /// update the pointer if we are not relocating functions. Hence, while reading 1460 /// the table we validate this assumption. If a function has a fixup code in the 1461 /// middle of its body, we issue a warning and ignore it. 1462 Error LinuxKernelRewriter::readPCIFixupTable() { 1463 PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup"); 1464 if (!PCIFixupSection) 1465 return Error::success(); 1466 1467 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE) 1468 return createStringError(errc::executable_format_error, 1469 "PCI fixup table size error"); 1470 1471 const uint64_t Address = PCIFixupSection->getAddress(); 1472 DataExtractor DE = DataExtractor(PCIFixupSection->getContents(), 1473 BC.AsmInfo->isLittleEndian(), 1474 BC.AsmInfo->getCodePointerSize()); 1475 uint64_t EntryID = 0; 1476 DataExtractor::Cursor Cursor(0); 1477 while (Cursor && !DE.eof(Cursor)) { 1478 const uint16_t Vendor = DE.getU16(Cursor); 1479 const uint16_t Device = DE.getU16(Cursor); 1480 const uint32_t Class = DE.getU32(Cursor); 1481 const uint32_t ClassShift = DE.getU32(Cursor); 1482 const uint64_t HookAddress = 1483 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1484 1485 if (!Cursor) 1486 return createStringError(errc::executable_format_error, 1487 "out of bounds while reading .pci_fixup: %s", 1488 toString(Cursor.takeError()).c_str()); 1489 1490 ++EntryID; 1491 1492 if (opts::DumpPCIFixups) { 1493 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x" 1494 << Twine::utohexstr(Vendor) << "\n\tDevice: 0x" 1495 << Twine::utohexstr(Device) << "\n\tClass: 0x" 1496 << Twine::utohexstr(Class) << "\n\tClassShift: 0x" 1497 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x" 1498 << Twine::utohexstr(HookAddress) << '\n'; 1499 } 1500 1501 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress); 1502 if (!BF && opts::Verbosity) { 1503 BC.outs() << "BOLT-INFO: no function matches address 0x" 1504 << Twine::utohexstr(HookAddress) 1505 << " of hook from .pci_fixup\n"; 1506 } 1507 1508 if (!BF || !BC.shouldEmit(*BF)) 1509 continue; 1510 1511 if (const uint64_t Offset = HookAddress - BF->getAddress()) { 1512 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function " 1513 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n'; 1514 BF->setSimple(false); 1515 } 1516 } 1517 1518 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n"; 1519 1520 return Error::success(); 1521 } 1522 1523 /// Runtime code modification used by static keys is the most ubiquitous 1524 /// self-modifying feature of the Linux kernel. The idea is to eliminate the 1525 /// condition check and associated conditional jump on a hot path if that 1526 /// condition (based on a boolean value of a static key) does not change often. 1527 /// Whenever the condition changes, the kernel runtime modifies all code paths 1528 /// associated with that key flipping the code between nop and (unconditional) 1529 /// jump. The information about the code is stored in a static key jump table 1530 /// and contains the list of entries of the following type from 1531 /// include/linux/jump_label.h: 1532 // 1533 /// struct jump_entry { 1534 /// s32 code; 1535 /// s32 target; 1536 /// long key; // key may be far away from the core kernel under KASLR 1537 /// }; 1538 /// 1539 /// The list does not have to be stored in any sorted way, but it is sorted at 1540 /// boot time (or module initialization time) first by "key" and then by "code". 1541 /// jump_label_sort_entries() is responsible for sorting the table. 1542 /// 1543 /// The key in jump_entry structure uses lower two bits of the key address 1544 /// (which itself is aligned) to store extra information. We are interested in 1545 /// the lower bit which indicates if the key is likely to be set on the code 1546 /// path associated with this jump_entry. 1547 /// 1548 /// static_key_{enable,disable}() functions modify the code based on key and 1549 /// jump table entries. 1550 /// 1551 /// jump_label_update() updates all code entries for a given key. Batch mode is 1552 /// used for x86. 1553 /// 1554 /// The actual patching happens in text_poke_bp_batch() that overrides the first 1555 /// byte of the sequence with int3 before proceeding with actual code 1556 /// replacement. 1557 Error LinuxKernelRewriter::readStaticKeysJumpTable() { 1558 const BinaryData *StaticKeysJumpTable = 1559 BC.getBinaryDataByName("__start___jump_table"); 1560 if (!StaticKeysJumpTable) 1561 return Error::success(); 1562 1563 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress(); 1564 1565 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1566 if (!Stop) 1567 return createStringError(errc::executable_format_error, 1568 "missing __stop___jump_table symbol"); 1569 1570 ErrorOr<BinarySection &> ErrorOrSection = 1571 BC.getSectionForAddress(StaticKeysJumpTableAddress); 1572 if (!ErrorOrSection) 1573 return createStringError(errc::executable_format_error, 1574 "no section matching __start___jump_table"); 1575 1576 StaticKeysJumpSection = *ErrorOrSection; 1577 if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1)) 1578 return createStringError(errc::executable_format_error, 1579 "__stop___jump_table not in the same section " 1580 "as __start___jump_table"); 1581 1582 if ((Stop->getAddress() - StaticKeysJumpTableAddress) % 1583 STATIC_KEYS_JUMP_ENTRY_SIZE) 1584 return createStringError(errc::executable_format_error, 1585 "static keys jump table size error"); 1586 1587 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1588 DataExtractor DE(StaticKeysJumpSection->getContents(), 1589 BC.AsmInfo->isLittleEndian(), 1590 BC.AsmInfo->getCodePointerSize()); 1591 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1592 uint32_t EntryID = 0; 1593 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1594 const uint64_t JumpAddress = 1595 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1596 const uint64_t TargetAddress = 1597 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1598 const uint64_t KeyAddress = 1599 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1600 1601 // Consume the status of the cursor. 1602 if (!Cursor) 1603 return createStringError( 1604 errc::executable_format_error, 1605 "out of bounds while reading static keys jump table: %s", 1606 toString(Cursor.takeError()).c_str()); 1607 1608 ++EntryID; 1609 1610 JumpInfo.push_back(JumpInfoEntry()); 1611 JumpInfoEntry &Info = JumpInfo.back(); 1612 Info.Likely = KeyAddress & 1; 1613 1614 if (opts::DumpStaticKeys) { 1615 BC.outs() << "Static key jump entry: " << EntryID 1616 << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1617 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1618 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 1619 << "\n\tIsLikely: " << Info.Likely << '\n'; 1620 } 1621 1622 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress); 1623 if (!BF && opts::Verbosity) { 1624 BC.outs() 1625 << "BOLT-INFO: no function matches address 0x" 1626 << Twine::utohexstr(JumpAddress) 1627 << " of jump instruction referenced from static keys jump table\n"; 1628 } 1629 1630 if (!BF || !BC.shouldEmit(*BF)) 1631 continue; 1632 1633 MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress()); 1634 if (!Inst) 1635 return createStringError( 1636 errc::executable_format_error, 1637 "no instruction at static keys jump site address 0x%" PRIx64, 1638 JumpAddress); 1639 1640 if (!BF->containsAddress(TargetAddress)) 1641 return createStringError( 1642 errc::executable_format_error, 1643 "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64, 1644 JumpAddress, TargetAddress); 1645 1646 const bool IsBranch = BC.MIB->isBranch(*Inst); 1647 if (!IsBranch && !BC.MIB->isNoop(*Inst)) 1648 return createStringError(errc::executable_format_error, 1649 "jump or nop expected at address 0x%" PRIx64, 1650 JumpAddress); 1651 1652 const uint64_t Size = BC.computeInstructionSize(*Inst); 1653 if (Size != 2 && Size != 5) { 1654 return createStringError( 1655 errc::executable_format_error, 1656 "unexpected static keys jump size at address 0x%" PRIx64, 1657 JumpAddress); 1658 } 1659 1660 MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress); 1661 MCInst StaticKeyBranch; 1662 1663 // Create a conditional branch instruction. The actual conditional code type 1664 // should not matter as long as it's a valid code. The instruction should be 1665 // treated as a conditional branch for control-flow purposes. Before we emit 1666 // the code, it will be converted to a different instruction in 1667 // rewriteStaticKeysJumpTable(). 1668 // 1669 // NB: for older kernels, under LongJumpLabels option, we create long 1670 // conditional branch to guarantee that code size estimation takes 1671 // into account the extra bytes needed for long branch that will be used 1672 // by the kernel patching code. Newer kernels can work with both short 1673 // and long branches. The code for long conditional branch is larger 1674 // than unconditional one, so we are pessimistic in our estimations. 1675 if (opts::LongJumpLabels) 1676 BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1677 else 1678 BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1679 BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch); 1680 BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID); 1681 *Inst = StaticKeyBranch; 1682 1683 // IsBranch = InitialValue ^ LIKELY 1684 // 1685 // 0 0 0 1686 // 1 0 1 1687 // 1 1 0 1688 // 0 1 1 1689 // 1690 // => InitialValue = IsBranch ^ LIKELY 1691 Info.InitValue = IsBranch ^ Info.Likely; 1692 1693 // Add annotations to facilitate manual code analysis. 1694 BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely); 1695 BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue); 1696 if (!BC.MIB->getSize(*Inst)) 1697 BC.MIB->setSize(*Inst, Size); 1698 1699 if (opts::LongJumpLabels) 1700 BC.MIB->setSize(*Inst, 5); 1701 } 1702 1703 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n"; 1704 1705 return Error::success(); 1706 } 1707 1708 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be 1709 // relaxed. In post-emit pass we will convert those jumps into nops when 1710 // necessary. We do the unconditional conversion into jumps so that the jumps 1711 // can be relaxed and the optimal size of jump/nop instruction is selected. 1712 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { 1713 if (!StaticKeysJumpSection) 1714 return Error::success(); 1715 1716 uint64_t NumShort = 0; 1717 uint64_t NumLong = 0; 1718 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1719 if (!BC.shouldEmit(BF)) 1720 continue; 1721 1722 for (BinaryBasicBlock &BB : BF) { 1723 for (MCInst &Inst : BB) { 1724 if (!BC.MIB->isDynamicBranch(Inst)) 1725 continue; 1726 1727 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst); 1728 MCSymbol *Target = 1729 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst)); 1730 assert(Target && "Target symbol should be set."); 1731 1732 const JumpInfoEntry &Info = JumpInfo[EntryID - 1]; 1733 const bool IsBranch = Info.Likely ^ Info.InitValue; 1734 1735 uint32_t Size = *BC.MIB->getSize(Inst); 1736 if (Size == 2) 1737 ++NumShort; 1738 else if (Size == 5) 1739 ++NumLong; 1740 else 1741 llvm_unreachable("Wrong size for static keys jump instruction."); 1742 1743 MCInst NewInst; 1744 // Replace the instruction with unconditional jump even if it needs to 1745 // be nop in the binary. 1746 if (opts::LongJumpLabels) { 1747 BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get()); 1748 } else { 1749 // Newer kernels can handle short and long jumps for static keys. 1750 // Optimistically, emit short jump and check if it gets relaxed into 1751 // a long one during post-emit. Only then convert the jump to a nop. 1752 BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get()); 1753 } 1754 1755 BC.MIB->moveAnnotations(std::move(Inst), NewInst); 1756 Inst = NewInst; 1757 1758 // Mark the instruction for nop conversion. 1759 if (!IsBranch) 1760 NopIDs.insert(EntryID); 1761 1762 MCSymbol *Label = 1763 BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get()); 1764 1765 // Create a relocation against the label. 1766 const uint64_t EntryOffset = StaticKeysJumpTableAddress - 1767 StaticKeysJumpSection->getAddress() + 1768 (EntryID - 1) * 16; 1769 StaticKeysJumpSection->addRelocation(EntryOffset, Label, 1770 ELF::R_X86_64_PC32, 1771 /*Addend*/ 0); 1772 StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target, 1773 ELF::R_X86_64_PC32, /*Addend*/ 0); 1774 } 1775 } 1776 } 1777 1778 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and " 1779 << NumLong << " long static keys jumps in optimized functions\n"; 1780 1781 return Error::success(); 1782 } 1783 1784 // Post-emit pass of static keys jump section. Convert jumps to nops. 1785 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { 1786 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized()) 1787 return Error::success(); 1788 1789 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1790 DataExtractor DE(StaticKeysJumpSection->getOutputContents(), 1791 BC.AsmInfo->isLittleEndian(), 1792 BC.AsmInfo->getCodePointerSize()); 1793 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1794 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1795 uint32_t EntryID = 0; 1796 uint64_t NumShort = 0; 1797 uint64_t NumLong = 0; 1798 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1799 const uint64_t JumpAddress = 1800 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1801 const uint64_t TargetAddress = 1802 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1803 const uint64_t KeyAddress = 1804 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1805 1806 // Consume the status of the cursor. 1807 if (!Cursor) 1808 return createStringError(errc::executable_format_error, 1809 "out of bounds while updating static keys: %s", 1810 toString(Cursor.takeError()).c_str()); 1811 1812 ++EntryID; 1813 1814 LLVM_DEBUG({ 1815 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1816 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1817 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n'; 1818 }); 1819 (void)TargetAddress; 1820 (void)KeyAddress; 1821 1822 BinaryFunction *BF = 1823 BC.getBinaryFunctionContainingAddress(JumpAddress, 1824 /*CheckPastEnd*/ false, 1825 /*UseMaxSize*/ true); 1826 assert(BF && "Cannot get function for modified static key."); 1827 1828 if (!BF->isEmitted()) 1829 continue; 1830 1831 // Disassemble instruction to collect stats even if nop-conversion is 1832 // unnecessary. 1833 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>( 1834 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize()); 1835 assert(Contents.size() && "Non-empty function image expected."); 1836 1837 MCInst Inst; 1838 uint64_t Size; 1839 const uint64_t JumpOffset = JumpAddress - BF->getAddress(); 1840 if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0, 1841 nulls())) { 1842 llvm_unreachable("Unable to disassemble jump instruction."); 1843 } 1844 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected."); 1845 1846 if (Size == 2) 1847 ++NumShort; 1848 else if (Size == 5) 1849 ++NumLong; 1850 else 1851 llvm_unreachable("Unexpected size for static keys jump instruction."); 1852 1853 // Check if we need to convert jump instruction into a nop. 1854 if (!NopIDs.contains(EntryID)) 1855 continue; 1856 1857 SmallString<15> NopCode; 1858 raw_svector_ostream VecOS(NopCode); 1859 BC.MAB->writeNopData(VecOS, Size, BC.STI.get()); 1860 for (uint64_t I = 0; I < Size; ++I) 1861 Contents[JumpOffset + I] = NopCode[I]; 1862 } 1863 1864 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong 1865 << " long static keys jumps in optimized functions\n"; 1866 1867 return Error::success(); 1868 } 1869 1870 } // namespace 1871 1872 std::unique_ptr<MetadataRewriter> 1873 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { 1874 return std::make_unique<LinuxKernelRewriter>(BC); 1875 } 1876