1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Support for updating Linux Kernel metadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryFunction.h" 14 #include "bolt/Rewrite/MetadataRewriter.h" 15 #include "bolt/Rewrite/MetadataRewriters.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 20 #include "llvm/Support/BinaryStreamWriter.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/Errc.h" 24 25 #define DEBUG_TYPE "bolt-linux" 26 27 using namespace llvm; 28 using namespace bolt; 29 30 namespace opts { 31 32 static cl::opt<bool> 33 AltInstHasPadLen("alt-inst-has-padlen", 34 cl::desc("specify that .altinstructions has padlen field"), 35 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 36 37 static cl::opt<uint32_t> 38 AltInstFeatureSize("alt-inst-feature-size", 39 cl::desc("size of feature field in .altinstructions"), 40 cl::init(2), cl::Hidden, cl::cat(BoltCategory)); 41 42 static cl::opt<bool> 43 DumpAltInstructions("dump-alt-instructions", 44 cl::desc("dump Linux alternative instructions info"), 45 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 46 47 static cl::opt<bool> 48 DumpExceptions("dump-linux-exceptions", 49 cl::desc("dump Linux kernel exception table"), 50 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 51 52 static cl::opt<bool> 53 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), 54 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 55 56 static cl::opt<bool> DumpParavirtualPatchSites( 57 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"), 58 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 59 60 static cl::opt<bool> 61 DumpPCIFixups("dump-pci-fixups", 62 cl::desc("dump Linux kernel PCI fixup table"), 63 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 64 65 static cl::opt<bool> DumpStaticCalls("dump-static-calls", 66 cl::desc("dump Linux kernel static calls"), 67 cl::init(false), cl::Hidden, 68 cl::cat(BoltCategory)); 69 70 static cl::opt<bool> 71 DumpStaticKeys("dump-static-keys", 72 cl::desc("dump Linux kernel static keys jump table"), 73 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 74 75 static cl::opt<bool> LongJumpLabels( 76 "long-jump-labels", 77 cl::desc("always use long jumps/nops for Linux kernel static keys"), 78 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 79 80 static cl::opt<bool> 81 PrintORC("print-orc", 82 cl::desc("print ORC unwind information for instructions"), 83 cl::init(true), cl::Hidden, cl::cat(BoltCategory)); 84 85 } // namespace opts 86 87 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). 88 /// ORC state at every IP can be described by the following data structure. 89 struct ORCState { 90 int16_t SPOffset; 91 int16_t BPOffset; 92 int16_t Info; 93 94 bool operator==(const ORCState &Other) const { 95 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset && 96 Info == Other.Info; 97 } 98 99 bool operator!=(const ORCState &Other) const { return !(*this == Other); } 100 }; 101 102 /// Section terminator ORC entry. 103 static ORCState NullORC = {0, 0, 0}; 104 105 /// Basic printer for ORC entry. It does not provide the same level of 106 /// information as objtool (for now). 107 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { 108 if (!opts::PrintORC) 109 return OS; 110 if (E != NullORC) 111 OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset, 112 E.Info); 113 else 114 OS << "{terminator}"; 115 116 return OS; 117 } 118 119 namespace { 120 121 class LinuxKernelRewriter final : public MetadataRewriter { 122 /// Linux Kernel special sections point to a specific instruction in many 123 /// cases. Unlike SDTMarkerInfo, these markers can come from different 124 /// sections. 125 struct LKInstructionMarkerInfo { 126 uint64_t SectionOffset; 127 int32_t PCRelativeOffset; 128 bool IsPCRelative; 129 StringRef SectionName; 130 }; 131 132 /// Map linux kernel program locations/instructions to their pointers in 133 /// special linux kernel sections 134 std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers; 135 136 /// Linux ORC sections. 137 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address; 138 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address; 139 140 /// Size of entries in ORC sections. 141 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6; 142 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4; 143 144 struct ORCListEntry { 145 uint64_t IP; /// Instruction address. 146 BinaryFunction *BF; /// Binary function corresponding to the entry. 147 ORCState ORC; /// Stack unwind info in ORC format. 148 149 /// ORC entries are sorted by their IPs. Terminator entries (NullORC) 150 /// should precede other entries with the same address. 151 bool operator<(const ORCListEntry &Other) const { 152 if (IP < Other.IP) 153 return 1; 154 if (IP > Other.IP) 155 return 0; 156 return ORC == NullORC && Other.ORC != NullORC; 157 } 158 }; 159 160 using ORCListType = std::vector<ORCListEntry>; 161 ORCListType ORCEntries; 162 163 /// Number of entries in the input file ORC sections. 164 uint64_t NumORCEntries = 0; 165 166 /// Section containing static keys jump table. 167 ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address; 168 uint64_t StaticKeysJumpTableAddress = 0; 169 static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8; 170 171 struct JumpInfoEntry { 172 bool Likely; 173 bool InitValue; 174 }; 175 SmallVector<JumpInfoEntry, 16> JumpInfo; 176 177 /// Static key entries that need nop conversion. 178 DenseSet<uint32_t> NopIDs; 179 180 /// Section containing static call table. 181 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address; 182 uint64_t StaticCallTableAddress = 0; 183 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8; 184 185 struct StaticCallInfo { 186 uint32_t ID; /// Identifier of the entry in the table. 187 BinaryFunction *Function; /// Function containing associated call. 188 MCSymbol *Label; /// Label attached to the call. 189 }; 190 using StaticCallListType = std::vector<StaticCallInfo>; 191 StaticCallListType StaticCallEntries; 192 193 /// Section containing the Linux exception table. 194 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address; 195 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; 196 197 /// Functions with exception handling code. 198 DenseSet<BinaryFunction *> FunctionsWithExceptions; 199 200 /// Section with paravirtual patch sites. 201 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address; 202 203 /// Alignment of paravirtual patch structures. 204 static constexpr size_t PARA_PATCH_ALIGN = 8; 205 206 /// .altinstructions section. 207 ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address; 208 209 /// Section containing Linux bug table. 210 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; 211 212 /// Size of bug_entry struct. 213 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12; 214 215 /// List of bug entries per function. 216 using FunctionBugListType = 217 DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>; 218 FunctionBugListType FunctionBugList; 219 220 /// .pci_fixup section. 221 ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address; 222 static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16; 223 224 /// Insert an LKMarker for a given code pointer \p PC from a non-code section 225 /// \p SectionName. 226 void insertLKMarker(uint64_t PC, uint64_t SectionOffset, 227 int32_t PCRelativeOffset, bool IsPCRelative, 228 StringRef SectionName); 229 230 /// Process linux kernel special sections and their relocations. 231 void processLKSections(); 232 233 /// Process __ksymtab and __ksymtab_gpl. 234 void processLKKSymtab(bool IsGPL = false); 235 236 /// Process special linux kernel section, .smp_locks. 237 void processLKSMPLocks(); 238 239 /// Update LKMarkers' locations for the output binary. 240 void updateLKMarkers(); 241 242 /// Read ORC unwind information and annotate instructions. 243 Error readORCTables(); 244 245 /// Update ORC for functions once CFG is constructed. 246 Error processORCPostCFG(); 247 248 /// Update ORC data in the binary. 249 Error rewriteORCTables(); 250 251 /// Validate written ORC tables after binary emission. 252 Error validateORCTables(); 253 254 /// Static call table handling. 255 Error readStaticCalls(); 256 Error rewriteStaticCalls(); 257 258 Error readExceptionTable(); 259 Error rewriteExceptionTable(); 260 261 /// Paravirtual instruction patch sites. 262 Error readParaInstructions(); 263 Error rewriteParaInstructions(); 264 265 /// __bug_table section handling. 266 Error readBugTable(); 267 Error rewriteBugTable(); 268 269 /// Do no process functions containing instruction annotated with 270 /// \p Annotation. 271 void skipFunctionsWithAnnotation(StringRef Annotation) const; 272 273 /// Handle alternative instruction info from .altinstructions. 274 Error readAltInstructions(); 275 Error rewriteAltInstructions(); 276 277 /// Read .pci_fixup 278 Error readPCIFixupTable(); 279 280 /// Handle static keys jump table. 281 Error readStaticKeysJumpTable(); 282 Error rewriteStaticKeysJumpTable(); 283 Error updateStaticKeysJumpTablePostEmit(); 284 285 /// Mark instructions referenced by kernel metadata. 286 Error markInstructions(); 287 288 public: 289 LinuxKernelRewriter(BinaryContext &BC) 290 : MetadataRewriter("linux-kernel-rewriter", BC) {} 291 292 Error preCFGInitializer() override { 293 processLKSections(); 294 if (Error E = markInstructions()) 295 return E; 296 297 if (Error E = readORCTables()) 298 return E; 299 300 if (Error E = readStaticCalls()) 301 return E; 302 303 if (Error E = readExceptionTable()) 304 return E; 305 306 if (Error E = readParaInstructions()) 307 return E; 308 309 if (Error E = readBugTable()) 310 return E; 311 312 if (Error E = readAltInstructions()) 313 return E; 314 315 if (Error E = readPCIFixupTable()) 316 return E; 317 318 if (Error E = readStaticKeysJumpTable()) 319 return E; 320 321 return Error::success(); 322 } 323 324 Error postCFGInitializer() override { 325 if (Error E = processORCPostCFG()) 326 return E; 327 328 return Error::success(); 329 } 330 331 Error preEmitFinalizer() override { 332 // Since rewriteExceptionTable() can mark functions as non-simple, run it 333 // before other rewriters that depend on simple/emit status. 334 if (Error E = rewriteExceptionTable()) 335 return E; 336 337 if (Error E = rewriteAltInstructions()) 338 return E; 339 340 if (Error E = rewriteParaInstructions()) 341 return E; 342 343 if (Error E = rewriteORCTables()) 344 return E; 345 346 if (Error E = rewriteStaticCalls()) 347 return E; 348 349 if (Error E = rewriteStaticKeysJumpTable()) 350 return E; 351 352 if (Error E = rewriteBugTable()) 353 return E; 354 355 return Error::success(); 356 } 357 358 Error postEmitFinalizer() override { 359 updateLKMarkers(); 360 361 if (Error E = updateStaticKeysJumpTablePostEmit()) 362 return E; 363 364 if (Error E = validateORCTables()) 365 return E; 366 367 return Error::success(); 368 } 369 }; 370 371 Error LinuxKernelRewriter::markInstructions() { 372 for (const uint64_t PC : llvm::make_first_range(LKMarkers)) { 373 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(PC); 374 375 if (!BF || !BC.shouldEmit(*BF)) 376 continue; 377 378 const uint64_t Offset = PC - BF->getAddress(); 379 MCInst *Inst = BF->getInstructionAtOffset(Offset); 380 if (!Inst) 381 return createStringError(errc::executable_format_error, 382 "no instruction matches kernel marker offset"); 383 384 BC.MIB->setOffset(*Inst, static_cast<uint32_t>(Offset)); 385 386 BF->setHasSDTMarker(true); 387 } 388 389 return Error::success(); 390 } 391 392 void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 393 int32_t PCRelativeOffset, 394 bool IsPCRelative, 395 StringRef SectionName) { 396 LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 397 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 398 } 399 400 void LinuxKernelRewriter::processLKSections() { 401 processLKKSymtab(); 402 processLKKSymtab(true); 403 processLKSMPLocks(); 404 } 405 406 /// Process __ksymtab[_gpl] sections of Linux Kernel. 407 /// This section lists all the vmlinux symbols that kernel modules can access. 408 /// 409 /// All the entries are 4 bytes each and hence we can read them by one by one 410 /// and ignore the ones that are not pointing to the .text section. All pointers 411 /// are PC relative offsets. Always, points to the beginning of the function. 412 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) { 413 StringRef SectionName = "__ksymtab"; 414 if (IsGPL) 415 SectionName = "__ksymtab_gpl"; 416 ErrorOr<BinarySection &> SectionOrError = 417 BC.getUniqueSectionByName(SectionName); 418 assert(SectionOrError && 419 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 420 const uint64_t SectionSize = SectionOrError->getSize(); 421 const uint64_t SectionAddress = SectionOrError->getAddress(); 422 assert((SectionSize % 4) == 0 && 423 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 424 425 for (uint64_t I = 0; I < SectionSize; I += 4) { 426 const uint64_t EntryAddress = SectionAddress + I; 427 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 428 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 429 const int32_t SignedOffset = *Offset; 430 const uint64_t RefAddress = EntryAddress + SignedOffset; 431 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress); 432 if (!BF) 433 continue; 434 435 BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 436 *Offset); 437 } 438 } 439 440 /// .smp_locks section contains PC-relative references to instructions with LOCK 441 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 442 void LinuxKernelRewriter::processLKSMPLocks() { 443 ErrorOr<BinarySection &> SectionOrError = 444 BC.getUniqueSectionByName(".smp_locks"); 445 if (!SectionOrError) 446 return; 447 448 uint64_t SectionSize = SectionOrError->getSize(); 449 const uint64_t SectionAddress = SectionOrError->getAddress(); 450 assert((SectionSize % 4) == 0 && 451 "The size of the .smp_locks section should be a multiple of 4"); 452 453 for (uint64_t I = 0; I < SectionSize; I += 4) { 454 const uint64_t EntryAddress = SectionAddress + I; 455 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 456 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 457 int32_t SignedOffset = *Offset; 458 uint64_t RefAddress = EntryAddress + SignedOffset; 459 460 BinaryFunction *ContainingBF = 461 BC.getBinaryFunctionContainingAddress(RefAddress); 462 if (!ContainingBF) 463 continue; 464 465 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 466 } 467 } 468 469 void LinuxKernelRewriter::updateLKMarkers() { 470 if (LKMarkers.size() == 0) 471 return; 472 473 std::unordered_map<std::string, uint64_t> PatchCounts; 474 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 475 &LKMarkerInfoKV : LKMarkers) { 476 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 477 const BinaryFunction *BF = 478 BC.getBinaryFunctionContainingAddress(OriginalAddress, false, true); 479 if (!BF) 480 continue; 481 482 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 483 if (NewAddress == 0) 484 continue; 485 486 // Apply base address. 487 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 488 NewAddress = NewAddress + 0xffffffff00000000; 489 490 if (OriginalAddress == NewAddress) 491 continue; 492 493 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 494 StringRef SectionName = LKMarkerInfo.SectionName; 495 SimpleBinaryPatcher *LKPatcher; 496 ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName); 497 assert(BSec && "missing section info for kernel section"); 498 if (!BSec->getPatcher()) 499 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 500 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 501 PatchCounts[std::string(SectionName)]++; 502 if (LKMarkerInfo.IsPCRelative) 503 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 504 NewAddress - OriginalAddress + 505 LKMarkerInfo.PCRelativeOffset); 506 else 507 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 508 } 509 } 510 BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 511 "section are as follows:\n"; 512 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 513 BC.outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 514 << '\n'; 515 } 516 517 Error LinuxKernelRewriter::readORCTables() { 518 // NOTE: we should ignore relocations for orc tables as the tables are sorted 519 // post-link time and relocations are not updated. 520 ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind"); 521 ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip"); 522 523 if (!ORCUnwindSection && !ORCUnwindIPSection) 524 return Error::success(); 525 526 if (!ORCUnwindSection || !ORCUnwindIPSection) 527 return createStringError(errc::executable_format_error, 528 "missing ORC section"); 529 530 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE; 531 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE || 532 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE) 533 return createStringError(errc::executable_format_error, 534 "ORC entries number mismatch detected"); 535 536 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 537 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(), 538 BC.AsmInfo->isLittleEndian(), 539 BC.AsmInfo->getCodePointerSize()); 540 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(), 541 BC.AsmInfo->isLittleEndian(), 542 BC.AsmInfo->getCodePointerSize()); 543 DataExtractor::Cursor ORCCursor(0); 544 DataExtractor::Cursor IPCursor(0); 545 uint64_t PrevIP = 0; 546 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 547 const uint64_t IP = 548 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 549 550 // Consume the status of the cursor. 551 if (!IPCursor) 552 return createStringError(errc::executable_format_error, 553 "out of bounds while reading ORC IP table: %s", 554 toString(IPCursor.takeError()).c_str()); 555 556 if (IP < PrevIP && opts::Verbosity) 557 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) 558 << " detected while reading ORC\n"; 559 560 PrevIP = IP; 561 562 // Store all entries, includes those we are not going to update as the 563 // tables need to be sorted globally before being written out. 564 ORCEntries.push_back(ORCListEntry()); 565 ORCListEntry &Entry = ORCEntries.back(); 566 567 Entry.IP = IP; 568 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor); 569 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); 570 Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); 571 Entry.BF = nullptr; 572 573 // Consume the status of the cursor. 574 if (!ORCCursor) 575 return createStringError(errc::executable_format_error, 576 "out of bounds while reading ORC: %s", 577 toString(ORCCursor.takeError()).c_str()); 578 579 if (Entry.ORC == NullORC) 580 continue; 581 582 BinaryFunction *&BF = Entry.BF; 583 BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true); 584 585 // If the entry immediately pointing past the end of the function is not 586 // the terminator entry, then it does not belong to this function. 587 if (BF && BF->getAddress() + BF->getSize() == IP) 588 BF = 0; 589 590 if (!BF) { 591 if (opts::Verbosity) 592 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x" 593 << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n'; 594 continue; 595 } 596 597 BF->setHasORC(true); 598 599 if (!BF->hasInstructions()) 600 continue; 601 602 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); 603 if (!Inst) 604 return createStringError( 605 errc::executable_format_error, 606 "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP); 607 608 // Some addresses will have two entries associated with them. The first 609 // one being a "weak" section terminator. Since we ignore the terminator, 610 // we should only assign one entry per instruction. 611 if (BC.MIB->hasAnnotation(*Inst, "ORC")) 612 return createStringError( 613 errc::executable_format_error, 614 "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); 615 616 BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); 617 } 618 619 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n"; 620 621 if (opts::DumpORC) { 622 BC.outs() << "BOLT-INFO: ORC unwind information:\n"; 623 for (const ORCListEntry &E : ORCEntries) { 624 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 625 if (E.BF) 626 BC.outs() << ": " << *E.BF; 627 BC.outs() << '\n'; 628 } 629 } 630 631 // Add entries for functions that don't have explicit ORC info at the start. 632 // We'll have the correct info for them even if ORC for the preceding function 633 // changes. 634 ORCListType NewEntries; 635 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 636 auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 637 return E.IP <= BF.getAddress(); 638 }); 639 if (It != ORCEntries.begin()) 640 --It; 641 642 if (It->BF == &BF) 643 continue; 644 645 if (It->ORC == NullORC && It->IP == BF.getAddress()) { 646 assert(!It->BF); 647 It->BF = &BF; 648 continue; 649 } 650 651 NewEntries.push_back({BF.getAddress(), &BF, It->ORC}); 652 if (It->ORC != NullORC) 653 BF.setHasORC(true); 654 } 655 656 llvm::copy(NewEntries, std::back_inserter(ORCEntries)); 657 llvm::sort(ORCEntries); 658 659 if (opts::DumpORC) { 660 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n"; 661 for (const ORCListEntry &E : ORCEntries) { 662 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 663 if (E.BF) 664 BC.outs() << ": " << *E.BF; 665 BC.outs() << '\n'; 666 } 667 } 668 669 return Error::success(); 670 } 671 672 Error LinuxKernelRewriter::processORCPostCFG() { 673 if (!NumORCEntries) 674 return Error::success(); 675 676 // Propagate ORC to the rest of the function. We can annotate every 677 // instruction in every function, but to minimize the overhead, we annotate 678 // the first instruction in every basic block to reflect the state at the 679 // entry. This way, the ORC state can be calculated based on annotations 680 // regardless of the basic block layout. Note that if we insert/delete 681 // instructions, we must take care to attach ORC info to the new/deleted ones. 682 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 683 684 std::optional<ORCState> CurrentState; 685 for (BinaryBasicBlock &BB : BF) { 686 for (MCInst &Inst : BB) { 687 ErrorOr<ORCState> State = 688 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 689 690 if (State) { 691 CurrentState = *State; 692 continue; 693 } 694 695 // Get state for the start of the function. 696 if (!CurrentState) { 697 // A terminator entry (NullORC) can match the function address. If 698 // there's also a non-terminator entry, it will be placed after the 699 // terminator. Hence, we are looking for the last ORC entry that 700 // matches the address. 701 auto It = 702 llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 703 return E.IP <= BF.getAddress(); 704 }); 705 if (It != ORCEntries.begin()) 706 --It; 707 708 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) && 709 "ORC info at function entry expected."); 710 711 if (It->ORC == NullORC && BF.hasORC()) { 712 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " 713 << BF << '\n'; 714 } 715 716 It->BF = &BF; 717 718 CurrentState = It->ORC; 719 if (It->ORC != NullORC) 720 BF.setHasORC(true); 721 } 722 723 // While printing ORC, attach info to every instruction for convenience. 724 if (opts::PrintORC || &Inst == &BB.front()) 725 BC.MIB->addAnnotation(Inst, "ORC", *CurrentState); 726 } 727 } 728 } 729 730 return Error::success(); 731 } 732 733 Error LinuxKernelRewriter::rewriteORCTables() { 734 if (!NumORCEntries) 735 return Error::success(); 736 737 // Update ORC sections in-place. As we change the code, the number of ORC 738 // entries may increase for some functions. However, as we remove terminator 739 // redundancy (see below), more space is freed up and we should always be able 740 // to fit new ORC tables in the reserved space. 741 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter { 742 const size_t Size = Section.getSize(); 743 uint8_t *NewContents = new uint8_t[Size]; 744 Section.updateContents(NewContents, Size); 745 Section.setOutputFileOffset(Section.getInputFileOffset()); 746 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian() 747 ? endianness::little 748 : endianness::big); 749 }; 750 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection); 751 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection); 752 753 uint64_t NumEmitted = 0; 754 std::optional<ORCState> LastEmittedORC; 755 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC, 756 MCSymbol *Label = 0, bool Force = false) -> Error { 757 if (LastEmittedORC && ORC == *LastEmittedORC && !Force) 758 return Error::success(); 759 760 LastEmittedORC = ORC; 761 762 if (++NumEmitted > NumORCEntries) 763 return createStringError(errc::executable_format_error, 764 "exceeded the number of allocated ORC entries"); 765 766 if (Label) 767 ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label, 768 Relocation::getPC32(), /*Addend*/ 0); 769 770 const int32_t IPValue = 771 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset(); 772 if (Error E = UnwindIPWriter.writeInteger(IPValue)) 773 return E; 774 775 if (Error E = UnwindWriter.writeInteger(ORC.SPOffset)) 776 return E; 777 if (Error E = UnwindWriter.writeInteger(ORC.BPOffset)) 778 return E; 779 if (Error E = UnwindWriter.writeInteger(ORC.Info)) 780 return E; 781 782 return Error::success(); 783 }; 784 785 // Emit new ORC entries for the emitted function. 786 auto emitORC = [&](const FunctionFragment &FF) -> Error { 787 ORCState CurrentState = NullORC; 788 for (BinaryBasicBlock *BB : FF) { 789 for (MCInst &Inst : *BB) { 790 ErrorOr<ORCState> ErrorOrState = 791 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 792 if (!ErrorOrState || *ErrorOrState == CurrentState) 793 continue; 794 795 // Issue label for the instruction. 796 MCSymbol *Label = 797 BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); 798 799 if (Error E = emitORCEntry(0, *ErrorOrState, Label)) 800 return E; 801 802 CurrentState = *ErrorOrState; 803 } 804 } 805 806 return Error::success(); 807 }; 808 809 // Emit ORC entries for cold fragments. We assume that these fragments are 810 // emitted contiguously in memory using reserved space in the kernel. This 811 // assumption is validated in post-emit pass validateORCTables() where we 812 // check that ORC entries are sorted by their addresses. 813 auto emitColdORC = [&]() -> Error { 814 for (BinaryFunction &BF : 815 llvm::make_second_range(BC.getBinaryFunctions())) { 816 if (!BC.shouldEmit(BF)) 817 continue; 818 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) 819 if (Error E = emitORC(FF)) 820 return E; 821 } 822 823 return Error::success(); 824 }; 825 826 bool ShouldEmitCold = !BC.BOLTReserved.empty(); 827 for (ORCListEntry &Entry : ORCEntries) { 828 if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) { 829 if (Error E = emitColdORC()) 830 return E; 831 832 // Emit terminator entry at the end of the reserved region. 833 if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC)) 834 return E; 835 836 ShouldEmitCold = false; 837 } 838 839 // Emit original entries for functions that we haven't modified. 840 if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { 841 // Emit terminator only if it marks the start of a function. 842 if (Entry.ORC == NullORC && !Entry.BF) 843 continue; 844 if (Error E = emitORCEntry(Entry.IP, Entry.ORC)) 845 return E; 846 continue; 847 } 848 849 // Emit all ORC entries for a function referenced by an entry and skip over 850 // the rest of entries for this function by resetting its ORC attribute. 851 if (Entry.BF->hasORC()) { 852 if (Error E = emitORC(Entry.BF->getLayout().getMainFragment())) 853 return E; 854 Entry.BF->setHasORC(false); 855 } 856 } 857 858 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted 859 << " ORC entries\n"); 860 861 // Populate ORC tables with a terminator entry with max address to match the 862 // original table sizes. 863 const uint64_t LastIP = std::numeric_limits<uint64_t>::max(); 864 while (UnwindWriter.bytesRemaining()) { 865 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) 866 return E; 867 } 868 869 return Error::success(); 870 } 871 872 Error LinuxKernelRewriter::validateORCTables() { 873 if (!ORCUnwindIPSection) 874 return Error::success(); 875 876 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 877 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(), 878 BC.AsmInfo->isLittleEndian(), 879 BC.AsmInfo->getCodePointerSize()); 880 DataExtractor::Cursor IPCursor(0); 881 uint64_t PrevIP = 0; 882 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 883 const uint64_t IP = 884 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 885 if (!IPCursor) 886 return createStringError(errc::executable_format_error, 887 "out of bounds while reading ORC IP table: %s", 888 toString(IPCursor.takeError()).c_str()); 889 890 assert(IP >= PrevIP && "Unsorted ORC table detected"); 891 (void)PrevIP; 892 PrevIP = IP; 893 } 894 895 return Error::success(); 896 } 897 898 /// The static call site table is created by objtool and contains entries in the 899 /// following format: 900 /// 901 /// struct static_call_site { 902 /// s32 addr; 903 /// s32 key; 904 /// }; 905 /// 906 Error LinuxKernelRewriter::readStaticCalls() { 907 const BinaryData *StaticCallTable = 908 BC.getBinaryDataByName("__start_static_call_sites"); 909 if (!StaticCallTable) 910 return Error::success(); 911 912 StaticCallTableAddress = StaticCallTable->getAddress(); 913 914 const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites"); 915 if (!Stop) 916 return createStringError(errc::executable_format_error, 917 "missing __stop_static_call_sites symbol"); 918 919 ErrorOr<BinarySection &> ErrorOrSection = 920 BC.getSectionForAddress(StaticCallTableAddress); 921 if (!ErrorOrSection) 922 return createStringError(errc::executable_format_error, 923 "no section matching __start_static_call_sites"); 924 925 StaticCallSection = *ErrorOrSection; 926 if (!StaticCallSection->containsAddress(Stop->getAddress() - 1)) 927 return createStringError(errc::executable_format_error, 928 "__stop_static_call_sites not in the same section " 929 "as __start_static_call_sites"); 930 931 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE) 932 return createStringError(errc::executable_format_error, 933 "static call table size error"); 934 935 const uint64_t SectionAddress = StaticCallSection->getAddress(); 936 DataExtractor DE(StaticCallSection->getContents(), 937 BC.AsmInfo->isLittleEndian(), 938 BC.AsmInfo->getCodePointerSize()); 939 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); 940 uint32_t EntryID = 0; 941 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 942 const uint64_t CallAddress = 943 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 944 const uint64_t KeyAddress = 945 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 946 947 // Consume the status of the cursor. 948 if (!Cursor) 949 return createStringError(errc::executable_format_error, 950 "out of bounds while reading static calls: %s", 951 toString(Cursor.takeError()).c_str()); 952 953 ++EntryID; 954 955 if (opts::DumpStaticCalls) { 956 BC.outs() << "Static Call Site: " << EntryID << '\n'; 957 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress) 958 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 959 << '\n'; 960 } 961 962 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress); 963 if (!BF) 964 continue; 965 966 if (!BC.shouldEmit(*BF)) 967 continue; 968 969 if (!BF->hasInstructions()) 970 continue; 971 972 MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress()); 973 if (!Inst) 974 return createStringError(errc::executable_format_error, 975 "no instruction at call site address 0x%" PRIx64, 976 CallAddress); 977 978 // Check for duplicate entries. 979 if (BC.MIB->hasAnnotation(*Inst, "StaticCall")) 980 return createStringError(errc::executable_format_error, 981 "duplicate static call site at 0x%" PRIx64, 982 CallAddress); 983 984 BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); 985 986 MCSymbol *Label = 987 BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); 988 989 StaticCallEntries.push_back({EntryID, BF, Label}); 990 } 991 992 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size() 993 << " static call entries\n"; 994 995 return Error::success(); 996 } 997 998 /// The static call table is sorted during boot time in 999 /// static_call_sort_entries(). This makes it possible to update existing 1000 /// entries in-place ignoring their relative order. 1001 Error LinuxKernelRewriter::rewriteStaticCalls() { 1002 if (!StaticCallTableAddress || !StaticCallSection) 1003 return Error::success(); 1004 1005 for (auto &Entry : StaticCallEntries) { 1006 if (!Entry.Function) 1007 continue; 1008 1009 BinaryFunction &BF = *Entry.Function; 1010 if (!BC.shouldEmit(BF)) 1011 continue; 1012 1013 // Create a relocation against the label. 1014 const uint64_t EntryOffset = StaticCallTableAddress - 1015 StaticCallSection->getAddress() + 1016 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; 1017 StaticCallSection->addRelocation(EntryOffset, Entry.Label, 1018 ELF::R_X86_64_PC32, /*Addend*/ 0); 1019 } 1020 1021 return Error::success(); 1022 } 1023 1024 /// Instructions that access user-space memory can cause page faults. These 1025 /// faults will be handled by the kernel and execution will resume at the fixup 1026 /// code location if the address was invalid. The kernel uses the exception 1027 /// table to match the faulting instruction to its fixup. The table consists of 1028 /// the following entries: 1029 /// 1030 /// struct exception_table_entry { 1031 /// int insn; 1032 /// int fixup; 1033 /// int data; 1034 /// }; 1035 /// 1036 /// More info at: 1037 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt 1038 Error LinuxKernelRewriter::readExceptionTable() { 1039 ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); 1040 if (!ExceptionsSection) 1041 return Error::success(); 1042 1043 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) 1044 return createStringError(errc::executable_format_error, 1045 "exception table size error"); 1046 1047 const uint64_t SectionAddress = ExceptionsSection->getAddress(); 1048 DataExtractor DE(ExceptionsSection->getContents(), 1049 BC.AsmInfo->isLittleEndian(), 1050 BC.AsmInfo->getCodePointerSize()); 1051 DataExtractor::Cursor Cursor(0); 1052 uint32_t EntryID = 0; 1053 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { 1054 const uint64_t InstAddress = 1055 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1056 const uint64_t FixupAddress = 1057 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1058 const uint64_t Data = DE.getU32(Cursor); 1059 1060 // Consume the status of the cursor. 1061 if (!Cursor) 1062 return createStringError( 1063 errc::executable_format_error, 1064 "out of bounds while reading exception table: %s", 1065 toString(Cursor.takeError()).c_str()); 1066 1067 ++EntryID; 1068 1069 if (opts::DumpExceptions) { 1070 BC.outs() << "Exception Entry: " << EntryID << '\n'; 1071 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' 1072 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' 1073 << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; 1074 } 1075 1076 MCInst *Inst = nullptr; 1077 MCSymbol *FixupLabel = nullptr; 1078 1079 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress); 1080 if (InstBF && BC.shouldEmit(*InstBF)) { 1081 Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress()); 1082 if (!Inst) 1083 return createStringError(errc::executable_format_error, 1084 "no instruction at address 0x%" PRIx64 1085 " in exception table", 1086 InstAddress); 1087 BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID); 1088 FunctionsWithExceptions.insert(InstBF); 1089 } 1090 1091 if (!InstBF && opts::Verbosity) { 1092 BC.outs() << "BOLT-INFO: no function matches instruction at 0x" 1093 << Twine::utohexstr(InstAddress) 1094 << " referenced by Linux exception table\n"; 1095 } 1096 1097 BinaryFunction *FixupBF = 1098 BC.getBinaryFunctionContainingAddress(FixupAddress); 1099 if (FixupBF && BC.shouldEmit(*FixupBF)) { 1100 const uint64_t Offset = FixupAddress - FixupBF->getAddress(); 1101 if (!FixupBF->getInstructionAtOffset(Offset)) 1102 return createStringError(errc::executable_format_error, 1103 "no instruction at fixup address 0x%" PRIx64 1104 " in exception table", 1105 FixupAddress); 1106 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset) 1107 : FixupBF->getSymbol(); 1108 if (Inst) 1109 BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName()); 1110 FunctionsWithExceptions.insert(FixupBF); 1111 } 1112 1113 if (!FixupBF && opts::Verbosity) { 1114 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x" 1115 << Twine::utohexstr(FixupAddress) 1116 << " referenced by Linux exception table\n"; 1117 } 1118 } 1119 1120 BC.outs() << "BOLT-INFO: parsed " 1121 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE 1122 << " exception table entries\n"; 1123 1124 return Error::success(); 1125 } 1126 1127 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects 1128 /// the exception table to be sorted. Hence we have to sort it after code 1129 /// reordering. 1130 Error LinuxKernelRewriter::rewriteExceptionTable() { 1131 // Disable output of functions with exceptions before rewrite support is 1132 // added. 1133 for (BinaryFunction *BF : FunctionsWithExceptions) 1134 BF->setSimple(false); 1135 1136 return Error::success(); 1137 } 1138 1139 /// .parainsrtuctions section contains information for patching parvirtual call 1140 /// instructions during runtime. The entries in the section are in the form: 1141 /// 1142 /// struct paravirt_patch_site { 1143 /// u8 *instr; /* original instructions */ 1144 /// u8 type; /* type of this instruction */ 1145 /// u8 len; /* length of original instruction */ 1146 /// }; 1147 /// 1148 /// Note that the structures are aligned at 8-byte boundary. 1149 Error LinuxKernelRewriter::readParaInstructions() { 1150 ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions"); 1151 if (!ParavirtualPatchSection) 1152 return Error::success(); 1153 1154 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(), 1155 BC.AsmInfo->isLittleEndian(), 1156 BC.AsmInfo->getCodePointerSize()); 1157 uint32_t EntryID = 0; 1158 DataExtractor::Cursor Cursor(0); 1159 while (Cursor && !DE.eof(Cursor)) { 1160 const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN)); 1161 if (!DE.isValidOffset(NextOffset)) 1162 break; 1163 1164 Cursor.seek(NextOffset); 1165 1166 const uint64_t InstrLocation = DE.getU64(Cursor); 1167 const uint8_t Type = DE.getU8(Cursor); 1168 const uint8_t Len = DE.getU8(Cursor); 1169 1170 if (!Cursor) 1171 return createStringError( 1172 errc::executable_format_error, 1173 "out of bounds while reading .parainstructions: %s", 1174 toString(Cursor.takeError()).c_str()); 1175 1176 ++EntryID; 1177 1178 if (opts::DumpParavirtualPatchSites) { 1179 BC.outs() << "Paravirtual patch site: " << EntryID << '\n'; 1180 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation) 1181 << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x" 1182 << Twine::utohexstr(Len) << '\n'; 1183 } 1184 1185 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation); 1186 if (!BF && opts::Verbosity) { 1187 BC.outs() << "BOLT-INFO: no function matches address 0x" 1188 << Twine::utohexstr(InstrLocation) 1189 << " referenced by paravirutal patch site\n"; 1190 } 1191 1192 if (BF && BC.shouldEmit(*BF)) { 1193 MCInst *Inst = 1194 BF->getInstructionAtOffset(InstrLocation - BF->getAddress()); 1195 if (!Inst) 1196 return createStringError(errc::executable_format_error, 1197 "no instruction at address 0x%" PRIx64 1198 " in paravirtual call site %d", 1199 InstrLocation, EntryID); 1200 BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID); 1201 } 1202 } 1203 1204 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; 1205 1206 return Error::success(); 1207 } 1208 1209 void LinuxKernelRewriter::skipFunctionsWithAnnotation( 1210 StringRef Annotation) const { 1211 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1212 if (!BC.shouldEmit(BF)) 1213 continue; 1214 for (const BinaryBasicBlock &BB : BF) { 1215 const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) { 1216 return BC.MIB->hasAnnotation(Inst, Annotation); 1217 }); 1218 if (HasAnnotation) { 1219 BF.setSimple(false); 1220 break; 1221 } 1222 } 1223 } 1224 } 1225 1226 Error LinuxKernelRewriter::rewriteParaInstructions() { 1227 // Disable output of functions with paravirtual instructions before the 1228 // rewrite support is complete. 1229 skipFunctionsWithAnnotation("ParaSite"); 1230 1231 return Error::success(); 1232 } 1233 1234 /// Process __bug_table section. 1235 /// This section contains information useful for kernel debugging, mostly 1236 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON(). 1237 /// 1238 /// Each entry in the section is a struct bug_entry that contains a pointer to 1239 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1240 /// pointers use PC relative offset addressing), line number, and flags. 1241 /// The definition of the struct bug_entry can be found in 1242 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction 1243 /// address encoded as a PC-relative offset. In theory, it could be an absolute 1244 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice 1245 /// the kernel code relies on it being a relative offset on x86-64. 1246 Error LinuxKernelRewriter::readBugTable() { 1247 BugTableSection = BC.getUniqueSectionByName("__bug_table"); 1248 if (!BugTableSection) 1249 return Error::success(); 1250 1251 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE) 1252 return createStringError(errc::executable_format_error, 1253 "bug table size error"); 1254 1255 const uint64_t SectionAddress = BugTableSection->getAddress(); 1256 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(), 1257 BC.AsmInfo->getCodePointerSize()); 1258 DataExtractor::Cursor Cursor(0); 1259 uint32_t EntryID = 0; 1260 while (Cursor && Cursor.tell() < BugTableSection->getSize()) { 1261 const uint64_t Pos = Cursor.tell(); 1262 const uint64_t InstAddress = 1263 SectionAddress + Pos + (int32_t)DE.getU32(Cursor); 1264 Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); 1265 1266 if (!Cursor) 1267 return createStringError(errc::executable_format_error, 1268 "out of bounds while reading __bug_table: %s", 1269 toString(Cursor.takeError()).c_str()); 1270 1271 ++EntryID; 1272 1273 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress); 1274 if (!BF && opts::Verbosity) { 1275 BC.outs() << "BOLT-INFO: no function matches address 0x" 1276 << Twine::utohexstr(InstAddress) 1277 << " referenced by bug table\n"; 1278 } 1279 1280 if (BF && BC.shouldEmit(*BF)) { 1281 MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress()); 1282 if (!Inst) 1283 return createStringError(errc::executable_format_error, 1284 "no instruction at address 0x%" PRIx64 1285 " referenced by bug table entry %d", 1286 InstAddress, EntryID); 1287 BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID); 1288 1289 FunctionBugList[BF].push_back(EntryID); 1290 } 1291 } 1292 1293 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n"; 1294 1295 return Error::success(); 1296 } 1297 1298 /// find_bug() uses linear search to match an address to an entry in the bug 1299 /// table. Hence, there is no need to sort entries when rewriting the table. 1300 /// When we need to erase an entry, we set its instruction address to zero. 1301 Error LinuxKernelRewriter::rewriteBugTable() { 1302 if (!BugTableSection) 1303 return Error::success(); 1304 1305 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1306 if (!BC.shouldEmit(BF)) 1307 continue; 1308 1309 if (!FunctionBugList.count(&BF)) 1310 continue; 1311 1312 // Bugs that will be emitted for this function. 1313 DenseSet<uint32_t> EmittedIDs; 1314 for (BinaryBasicBlock &BB : BF) { 1315 for (MCInst &Inst : BB) { 1316 if (!BC.MIB->hasAnnotation(Inst, "BugEntry")) 1317 continue; 1318 const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry"); 1319 EmittedIDs.insert(ID); 1320 1321 // Create a relocation entry for this bug entry. 1322 MCSymbol *Label = 1323 BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get()); 1324 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1325 BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32, 1326 /*Addend*/ 0); 1327 } 1328 } 1329 1330 // Clear bug entries that were not emitted for this function, e.g. as a 1331 // result of DCE, but setting their instruction address to zero. 1332 for (const uint32_t ID : FunctionBugList[&BF]) { 1333 if (!EmittedIDs.count(ID)) { 1334 const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; 1335 BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32, 1336 /*Addend*/ 0); 1337 } 1338 } 1339 } 1340 1341 return Error::success(); 1342 } 1343 1344 /// The kernel can replace certain instruction sequences depending on hardware 1345 /// it is running on and features specified during boot time. The information 1346 /// about alternative instruction sequences is stored in .altinstructions 1347 /// section. The format of entries in this section is defined in 1348 /// arch/x86/include/asm/alternative.h: 1349 /// 1350 /// struct alt_instr { 1351 /// s32 instr_offset; 1352 /// s32 repl_offset; 1353 /// uXX feature; 1354 /// u8 instrlen; 1355 /// u8 replacementlen; 1356 /// u8 padlen; // present in older kernels 1357 /// } __packed; 1358 /// 1359 /// Note the structures is packed. 1360 Error LinuxKernelRewriter::readAltInstructions() { 1361 AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); 1362 if (!AltInstrSection) 1363 return Error::success(); 1364 1365 const uint64_t Address = AltInstrSection->getAddress(); 1366 DataExtractor DE = DataExtractor(AltInstrSection->getContents(), 1367 BC.AsmInfo->isLittleEndian(), 1368 BC.AsmInfo->getCodePointerSize()); 1369 uint64_t EntryID = 0; 1370 DataExtractor::Cursor Cursor(0); 1371 while (Cursor && !DE.eof(Cursor)) { 1372 const uint64_t OrgInstAddress = 1373 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1374 const uint64_t AltInstAddress = 1375 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1376 const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize); 1377 const uint8_t OrgSize = DE.getU8(Cursor); 1378 const uint8_t AltSize = DE.getU8(Cursor); 1379 1380 // Older kernels may have the padlen field. 1381 const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0; 1382 1383 if (!Cursor) 1384 return createStringError( 1385 errc::executable_format_error, 1386 "out of bounds while reading .altinstructions: %s", 1387 toString(Cursor.takeError()).c_str()); 1388 1389 ++EntryID; 1390 1391 if (opts::DumpAltInstructions) { 1392 BC.outs() << "Alternative instruction entry: " << EntryID 1393 << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) 1394 << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) 1395 << "\n\tFeature: 0x" << Twine::utohexstr(Feature) 1396 << "\n\tOrgSize: " << (int)OrgSize 1397 << "\n\tAltSize: " << (int)AltSize << '\n'; 1398 if (opts::AltInstHasPadLen) 1399 BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; 1400 } 1401 1402 if (AltSize > OrgSize) 1403 return createStringError(errc::executable_format_error, 1404 "error reading .altinstructions"); 1405 1406 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); 1407 if (!BF && opts::Verbosity) { 1408 BC.outs() << "BOLT-INFO: no function matches address 0x" 1409 << Twine::utohexstr(OrgInstAddress) 1410 << " of instruction from .altinstructions\n"; 1411 } 1412 1413 BinaryFunction *AltBF = 1414 BC.getBinaryFunctionContainingAddress(AltInstAddress); 1415 if (AltBF && BC.shouldEmit(*AltBF)) { 1416 BC.errs() 1417 << "BOLT-WARNING: alternative instruction sequence found in function " 1418 << *AltBF << '\n'; 1419 AltBF->setIgnored(); 1420 } 1421 1422 if (!BF || !BC.shouldEmit(*BF)) 1423 continue; 1424 1425 if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) 1426 return createStringError(errc::executable_format_error, 1427 "error reading .altinstructions"); 1428 1429 MCInst *Inst = 1430 BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); 1431 if (!Inst) 1432 return createStringError(errc::executable_format_error, 1433 "no instruction at address 0x%" PRIx64 1434 " referenced by .altinstructions entry %d", 1435 OrgInstAddress, EntryID); 1436 1437 // There could be more than one alternative instruction sequences for the 1438 // same original instruction. Annotate each alternative separately. 1439 std::string AnnotationName = "AltInst"; 1440 unsigned N = 2; 1441 while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) 1442 AnnotationName = "AltInst" + std::to_string(N++); 1443 1444 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1445 1446 // Annotate all instructions from the original sequence. Note that it's not 1447 // the most efficient way to look for instructions in the address range, 1448 // but since alternative instructions are uncommon, it will do for now. 1449 for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { 1450 Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - 1451 BF->getAddress()); 1452 if (Inst) 1453 BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); 1454 } 1455 } 1456 1457 BC.outs() << "BOLT-INFO: parsed " << EntryID 1458 << " alternative instruction entries\n"; 1459 1460 return Error::success(); 1461 } 1462 1463 Error LinuxKernelRewriter::rewriteAltInstructions() { 1464 // Disable output of functions with alt instructions before the rewrite 1465 // support is complete. 1466 skipFunctionsWithAnnotation("AltInst"); 1467 1468 return Error::success(); 1469 } 1470 1471 /// When the Linux kernel needs to handle an error associated with a given PCI 1472 /// device, it uses a table stored in .pci_fixup section to locate a fixup code 1473 /// specific to the vendor and the problematic device. The section contains a 1474 /// list of the following structures defined in include/linux/pci.h: 1475 /// 1476 /// struct pci_fixup { 1477 /// u16 vendor; /* Or PCI_ANY_ID */ 1478 /// u16 device; /* Or PCI_ANY_ID */ 1479 /// u32 class; /* Or PCI_ANY_ID */ 1480 /// unsigned int class_shift; /* should be 0, 8, 16 */ 1481 /// int hook_offset; 1482 /// }; 1483 /// 1484 /// Normally, the hook will point to a function start and we don't have to 1485 /// update the pointer if we are not relocating functions. Hence, while reading 1486 /// the table we validate this assumption. If a function has a fixup code in the 1487 /// middle of its body, we issue a warning and ignore it. 1488 Error LinuxKernelRewriter::readPCIFixupTable() { 1489 PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup"); 1490 if (!PCIFixupSection) 1491 return Error::success(); 1492 1493 if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE) 1494 return createStringError(errc::executable_format_error, 1495 "PCI fixup table size error"); 1496 1497 const uint64_t Address = PCIFixupSection->getAddress(); 1498 DataExtractor DE = DataExtractor(PCIFixupSection->getContents(), 1499 BC.AsmInfo->isLittleEndian(), 1500 BC.AsmInfo->getCodePointerSize()); 1501 uint64_t EntryID = 0; 1502 DataExtractor::Cursor Cursor(0); 1503 while (Cursor && !DE.eof(Cursor)) { 1504 const uint16_t Vendor = DE.getU16(Cursor); 1505 const uint16_t Device = DE.getU16(Cursor); 1506 const uint32_t Class = DE.getU32(Cursor); 1507 const uint32_t ClassShift = DE.getU32(Cursor); 1508 const uint64_t HookAddress = 1509 Address + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1510 1511 if (!Cursor) 1512 return createStringError(errc::executable_format_error, 1513 "out of bounds while reading .pci_fixup: %s", 1514 toString(Cursor.takeError()).c_str()); 1515 1516 ++EntryID; 1517 1518 if (opts::DumpPCIFixups) { 1519 BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x" 1520 << Twine::utohexstr(Vendor) << "\n\tDevice: 0x" 1521 << Twine::utohexstr(Device) << "\n\tClass: 0x" 1522 << Twine::utohexstr(Class) << "\n\tClassShift: 0x" 1523 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x" 1524 << Twine::utohexstr(HookAddress) << '\n'; 1525 } 1526 1527 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress); 1528 if (!BF && opts::Verbosity) { 1529 BC.outs() << "BOLT-INFO: no function matches address 0x" 1530 << Twine::utohexstr(HookAddress) 1531 << " of hook from .pci_fixup\n"; 1532 } 1533 1534 if (!BF || !BC.shouldEmit(*BF)) 1535 continue; 1536 1537 if (const uint64_t Offset = HookAddress - BF->getAddress()) { 1538 BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function " 1539 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n'; 1540 BF->setSimple(false); 1541 } 1542 } 1543 1544 BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n"; 1545 1546 return Error::success(); 1547 } 1548 1549 /// Runtime code modification used by static keys is the most ubiquitous 1550 /// self-modifying feature of the Linux kernel. The idea is to eliminate the 1551 /// condition check and associated conditional jump on a hot path if that 1552 /// condition (based on a boolean value of a static key) does not change often. 1553 /// Whenever the condition changes, the kernel runtime modifies all code paths 1554 /// associated with that key flipping the code between nop and (unconditional) 1555 /// jump. The information about the code is stored in a static key jump table 1556 /// and contains the list of entries of the following type from 1557 /// include/linux/jump_label.h: 1558 // 1559 /// struct jump_entry { 1560 /// s32 code; 1561 /// s32 target; 1562 /// long key; // key may be far away from the core kernel under KASLR 1563 /// }; 1564 /// 1565 /// The list does not have to be stored in any sorted way, but it is sorted at 1566 /// boot time (or module initialization time) first by "key" and then by "code". 1567 /// jump_label_sort_entries() is responsible for sorting the table. 1568 /// 1569 /// The key in jump_entry structure uses lower two bits of the key address 1570 /// (which itself is aligned) to store extra information. We are interested in 1571 /// the lower bit which indicates if the key is likely to be set on the code 1572 /// path associated with this jump_entry. 1573 /// 1574 /// static_key_{enable,disable}() functions modify the code based on key and 1575 /// jump table entries. 1576 /// 1577 /// jump_label_update() updates all code entries for a given key. Batch mode is 1578 /// used for x86. 1579 /// 1580 /// The actual patching happens in text_poke_bp_batch() that overrides the first 1581 /// byte of the sequence with int3 before proceeding with actual code 1582 /// replacement. 1583 Error LinuxKernelRewriter::readStaticKeysJumpTable() { 1584 const BinaryData *StaticKeysJumpTable = 1585 BC.getBinaryDataByName("__start___jump_table"); 1586 if (!StaticKeysJumpTable) 1587 return Error::success(); 1588 1589 StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress(); 1590 1591 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1592 if (!Stop) 1593 return createStringError(errc::executable_format_error, 1594 "missing __stop___jump_table symbol"); 1595 1596 ErrorOr<BinarySection &> ErrorOrSection = 1597 BC.getSectionForAddress(StaticKeysJumpTableAddress); 1598 if (!ErrorOrSection) 1599 return createStringError(errc::executable_format_error, 1600 "no section matching __start___jump_table"); 1601 1602 StaticKeysJumpSection = *ErrorOrSection; 1603 if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1)) 1604 return createStringError(errc::executable_format_error, 1605 "__stop___jump_table not in the same section " 1606 "as __start___jump_table"); 1607 1608 if ((Stop->getAddress() - StaticKeysJumpTableAddress) % 1609 STATIC_KEYS_JUMP_ENTRY_SIZE) 1610 return createStringError(errc::executable_format_error, 1611 "static keys jump table size error"); 1612 1613 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1614 DataExtractor DE(StaticKeysJumpSection->getContents(), 1615 BC.AsmInfo->isLittleEndian(), 1616 BC.AsmInfo->getCodePointerSize()); 1617 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1618 uint32_t EntryID = 0; 1619 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1620 const uint64_t JumpAddress = 1621 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1622 const uint64_t TargetAddress = 1623 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1624 const uint64_t KeyAddress = 1625 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1626 1627 // Consume the status of the cursor. 1628 if (!Cursor) 1629 return createStringError( 1630 errc::executable_format_error, 1631 "out of bounds while reading static keys jump table: %s", 1632 toString(Cursor.takeError()).c_str()); 1633 1634 ++EntryID; 1635 1636 JumpInfo.push_back(JumpInfoEntry()); 1637 JumpInfoEntry &Info = JumpInfo.back(); 1638 Info.Likely = KeyAddress & 1; 1639 1640 if (opts::DumpStaticKeys) { 1641 BC.outs() << "Static key jump entry: " << EntryID 1642 << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1643 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1644 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 1645 << "\n\tIsLikely: " << Info.Likely << '\n'; 1646 } 1647 1648 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress); 1649 if (!BF && opts::Verbosity) { 1650 BC.outs() 1651 << "BOLT-INFO: no function matches address 0x" 1652 << Twine::utohexstr(JumpAddress) 1653 << " of jump instruction referenced from static keys jump table\n"; 1654 } 1655 1656 if (!BF || !BC.shouldEmit(*BF)) 1657 continue; 1658 1659 MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress()); 1660 if (!Inst) 1661 return createStringError( 1662 errc::executable_format_error, 1663 "no instruction at static keys jump site address 0x%" PRIx64, 1664 JumpAddress); 1665 1666 if (!BF->containsAddress(TargetAddress)) 1667 return createStringError( 1668 errc::executable_format_error, 1669 "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64, 1670 JumpAddress, TargetAddress); 1671 1672 const bool IsBranch = BC.MIB->isBranch(*Inst); 1673 if (!IsBranch && !BC.MIB->isNoop(*Inst)) 1674 return createStringError(errc::executable_format_error, 1675 "jump or nop expected at address 0x%" PRIx64, 1676 JumpAddress); 1677 1678 const uint64_t Size = BC.computeInstructionSize(*Inst); 1679 if (Size != 2 && Size != 5) { 1680 return createStringError( 1681 errc::executable_format_error, 1682 "unexpected static keys jump size at address 0x%" PRIx64, 1683 JumpAddress); 1684 } 1685 1686 MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress); 1687 MCInst StaticKeyBranch; 1688 1689 // Create a conditional branch instruction. The actual conditional code type 1690 // should not matter as long as it's a valid code. The instruction should be 1691 // treated as a conditional branch for control-flow purposes. Before we emit 1692 // the code, it will be converted to a different instruction in 1693 // rewriteStaticKeysJumpTable(). 1694 // 1695 // NB: for older kernels, under LongJumpLabels option, we create long 1696 // conditional branch to guarantee that code size estimation takes 1697 // into account the extra bytes needed for long branch that will be used 1698 // by the kernel patching code. Newer kernels can work with both short 1699 // and long branches. The code for long conditional branch is larger 1700 // than unconditional one, so we are pessimistic in our estimations. 1701 if (opts::LongJumpLabels) 1702 BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1703 else 1704 BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); 1705 BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch); 1706 BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID); 1707 *Inst = StaticKeyBranch; 1708 1709 // IsBranch = InitialValue ^ LIKELY 1710 // 1711 // 0 0 0 1712 // 1 0 1 1713 // 1 1 0 1714 // 0 1 1 1715 // 1716 // => InitialValue = IsBranch ^ LIKELY 1717 Info.InitValue = IsBranch ^ Info.Likely; 1718 1719 // Add annotations to facilitate manual code analysis. 1720 BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely); 1721 BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue); 1722 if (!BC.MIB->getSize(*Inst)) 1723 BC.MIB->setSize(*Inst, Size); 1724 1725 if (opts::LongJumpLabels) 1726 BC.MIB->setSize(*Inst, 5); 1727 } 1728 1729 BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n"; 1730 1731 return Error::success(); 1732 } 1733 1734 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be 1735 // relaxed. In post-emit pass we will convert those jumps into nops when 1736 // necessary. We do the unconditional conversion into jumps so that the jumps 1737 // can be relaxed and the optimal size of jump/nop instruction is selected. 1738 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { 1739 if (!StaticKeysJumpSection) 1740 return Error::success(); 1741 1742 uint64_t NumShort = 0; 1743 uint64_t NumLong = 0; 1744 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 1745 if (!BC.shouldEmit(BF)) 1746 continue; 1747 1748 for (BinaryBasicBlock &BB : BF) { 1749 for (MCInst &Inst : BB) { 1750 if (!BC.MIB->isDynamicBranch(Inst)) 1751 continue; 1752 1753 const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst); 1754 MCSymbol *Target = 1755 const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst)); 1756 assert(Target && "Target symbol should be set."); 1757 1758 const JumpInfoEntry &Info = JumpInfo[EntryID - 1]; 1759 const bool IsBranch = Info.Likely ^ Info.InitValue; 1760 1761 uint32_t Size = *BC.MIB->getSize(Inst); 1762 if (Size == 2) 1763 ++NumShort; 1764 else if (Size == 5) 1765 ++NumLong; 1766 else 1767 llvm_unreachable("Wrong size for static keys jump instruction."); 1768 1769 MCInst NewInst; 1770 // Replace the instruction with unconditional jump even if it needs to 1771 // be nop in the binary. 1772 if (opts::LongJumpLabels) { 1773 BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get()); 1774 } else { 1775 // Newer kernels can handle short and long jumps for static keys. 1776 // Optimistically, emit short jump and check if it gets relaxed into 1777 // a long one during post-emit. Only then convert the jump to a nop. 1778 BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get()); 1779 } 1780 1781 BC.MIB->moveAnnotations(std::move(Inst), NewInst); 1782 Inst = NewInst; 1783 1784 // Mark the instruction for nop conversion. 1785 if (!IsBranch) 1786 NopIDs.insert(EntryID); 1787 1788 MCSymbol *Label = 1789 BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get()); 1790 1791 // Create a relocation against the label. 1792 const uint64_t EntryOffset = StaticKeysJumpTableAddress - 1793 StaticKeysJumpSection->getAddress() + 1794 (EntryID - 1) * 16; 1795 StaticKeysJumpSection->addRelocation(EntryOffset, Label, 1796 ELF::R_X86_64_PC32, 1797 /*Addend*/ 0); 1798 StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target, 1799 ELF::R_X86_64_PC32, /*Addend*/ 0); 1800 } 1801 } 1802 } 1803 1804 BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and " 1805 << NumLong << " long static keys jumps in optimized functions\n"; 1806 1807 return Error::success(); 1808 } 1809 1810 // Post-emit pass of static keys jump section. Convert jumps to nops. 1811 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { 1812 if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized()) 1813 return Error::success(); 1814 1815 const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); 1816 DataExtractor DE(StaticKeysJumpSection->getOutputContents(), 1817 BC.AsmInfo->isLittleEndian(), 1818 BC.AsmInfo->getCodePointerSize()); 1819 DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); 1820 const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); 1821 uint32_t EntryID = 0; 1822 uint64_t NumShort = 0; 1823 uint64_t NumLong = 0; 1824 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 1825 const uint64_t JumpAddress = 1826 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1827 const uint64_t TargetAddress = 1828 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 1829 const uint64_t KeyAddress = 1830 SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor); 1831 1832 // Consume the status of the cursor. 1833 if (!Cursor) 1834 return createStringError(errc::executable_format_error, 1835 "out of bounds while updating static keys: %s", 1836 toString(Cursor.takeError()).c_str()); 1837 1838 ++EntryID; 1839 1840 LLVM_DEBUG({ 1841 dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) 1842 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) 1843 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n'; 1844 }); 1845 (void)TargetAddress; 1846 (void)KeyAddress; 1847 1848 BinaryFunction *BF = 1849 BC.getBinaryFunctionContainingAddress(JumpAddress, 1850 /*CheckPastEnd*/ false, 1851 /*UseMaxSize*/ true); 1852 assert(BF && "Cannot get function for modified static key."); 1853 1854 if (!BF->isEmitted()) 1855 continue; 1856 1857 // Disassemble instruction to collect stats even if nop-conversion is 1858 // unnecessary. 1859 MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>( 1860 reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize()); 1861 assert(Contents.size() && "Non-empty function image expected."); 1862 1863 MCInst Inst; 1864 uint64_t Size; 1865 const uint64_t JumpOffset = JumpAddress - BF->getAddress(); 1866 if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0, 1867 nulls())) { 1868 llvm_unreachable("Unable to disassemble jump instruction."); 1869 } 1870 assert(BC.MIB->isBranch(Inst) && "Branch instruction expected."); 1871 1872 if (Size == 2) 1873 ++NumShort; 1874 else if (Size == 5) 1875 ++NumLong; 1876 else 1877 llvm_unreachable("Unexpected size for static keys jump instruction."); 1878 1879 // Check if we need to convert jump instruction into a nop. 1880 if (!NopIDs.contains(EntryID)) 1881 continue; 1882 1883 SmallString<15> NopCode; 1884 raw_svector_ostream VecOS(NopCode); 1885 BC.MAB->writeNopData(VecOS, Size, BC.STI.get()); 1886 for (uint64_t I = 0; I < Size; ++I) 1887 Contents[JumpOffset + I] = NopCode[I]; 1888 } 1889 1890 BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong 1891 << " long static keys jumps in optimized functions\n"; 1892 1893 return Error::success(); 1894 } 1895 1896 } // namespace 1897 1898 std::unique_ptr<MetadataRewriter> 1899 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { 1900 return std::make_unique<LinuxKernelRewriter>(BC); 1901 } 1902