1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Support for updating Linux Kernel metadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/BinaryFunction.h" 14 #include "bolt/Rewrite/MetadataRewriter.h" 15 #include "bolt/Rewrite/MetadataRewriters.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/Support/BinaryStreamWriter.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/Errc.h" 22 23 #define DEBUG_TYPE "bolt-linux" 24 25 using namespace llvm; 26 using namespace bolt; 27 28 namespace opts { 29 30 static cl::opt<bool> 31 DumpExceptions("dump-linux-exceptions", 32 cl::desc("dump Linux kernel exception table"), 33 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 34 35 static cl::opt<bool> 36 DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), 37 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 38 39 static cl::opt<bool> DumpParavirtualPatchSites( 40 "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"), 41 cl::init(false), cl::Hidden, cl::cat(BoltCategory)); 42 43 static cl::opt<bool> DumpStaticCalls("dump-static-calls", 44 cl::desc("dump Linux kernel static calls"), 45 cl::init(false), cl::Hidden, 46 cl::cat(BoltCategory)); 47 48 static cl::opt<bool> 49 PrintORC("print-orc", 50 cl::desc("print ORC unwind information for instructions"), 51 cl::init(true), cl::Hidden, cl::cat(BoltCategory)); 52 53 } // namespace opts 54 55 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). 56 /// ORC state at every IP can be described by the following data structure. 57 struct ORCState { 58 int16_t SPOffset; 59 int16_t BPOffset; 60 int16_t Info; 61 62 bool operator==(const ORCState &Other) const { 63 return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset && 64 Info == Other.Info; 65 } 66 67 bool operator!=(const ORCState &Other) const { return !(*this == Other); } 68 }; 69 70 /// Section terminator ORC entry. 71 static ORCState NullORC = {0, 0, 0}; 72 73 /// Basic printer for ORC entry. It does not provide the same level of 74 /// information as objtool (for now). 75 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { 76 if (!opts::PrintORC) 77 return OS; 78 if (E != NullORC) 79 OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset, 80 E.Info); 81 else 82 OS << "{terminator}"; 83 84 return OS; 85 } 86 87 namespace { 88 89 class LinuxKernelRewriter final : public MetadataRewriter { 90 /// Linux Kernel special sections point to a specific instruction in many 91 /// cases. Unlike SDTMarkerInfo, these markers can come from different 92 /// sections. 93 struct LKInstructionMarkerInfo { 94 uint64_t SectionOffset; 95 int32_t PCRelativeOffset; 96 bool IsPCRelative; 97 StringRef SectionName; 98 }; 99 100 /// Map linux kernel program locations/instructions to their pointers in 101 /// special linux kernel sections 102 std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers; 103 104 /// Linux ORC sections. 105 ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address; 106 ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address; 107 108 /// Size of entries in ORC sections. 109 static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6; 110 static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4; 111 112 struct ORCListEntry { 113 uint64_t IP; /// Instruction address. 114 BinaryFunction *BF; /// Binary function corresponding to the entry. 115 ORCState ORC; /// Stack unwind info in ORC format. 116 117 /// ORC entries are sorted by their IPs. Terminator entries (NullORC) 118 /// should precede other entries with the same address. 119 bool operator<(const ORCListEntry &Other) const { 120 if (IP < Other.IP) 121 return 1; 122 if (IP > Other.IP) 123 return 0; 124 return ORC == NullORC && Other.ORC != NullORC; 125 } 126 }; 127 128 using ORCListType = std::vector<ORCListEntry>; 129 ORCListType ORCEntries; 130 131 /// Number of entries in the input file ORC sections. 132 uint64_t NumORCEntries = 0; 133 134 /// Section containing static call table. 135 ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address; 136 uint64_t StaticCallTableAddress = 0; 137 static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8; 138 139 struct StaticCallInfo { 140 uint32_t ID; /// Identifier of the entry in the table. 141 BinaryFunction *Function; /// Function containing associated call. 142 MCSymbol *Label; /// Label attached to the call. 143 }; 144 using StaticCallListType = std::vector<StaticCallInfo>; 145 StaticCallListType StaticCallEntries; 146 147 /// Section containing the Linux exception table. 148 ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address; 149 static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; 150 151 /// Functions with exception handling code. 152 DenseSet<BinaryFunction *> FunctionsWithExceptions; 153 154 /// Section with paravirtual patch sites. 155 ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address; 156 157 /// Alignment of paravirtual patch structures. 158 static constexpr size_t PARA_PATCH_ALIGN = 8; 159 160 /// Section containing Linux bug table. 161 ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; 162 163 /// Size of bug_entry struct. 164 static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12; 165 166 /// Insert an LKMarker for a given code pointer \p PC from a non-code section 167 /// \p SectionName. 168 void insertLKMarker(uint64_t PC, uint64_t SectionOffset, 169 int32_t PCRelativeOffset, bool IsPCRelative, 170 StringRef SectionName); 171 172 /// Process linux kernel special sections and their relocations. 173 void processLKSections(); 174 175 /// Process special linux kernel section, .pci_fixup. 176 void processLKPCIFixup(); 177 178 /// Process __ksymtab and __ksymtab_gpl. 179 void processLKKSymtab(bool IsGPL = false); 180 181 /// Process special linux kernel section, .smp_locks. 182 void processLKSMPLocks(); 183 184 /// Update LKMarkers' locations for the output binary. 185 void updateLKMarkers(); 186 187 /// Read ORC unwind information and annotate instructions. 188 Error readORCTables(); 189 190 /// Update ORC for functions once CFG is constructed. 191 Error processORCPostCFG(); 192 193 /// Update ORC data in the binary. 194 Error rewriteORCTables(); 195 196 /// Static call table handling. 197 Error readStaticCalls(); 198 Error rewriteStaticCalls(); 199 200 Error readExceptionTable(); 201 Error rewriteExceptionTable(); 202 203 /// Paravirtual instruction patch sites. 204 Error readParaInstructions(); 205 206 Error readBugTable(); 207 208 /// Mark instructions referenced by kernel metadata. 209 Error markInstructions(); 210 211 public: 212 LinuxKernelRewriter(BinaryContext &BC) 213 : MetadataRewriter("linux-kernel-rewriter", BC) {} 214 215 Error preCFGInitializer() override { 216 processLKSections(); 217 if (Error E = markInstructions()) 218 return E; 219 220 if (Error E = readORCTables()) 221 return E; 222 223 if (Error E = readStaticCalls()) 224 return E; 225 226 if (Error E = readExceptionTable()) 227 return E; 228 229 if (Error E = readParaInstructions()) 230 return E; 231 232 if (Error E = readBugTable()) 233 return E; 234 235 return Error::success(); 236 } 237 238 Error postCFGInitializer() override { 239 if (Error E = processORCPostCFG()) 240 return E; 241 242 return Error::success(); 243 } 244 245 Error preEmitFinalizer() override { 246 // Since rewriteExceptionTable() can mark functions as non-simple, run it 247 // before other rewriters that depend on simple/emit status. 248 if (Error E = rewriteExceptionTable()) 249 return E; 250 251 if (Error E = rewriteORCTables()) 252 return E; 253 254 if (Error E = rewriteStaticCalls()) 255 return E; 256 257 return Error::success(); 258 } 259 260 Error postEmitFinalizer() override { 261 updateLKMarkers(); 262 263 return Error::success(); 264 } 265 }; 266 267 Error LinuxKernelRewriter::markInstructions() { 268 for (const uint64_t PC : llvm::make_first_range(LKMarkers)) { 269 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(PC); 270 271 if (!BF || !BC.shouldEmit(*BF)) 272 continue; 273 274 const uint64_t Offset = PC - BF->getAddress(); 275 MCInst *Inst = BF->getInstructionAtOffset(Offset); 276 if (!Inst) 277 return createStringError(errc::executable_format_error, 278 "no instruction matches kernel marker offset"); 279 280 BC.MIB->setOffset(*Inst, static_cast<uint32_t>(Offset)); 281 282 BF->setHasSDTMarker(true); 283 } 284 285 return Error::success(); 286 } 287 288 void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset, 289 int32_t PCRelativeOffset, 290 bool IsPCRelative, 291 StringRef SectionName) { 292 LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{ 293 SectionOffset, PCRelativeOffset, IsPCRelative, SectionName}); 294 } 295 296 void LinuxKernelRewriter::processLKSections() { 297 processLKPCIFixup(); 298 processLKKSymtab(); 299 processLKKSymtab(true); 300 processLKSMPLocks(); 301 } 302 303 /// Process .pci_fixup section of Linux Kernel. 304 /// This section contains a list of entries for different PCI devices and their 305 /// corresponding hook handler (code pointer where the fixup 306 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset). 307 /// Documentation is in include/linux/pci.h. 308 void LinuxKernelRewriter::processLKPCIFixup() { 309 ErrorOr<BinarySection &> SectionOrError = 310 BC.getUniqueSectionByName(".pci_fixup"); 311 if (!SectionOrError) 312 return; 313 314 const uint64_t SectionSize = SectionOrError->getSize(); 315 const uint64_t SectionAddress = SectionOrError->getAddress(); 316 assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16"); 317 318 for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) { 319 const uint64_t PC = SectionAddress + I; 320 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(PC, 4); 321 assert(Offset && "cannot read value from .pci_fixup"); 322 const int32_t SignedOffset = *Offset; 323 const uint64_t HookupAddress = PC + SignedOffset; 324 BinaryFunction *HookupFunction = 325 BC.getBinaryFunctionAtAddress(HookupAddress); 326 assert(HookupFunction && "expected function for entry in .pci_fixup"); 327 BC.addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0, 328 *Offset); 329 } 330 } 331 332 /// Process __ksymtab[_gpl] sections of Linux Kernel. 333 /// This section lists all the vmlinux symbols that kernel modules can access. 334 /// 335 /// All the entries are 4 bytes each and hence we can read them by one by one 336 /// and ignore the ones that are not pointing to the .text section. All pointers 337 /// are PC relative offsets. Always, points to the beginning of the function. 338 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) { 339 StringRef SectionName = "__ksymtab"; 340 if (IsGPL) 341 SectionName = "__ksymtab_gpl"; 342 ErrorOr<BinarySection &> SectionOrError = 343 BC.getUniqueSectionByName(SectionName); 344 assert(SectionOrError && 345 "__ksymtab[_gpl] section not found in Linux Kernel binary"); 346 const uint64_t SectionSize = SectionOrError->getSize(); 347 const uint64_t SectionAddress = SectionOrError->getAddress(); 348 assert((SectionSize % 4) == 0 && 349 "The size of the __ksymtab[_gpl] section should be a multiple of 4"); 350 351 for (uint64_t I = 0; I < SectionSize; I += 4) { 352 const uint64_t EntryAddress = SectionAddress + I; 353 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 354 assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); 355 const int32_t SignedOffset = *Offset; 356 const uint64_t RefAddress = EntryAddress + SignedOffset; 357 BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress); 358 if (!BF) 359 continue; 360 361 BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, 362 *Offset); 363 } 364 } 365 366 /// .smp_locks section contains PC-relative references to instructions with LOCK 367 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. 368 void LinuxKernelRewriter::processLKSMPLocks() { 369 ErrorOr<BinarySection &> SectionOrError = 370 BC.getUniqueSectionByName(".smp_locks"); 371 if (!SectionOrError) 372 return; 373 374 uint64_t SectionSize = SectionOrError->getSize(); 375 const uint64_t SectionAddress = SectionOrError->getAddress(); 376 assert((SectionSize % 4) == 0 && 377 "The size of the .smp_locks section should be a multiple of 4"); 378 379 for (uint64_t I = 0; I < SectionSize; I += 4) { 380 const uint64_t EntryAddress = SectionAddress + I; 381 ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); 382 assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry"); 383 int32_t SignedOffset = *Offset; 384 uint64_t RefAddress = EntryAddress + SignedOffset; 385 386 BinaryFunction *ContainingBF = 387 BC.getBinaryFunctionContainingAddress(RefAddress); 388 if (!ContainingBF) 389 continue; 390 391 insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks"); 392 } 393 } 394 395 void LinuxKernelRewriter::updateLKMarkers() { 396 if (LKMarkers.size() == 0) 397 return; 398 399 std::unordered_map<std::string, uint64_t> PatchCounts; 400 for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>> 401 &LKMarkerInfoKV : LKMarkers) { 402 const uint64_t OriginalAddress = LKMarkerInfoKV.first; 403 const BinaryFunction *BF = 404 BC.getBinaryFunctionContainingAddress(OriginalAddress, false, true); 405 if (!BF) 406 continue; 407 408 uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress); 409 if (NewAddress == 0) 410 continue; 411 412 // Apply base address. 413 if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff) 414 NewAddress = NewAddress + 0xffffffff00000000; 415 416 if (OriginalAddress == NewAddress) 417 continue; 418 419 for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) { 420 StringRef SectionName = LKMarkerInfo.SectionName; 421 SimpleBinaryPatcher *LKPatcher; 422 ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName); 423 assert(BSec && "missing section info for kernel section"); 424 if (!BSec->getPatcher()) 425 BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>()); 426 LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher()); 427 PatchCounts[std::string(SectionName)]++; 428 if (LKMarkerInfo.IsPCRelative) 429 LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset, 430 NewAddress - OriginalAddress + 431 LKMarkerInfo.PCRelativeOffset); 432 else 433 LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress); 434 } 435 } 436 BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per " 437 "section are as follows:\n"; 438 for (const std::pair<const std::string, uint64_t> &KV : PatchCounts) 439 BC.outs() << " Section: " << KV.first << ", patch-counts: " << KV.second 440 << '\n'; 441 } 442 443 Error LinuxKernelRewriter::readORCTables() { 444 // NOTE: we should ignore relocations for orc tables as the tables are sorted 445 // post-link time and relocations are not updated. 446 ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind"); 447 ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip"); 448 449 if (!ORCUnwindSection && !ORCUnwindIPSection) 450 return Error::success(); 451 452 if (!ORCUnwindSection || !ORCUnwindIPSection) 453 return createStringError(errc::executable_format_error, 454 "missing ORC section"); 455 456 NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE; 457 if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE || 458 ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE) 459 return createStringError(errc::executable_format_error, 460 "ORC entries number mismatch detected"); 461 462 const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress(); 463 DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(), 464 BC.AsmInfo->isLittleEndian(), 465 BC.AsmInfo->getCodePointerSize()); 466 DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(), 467 BC.AsmInfo->isLittleEndian(), 468 BC.AsmInfo->getCodePointerSize()); 469 DataExtractor::Cursor ORCCursor(0); 470 DataExtractor::Cursor IPCursor(0); 471 uint64_t PrevIP = 0; 472 for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { 473 const uint64_t IP = 474 IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor); 475 476 // Consume the status of the cursor. 477 if (!IPCursor) 478 return createStringError(errc::executable_format_error, 479 "out of bounds while reading ORC IP table"); 480 481 if (IP < PrevIP && opts::Verbosity) 482 BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) 483 << " detected while reading ORC\n"; 484 485 PrevIP = IP; 486 487 // Store all entries, includes those we are not going to update as the 488 // tables need to be sorted globally before being written out. 489 ORCEntries.push_back(ORCListEntry()); 490 ORCListEntry &Entry = ORCEntries.back(); 491 492 Entry.IP = IP; 493 Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor); 494 Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); 495 Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); 496 Entry.BF = nullptr; 497 498 // Consume the status of the cursor. 499 if (!ORCCursor) 500 return createStringError(errc::executable_format_error, 501 "out of bounds while reading ORC"); 502 503 if (Entry.ORC == NullORC) 504 continue; 505 506 BinaryFunction *&BF = Entry.BF; 507 BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true); 508 509 // If the entry immediately pointing past the end of the function is not 510 // the terminator entry, then it does not belong to this function. 511 if (BF && BF->getAddress() + BF->getSize() == IP) 512 BF = 0; 513 514 if (!BF) { 515 if (opts::Verbosity) 516 BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x" 517 << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n'; 518 continue; 519 } 520 521 BF->setHasORC(true); 522 523 if (!BF->hasInstructions()) 524 continue; 525 526 MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); 527 if (!Inst) 528 return createStringError( 529 errc::executable_format_error, 530 "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP); 531 532 // Some addresses will have two entries associated with them. The first 533 // one being a "weak" section terminator. Since we ignore the terminator, 534 // we should only assign one entry per instruction. 535 if (BC.MIB->hasAnnotation(*Inst, "ORC")) 536 return createStringError( 537 errc::executable_format_error, 538 "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); 539 540 BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); 541 } 542 543 BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n"; 544 545 if (opts::DumpORC) { 546 BC.outs() << "BOLT-INFO: ORC unwind information:\n"; 547 for (const ORCListEntry &E : ORCEntries) { 548 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 549 if (E.BF) 550 BC.outs() << ": " << *E.BF; 551 BC.outs() << '\n'; 552 } 553 } 554 555 // Add entries for functions that don't have explicit ORC info at the start. 556 // We'll have the correct info for them even if ORC for the preceding function 557 // changes. 558 ORCListType NewEntries; 559 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 560 auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 561 return E.IP <= BF.getAddress(); 562 }); 563 if (It != ORCEntries.begin()) 564 --It; 565 566 if (It->BF == &BF) 567 continue; 568 569 if (It->ORC == NullORC && It->IP == BF.getAddress()) { 570 assert(!It->BF); 571 It->BF = &BF; 572 continue; 573 } 574 575 NewEntries.push_back({BF.getAddress(), &BF, It->ORC}); 576 if (It->ORC != NullORC) 577 BF.setHasORC(true); 578 } 579 580 llvm::copy(NewEntries, std::back_inserter(ORCEntries)); 581 llvm::sort(ORCEntries); 582 583 if (opts::DumpORC) { 584 BC.outs() << "BOLT-INFO: amended ORC unwind information:\n"; 585 for (const ORCListEntry &E : ORCEntries) { 586 BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; 587 if (E.BF) 588 BC.outs() << ": " << *E.BF; 589 BC.outs() << '\n'; 590 } 591 } 592 593 return Error::success(); 594 } 595 596 Error LinuxKernelRewriter::processORCPostCFG() { 597 if (!NumORCEntries) 598 return Error::success(); 599 600 // Propagate ORC to the rest of the function. We can annotate every 601 // instruction in every function, but to minimize the overhead, we annotate 602 // the first instruction in every basic block to reflect the state at the 603 // entry. This way, the ORC state can be calculated based on annotations 604 // regardless of the basic block layout. Note that if we insert/delete 605 // instructions, we must take care to attach ORC info to the new/deleted ones. 606 for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { 607 608 std::optional<ORCState> CurrentState; 609 for (BinaryBasicBlock &BB : BF) { 610 for (MCInst &Inst : BB) { 611 ErrorOr<ORCState> State = 612 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 613 614 if (State) { 615 CurrentState = *State; 616 continue; 617 } 618 619 // Get state for the start of the function. 620 if (!CurrentState) { 621 // A terminator entry (NullORC) can match the function address. If 622 // there's also a non-terminator entry, it will be placed after the 623 // terminator. Hence, we are looking for the last ORC entry that 624 // matches the address. 625 auto It = 626 llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { 627 return E.IP <= BF.getAddress(); 628 }); 629 if (It != ORCEntries.begin()) 630 --It; 631 632 assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) && 633 "ORC info at function entry expected."); 634 635 if (It->ORC == NullORC && BF.hasORC()) { 636 BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " 637 << BF << '\n'; 638 } 639 640 It->BF = &BF; 641 642 CurrentState = It->ORC; 643 if (It->ORC != NullORC) 644 BF.setHasORC(true); 645 } 646 647 // While printing ORC, attach info to every instruction for convenience. 648 if (opts::PrintORC || &Inst == &BB.front()) 649 BC.MIB->addAnnotation(Inst, "ORC", *CurrentState); 650 } 651 } 652 } 653 654 return Error::success(); 655 } 656 657 Error LinuxKernelRewriter::rewriteORCTables() { 658 if (!NumORCEntries) 659 return Error::success(); 660 661 // Update ORC sections in-place. As we change the code, the number of ORC 662 // entries may increase for some functions. However, as we remove terminator 663 // redundancy (see below), more space is freed up and we should always be able 664 // to fit new ORC tables in the reserved space. 665 auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter { 666 const size_t Size = Section.getSize(); 667 uint8_t *NewContents = new uint8_t[Size]; 668 Section.updateContents(NewContents, Size); 669 Section.setOutputFileOffset(Section.getInputFileOffset()); 670 return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian() 671 ? endianness::little 672 : endianness::big); 673 }; 674 BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection); 675 BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection); 676 677 uint64_t NumEmitted = 0; 678 std::optional<ORCState> LastEmittedORC; 679 auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC, 680 MCSymbol *Label = 0, bool Force = false) -> Error { 681 if (LastEmittedORC && ORC == *LastEmittedORC && !Force) 682 return Error::success(); 683 684 LastEmittedORC = ORC; 685 686 if (++NumEmitted > NumORCEntries) 687 return createStringError(errc::executable_format_error, 688 "exceeded the number of allocated ORC entries"); 689 690 if (Label) 691 ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label, 692 Relocation::getPC32(), /*Addend*/ 0); 693 694 const int32_t IPValue = 695 IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset(); 696 if (Error E = UnwindIPWriter.writeInteger(IPValue)) 697 return E; 698 699 if (Error E = UnwindWriter.writeInteger(ORC.SPOffset)) 700 return E; 701 if (Error E = UnwindWriter.writeInteger(ORC.BPOffset)) 702 return E; 703 if (Error E = UnwindWriter.writeInteger(ORC.Info)) 704 return E; 705 706 return Error::success(); 707 }; 708 709 // Emit new ORC entries for the emitted function. 710 auto emitORC = [&](const BinaryFunction &BF) -> Error { 711 assert(!BF.isSplit() && "Split functions not supported by ORC writer yet."); 712 713 ORCState CurrentState = NullORC; 714 for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { 715 for (MCInst &Inst : *BB) { 716 ErrorOr<ORCState> ErrorOrState = 717 BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); 718 if (!ErrorOrState || *ErrorOrState == CurrentState) 719 continue; 720 721 // Issue label for the instruction. 722 MCSymbol *Label = 723 BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); 724 725 if (Error E = emitORCEntry(0, *ErrorOrState, Label)) 726 return E; 727 728 CurrentState = *ErrorOrState; 729 } 730 } 731 732 return Error::success(); 733 }; 734 735 for (ORCListEntry &Entry : ORCEntries) { 736 // Emit original entries for functions that we haven't modified. 737 if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { 738 // Emit terminator only if it marks the start of a function. 739 if (Entry.ORC == NullORC && !Entry.BF) 740 continue; 741 if (Error E = emitORCEntry(Entry.IP, Entry.ORC)) 742 return E; 743 continue; 744 } 745 746 // Emit all ORC entries for a function referenced by an entry and skip over 747 // the rest of entries for this function by resetting its ORC attribute. 748 if (Entry.BF->hasORC()) { 749 if (Error E = emitORC(*Entry.BF)) 750 return E; 751 Entry.BF->setHasORC(false); 752 } 753 } 754 755 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted 756 << " ORC entries\n"); 757 758 // Replicate terminator entry at the end of sections to match the original 759 // table sizes. 760 const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second; 761 const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize(); 762 while (UnwindWriter.bytesRemaining()) { 763 if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) 764 return E; 765 } 766 767 return Error::success(); 768 } 769 770 /// The static call site table is created by objtool and contains entries in the 771 /// following format: 772 /// 773 /// struct static_call_site { 774 /// s32 addr; 775 /// s32 key; 776 /// }; 777 /// 778 Error LinuxKernelRewriter::readStaticCalls() { 779 const BinaryData *StaticCallTable = 780 BC.getBinaryDataByName("__start_static_call_sites"); 781 if (!StaticCallTable) 782 return Error::success(); 783 784 StaticCallTableAddress = StaticCallTable->getAddress(); 785 786 const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites"); 787 if (!Stop) 788 return createStringError(errc::executable_format_error, 789 "missing __stop_static_call_sites symbol"); 790 791 ErrorOr<BinarySection &> ErrorOrSection = 792 BC.getSectionForAddress(StaticCallTableAddress); 793 if (!ErrorOrSection) 794 return createStringError(errc::executable_format_error, 795 "no section matching __start_static_call_sites"); 796 797 StaticCallSection = *ErrorOrSection; 798 if (!StaticCallSection->containsAddress(Stop->getAddress() - 1)) 799 return createStringError(errc::executable_format_error, 800 "__stop_static_call_sites not in the same section " 801 "as __start_static_call_sites"); 802 803 if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE) 804 return createStringError(errc::executable_format_error, 805 "static call table size error"); 806 807 const uint64_t SectionAddress = StaticCallSection->getAddress(); 808 DataExtractor DE(StaticCallSection->getContents(), 809 BC.AsmInfo->isLittleEndian(), 810 BC.AsmInfo->getCodePointerSize()); 811 DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); 812 uint32_t EntryID = 0; 813 while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { 814 const uint64_t CallAddress = 815 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 816 const uint64_t KeyAddress = 817 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 818 819 // Consume the status of the cursor. 820 if (!Cursor) 821 return createStringError(errc::executable_format_error, 822 "out of bounds while reading static calls"); 823 824 ++EntryID; 825 826 if (opts::DumpStaticCalls) { 827 BC.outs() << "Static Call Site: " << EntryID << '\n'; 828 BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress) 829 << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) 830 << '\n'; 831 } 832 833 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress); 834 if (!BF) 835 continue; 836 837 if (!BC.shouldEmit(*BF)) 838 continue; 839 840 if (!BF->hasInstructions()) 841 continue; 842 843 MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress()); 844 if (!Inst) 845 return createStringError(errc::executable_format_error, 846 "no instruction at call site address 0x%" PRIx64, 847 CallAddress); 848 849 // Check for duplicate entries. 850 if (BC.MIB->hasAnnotation(*Inst, "StaticCall")) 851 return createStringError(errc::executable_format_error, 852 "duplicate static call site at 0x%" PRIx64, 853 CallAddress); 854 855 BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); 856 857 MCSymbol *Label = 858 BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); 859 860 StaticCallEntries.push_back({EntryID, BF, Label}); 861 } 862 863 BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size() 864 << " static call entries\n"; 865 866 return Error::success(); 867 } 868 869 /// The static call table is sorted during boot time in 870 /// static_call_sort_entries(). This makes it possible to update existing 871 /// entries in-place ignoring their relative order. 872 Error LinuxKernelRewriter::rewriteStaticCalls() { 873 if (!StaticCallTableAddress || !StaticCallSection) 874 return Error::success(); 875 876 for (auto &Entry : StaticCallEntries) { 877 if (!Entry.Function) 878 continue; 879 880 BinaryFunction &BF = *Entry.Function; 881 if (!BC.shouldEmit(BF)) 882 continue; 883 884 // Create a relocation against the label. 885 const uint64_t EntryOffset = StaticCallTableAddress - 886 StaticCallSection->getAddress() + 887 (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; 888 StaticCallSection->addRelocation(EntryOffset, Entry.Label, 889 ELF::R_X86_64_PC32, /*Addend*/ 0); 890 } 891 892 return Error::success(); 893 } 894 895 /// Instructions that access user-space memory can cause page faults. These 896 /// faults will be handled by the kernel and execution will resume at the fixup 897 /// code location if the address was invalid. The kernel uses the exception 898 /// table to match the faulting instruction to its fixup. The table consists of 899 /// the following entries: 900 /// 901 /// struct exception_table_entry { 902 /// int insn; 903 /// int fixup; 904 /// int data; 905 /// }; 906 /// 907 /// More info at: 908 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt 909 Error LinuxKernelRewriter::readExceptionTable() { 910 ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); 911 if (!ExceptionsSection) 912 return Error::success(); 913 914 if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) 915 return createStringError(errc::executable_format_error, 916 "exception table size error"); 917 918 const uint64_t SectionAddress = ExceptionsSection->getAddress(); 919 DataExtractor DE(ExceptionsSection->getContents(), 920 BC.AsmInfo->isLittleEndian(), 921 BC.AsmInfo->getCodePointerSize()); 922 DataExtractor::Cursor Cursor(0); 923 uint32_t EntryID = 0; 924 while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { 925 const uint64_t InstAddress = 926 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 927 const uint64_t FixupAddress = 928 SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor); 929 const uint64_t Data = DE.getU32(Cursor); 930 931 // Consume the status of the cursor. 932 if (!Cursor) 933 return createStringError(errc::executable_format_error, 934 "out of bounds while reading exception table"); 935 936 ++EntryID; 937 938 if (opts::DumpExceptions) { 939 BC.outs() << "Exception Entry: " << EntryID << '\n'; 940 BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' 941 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' 942 << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; 943 } 944 945 MCInst *Inst = nullptr; 946 MCSymbol *FixupLabel = nullptr; 947 948 BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress); 949 if (InstBF && BC.shouldEmit(*InstBF)) { 950 Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress()); 951 if (!Inst) 952 return createStringError(errc::executable_format_error, 953 "no instruction at address 0x%" PRIx64 954 " in exception table", 955 InstAddress); 956 BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID); 957 FunctionsWithExceptions.insert(InstBF); 958 } 959 960 if (!InstBF && opts::Verbosity) { 961 BC.outs() << "BOLT-INFO: no function matches instruction at 0x" 962 << Twine::utohexstr(InstAddress) 963 << " referenced by Linux exception table\n"; 964 } 965 966 BinaryFunction *FixupBF = 967 BC.getBinaryFunctionContainingAddress(FixupAddress); 968 if (FixupBF && BC.shouldEmit(*FixupBF)) { 969 const uint64_t Offset = FixupAddress - FixupBF->getAddress(); 970 if (!FixupBF->getInstructionAtOffset(Offset)) 971 return createStringError(errc::executable_format_error, 972 "no instruction at fixup address 0x%" PRIx64 973 " in exception table", 974 FixupAddress); 975 FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset) 976 : FixupBF->getSymbol(); 977 if (Inst) 978 BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName()); 979 FunctionsWithExceptions.insert(FixupBF); 980 } 981 982 if (!FixupBF && opts::Verbosity) { 983 BC.outs() << "BOLT-INFO: no function matches fixup code at 0x" 984 << Twine::utohexstr(FixupAddress) 985 << " referenced by Linux exception table\n"; 986 } 987 } 988 989 BC.outs() << "BOLT-INFO: parsed " 990 << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE 991 << " exception table entries\n"; 992 993 return Error::success(); 994 } 995 996 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects 997 /// the exception table to be sorted. Hence we have to sort it after code 998 /// reordering. 999 Error LinuxKernelRewriter::rewriteExceptionTable() { 1000 // Disable output of functions with exceptions before rewrite support is 1001 // added. 1002 for (BinaryFunction *BF : FunctionsWithExceptions) 1003 BF->setSimple(false); 1004 1005 return Error::success(); 1006 } 1007 1008 /// .parainsrtuctions section contains information for patching parvirtual call 1009 /// instructions during runtime. The entries in the section are in the form: 1010 /// 1011 /// struct paravirt_patch_site { 1012 /// u8 *instr; /* original instructions */ 1013 /// u8 type; /* type of this instruction */ 1014 /// u8 len; /* length of original instruction */ 1015 /// }; 1016 /// 1017 /// Note that the structures are aligned at 8-byte boundary. 1018 Error LinuxKernelRewriter::readParaInstructions() { 1019 ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions"); 1020 if (!ParavirtualPatchSection) 1021 return Error::success(); 1022 1023 DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(), 1024 BC.AsmInfo->isLittleEndian(), 1025 BC.AsmInfo->getCodePointerSize()); 1026 uint32_t EntryID = 0; 1027 DataExtractor::Cursor Cursor(0); 1028 while (Cursor && !DE.eof(Cursor)) { 1029 const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN)); 1030 if (!DE.isValidOffset(NextOffset)) 1031 break; 1032 1033 Cursor.seek(NextOffset); 1034 1035 const uint64_t InstrLocation = DE.getU64(Cursor); 1036 const uint8_t Type = DE.getU8(Cursor); 1037 const uint8_t Len = DE.getU8(Cursor); 1038 1039 if (!Cursor) 1040 return createStringError(errc::executable_format_error, 1041 "out of bounds while reading .parainstructions"); 1042 1043 ++EntryID; 1044 1045 if (opts::DumpParavirtualPatchSites) { 1046 BC.outs() << "Paravirtual patch site: " << EntryID << '\n'; 1047 BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation) 1048 << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x" 1049 << Twine::utohexstr(Len) << '\n'; 1050 } 1051 1052 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation); 1053 if (!BF && opts::Verbosity) { 1054 BC.outs() << "BOLT-INFO: no function matches address 0x" 1055 << Twine::utohexstr(InstrLocation) 1056 << " referenced by paravirutal patch site\n"; 1057 } 1058 1059 if (BF && BC.shouldEmit(*BF)) { 1060 MCInst *Inst = 1061 BF->getInstructionAtOffset(InstrLocation - BF->getAddress()); 1062 if (!Inst) 1063 return createStringError(errc::executable_format_error, 1064 "no instruction at address 0x%" PRIx64 1065 " in paravirtual call site %d", 1066 InstrLocation, EntryID); 1067 BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID); 1068 } 1069 } 1070 1071 BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; 1072 1073 return Error::success(); 1074 } 1075 1076 /// Process __bug_table section. 1077 /// This section contains information useful for kernel debugging. 1078 /// Each entry in the section is a struct bug_entry that contains a pointer to 1079 /// the ud2 instruction corresponding to the bug, corresponding file name (both 1080 /// pointers use PC relative offset addressing), line number, and flags. 1081 /// The definition of the struct bug_entry can be found in 1082 /// `include/asm-generic/bug.h` 1083 /// 1084 /// NB: find_bug() uses linear search to match an address to an entry in the bug 1085 /// table. Hence there is no need to sort entries when rewriting the table. 1086 Error LinuxKernelRewriter::readBugTable() { 1087 BugTableSection = BC.getUniqueSectionByName("__bug_table"); 1088 if (!BugTableSection) 1089 return Error::success(); 1090 1091 if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE) 1092 return createStringError(errc::executable_format_error, 1093 "bug table size error"); 1094 1095 const uint64_t SectionAddress = BugTableSection->getAddress(); 1096 DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(), 1097 BC.AsmInfo->getCodePointerSize()); 1098 DataExtractor::Cursor Cursor(0); 1099 uint32_t EntryID = 0; 1100 while (Cursor && Cursor.tell() < BugTableSection->getSize()) { 1101 const uint64_t Pos = Cursor.tell(); 1102 const uint64_t InstAddress = 1103 SectionAddress + Pos + (int32_t)DE.getU32(Cursor); 1104 Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); 1105 1106 if (!Cursor) 1107 return createStringError(errc::executable_format_error, 1108 "out of bounds while reading __bug_table"); 1109 1110 ++EntryID; 1111 1112 BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress); 1113 if (!BF && opts::Verbosity) { 1114 BC.outs() << "BOLT-INFO: no function matches address 0x" 1115 << Twine::utohexstr(InstAddress) 1116 << " referenced by bug table\n"; 1117 } 1118 1119 if (BF && BC.shouldEmit(*BF)) { 1120 MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress()); 1121 if (!Inst) 1122 return createStringError(errc::executable_format_error, 1123 "no instruction at address 0x%" PRIx64 1124 " referenced by bug table entry %d", 1125 InstAddress, EntryID); 1126 BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID); 1127 } 1128 } 1129 1130 BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n"; 1131 1132 return Error::success(); 1133 } 1134 1135 } // namespace 1136 1137 std::unique_ptr<MetadataRewriter> 1138 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { 1139 return std::make_unique<LinuxKernelRewriter>(BC); 1140 } 1141