xref: /llvm-project/bolt/lib/Rewrite/LinuxKernelRewriter.cpp (revision 02629793a4581e79cda342eaae007704b4daa8ce)
1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Support for updating Linux Kernel metadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Rewrite/MetadataRewriter.h"
15 #include "bolt/Rewrite/MetadataRewriters.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/Support/BinaryStreamWriter.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/Errc.h"
22 
23 #define DEBUG_TYPE "bolt-linux"
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 
30 static cl::opt<bool>
31     DumpExceptions("dump-linux-exceptions",
32                    cl::desc("dump Linux kernel exception table"),
33                    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
34 
35 static cl::opt<bool>
36     DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
37             cl::init(false), cl::Hidden, cl::cat(BoltCategory));
38 
39 static cl::opt<bool> DumpParavirtualPatchSites(
40     "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
41     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
42 
43 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
44                                      cl::desc("dump Linux kernel static calls"),
45                                      cl::init(false), cl::Hidden,
46                                      cl::cat(BoltCategory));
47 
48 static cl::opt<bool>
49     PrintORC("print-orc",
50              cl::desc("print ORC unwind information for instructions"),
51              cl::init(true), cl::Hidden, cl::cat(BoltCategory));
52 
53 } // namespace opts
54 
55 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
56 /// ORC state at every IP can be described by the following data structure.
57 struct ORCState {
58   int16_t SPOffset;
59   int16_t BPOffset;
60   int16_t Info;
61 
62   bool operator==(const ORCState &Other) const {
63     return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
64            Info == Other.Info;
65   }
66 
67   bool operator!=(const ORCState &Other) const { return !(*this == Other); }
68 };
69 
70 /// Section terminator ORC entry.
71 static ORCState NullORC = {0, 0, 0};
72 
73 /// Basic printer for ORC entry. It does not provide the same level of
74 /// information as objtool (for now).
75 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
76   if (!opts::PrintORC)
77     return OS;
78   if (E != NullORC)
79     OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
80                  E.Info);
81   else
82     OS << "{terminator}";
83 
84   return OS;
85 }
86 
87 namespace {
88 
89 class LinuxKernelRewriter final : public MetadataRewriter {
90   /// Linux Kernel special sections point to a specific instruction in many
91   /// cases. Unlike SDTMarkerInfo, these markers can come from different
92   /// sections.
93   struct LKInstructionMarkerInfo {
94     uint64_t SectionOffset;
95     int32_t PCRelativeOffset;
96     bool IsPCRelative;
97     StringRef SectionName;
98   };
99 
100   /// Map linux kernel program locations/instructions to their pointers in
101   /// special linux kernel sections
102   std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
103 
104   /// Linux ORC sections.
105   ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
106   ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
107 
108   /// Size of entries in ORC sections.
109   static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
110   static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
111 
112   struct ORCListEntry {
113     uint64_t IP;        /// Instruction address.
114     BinaryFunction *BF; /// Binary function corresponding to the entry.
115     ORCState ORC;       /// Stack unwind info in ORC format.
116 
117     /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
118     /// should precede other entries with the same address.
119     bool operator<(const ORCListEntry &Other) const {
120       if (IP < Other.IP)
121         return 1;
122       if (IP > Other.IP)
123         return 0;
124       return ORC == NullORC && Other.ORC != NullORC;
125     }
126   };
127 
128   using ORCListType = std::vector<ORCListEntry>;
129   ORCListType ORCEntries;
130 
131   /// Number of entries in the input file ORC sections.
132   uint64_t NumORCEntries = 0;
133 
134   /// Section containing static call table.
135   ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
136   uint64_t StaticCallTableAddress = 0;
137   static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
138 
139   struct StaticCallInfo {
140     uint32_t ID;              /// Identifier of the entry in the table.
141     BinaryFunction *Function; /// Function containing associated call.
142     MCSymbol *Label;          /// Label attached to the call.
143   };
144   using StaticCallListType = std::vector<StaticCallInfo>;
145   StaticCallListType StaticCallEntries;
146 
147   /// Section containing the Linux exception table.
148   ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
149   static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
150 
151   /// Functions with exception handling code.
152   DenseSet<BinaryFunction *> FunctionsWithExceptions;
153 
154   /// Section with paravirtual patch sites.
155   ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
156 
157   /// Alignment of paravirtual patch structures.
158   static constexpr size_t PARA_PATCH_ALIGN = 8;
159 
160   /// Section containing Linux bug table.
161   ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
162 
163   /// Size of bug_entry struct.
164   static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
165 
166   /// Insert an LKMarker for a given code pointer \p PC from a non-code section
167   /// \p SectionName.
168   void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
169                       int32_t PCRelativeOffset, bool IsPCRelative,
170                       StringRef SectionName);
171 
172   /// Process linux kernel special sections and their relocations.
173   void processLKSections();
174 
175   /// Process special linux kernel section, .pci_fixup.
176   void processLKPCIFixup();
177 
178   /// Process __ksymtab and __ksymtab_gpl.
179   void processLKKSymtab(bool IsGPL = false);
180 
181   /// Process special linux kernel section, .smp_locks.
182   void processLKSMPLocks();
183 
184   /// Update LKMarkers' locations for the output binary.
185   void updateLKMarkers();
186 
187   /// Read ORC unwind information and annotate instructions.
188   Error readORCTables();
189 
190   /// Update ORC for functions once CFG is constructed.
191   Error processORCPostCFG();
192 
193   /// Update ORC data in the binary.
194   Error rewriteORCTables();
195 
196   /// Static call table handling.
197   Error readStaticCalls();
198   Error rewriteStaticCalls();
199 
200   Error readExceptionTable();
201   Error rewriteExceptionTable();
202 
203   /// Paravirtual instruction patch sites.
204   Error readParaInstructions();
205 
206   Error readBugTable();
207 
208   /// Mark instructions referenced by kernel metadata.
209   Error markInstructions();
210 
211 public:
212   LinuxKernelRewriter(BinaryContext &BC)
213       : MetadataRewriter("linux-kernel-rewriter", BC) {}
214 
215   Error preCFGInitializer() override {
216     processLKSections();
217     if (Error E = markInstructions())
218       return E;
219 
220     if (Error E = readORCTables())
221       return E;
222 
223     if (Error E = readStaticCalls())
224       return E;
225 
226     if (Error E = readExceptionTable())
227       return E;
228 
229     if (Error E = readParaInstructions())
230       return E;
231 
232     if (Error E = readBugTable())
233       return E;
234 
235     return Error::success();
236   }
237 
238   Error postCFGInitializer() override {
239     if (Error E = processORCPostCFG())
240       return E;
241 
242     return Error::success();
243   }
244 
245   Error preEmitFinalizer() override {
246     // Since rewriteExceptionTable() can mark functions as non-simple, run it
247     // before other rewriters that depend on simple/emit status.
248     if (Error E = rewriteExceptionTable())
249       return E;
250 
251     if (Error E = rewriteORCTables())
252       return E;
253 
254     if (Error E = rewriteStaticCalls())
255       return E;
256 
257     return Error::success();
258   }
259 
260   Error postEmitFinalizer() override {
261     updateLKMarkers();
262 
263     return Error::success();
264   }
265 };
266 
267 Error LinuxKernelRewriter::markInstructions() {
268   for (const uint64_t PC : llvm::make_first_range(LKMarkers)) {
269     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(PC);
270 
271     if (!BF || !BC.shouldEmit(*BF))
272       continue;
273 
274     const uint64_t Offset = PC - BF->getAddress();
275     MCInst *Inst = BF->getInstructionAtOffset(Offset);
276     if (!Inst)
277       return createStringError(errc::executable_format_error,
278                                "no instruction matches kernel marker offset");
279 
280     BC.MIB->setOffset(*Inst, static_cast<uint32_t>(Offset));
281 
282     BF->setHasSDTMarker(true);
283   }
284 
285   return Error::success();
286 }
287 
288 void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
289                                          int32_t PCRelativeOffset,
290                                          bool IsPCRelative,
291                                          StringRef SectionName) {
292   LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{
293       SectionOffset, PCRelativeOffset, IsPCRelative, SectionName});
294 }
295 
296 void LinuxKernelRewriter::processLKSections() {
297   processLKPCIFixup();
298   processLKKSymtab();
299   processLKKSymtab(true);
300   processLKSMPLocks();
301 }
302 
303 /// Process .pci_fixup section of Linux Kernel.
304 /// This section contains a list of entries for different PCI devices and their
305 /// corresponding hook handler (code pointer where the fixup
306 /// code resides, usually on x86_64 it is an entry PC relative 32 bit offset).
307 /// Documentation is in include/linux/pci.h.
308 void LinuxKernelRewriter::processLKPCIFixup() {
309   ErrorOr<BinarySection &> SectionOrError =
310       BC.getUniqueSectionByName(".pci_fixup");
311   if (!SectionOrError)
312     return;
313 
314   const uint64_t SectionSize = SectionOrError->getSize();
315   const uint64_t SectionAddress = SectionOrError->getAddress();
316   assert((SectionSize % 16) == 0 && ".pci_fixup size is not a multiple of 16");
317 
318   for (uint64_t I = 12; I + 4 <= SectionSize; I += 16) {
319     const uint64_t PC = SectionAddress + I;
320     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(PC, 4);
321     assert(Offset && "cannot read value from .pci_fixup");
322     const int32_t SignedOffset = *Offset;
323     const uint64_t HookupAddress = PC + SignedOffset;
324     BinaryFunction *HookupFunction =
325         BC.getBinaryFunctionAtAddress(HookupAddress);
326     assert(HookupFunction && "expected function for entry in .pci_fixup");
327     BC.addRelocation(PC, HookupFunction->getSymbol(), Relocation::getPC32(), 0,
328                      *Offset);
329   }
330 }
331 
332 /// Process __ksymtab[_gpl] sections of Linux Kernel.
333 /// This section lists all the vmlinux symbols that kernel modules can access.
334 ///
335 /// All the entries are 4 bytes each and hence we can read them by one by one
336 /// and ignore the ones that are not pointing to the .text section. All pointers
337 /// are PC relative offsets. Always, points to the beginning of the function.
338 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
339   StringRef SectionName = "__ksymtab";
340   if (IsGPL)
341     SectionName = "__ksymtab_gpl";
342   ErrorOr<BinarySection &> SectionOrError =
343       BC.getUniqueSectionByName(SectionName);
344   assert(SectionOrError &&
345          "__ksymtab[_gpl] section not found in Linux Kernel binary");
346   const uint64_t SectionSize = SectionOrError->getSize();
347   const uint64_t SectionAddress = SectionOrError->getAddress();
348   assert((SectionSize % 4) == 0 &&
349          "The size of the __ksymtab[_gpl] section should be a multiple of 4");
350 
351   for (uint64_t I = 0; I < SectionSize; I += 4) {
352     const uint64_t EntryAddress = SectionAddress + I;
353     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
354     assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
355     const int32_t SignedOffset = *Offset;
356     const uint64_t RefAddress = EntryAddress + SignedOffset;
357     BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress);
358     if (!BF)
359       continue;
360 
361     BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
362                      *Offset);
363   }
364 }
365 
366 /// .smp_locks section contains PC-relative references to instructions with LOCK
367 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
368 void LinuxKernelRewriter::processLKSMPLocks() {
369   ErrorOr<BinarySection &> SectionOrError =
370       BC.getUniqueSectionByName(".smp_locks");
371   if (!SectionOrError)
372     return;
373 
374   uint64_t SectionSize = SectionOrError->getSize();
375   const uint64_t SectionAddress = SectionOrError->getAddress();
376   assert((SectionSize % 4) == 0 &&
377          "The size of the .smp_locks section should be a multiple of 4");
378 
379   for (uint64_t I = 0; I < SectionSize; I += 4) {
380     const uint64_t EntryAddress = SectionAddress + I;
381     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
382     assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
383     int32_t SignedOffset = *Offset;
384     uint64_t RefAddress = EntryAddress + SignedOffset;
385 
386     BinaryFunction *ContainingBF =
387         BC.getBinaryFunctionContainingAddress(RefAddress);
388     if (!ContainingBF)
389       continue;
390 
391     insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks");
392   }
393 }
394 
395 void LinuxKernelRewriter::updateLKMarkers() {
396   if (LKMarkers.size() == 0)
397     return;
398 
399   std::unordered_map<std::string, uint64_t> PatchCounts;
400   for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
401            &LKMarkerInfoKV : LKMarkers) {
402     const uint64_t OriginalAddress = LKMarkerInfoKV.first;
403     const BinaryFunction *BF =
404         BC.getBinaryFunctionContainingAddress(OriginalAddress, false, true);
405     if (!BF)
406       continue;
407 
408     uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress);
409     if (NewAddress == 0)
410       continue;
411 
412     // Apply base address.
413     if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
414       NewAddress = NewAddress + 0xffffffff00000000;
415 
416     if (OriginalAddress == NewAddress)
417       continue;
418 
419     for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
420       StringRef SectionName = LKMarkerInfo.SectionName;
421       SimpleBinaryPatcher *LKPatcher;
422       ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName);
423       assert(BSec && "missing section info for kernel section");
424       if (!BSec->getPatcher())
425         BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
426       LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
427       PatchCounts[std::string(SectionName)]++;
428       if (LKMarkerInfo.IsPCRelative)
429         LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset,
430                                 NewAddress - OriginalAddress +
431                                     LKMarkerInfo.PCRelativeOffset);
432       else
433         LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress);
434     }
435   }
436   BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
437                "section are as follows:\n";
438   for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
439     BC.outs() << "  Section: " << KV.first << ", patch-counts: " << KV.second
440               << '\n';
441 }
442 
443 Error LinuxKernelRewriter::readORCTables() {
444   // NOTE: we should ignore relocations for orc tables as the tables are sorted
445   // post-link time and relocations are not updated.
446   ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind");
447   ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip");
448 
449   if (!ORCUnwindSection && !ORCUnwindIPSection)
450     return Error::success();
451 
452   if (!ORCUnwindSection || !ORCUnwindIPSection)
453     return createStringError(errc::executable_format_error,
454                              "missing ORC section");
455 
456   NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
457   if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
458       ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
459     return createStringError(errc::executable_format_error,
460                              "ORC entries number mismatch detected");
461 
462   const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
463   DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
464                                       BC.AsmInfo->isLittleEndian(),
465                                       BC.AsmInfo->getCodePointerSize());
466   DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
467                                      BC.AsmInfo->isLittleEndian(),
468                                      BC.AsmInfo->getCodePointerSize());
469   DataExtractor::Cursor ORCCursor(0);
470   DataExtractor::Cursor IPCursor(0);
471   uint64_t PrevIP = 0;
472   for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
473     const uint64_t IP =
474         IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
475 
476     // Consume the status of the cursor.
477     if (!IPCursor)
478       return createStringError(errc::executable_format_error,
479                                "out of bounds while reading ORC IP table");
480 
481     if (IP < PrevIP && opts::Verbosity)
482       BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP)
483                 << " detected while reading ORC\n";
484 
485     PrevIP = IP;
486 
487     // Store all entries, includes those we are not going to update as the
488     // tables need to be sorted globally before being written out.
489     ORCEntries.push_back(ORCListEntry());
490     ORCListEntry &Entry = ORCEntries.back();
491 
492     Entry.IP = IP;
493     Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
494     Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
495     Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
496     Entry.BF = nullptr;
497 
498     // Consume the status of the cursor.
499     if (!ORCCursor)
500       return createStringError(errc::executable_format_error,
501                                "out of bounds while reading ORC");
502 
503     if (Entry.ORC == NullORC)
504       continue;
505 
506     BinaryFunction *&BF = Entry.BF;
507     BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
508 
509     // If the entry immediately pointing past the end of the function is not
510     // the terminator entry, then it does not belong to this function.
511     if (BF && BF->getAddress() + BF->getSize() == IP)
512       BF = 0;
513 
514     if (!BF) {
515       if (opts::Verbosity)
516         BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
517                   << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
518       continue;
519     }
520 
521     BF->setHasORC(true);
522 
523     if (!BF->hasInstructions())
524       continue;
525 
526     MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
527     if (!Inst)
528       return createStringError(
529           errc::executable_format_error,
530           "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
531 
532     // Some addresses will have two entries associated with them. The first
533     // one being a "weak" section terminator. Since we ignore the terminator,
534     // we should only assign one entry per instruction.
535     if (BC.MIB->hasAnnotation(*Inst, "ORC"))
536       return createStringError(
537           errc::executable_format_error,
538           "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP);
539 
540     BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
541   }
542 
543   BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
544 
545   if (opts::DumpORC) {
546     BC.outs() << "BOLT-INFO: ORC unwind information:\n";
547     for (const ORCListEntry &E : ORCEntries) {
548       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
549       if (E.BF)
550         BC.outs() << ": " << *E.BF;
551       BC.outs() << '\n';
552     }
553   }
554 
555   // Add entries for functions that don't have explicit ORC info at the start.
556   // We'll have the correct info for them even if ORC for the preceding function
557   // changes.
558   ORCListType NewEntries;
559   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
560     auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
561       return E.IP <= BF.getAddress();
562     });
563     if (It != ORCEntries.begin())
564       --It;
565 
566     if (It->BF == &BF)
567       continue;
568 
569     if (It->ORC == NullORC && It->IP == BF.getAddress()) {
570       assert(!It->BF);
571       It->BF = &BF;
572       continue;
573     }
574 
575     NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
576     if (It->ORC != NullORC)
577       BF.setHasORC(true);
578   }
579 
580   llvm::copy(NewEntries, std::back_inserter(ORCEntries));
581   llvm::sort(ORCEntries);
582 
583   if (opts::DumpORC) {
584     BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
585     for (const ORCListEntry &E : ORCEntries) {
586       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
587       if (E.BF)
588         BC.outs() << ": " << *E.BF;
589       BC.outs() << '\n';
590     }
591   }
592 
593   return Error::success();
594 }
595 
596 Error LinuxKernelRewriter::processORCPostCFG() {
597   if (!NumORCEntries)
598     return Error::success();
599 
600   // Propagate ORC to the rest of the function. We can annotate every
601   // instruction in every function, but to minimize the overhead, we annotate
602   // the first instruction in every basic block to reflect the state at the
603   // entry. This way, the ORC state can be calculated based on annotations
604   // regardless of the basic block layout. Note that if we insert/delete
605   // instructions, we must take care to attach ORC info to the new/deleted ones.
606   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
607 
608     std::optional<ORCState> CurrentState;
609     for (BinaryBasicBlock &BB : BF) {
610       for (MCInst &Inst : BB) {
611         ErrorOr<ORCState> State =
612             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
613 
614         if (State) {
615           CurrentState = *State;
616           continue;
617         }
618 
619         // Get state for the start of the function.
620         if (!CurrentState) {
621           // A terminator entry (NullORC) can match the function address. If
622           // there's also a non-terminator entry, it will be placed after the
623           // terminator. Hence, we are looking for the last ORC entry that
624           // matches the address.
625           auto It =
626               llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
627                 return E.IP <= BF.getAddress();
628               });
629           if (It != ORCEntries.begin())
630             --It;
631 
632           assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
633                  "ORC info at function entry expected.");
634 
635           if (It->ORC == NullORC && BF.hasORC()) {
636             BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
637                       << BF << '\n';
638           }
639 
640           It->BF = &BF;
641 
642           CurrentState = It->ORC;
643           if (It->ORC != NullORC)
644             BF.setHasORC(true);
645         }
646 
647         // While printing ORC, attach info to every instruction for convenience.
648         if (opts::PrintORC || &Inst == &BB.front())
649           BC.MIB->addAnnotation(Inst, "ORC", *CurrentState);
650       }
651     }
652   }
653 
654   return Error::success();
655 }
656 
657 Error LinuxKernelRewriter::rewriteORCTables() {
658   if (!NumORCEntries)
659     return Error::success();
660 
661   // Update ORC sections in-place. As we change the code, the number of ORC
662   // entries may increase for some functions. However, as we remove terminator
663   // redundancy (see below), more space is freed up and we should always be able
664   // to fit new ORC tables in the reserved space.
665   auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
666     const size_t Size = Section.getSize();
667     uint8_t *NewContents = new uint8_t[Size];
668     Section.updateContents(NewContents, Size);
669     Section.setOutputFileOffset(Section.getInputFileOffset());
670     return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
671                                                        ? endianness::little
672                                                        : endianness::big);
673   };
674   BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
675   BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
676 
677   uint64_t NumEmitted = 0;
678   std::optional<ORCState> LastEmittedORC;
679   auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
680                           MCSymbol *Label = 0, bool Force = false) -> Error {
681     if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
682       return Error::success();
683 
684     LastEmittedORC = ORC;
685 
686     if (++NumEmitted > NumORCEntries)
687       return createStringError(errc::executable_format_error,
688                                "exceeded the number of allocated ORC entries");
689 
690     if (Label)
691       ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
692                                         Relocation::getPC32(), /*Addend*/ 0);
693 
694     const int32_t IPValue =
695         IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
696     if (Error E = UnwindIPWriter.writeInteger(IPValue))
697       return E;
698 
699     if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
700       return E;
701     if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
702       return E;
703     if (Error E = UnwindWriter.writeInteger(ORC.Info))
704       return E;
705 
706     return Error::success();
707   };
708 
709   // Emit new ORC entries for the emitted function.
710   auto emitORC = [&](const BinaryFunction &BF) -> Error {
711     assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
712 
713     ORCState CurrentState = NullORC;
714     for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
715       for (MCInst &Inst : *BB) {
716         ErrorOr<ORCState> ErrorOrState =
717             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
718         if (!ErrorOrState || *ErrorOrState == CurrentState)
719           continue;
720 
721         // Issue label for the instruction.
722         MCSymbol *Label =
723             BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get());
724 
725         if (Error E = emitORCEntry(0, *ErrorOrState, Label))
726           return E;
727 
728         CurrentState = *ErrorOrState;
729       }
730     }
731 
732     return Error::success();
733   };
734 
735   for (ORCListEntry &Entry : ORCEntries) {
736     // Emit original entries for functions that we haven't modified.
737     if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
738       // Emit terminator only if it marks the start of a function.
739       if (Entry.ORC == NullORC && !Entry.BF)
740         continue;
741       if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
742         return E;
743       continue;
744     }
745 
746     // Emit all ORC entries for a function referenced by an entry and skip over
747     // the rest of entries for this function by resetting its ORC attribute.
748     if (Entry.BF->hasORC()) {
749       if (Error E = emitORC(*Entry.BF))
750         return E;
751       Entry.BF->setHasORC(false);
752     }
753   }
754 
755   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
756                     << " ORC entries\n");
757 
758   // Replicate terminator entry at the end of sections to match the original
759   // table sizes.
760   const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
761   const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
762   while (UnwindWriter.bytesRemaining()) {
763     if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
764       return E;
765   }
766 
767   return Error::success();
768 }
769 
770 /// The static call site table is created by objtool and contains entries in the
771 /// following format:
772 ///
773 ///    struct static_call_site {
774 ///      s32 addr;
775 ///      s32 key;
776 ///    };
777 ///
778 Error LinuxKernelRewriter::readStaticCalls() {
779   const BinaryData *StaticCallTable =
780       BC.getBinaryDataByName("__start_static_call_sites");
781   if (!StaticCallTable)
782     return Error::success();
783 
784   StaticCallTableAddress = StaticCallTable->getAddress();
785 
786   const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites");
787   if (!Stop)
788     return createStringError(errc::executable_format_error,
789                              "missing __stop_static_call_sites symbol");
790 
791   ErrorOr<BinarySection &> ErrorOrSection =
792       BC.getSectionForAddress(StaticCallTableAddress);
793   if (!ErrorOrSection)
794     return createStringError(errc::executable_format_error,
795                              "no section matching __start_static_call_sites");
796 
797   StaticCallSection = *ErrorOrSection;
798   if (!StaticCallSection->containsAddress(Stop->getAddress() - 1))
799     return createStringError(errc::executable_format_error,
800                              "__stop_static_call_sites not in the same section "
801                              "as __start_static_call_sites");
802 
803   if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
804     return createStringError(errc::executable_format_error,
805                              "static call table size error");
806 
807   const uint64_t SectionAddress = StaticCallSection->getAddress();
808   DataExtractor DE(StaticCallSection->getContents(),
809                    BC.AsmInfo->isLittleEndian(),
810                    BC.AsmInfo->getCodePointerSize());
811   DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
812   uint32_t EntryID = 0;
813   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
814     const uint64_t CallAddress =
815         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
816     const uint64_t KeyAddress =
817         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
818 
819     // Consume the status of the cursor.
820     if (!Cursor)
821       return createStringError(errc::executable_format_error,
822                                "out of bounds while reading static calls");
823 
824     ++EntryID;
825 
826     if (opts::DumpStaticCalls) {
827       BC.outs() << "Static Call Site: " << EntryID << '\n';
828       BC.outs() << "\tCallAddress:   0x" << Twine::utohexstr(CallAddress)
829                 << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress)
830                 << '\n';
831     }
832 
833     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress);
834     if (!BF)
835       continue;
836 
837     if (!BC.shouldEmit(*BF))
838       continue;
839 
840     if (!BF->hasInstructions())
841       continue;
842 
843     MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress());
844     if (!Inst)
845       return createStringError(errc::executable_format_error,
846                                "no instruction at call site address 0x%" PRIx64,
847                                CallAddress);
848 
849     // Check for duplicate entries.
850     if (BC.MIB->hasAnnotation(*Inst, "StaticCall"))
851       return createStringError(errc::executable_format_error,
852                                "duplicate static call site at 0x%" PRIx64,
853                                CallAddress);
854 
855     BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID);
856 
857     MCSymbol *Label =
858         BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get());
859 
860     StaticCallEntries.push_back({EntryID, BF, Label});
861   }
862 
863   BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
864             << " static call entries\n";
865 
866   return Error::success();
867 }
868 
869 /// The static call table is sorted during boot time in
870 /// static_call_sort_entries(). This makes it possible to update existing
871 /// entries in-place ignoring their relative order.
872 Error LinuxKernelRewriter::rewriteStaticCalls() {
873   if (!StaticCallTableAddress || !StaticCallSection)
874     return Error::success();
875 
876   for (auto &Entry : StaticCallEntries) {
877     if (!Entry.Function)
878       continue;
879 
880     BinaryFunction &BF = *Entry.Function;
881     if (!BC.shouldEmit(BF))
882       continue;
883 
884     // Create a relocation against the label.
885     const uint64_t EntryOffset = StaticCallTableAddress -
886                                  StaticCallSection->getAddress() +
887                                  (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
888     StaticCallSection->addRelocation(EntryOffset, Entry.Label,
889                                      ELF::R_X86_64_PC32, /*Addend*/ 0);
890   }
891 
892   return Error::success();
893 }
894 
895 /// Instructions that access user-space memory can cause page faults. These
896 /// faults will be handled by the kernel and execution will resume at the fixup
897 /// code location if the address was invalid. The kernel uses the exception
898 /// table to match the faulting instruction to its fixup. The table consists of
899 /// the following entries:
900 ///
901 ///   struct exception_table_entry {
902 ///     int insn;
903 ///     int fixup;
904 ///     int data;
905 ///   };
906 ///
907 /// More info at:
908 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
909 Error LinuxKernelRewriter::readExceptionTable() {
910   ExceptionsSection = BC.getUniqueSectionByName("__ex_table");
911   if (!ExceptionsSection)
912     return Error::success();
913 
914   if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
915     return createStringError(errc::executable_format_error,
916                              "exception table size error");
917 
918   const uint64_t SectionAddress = ExceptionsSection->getAddress();
919   DataExtractor DE(ExceptionsSection->getContents(),
920                    BC.AsmInfo->isLittleEndian(),
921                    BC.AsmInfo->getCodePointerSize());
922   DataExtractor::Cursor Cursor(0);
923   uint32_t EntryID = 0;
924   while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
925     const uint64_t InstAddress =
926         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
927     const uint64_t FixupAddress =
928         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
929     const uint64_t Data = DE.getU32(Cursor);
930 
931     // Consume the status of the cursor.
932     if (!Cursor)
933       return createStringError(errc::executable_format_error,
934                                "out of bounds while reading exception table");
935 
936     ++EntryID;
937 
938     if (opts::DumpExceptions) {
939       BC.outs() << "Exception Entry: " << EntryID << '\n';
940       BC.outs() << "\tInsn:  0x" << Twine::utohexstr(InstAddress) << '\n'
941                 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'
942                 << "\tData:  0x" << Twine::utohexstr(Data) << '\n';
943     }
944 
945     MCInst *Inst = nullptr;
946     MCSymbol *FixupLabel = nullptr;
947 
948     BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress);
949     if (InstBF && BC.shouldEmit(*InstBF)) {
950       Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress());
951       if (!Inst)
952         return createStringError(errc::executable_format_error,
953                                  "no instruction at address 0x%" PRIx64
954                                  " in exception table",
955                                  InstAddress);
956       BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID);
957       FunctionsWithExceptions.insert(InstBF);
958     }
959 
960     if (!InstBF && opts::Verbosity) {
961       BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
962                 << Twine::utohexstr(InstAddress)
963                 << " referenced by Linux exception table\n";
964     }
965 
966     BinaryFunction *FixupBF =
967         BC.getBinaryFunctionContainingAddress(FixupAddress);
968     if (FixupBF && BC.shouldEmit(*FixupBF)) {
969       const uint64_t Offset = FixupAddress - FixupBF->getAddress();
970       if (!FixupBF->getInstructionAtOffset(Offset))
971         return createStringError(errc::executable_format_error,
972                                  "no instruction at fixup address 0x%" PRIx64
973                                  " in exception table",
974                                  FixupAddress);
975       FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
976                           : FixupBF->getSymbol();
977       if (Inst)
978         BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName());
979       FunctionsWithExceptions.insert(FixupBF);
980     }
981 
982     if (!FixupBF && opts::Verbosity) {
983       BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
984                 << Twine::utohexstr(FixupAddress)
985                 << " referenced by Linux exception table\n";
986     }
987   }
988 
989   BC.outs() << "BOLT-INFO: parsed "
990             << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
991             << " exception table entries\n";
992 
993   return Error::success();
994 }
995 
996 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
997 /// the exception table to be sorted. Hence we have to sort it after code
998 /// reordering.
999 Error LinuxKernelRewriter::rewriteExceptionTable() {
1000   // Disable output of functions with exceptions before rewrite support is
1001   // added.
1002   for (BinaryFunction *BF : FunctionsWithExceptions)
1003     BF->setSimple(false);
1004 
1005   return Error::success();
1006 }
1007 
1008 /// .parainsrtuctions section contains information for patching parvirtual call
1009 /// instructions during runtime. The entries in the section are in the form:
1010 ///
1011 ///    struct paravirt_patch_site {
1012 ///      u8 *instr;    /* original instructions */
1013 ///      u8 type;      /* type of this instruction */
1014 ///      u8 len;       /* length of original instruction */
1015 ///    };
1016 ///
1017 /// Note that the structures are aligned at 8-byte boundary.
1018 Error LinuxKernelRewriter::readParaInstructions() {
1019   ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions");
1020   if (!ParavirtualPatchSection)
1021     return Error::success();
1022 
1023   DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
1024                                    BC.AsmInfo->isLittleEndian(),
1025                                    BC.AsmInfo->getCodePointerSize());
1026   uint32_t EntryID = 0;
1027   DataExtractor::Cursor Cursor(0);
1028   while (Cursor && !DE.eof(Cursor)) {
1029     const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN));
1030     if (!DE.isValidOffset(NextOffset))
1031       break;
1032 
1033     Cursor.seek(NextOffset);
1034 
1035     const uint64_t InstrLocation = DE.getU64(Cursor);
1036     const uint8_t Type = DE.getU8(Cursor);
1037     const uint8_t Len = DE.getU8(Cursor);
1038 
1039     if (!Cursor)
1040       return createStringError(errc::executable_format_error,
1041                                "out of bounds while reading .parainstructions");
1042 
1043     ++EntryID;
1044 
1045     if (opts::DumpParavirtualPatchSites) {
1046       BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1047       BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation)
1048                 << "\n\tType:  0x" << Twine::utohexstr(Type) << "\n\tLen:   0x"
1049                 << Twine::utohexstr(Len) << '\n';
1050     }
1051 
1052     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation);
1053     if (!BF && opts::Verbosity) {
1054       BC.outs() << "BOLT-INFO: no function matches address 0x"
1055                 << Twine::utohexstr(InstrLocation)
1056                 << " referenced by paravirutal patch site\n";
1057     }
1058 
1059     if (BF && BC.shouldEmit(*BF)) {
1060       MCInst *Inst =
1061           BF->getInstructionAtOffset(InstrLocation - BF->getAddress());
1062       if (!Inst)
1063         return createStringError(errc::executable_format_error,
1064                                  "no instruction at address 0x%" PRIx64
1065                                  " in paravirtual call site %d",
1066                                  InstrLocation, EntryID);
1067       BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID);
1068     }
1069   }
1070 
1071   BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1072 
1073   return Error::success();
1074 }
1075 
1076 /// Process __bug_table section.
1077 /// This section contains information useful for kernel debugging.
1078 /// Each entry in the section is a struct bug_entry that contains a pointer to
1079 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1080 /// pointers use PC relative offset addressing), line number, and flags.
1081 /// The definition of the struct bug_entry can be found in
1082 /// `include/asm-generic/bug.h`
1083 ///
1084 /// NB: find_bug() uses linear search to match an address to an entry in the bug
1085 ///     table. Hence there is no need to sort entries when rewriting the table.
1086 Error LinuxKernelRewriter::readBugTable() {
1087   BugTableSection = BC.getUniqueSectionByName("__bug_table");
1088   if (!BugTableSection)
1089     return Error::success();
1090 
1091   if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1092     return createStringError(errc::executable_format_error,
1093                              "bug table size error");
1094 
1095   const uint64_t SectionAddress = BugTableSection->getAddress();
1096   DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(),
1097                    BC.AsmInfo->getCodePointerSize());
1098   DataExtractor::Cursor Cursor(0);
1099   uint32_t EntryID = 0;
1100   while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1101     const uint64_t Pos = Cursor.tell();
1102     const uint64_t InstAddress =
1103         SectionAddress + Pos + (int32_t)DE.getU32(Cursor);
1104     Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE);
1105 
1106     if (!Cursor)
1107       return createStringError(errc::executable_format_error,
1108                                "out of bounds while reading __bug_table");
1109 
1110     ++EntryID;
1111 
1112     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress);
1113     if (!BF && opts::Verbosity) {
1114       BC.outs() << "BOLT-INFO: no function matches address 0x"
1115                 << Twine::utohexstr(InstAddress)
1116                 << " referenced by bug table\n";
1117     }
1118 
1119     if (BF && BC.shouldEmit(*BF)) {
1120       MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress());
1121       if (!Inst)
1122         return createStringError(errc::executable_format_error,
1123                                  "no instruction at address 0x%" PRIx64
1124                                  " referenced by bug table entry %d",
1125                                  InstAddress, EntryID);
1126       BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID);
1127     }
1128   }
1129 
1130   BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1131 
1132   return Error::success();
1133 }
1134 
1135 } // namespace
1136 
1137 std::unique_ptr<MetadataRewriter>
1138 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
1139   return std::make_unique<LinuxKernelRewriter>(BC);
1140 }
1141