xref: /llvm-project/bolt/lib/Rewrite/LinuxKernelRewriter.cpp (revision 805e08ef26a4dd4c3048a836a49219545a211686)
1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Support for updating Linux Kernel metadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Rewrite/MetadataRewriter.h"
15 #include "bolt/Rewrite/MetadataRewriters.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/Support/BinaryStreamWriter.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Errc.h"
24 
25 #define DEBUG_TYPE "bolt-linux"
26 
27 using namespace llvm;
28 using namespace bolt;
29 
30 namespace opts {
31 
32 static cl::opt<bool>
33     AltInstHasPadLen("alt-inst-has-padlen",
34                      cl::desc("specify that .altinstructions has padlen field"),
35                      cl::init(false), cl::Hidden, cl::cat(BoltCategory));
36 
37 static cl::opt<uint32_t>
38     AltInstFeatureSize("alt-inst-feature-size",
39                        cl::desc("size of feature field in .altinstructions"),
40                        cl::init(2), cl::Hidden, cl::cat(BoltCategory));
41 
42 static cl::opt<bool>
43     DumpAltInstructions("dump-alt-instructions",
44                         cl::desc("dump Linux alternative instructions info"),
45                         cl::init(false), cl::Hidden, cl::cat(BoltCategory));
46 
47 static cl::opt<bool>
48     DumpExceptions("dump-linux-exceptions",
49                    cl::desc("dump Linux kernel exception table"),
50                    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
51 
52 static cl::opt<bool>
53     DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
54             cl::init(false), cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool> DumpParavirtualPatchSites(
57     "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
58     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
59 
60 static cl::opt<bool>
61     DumpPCIFixups("dump-pci-fixups",
62                   cl::desc("dump Linux kernel PCI fixup table"),
63                   cl::init(false), cl::Hidden, cl::cat(BoltCategory));
64 
65 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
66                                      cl::desc("dump Linux kernel static calls"),
67                                      cl::init(false), cl::Hidden,
68                                      cl::cat(BoltCategory));
69 
70 static cl::opt<bool>
71     DumpStaticKeys("dump-static-keys",
72                    cl::desc("dump Linux kernel static keys jump table"),
73                    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
74 
75 static cl::opt<bool> LongJumpLabels(
76     "long-jump-labels",
77     cl::desc("always use long jumps/nops for Linux kernel static keys"),
78     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
79 
80 static cl::opt<bool>
81     PrintORC("print-orc",
82              cl::desc("print ORC unwind information for instructions"),
83              cl::init(true), cl::Hidden, cl::cat(BoltCategory));
84 
85 } // namespace opts
86 
87 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
88 /// ORC state at every IP can be described by the following data structure.
89 struct ORCState {
90   int16_t SPOffset;
91   int16_t BPOffset;
92   int16_t Info;
93 
94   bool operator==(const ORCState &Other) const {
95     return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
96            Info == Other.Info;
97   }
98 
99   bool operator!=(const ORCState &Other) const { return !(*this == Other); }
100 };
101 
102 /// Section terminator ORC entry.
103 static ORCState NullORC = {0, 0, 0};
104 
105 /// Basic printer for ORC entry. It does not provide the same level of
106 /// information as objtool (for now).
107 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
108   if (!opts::PrintORC)
109     return OS;
110   if (E != NullORC)
111     OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
112                  E.Info);
113   else
114     OS << "{terminator}";
115 
116   return OS;
117 }
118 
119 namespace {
120 
121 class LinuxKernelRewriter final : public MetadataRewriter {
122   /// Linux Kernel special sections point to a specific instruction in many
123   /// cases. Unlike SDTMarkerInfo, these markers can come from different
124   /// sections.
125   struct LKInstructionMarkerInfo {
126     uint64_t SectionOffset;
127     int32_t PCRelativeOffset;
128     bool IsPCRelative;
129     StringRef SectionName;
130   };
131 
132   /// Map linux kernel program locations/instructions to their pointers in
133   /// special linux kernel sections
134   std::unordered_map<uint64_t, std::vector<LKInstructionMarkerInfo>> LKMarkers;
135 
136   /// Linux ORC sections.
137   ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
138   ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
139 
140   /// Size of entries in ORC sections.
141   static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
142   static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
143 
144   struct ORCListEntry {
145     uint64_t IP;        /// Instruction address.
146     BinaryFunction *BF; /// Binary function corresponding to the entry.
147     ORCState ORC;       /// Stack unwind info in ORC format.
148 
149     /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
150     /// should precede other entries with the same address.
151     bool operator<(const ORCListEntry &Other) const {
152       if (IP < Other.IP)
153         return 1;
154       if (IP > Other.IP)
155         return 0;
156       return ORC == NullORC && Other.ORC != NullORC;
157     }
158   };
159 
160   using ORCListType = std::vector<ORCListEntry>;
161   ORCListType ORCEntries;
162 
163   /// Number of entries in the input file ORC sections.
164   uint64_t NumORCEntries = 0;
165 
166   /// Section containing static keys jump table.
167   ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
168   uint64_t StaticKeysJumpTableAddress = 0;
169   static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
170 
171   struct JumpInfoEntry {
172     bool Likely;
173     bool InitValue;
174   };
175   SmallVector<JumpInfoEntry, 16> JumpInfo;
176 
177   /// Static key entries that need nop conversion.
178   DenseSet<uint32_t> NopIDs;
179 
180   /// Section containing static call table.
181   ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
182   uint64_t StaticCallTableAddress = 0;
183   static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
184 
185   struct StaticCallInfo {
186     uint32_t ID;              /// Identifier of the entry in the table.
187     BinaryFunction *Function; /// Function containing associated call.
188     MCSymbol *Label;          /// Label attached to the call.
189   };
190   using StaticCallListType = std::vector<StaticCallInfo>;
191   StaticCallListType StaticCallEntries;
192 
193   /// Section containing the Linux exception table.
194   ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
195   static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
196 
197   /// Functions with exception handling code.
198   DenseSet<BinaryFunction *> FunctionsWithExceptions;
199 
200   /// Section with paravirtual patch sites.
201   ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
202 
203   /// Alignment of paravirtual patch structures.
204   static constexpr size_t PARA_PATCH_ALIGN = 8;
205 
206   /// .altinstructions section.
207   ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
208 
209   /// Section containing Linux bug table.
210   ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
211 
212   /// Size of bug_entry struct.
213   static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
214 
215   /// List of bug entries per function.
216   using FunctionBugListType =
217       DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
218   FunctionBugListType FunctionBugList;
219 
220   /// .pci_fixup section.
221   ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
222   static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
223 
224   /// Insert an LKMarker for a given code pointer \p PC from a non-code section
225   /// \p SectionName.
226   void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
227                       int32_t PCRelativeOffset, bool IsPCRelative,
228                       StringRef SectionName);
229 
230   /// Process linux kernel special sections and their relocations.
231   void processLKSections();
232 
233   /// Process __ksymtab and __ksymtab_gpl.
234   void processLKKSymtab(bool IsGPL = false);
235 
236   /// Process special linux kernel section, .smp_locks.
237   void processLKSMPLocks();
238 
239   /// Update LKMarkers' locations for the output binary.
240   void updateLKMarkers();
241 
242   /// Read ORC unwind information and annotate instructions.
243   Error readORCTables();
244 
245   /// Update ORC for functions once CFG is constructed.
246   Error processORCPostCFG();
247 
248   /// Update ORC data in the binary.
249   Error rewriteORCTables();
250 
251   /// Validate written ORC tables after binary emission.
252   Error validateORCTables();
253 
254   /// Static call table handling.
255   Error readStaticCalls();
256   Error rewriteStaticCalls();
257 
258   Error readExceptionTable();
259   Error rewriteExceptionTable();
260 
261   /// Paravirtual instruction patch sites.
262   Error readParaInstructions();
263   Error rewriteParaInstructions();
264 
265   /// __bug_table section handling.
266   Error readBugTable();
267   Error rewriteBugTable();
268 
269   /// Do no process functions containing instruction annotated with
270   /// \p Annotation.
271   void skipFunctionsWithAnnotation(StringRef Annotation) const;
272 
273   /// Handle alternative instruction info from .altinstructions.
274   Error readAltInstructions();
275   Error rewriteAltInstructions();
276 
277   /// Read .pci_fixup
278   Error readPCIFixupTable();
279 
280   /// Handle static keys jump table.
281   Error readStaticKeysJumpTable();
282   Error rewriteStaticKeysJumpTable();
283   Error updateStaticKeysJumpTablePostEmit();
284 
285   /// Mark instructions referenced by kernel metadata.
286   Error markInstructions();
287 
288 public:
289   LinuxKernelRewriter(BinaryContext &BC)
290       : MetadataRewriter("linux-kernel-rewriter", BC) {}
291 
292   Error preCFGInitializer() override {
293     processLKSections();
294     if (Error E = markInstructions())
295       return E;
296 
297     if (Error E = readORCTables())
298       return E;
299 
300     if (Error E = readStaticCalls())
301       return E;
302 
303     if (Error E = readExceptionTable())
304       return E;
305 
306     if (Error E = readParaInstructions())
307       return E;
308 
309     if (Error E = readBugTable())
310       return E;
311 
312     if (Error E = readAltInstructions())
313       return E;
314 
315     if (Error E = readPCIFixupTable())
316       return E;
317 
318     if (Error E = readStaticKeysJumpTable())
319       return E;
320 
321     return Error::success();
322   }
323 
324   Error postCFGInitializer() override {
325     if (Error E = processORCPostCFG())
326       return E;
327 
328     return Error::success();
329   }
330 
331   Error preEmitFinalizer() override {
332     // Since rewriteExceptionTable() can mark functions as non-simple, run it
333     // before other rewriters that depend on simple/emit status.
334     if (Error E = rewriteExceptionTable())
335       return E;
336 
337     if (Error E = rewriteAltInstructions())
338       return E;
339 
340     if (Error E = rewriteParaInstructions())
341       return E;
342 
343     if (Error E = rewriteORCTables())
344       return E;
345 
346     if (Error E = rewriteStaticCalls())
347       return E;
348 
349     if (Error E = rewriteStaticKeysJumpTable())
350       return E;
351 
352     if (Error E = rewriteBugTable())
353       return E;
354 
355     return Error::success();
356   }
357 
358   Error postEmitFinalizer() override {
359     updateLKMarkers();
360 
361     if (Error E = updateStaticKeysJumpTablePostEmit())
362       return E;
363 
364     if (Error E = validateORCTables())
365       return E;
366 
367     return Error::success();
368   }
369 };
370 
371 Error LinuxKernelRewriter::markInstructions() {
372   for (const uint64_t PC : llvm::make_first_range(LKMarkers)) {
373     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(PC);
374 
375     if (!BF || !BC.shouldEmit(*BF))
376       continue;
377 
378     const uint64_t Offset = PC - BF->getAddress();
379     MCInst *Inst = BF->getInstructionAtOffset(Offset);
380     if (!Inst)
381       return createStringError(errc::executable_format_error,
382                                "no instruction matches kernel marker offset");
383 
384     BC.MIB->setOffset(*Inst, static_cast<uint32_t>(Offset));
385 
386     BF->setHasSDTMarker(true);
387   }
388 
389   return Error::success();
390 }
391 
392 void LinuxKernelRewriter::insertLKMarker(uint64_t PC, uint64_t SectionOffset,
393                                          int32_t PCRelativeOffset,
394                                          bool IsPCRelative,
395                                          StringRef SectionName) {
396   LKMarkers[PC].emplace_back(LKInstructionMarkerInfo{
397       SectionOffset, PCRelativeOffset, IsPCRelative, SectionName});
398 }
399 
400 void LinuxKernelRewriter::processLKSections() {
401   processLKKSymtab();
402   processLKKSymtab(true);
403   processLKSMPLocks();
404 }
405 
406 /// Process __ksymtab[_gpl] sections of Linux Kernel.
407 /// This section lists all the vmlinux symbols that kernel modules can access.
408 ///
409 /// All the entries are 4 bytes each and hence we can read them by one by one
410 /// and ignore the ones that are not pointing to the .text section. All pointers
411 /// are PC relative offsets. Always, points to the beginning of the function.
412 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
413   StringRef SectionName = "__ksymtab";
414   if (IsGPL)
415     SectionName = "__ksymtab_gpl";
416   ErrorOr<BinarySection &> SectionOrError =
417       BC.getUniqueSectionByName(SectionName);
418   assert(SectionOrError &&
419          "__ksymtab[_gpl] section not found in Linux Kernel binary");
420   const uint64_t SectionSize = SectionOrError->getSize();
421   const uint64_t SectionAddress = SectionOrError->getAddress();
422   assert((SectionSize % 4) == 0 &&
423          "The size of the __ksymtab[_gpl] section should be a multiple of 4");
424 
425   for (uint64_t I = 0; I < SectionSize; I += 4) {
426     const uint64_t EntryAddress = SectionAddress + I;
427     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
428     assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
429     const int32_t SignedOffset = *Offset;
430     const uint64_t RefAddress = EntryAddress + SignedOffset;
431     BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress);
432     if (!BF)
433       continue;
434 
435     BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
436                      *Offset);
437   }
438 }
439 
440 /// .smp_locks section contains PC-relative references to instructions with LOCK
441 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
442 void LinuxKernelRewriter::processLKSMPLocks() {
443   ErrorOr<BinarySection &> SectionOrError =
444       BC.getUniqueSectionByName(".smp_locks");
445   if (!SectionOrError)
446     return;
447 
448   uint64_t SectionSize = SectionOrError->getSize();
449   const uint64_t SectionAddress = SectionOrError->getAddress();
450   assert((SectionSize % 4) == 0 &&
451          "The size of the .smp_locks section should be a multiple of 4");
452 
453   for (uint64_t I = 0; I < SectionSize; I += 4) {
454     const uint64_t EntryAddress = SectionAddress + I;
455     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
456     assert(Offset && "Reading valid PC-relative offset for a .smp_locks entry");
457     int32_t SignedOffset = *Offset;
458     uint64_t RefAddress = EntryAddress + SignedOffset;
459 
460     BinaryFunction *ContainingBF =
461         BC.getBinaryFunctionContainingAddress(RefAddress);
462     if (!ContainingBF)
463       continue;
464 
465     insertLKMarker(RefAddress, I, SignedOffset, true, ".smp_locks");
466   }
467 }
468 
469 void LinuxKernelRewriter::updateLKMarkers() {
470   if (LKMarkers.size() == 0)
471     return;
472 
473   std::unordered_map<std::string, uint64_t> PatchCounts;
474   for (std::pair<const uint64_t, std::vector<LKInstructionMarkerInfo>>
475            &LKMarkerInfoKV : LKMarkers) {
476     const uint64_t OriginalAddress = LKMarkerInfoKV.first;
477     const BinaryFunction *BF =
478         BC.getBinaryFunctionContainingAddress(OriginalAddress, false, true);
479     if (!BF)
480       continue;
481 
482     uint64_t NewAddress = BF->translateInputToOutputAddress(OriginalAddress);
483     if (NewAddress == 0)
484       continue;
485 
486     // Apply base address.
487     if (OriginalAddress >= 0xffffffff00000000 && NewAddress < 0xffffffff)
488       NewAddress = NewAddress + 0xffffffff00000000;
489 
490     if (OriginalAddress == NewAddress)
491       continue;
492 
493     for (LKInstructionMarkerInfo &LKMarkerInfo : LKMarkerInfoKV.second) {
494       StringRef SectionName = LKMarkerInfo.SectionName;
495       SimpleBinaryPatcher *LKPatcher;
496       ErrorOr<BinarySection &> BSec = BC.getUniqueSectionByName(SectionName);
497       assert(BSec && "missing section info for kernel section");
498       if (!BSec->getPatcher())
499         BSec->registerPatcher(std::make_unique<SimpleBinaryPatcher>());
500       LKPatcher = static_cast<SimpleBinaryPatcher *>(BSec->getPatcher());
501       PatchCounts[std::string(SectionName)]++;
502       if (LKMarkerInfo.IsPCRelative)
503         LKPatcher->addLE32Patch(LKMarkerInfo.SectionOffset,
504                                 NewAddress - OriginalAddress +
505                                     LKMarkerInfo.PCRelativeOffset);
506       else
507         LKPatcher->addLE64Patch(LKMarkerInfo.SectionOffset, NewAddress);
508     }
509   }
510   BC.outs() << "BOLT-INFO: patching linux kernel sections. Total patches per "
511                "section are as follows:\n";
512   for (const std::pair<const std::string, uint64_t> &KV : PatchCounts)
513     BC.outs() << "  Section: " << KV.first << ", patch-counts: " << KV.second
514               << '\n';
515 }
516 
517 Error LinuxKernelRewriter::readORCTables() {
518   // NOTE: we should ignore relocations for orc tables as the tables are sorted
519   // post-link time and relocations are not updated.
520   ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind");
521   ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip");
522 
523   if (!ORCUnwindSection && !ORCUnwindIPSection)
524     return Error::success();
525 
526   if (!ORCUnwindSection || !ORCUnwindIPSection)
527     return createStringError(errc::executable_format_error,
528                              "missing ORC section");
529 
530   NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
531   if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
532       ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
533     return createStringError(errc::executable_format_error,
534                              "ORC entries number mismatch detected");
535 
536   const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
537   DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
538                                       BC.AsmInfo->isLittleEndian(),
539                                       BC.AsmInfo->getCodePointerSize());
540   DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
541                                      BC.AsmInfo->isLittleEndian(),
542                                      BC.AsmInfo->getCodePointerSize());
543   DataExtractor::Cursor ORCCursor(0);
544   DataExtractor::Cursor IPCursor(0);
545   uint64_t PrevIP = 0;
546   for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
547     const uint64_t IP =
548         IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
549 
550     // Consume the status of the cursor.
551     if (!IPCursor)
552       return createStringError(errc::executable_format_error,
553                                "out of bounds while reading ORC IP table: %s",
554                                toString(IPCursor.takeError()).c_str());
555 
556     if (IP < PrevIP && opts::Verbosity)
557       BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP)
558                 << " detected while reading ORC\n";
559 
560     PrevIP = IP;
561 
562     // Store all entries, includes those we are not going to update as the
563     // tables need to be sorted globally before being written out.
564     ORCEntries.push_back(ORCListEntry());
565     ORCListEntry &Entry = ORCEntries.back();
566 
567     Entry.IP = IP;
568     Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
569     Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
570     Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
571     Entry.BF = nullptr;
572 
573     // Consume the status of the cursor.
574     if (!ORCCursor)
575       return createStringError(errc::executable_format_error,
576                                "out of bounds while reading ORC: %s",
577                                toString(ORCCursor.takeError()).c_str());
578 
579     if (Entry.ORC == NullORC)
580       continue;
581 
582     BinaryFunction *&BF = Entry.BF;
583     BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
584 
585     // If the entry immediately pointing past the end of the function is not
586     // the terminator entry, then it does not belong to this function.
587     if (BF && BF->getAddress() + BF->getSize() == IP)
588       BF = 0;
589 
590     if (!BF) {
591       if (opts::Verbosity)
592         BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
593                   << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
594       continue;
595     }
596 
597     BF->setHasORC(true);
598 
599     if (!BF->hasInstructions())
600       continue;
601 
602     MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
603     if (!Inst)
604       return createStringError(
605           errc::executable_format_error,
606           "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
607 
608     // Some addresses will have two entries associated with them. The first
609     // one being a "weak" section terminator. Since we ignore the terminator,
610     // we should only assign one entry per instruction.
611     if (BC.MIB->hasAnnotation(*Inst, "ORC"))
612       return createStringError(
613           errc::executable_format_error,
614           "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP);
615 
616     BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
617   }
618 
619   BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
620 
621   if (opts::DumpORC) {
622     BC.outs() << "BOLT-INFO: ORC unwind information:\n";
623     for (const ORCListEntry &E : ORCEntries) {
624       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
625       if (E.BF)
626         BC.outs() << ": " << *E.BF;
627       BC.outs() << '\n';
628     }
629   }
630 
631   // Add entries for functions that don't have explicit ORC info at the start.
632   // We'll have the correct info for them even if ORC for the preceding function
633   // changes.
634   ORCListType NewEntries;
635   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
636     auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
637       return E.IP <= BF.getAddress();
638     });
639     if (It != ORCEntries.begin())
640       --It;
641 
642     if (It->BF == &BF)
643       continue;
644 
645     if (It->ORC == NullORC && It->IP == BF.getAddress()) {
646       assert(!It->BF);
647       It->BF = &BF;
648       continue;
649     }
650 
651     NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
652     if (It->ORC != NullORC)
653       BF.setHasORC(true);
654   }
655 
656   llvm::copy(NewEntries, std::back_inserter(ORCEntries));
657   llvm::sort(ORCEntries);
658 
659   if (opts::DumpORC) {
660     BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
661     for (const ORCListEntry &E : ORCEntries) {
662       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
663       if (E.BF)
664         BC.outs() << ": " << *E.BF;
665       BC.outs() << '\n';
666     }
667   }
668 
669   return Error::success();
670 }
671 
672 Error LinuxKernelRewriter::processORCPostCFG() {
673   if (!NumORCEntries)
674     return Error::success();
675 
676   // Propagate ORC to the rest of the function. We can annotate every
677   // instruction in every function, but to minimize the overhead, we annotate
678   // the first instruction in every basic block to reflect the state at the
679   // entry. This way, the ORC state can be calculated based on annotations
680   // regardless of the basic block layout. Note that if we insert/delete
681   // instructions, we must take care to attach ORC info to the new/deleted ones.
682   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
683 
684     std::optional<ORCState> CurrentState;
685     for (BinaryBasicBlock &BB : BF) {
686       for (MCInst &Inst : BB) {
687         ErrorOr<ORCState> State =
688             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
689 
690         if (State) {
691           CurrentState = *State;
692           continue;
693         }
694 
695         // Get state for the start of the function.
696         if (!CurrentState) {
697           // A terminator entry (NullORC) can match the function address. If
698           // there's also a non-terminator entry, it will be placed after the
699           // terminator. Hence, we are looking for the last ORC entry that
700           // matches the address.
701           auto It =
702               llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
703                 return E.IP <= BF.getAddress();
704               });
705           if (It != ORCEntries.begin())
706             --It;
707 
708           assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
709                  "ORC info at function entry expected.");
710 
711           if (It->ORC == NullORC && BF.hasORC()) {
712             BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
713                       << BF << '\n';
714           }
715 
716           It->BF = &BF;
717 
718           CurrentState = It->ORC;
719           if (It->ORC != NullORC)
720             BF.setHasORC(true);
721         }
722 
723         // While printing ORC, attach info to every instruction for convenience.
724         if (opts::PrintORC || &Inst == &BB.front())
725           BC.MIB->addAnnotation(Inst, "ORC", *CurrentState);
726       }
727     }
728   }
729 
730   return Error::success();
731 }
732 
733 Error LinuxKernelRewriter::rewriteORCTables() {
734   if (!NumORCEntries)
735     return Error::success();
736 
737   // Update ORC sections in-place. As we change the code, the number of ORC
738   // entries may increase for some functions. However, as we remove terminator
739   // redundancy (see below), more space is freed up and we should always be able
740   // to fit new ORC tables in the reserved space.
741   auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
742     const size_t Size = Section.getSize();
743     uint8_t *NewContents = new uint8_t[Size];
744     Section.updateContents(NewContents, Size);
745     Section.setOutputFileOffset(Section.getInputFileOffset());
746     return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
747                                                        ? endianness::little
748                                                        : endianness::big);
749   };
750   BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
751   BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
752 
753   uint64_t NumEmitted = 0;
754   std::optional<ORCState> LastEmittedORC;
755   auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
756                           MCSymbol *Label = 0, bool Force = false) -> Error {
757     if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
758       return Error::success();
759 
760     LastEmittedORC = ORC;
761 
762     if (++NumEmitted > NumORCEntries)
763       return createStringError(errc::executable_format_error,
764                                "exceeded the number of allocated ORC entries");
765 
766     if (Label)
767       ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
768                                         Relocation::getPC32(), /*Addend*/ 0);
769 
770     const int32_t IPValue =
771         IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
772     if (Error E = UnwindIPWriter.writeInteger(IPValue))
773       return E;
774 
775     if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
776       return E;
777     if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
778       return E;
779     if (Error E = UnwindWriter.writeInteger(ORC.Info))
780       return E;
781 
782     return Error::success();
783   };
784 
785   // Emit new ORC entries for the emitted function.
786   auto emitORC = [&](const BinaryFunction &BF) -> Error {
787     assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
788 
789     ORCState CurrentState = NullORC;
790     for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
791       for (MCInst &Inst : *BB) {
792         ErrorOr<ORCState> ErrorOrState =
793             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
794         if (!ErrorOrState || *ErrorOrState == CurrentState)
795           continue;
796 
797         // Issue label for the instruction.
798         MCSymbol *Label =
799             BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get());
800 
801         if (Error E = emitORCEntry(0, *ErrorOrState, Label))
802           return E;
803 
804         CurrentState = *ErrorOrState;
805       }
806     }
807 
808     return Error::success();
809   };
810 
811   for (ORCListEntry &Entry : ORCEntries) {
812     // Emit original entries for functions that we haven't modified.
813     if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
814       // Emit terminator only if it marks the start of a function.
815       if (Entry.ORC == NullORC && !Entry.BF)
816         continue;
817       if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
818         return E;
819       continue;
820     }
821 
822     // Emit all ORC entries for a function referenced by an entry and skip over
823     // the rest of entries for this function by resetting its ORC attribute.
824     if (Entry.BF->hasORC()) {
825       if (Error E = emitORC(*Entry.BF))
826         return E;
827       Entry.BF->setHasORC(false);
828     }
829   }
830 
831   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
832                     << " ORC entries\n");
833 
834   // Replicate terminator entry at the end of sections to match the original
835   // table sizes.
836   const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
837   const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
838   while (UnwindWriter.bytesRemaining()) {
839     if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
840       return E;
841   }
842 
843   return Error::success();
844 }
845 
846 Error LinuxKernelRewriter::validateORCTables() {
847   if (!ORCUnwindIPSection)
848     return Error::success();
849 
850   const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
851   DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(),
852                                      BC.AsmInfo->isLittleEndian(),
853                                      BC.AsmInfo->getCodePointerSize());
854   DataExtractor::Cursor IPCursor(0);
855   uint64_t PrevIP = 0;
856   for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
857     const uint64_t IP =
858         IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
859     if (!IPCursor)
860       return createStringError(errc::executable_format_error,
861                                "out of bounds while reading ORC IP table: %s",
862                                toString(IPCursor.takeError()).c_str());
863 
864     assert(IP >= PrevIP && "Unsorted ORC table detected");
865     (void)PrevIP;
866     PrevIP = IP;
867   }
868 
869   return Error::success();
870 }
871 
872 /// The static call site table is created by objtool and contains entries in the
873 /// following format:
874 ///
875 ///    struct static_call_site {
876 ///      s32 addr;
877 ///      s32 key;
878 ///    };
879 ///
880 Error LinuxKernelRewriter::readStaticCalls() {
881   const BinaryData *StaticCallTable =
882       BC.getBinaryDataByName("__start_static_call_sites");
883   if (!StaticCallTable)
884     return Error::success();
885 
886   StaticCallTableAddress = StaticCallTable->getAddress();
887 
888   const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites");
889   if (!Stop)
890     return createStringError(errc::executable_format_error,
891                              "missing __stop_static_call_sites symbol");
892 
893   ErrorOr<BinarySection &> ErrorOrSection =
894       BC.getSectionForAddress(StaticCallTableAddress);
895   if (!ErrorOrSection)
896     return createStringError(errc::executable_format_error,
897                              "no section matching __start_static_call_sites");
898 
899   StaticCallSection = *ErrorOrSection;
900   if (!StaticCallSection->containsAddress(Stop->getAddress() - 1))
901     return createStringError(errc::executable_format_error,
902                              "__stop_static_call_sites not in the same section "
903                              "as __start_static_call_sites");
904 
905   if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
906     return createStringError(errc::executable_format_error,
907                              "static call table size error");
908 
909   const uint64_t SectionAddress = StaticCallSection->getAddress();
910   DataExtractor DE(StaticCallSection->getContents(),
911                    BC.AsmInfo->isLittleEndian(),
912                    BC.AsmInfo->getCodePointerSize());
913   DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
914   uint32_t EntryID = 0;
915   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
916     const uint64_t CallAddress =
917         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
918     const uint64_t KeyAddress =
919         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
920 
921     // Consume the status of the cursor.
922     if (!Cursor)
923       return createStringError(errc::executable_format_error,
924                                "out of bounds while reading static calls: %s",
925                                toString(Cursor.takeError()).c_str());
926 
927     ++EntryID;
928 
929     if (opts::DumpStaticCalls) {
930       BC.outs() << "Static Call Site: " << EntryID << '\n';
931       BC.outs() << "\tCallAddress:   0x" << Twine::utohexstr(CallAddress)
932                 << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress)
933                 << '\n';
934     }
935 
936     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress);
937     if (!BF)
938       continue;
939 
940     if (!BC.shouldEmit(*BF))
941       continue;
942 
943     if (!BF->hasInstructions())
944       continue;
945 
946     MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress());
947     if (!Inst)
948       return createStringError(errc::executable_format_error,
949                                "no instruction at call site address 0x%" PRIx64,
950                                CallAddress);
951 
952     // Check for duplicate entries.
953     if (BC.MIB->hasAnnotation(*Inst, "StaticCall"))
954       return createStringError(errc::executable_format_error,
955                                "duplicate static call site at 0x%" PRIx64,
956                                CallAddress);
957 
958     BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID);
959 
960     MCSymbol *Label =
961         BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get());
962 
963     StaticCallEntries.push_back({EntryID, BF, Label});
964   }
965 
966   BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
967             << " static call entries\n";
968 
969   return Error::success();
970 }
971 
972 /// The static call table is sorted during boot time in
973 /// static_call_sort_entries(). This makes it possible to update existing
974 /// entries in-place ignoring their relative order.
975 Error LinuxKernelRewriter::rewriteStaticCalls() {
976   if (!StaticCallTableAddress || !StaticCallSection)
977     return Error::success();
978 
979   for (auto &Entry : StaticCallEntries) {
980     if (!Entry.Function)
981       continue;
982 
983     BinaryFunction &BF = *Entry.Function;
984     if (!BC.shouldEmit(BF))
985       continue;
986 
987     // Create a relocation against the label.
988     const uint64_t EntryOffset = StaticCallTableAddress -
989                                  StaticCallSection->getAddress() +
990                                  (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
991     StaticCallSection->addRelocation(EntryOffset, Entry.Label,
992                                      ELF::R_X86_64_PC32, /*Addend*/ 0);
993   }
994 
995   return Error::success();
996 }
997 
998 /// Instructions that access user-space memory can cause page faults. These
999 /// faults will be handled by the kernel and execution will resume at the fixup
1000 /// code location if the address was invalid. The kernel uses the exception
1001 /// table to match the faulting instruction to its fixup. The table consists of
1002 /// the following entries:
1003 ///
1004 ///   struct exception_table_entry {
1005 ///     int insn;
1006 ///     int fixup;
1007 ///     int data;
1008 ///   };
1009 ///
1010 /// More info at:
1011 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1012 Error LinuxKernelRewriter::readExceptionTable() {
1013   ExceptionsSection = BC.getUniqueSectionByName("__ex_table");
1014   if (!ExceptionsSection)
1015     return Error::success();
1016 
1017   if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
1018     return createStringError(errc::executable_format_error,
1019                              "exception table size error");
1020 
1021   const uint64_t SectionAddress = ExceptionsSection->getAddress();
1022   DataExtractor DE(ExceptionsSection->getContents(),
1023                    BC.AsmInfo->isLittleEndian(),
1024                    BC.AsmInfo->getCodePointerSize());
1025   DataExtractor::Cursor Cursor(0);
1026   uint32_t EntryID = 0;
1027   while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
1028     const uint64_t InstAddress =
1029         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1030     const uint64_t FixupAddress =
1031         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1032     const uint64_t Data = DE.getU32(Cursor);
1033 
1034     // Consume the status of the cursor.
1035     if (!Cursor)
1036       return createStringError(
1037           errc::executable_format_error,
1038           "out of bounds while reading exception table: %s",
1039           toString(Cursor.takeError()).c_str());
1040 
1041     ++EntryID;
1042 
1043     if (opts::DumpExceptions) {
1044       BC.outs() << "Exception Entry: " << EntryID << '\n';
1045       BC.outs() << "\tInsn:  0x" << Twine::utohexstr(InstAddress) << '\n'
1046                 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'
1047                 << "\tData:  0x" << Twine::utohexstr(Data) << '\n';
1048     }
1049 
1050     MCInst *Inst = nullptr;
1051     MCSymbol *FixupLabel = nullptr;
1052 
1053     BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress);
1054     if (InstBF && BC.shouldEmit(*InstBF)) {
1055       Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress());
1056       if (!Inst)
1057         return createStringError(errc::executable_format_error,
1058                                  "no instruction at address 0x%" PRIx64
1059                                  " in exception table",
1060                                  InstAddress);
1061       BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID);
1062       FunctionsWithExceptions.insert(InstBF);
1063     }
1064 
1065     if (!InstBF && opts::Verbosity) {
1066       BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1067                 << Twine::utohexstr(InstAddress)
1068                 << " referenced by Linux exception table\n";
1069     }
1070 
1071     BinaryFunction *FixupBF =
1072         BC.getBinaryFunctionContainingAddress(FixupAddress);
1073     if (FixupBF && BC.shouldEmit(*FixupBF)) {
1074       const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1075       if (!FixupBF->getInstructionAtOffset(Offset))
1076         return createStringError(errc::executable_format_error,
1077                                  "no instruction at fixup address 0x%" PRIx64
1078                                  " in exception table",
1079                                  FixupAddress);
1080       FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1081                           : FixupBF->getSymbol();
1082       if (Inst)
1083         BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName());
1084       FunctionsWithExceptions.insert(FixupBF);
1085     }
1086 
1087     if (!FixupBF && opts::Verbosity) {
1088       BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1089                 << Twine::utohexstr(FixupAddress)
1090                 << " referenced by Linux exception table\n";
1091     }
1092   }
1093 
1094   BC.outs() << "BOLT-INFO: parsed "
1095             << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1096             << " exception table entries\n";
1097 
1098   return Error::success();
1099 }
1100 
1101 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1102 /// the exception table to be sorted. Hence we have to sort it after code
1103 /// reordering.
1104 Error LinuxKernelRewriter::rewriteExceptionTable() {
1105   // Disable output of functions with exceptions before rewrite support is
1106   // added.
1107   for (BinaryFunction *BF : FunctionsWithExceptions)
1108     BF->setSimple(false);
1109 
1110   return Error::success();
1111 }
1112 
1113 /// .parainsrtuctions section contains information for patching parvirtual call
1114 /// instructions during runtime. The entries in the section are in the form:
1115 ///
1116 ///    struct paravirt_patch_site {
1117 ///      u8 *instr;    /* original instructions */
1118 ///      u8 type;      /* type of this instruction */
1119 ///      u8 len;       /* length of original instruction */
1120 ///    };
1121 ///
1122 /// Note that the structures are aligned at 8-byte boundary.
1123 Error LinuxKernelRewriter::readParaInstructions() {
1124   ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions");
1125   if (!ParavirtualPatchSection)
1126     return Error::success();
1127 
1128   DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
1129                                    BC.AsmInfo->isLittleEndian(),
1130                                    BC.AsmInfo->getCodePointerSize());
1131   uint32_t EntryID = 0;
1132   DataExtractor::Cursor Cursor(0);
1133   while (Cursor && !DE.eof(Cursor)) {
1134     const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN));
1135     if (!DE.isValidOffset(NextOffset))
1136       break;
1137 
1138     Cursor.seek(NextOffset);
1139 
1140     const uint64_t InstrLocation = DE.getU64(Cursor);
1141     const uint8_t Type = DE.getU8(Cursor);
1142     const uint8_t Len = DE.getU8(Cursor);
1143 
1144     if (!Cursor)
1145       return createStringError(
1146           errc::executable_format_error,
1147           "out of bounds while reading .parainstructions: %s",
1148           toString(Cursor.takeError()).c_str());
1149 
1150     ++EntryID;
1151 
1152     if (opts::DumpParavirtualPatchSites) {
1153       BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1154       BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation)
1155                 << "\n\tType:  0x" << Twine::utohexstr(Type) << "\n\tLen:   0x"
1156                 << Twine::utohexstr(Len) << '\n';
1157     }
1158 
1159     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation);
1160     if (!BF && opts::Verbosity) {
1161       BC.outs() << "BOLT-INFO: no function matches address 0x"
1162                 << Twine::utohexstr(InstrLocation)
1163                 << " referenced by paravirutal patch site\n";
1164     }
1165 
1166     if (BF && BC.shouldEmit(*BF)) {
1167       MCInst *Inst =
1168           BF->getInstructionAtOffset(InstrLocation - BF->getAddress());
1169       if (!Inst)
1170         return createStringError(errc::executable_format_error,
1171                                  "no instruction at address 0x%" PRIx64
1172                                  " in paravirtual call site %d",
1173                                  InstrLocation, EntryID);
1174       BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID);
1175     }
1176   }
1177 
1178   BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1179 
1180   return Error::success();
1181 }
1182 
1183 void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1184     StringRef Annotation) const {
1185   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1186     if (!BC.shouldEmit(BF))
1187       continue;
1188     for (const BinaryBasicBlock &BB : BF) {
1189       const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) {
1190         return BC.MIB->hasAnnotation(Inst, Annotation);
1191       });
1192       if (HasAnnotation) {
1193         BF.setSimple(false);
1194         break;
1195       }
1196     }
1197   }
1198 }
1199 
1200 Error LinuxKernelRewriter::rewriteParaInstructions() {
1201   // Disable output of functions with paravirtual instructions before the
1202   // rewrite support is complete.
1203   skipFunctionsWithAnnotation("ParaSite");
1204 
1205   return Error::success();
1206 }
1207 
1208 /// Process __bug_table section.
1209 /// This section contains information useful for kernel debugging, mostly
1210 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1211 ///
1212 /// Each entry in the section is a struct bug_entry that contains a pointer to
1213 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1214 /// pointers use PC relative offset addressing), line number, and flags.
1215 /// The definition of the struct bug_entry can be found in
1216 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1217 /// address encoded as a PC-relative offset. In theory, it could be an absolute
1218 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1219 /// the kernel code relies on it being a relative offset on x86-64.
1220 Error LinuxKernelRewriter::readBugTable() {
1221   BugTableSection = BC.getUniqueSectionByName("__bug_table");
1222   if (!BugTableSection)
1223     return Error::success();
1224 
1225   if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1226     return createStringError(errc::executable_format_error,
1227                              "bug table size error");
1228 
1229   const uint64_t SectionAddress = BugTableSection->getAddress();
1230   DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(),
1231                    BC.AsmInfo->getCodePointerSize());
1232   DataExtractor::Cursor Cursor(0);
1233   uint32_t EntryID = 0;
1234   while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1235     const uint64_t Pos = Cursor.tell();
1236     const uint64_t InstAddress =
1237         SectionAddress + Pos + (int32_t)DE.getU32(Cursor);
1238     Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE);
1239 
1240     if (!Cursor)
1241       return createStringError(errc::executable_format_error,
1242                                "out of bounds while reading __bug_table: %s",
1243                                toString(Cursor.takeError()).c_str());
1244 
1245     ++EntryID;
1246 
1247     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress);
1248     if (!BF && opts::Verbosity) {
1249       BC.outs() << "BOLT-INFO: no function matches address 0x"
1250                 << Twine::utohexstr(InstAddress)
1251                 << " referenced by bug table\n";
1252     }
1253 
1254     if (BF && BC.shouldEmit(*BF)) {
1255       MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress());
1256       if (!Inst)
1257         return createStringError(errc::executable_format_error,
1258                                  "no instruction at address 0x%" PRIx64
1259                                  " referenced by bug table entry %d",
1260                                  InstAddress, EntryID);
1261       BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID);
1262 
1263       FunctionBugList[BF].push_back(EntryID);
1264     }
1265   }
1266 
1267   BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1268 
1269   return Error::success();
1270 }
1271 
1272 /// find_bug() uses linear search to match an address to an entry in the bug
1273 /// table. Hence, there is no need to sort entries when rewriting the table.
1274 /// When we need to erase an entry, we set its instruction address to zero.
1275 Error LinuxKernelRewriter::rewriteBugTable() {
1276   if (!BugTableSection)
1277     return Error::success();
1278 
1279   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1280     if (!BC.shouldEmit(BF))
1281       continue;
1282 
1283     if (!FunctionBugList.count(&BF))
1284       continue;
1285 
1286     // Bugs that will be emitted for this function.
1287     DenseSet<uint32_t> EmittedIDs;
1288     for (BinaryBasicBlock &BB : BF) {
1289       for (MCInst &Inst : BB) {
1290         if (!BC.MIB->hasAnnotation(Inst, "BugEntry"))
1291           continue;
1292         const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry");
1293         EmittedIDs.insert(ID);
1294 
1295         // Create a relocation entry for this bug entry.
1296         MCSymbol *Label =
1297             BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get());
1298         const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1299         BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32,
1300                                        /*Addend*/ 0);
1301       }
1302     }
1303 
1304     // Clear bug entries that were not emitted for this function, e.g. as a
1305     // result of DCE, but setting their instruction address to zero.
1306     for (const uint32_t ID : FunctionBugList[&BF]) {
1307       if (!EmittedIDs.count(ID)) {
1308         const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1309         BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32,
1310                                        /*Addend*/ 0);
1311       }
1312     }
1313   }
1314 
1315   return Error::success();
1316 }
1317 
1318 /// The kernel can replace certain instruction sequences depending on hardware
1319 /// it is running on and features specified during boot time. The information
1320 /// about alternative instruction sequences is stored in .altinstructions
1321 /// section. The format of entries in this section is defined in
1322 /// arch/x86/include/asm/alternative.h:
1323 ///
1324 ///   struct alt_instr {
1325 ///     s32 instr_offset;
1326 ///     s32 repl_offset;
1327 ///     uXX feature;
1328 ///     u8  instrlen;
1329 ///     u8  replacementlen;
1330 ///	    u8  padlen;         // present in older kernels
1331 ///   } __packed;
1332 ///
1333 /// Note the structures is packed.
1334 Error LinuxKernelRewriter::readAltInstructions() {
1335   AltInstrSection = BC.getUniqueSectionByName(".altinstructions");
1336   if (!AltInstrSection)
1337     return Error::success();
1338 
1339   const uint64_t Address = AltInstrSection->getAddress();
1340   DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
1341                                    BC.AsmInfo->isLittleEndian(),
1342                                    BC.AsmInfo->getCodePointerSize());
1343   uint64_t EntryID = 0;
1344   DataExtractor::Cursor Cursor(0);
1345   while (Cursor && !DE.eof(Cursor)) {
1346     const uint64_t OrgInstAddress =
1347         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1348     const uint64_t AltInstAddress =
1349         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1350     const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize);
1351     const uint8_t OrgSize = DE.getU8(Cursor);
1352     const uint8_t AltSize = DE.getU8(Cursor);
1353 
1354     // Older kernels may have the padlen field.
1355     const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0;
1356 
1357     if (!Cursor)
1358       return createStringError(
1359           errc::executable_format_error,
1360           "out of bounds while reading .altinstructions: %s",
1361           toString(Cursor.takeError()).c_str());
1362 
1363     ++EntryID;
1364 
1365     if (opts::DumpAltInstructions) {
1366       BC.outs() << "Alternative instruction entry: " << EntryID
1367                 << "\n\tOrg:     0x" << Twine::utohexstr(OrgInstAddress)
1368                 << "\n\tAlt:     0x" << Twine::utohexstr(AltInstAddress)
1369                 << "\n\tFeature: 0x" << Twine::utohexstr(Feature)
1370                 << "\n\tOrgSize: " << (int)OrgSize
1371                 << "\n\tAltSize: " << (int)AltSize << '\n';
1372       if (opts::AltInstHasPadLen)
1373         BC.outs() << "\tPadLen:  " << (int)PadLen << '\n';
1374     }
1375 
1376     if (AltSize > OrgSize)
1377       return createStringError(errc::executable_format_error,
1378                                "error reading .altinstructions");
1379 
1380     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress);
1381     if (!BF && opts::Verbosity) {
1382       BC.outs() << "BOLT-INFO: no function matches address 0x"
1383                 << Twine::utohexstr(OrgInstAddress)
1384                 << " of instruction from .altinstructions\n";
1385     }
1386 
1387     BinaryFunction *AltBF =
1388         BC.getBinaryFunctionContainingAddress(AltInstAddress);
1389     if (AltBF && BC.shouldEmit(*AltBF)) {
1390       BC.errs()
1391           << "BOLT-WARNING: alternative instruction sequence found in function "
1392           << *AltBF << '\n';
1393       AltBF->setIgnored();
1394     }
1395 
1396     if (!BF || !BC.shouldEmit(*BF))
1397       continue;
1398 
1399     if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1400       return createStringError(errc::executable_format_error,
1401                                "error reading .altinstructions");
1402 
1403     MCInst *Inst =
1404         BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress());
1405     if (!Inst)
1406       return createStringError(errc::executable_format_error,
1407                                "no instruction at address 0x%" PRIx64
1408                                " referenced by .altinstructions entry %d",
1409                                OrgInstAddress, EntryID);
1410 
1411     // There could be more than one alternative instruction sequences for the
1412     // same original instruction. Annotate each alternative separately.
1413     std::string AnnotationName = "AltInst";
1414     unsigned N = 2;
1415     while (BC.MIB->hasAnnotation(*Inst, AnnotationName))
1416       AnnotationName = "AltInst" + std::to_string(N++);
1417 
1418     BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1419 
1420     // Annotate all instructions from the original sequence. Note that it's not
1421     // the most efficient way to look for instructions in the address range,
1422     // but since alternative instructions are uncommon, it will do for now.
1423     for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1424       Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset -
1425                                         BF->getAddress());
1426       if (Inst)
1427         BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1428     }
1429   }
1430 
1431   BC.outs() << "BOLT-INFO: parsed " << EntryID
1432             << " alternative instruction entries\n";
1433 
1434   return Error::success();
1435 }
1436 
1437 Error LinuxKernelRewriter::rewriteAltInstructions() {
1438   // Disable output of functions with alt instructions before the rewrite
1439   // support is complete.
1440   skipFunctionsWithAnnotation("AltInst");
1441 
1442   return Error::success();
1443 }
1444 
1445 /// When the Linux kernel needs to handle an error associated with a given PCI
1446 /// device, it uses a table stored in .pci_fixup section to locate a fixup code
1447 /// specific to the vendor and the problematic device. The section contains a
1448 /// list of the following structures defined in include/linux/pci.h:
1449 ///
1450 ///   struct pci_fixup {
1451 ///     u16 vendor;     /* Or PCI_ANY_ID */
1452 ///     u16 device;     /* Or PCI_ANY_ID */
1453 ///     u32 class;      /* Or PCI_ANY_ID */
1454 ///     unsigned int class_shift; /* should be 0, 8, 16 */
1455 ///     int hook_offset;
1456 ///   };
1457 ///
1458 /// Normally, the hook will point to a function start and we don't have to
1459 /// update the pointer if we are not relocating functions. Hence, while reading
1460 /// the table we validate this assumption. If a function has a fixup code in the
1461 /// middle of its body, we issue a warning and ignore it.
1462 Error LinuxKernelRewriter::readPCIFixupTable() {
1463   PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup");
1464   if (!PCIFixupSection)
1465     return Error::success();
1466 
1467   if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1468     return createStringError(errc::executable_format_error,
1469                              "PCI fixup table size error");
1470 
1471   const uint64_t Address = PCIFixupSection->getAddress();
1472   DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
1473                                    BC.AsmInfo->isLittleEndian(),
1474                                    BC.AsmInfo->getCodePointerSize());
1475   uint64_t EntryID = 0;
1476   DataExtractor::Cursor Cursor(0);
1477   while (Cursor && !DE.eof(Cursor)) {
1478     const uint16_t Vendor = DE.getU16(Cursor);
1479     const uint16_t Device = DE.getU16(Cursor);
1480     const uint32_t Class = DE.getU32(Cursor);
1481     const uint32_t ClassShift = DE.getU32(Cursor);
1482     const uint64_t HookAddress =
1483         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1484 
1485     if (!Cursor)
1486       return createStringError(errc::executable_format_error,
1487                                "out of bounds while reading .pci_fixup: %s",
1488                                toString(Cursor.takeError()).c_str());
1489 
1490     ++EntryID;
1491 
1492     if (opts::DumpPCIFixups) {
1493       BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor       0x"
1494                 << Twine::utohexstr(Vendor) << "\n\tDevice:      0x"
1495                 << Twine::utohexstr(Device) << "\n\tClass:       0x"
1496                 << Twine::utohexstr(Class) << "\n\tClassShift:  0x"
1497                 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x"
1498                 << Twine::utohexstr(HookAddress) << '\n';
1499     }
1500 
1501     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress);
1502     if (!BF && opts::Verbosity) {
1503       BC.outs() << "BOLT-INFO: no function matches address 0x"
1504                 << Twine::utohexstr(HookAddress)
1505                 << " of hook from .pci_fixup\n";
1506     }
1507 
1508     if (!BF || !BC.shouldEmit(*BF))
1509       continue;
1510 
1511     if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1512       BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1513                 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1514       BF->setSimple(false);
1515     }
1516   }
1517 
1518   BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1519 
1520   return Error::success();
1521 }
1522 
1523 /// Runtime code modification used by static keys is the most ubiquitous
1524 /// self-modifying feature of the Linux kernel. The idea is to eliminate the
1525 /// condition check and associated conditional jump on a hot path if that
1526 /// condition (based on a boolean value of a static key) does not change often.
1527 /// Whenever the condition changes, the kernel runtime modifies all code paths
1528 /// associated with that key flipping the code between nop and (unconditional)
1529 /// jump. The information about the code is stored in a static key jump table
1530 /// and contains the list of entries of the following type from
1531 /// include/linux/jump_label.h:
1532 //
1533 ///   struct jump_entry {
1534 ///     s32 code;
1535 ///     s32 target;
1536 ///     long key; // key may be far away from the core kernel under KASLR
1537 ///   };
1538 ///
1539 /// The list does not have to be stored in any sorted way, but it is sorted at
1540 /// boot time (or module initialization time) first by "key" and then by "code".
1541 /// jump_label_sort_entries() is responsible for sorting the table.
1542 ///
1543 /// The key in jump_entry structure uses lower two bits of the key address
1544 /// (which itself is aligned) to store extra information. We are interested in
1545 /// the lower bit which indicates if the key is likely to be set on the code
1546 /// path associated with this jump_entry.
1547 ///
1548 /// static_key_{enable,disable}() functions modify the code based on key and
1549 /// jump table entries.
1550 ///
1551 /// jump_label_update() updates all code entries for a given key. Batch mode is
1552 /// used for x86.
1553 ///
1554 /// The actual patching happens in text_poke_bp_batch() that overrides the first
1555 /// byte of the sequence with int3 before proceeding with actual code
1556 /// replacement.
1557 Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1558   const BinaryData *StaticKeysJumpTable =
1559       BC.getBinaryDataByName("__start___jump_table");
1560   if (!StaticKeysJumpTable)
1561     return Error::success();
1562 
1563   StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1564 
1565   const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1566   if (!Stop)
1567     return createStringError(errc::executable_format_error,
1568                              "missing __stop___jump_table symbol");
1569 
1570   ErrorOr<BinarySection &> ErrorOrSection =
1571       BC.getSectionForAddress(StaticKeysJumpTableAddress);
1572   if (!ErrorOrSection)
1573     return createStringError(errc::executable_format_error,
1574                              "no section matching __start___jump_table");
1575 
1576   StaticKeysJumpSection = *ErrorOrSection;
1577   if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1))
1578     return createStringError(errc::executable_format_error,
1579                              "__stop___jump_table not in the same section "
1580                              "as __start___jump_table");
1581 
1582   if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1583       STATIC_KEYS_JUMP_ENTRY_SIZE)
1584     return createStringError(errc::executable_format_error,
1585                              "static keys jump table size error");
1586 
1587   const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1588   DataExtractor DE(StaticKeysJumpSection->getContents(),
1589                    BC.AsmInfo->isLittleEndian(),
1590                    BC.AsmInfo->getCodePointerSize());
1591   DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1592   uint32_t EntryID = 0;
1593   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1594     const uint64_t JumpAddress =
1595         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1596     const uint64_t TargetAddress =
1597         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1598     const uint64_t KeyAddress =
1599         SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1600 
1601     // Consume the status of the cursor.
1602     if (!Cursor)
1603       return createStringError(
1604           errc::executable_format_error,
1605           "out of bounds while reading static keys jump table: %s",
1606           toString(Cursor.takeError()).c_str());
1607 
1608     ++EntryID;
1609 
1610     JumpInfo.push_back(JumpInfoEntry());
1611     JumpInfoEntry &Info = JumpInfo.back();
1612     Info.Likely = KeyAddress & 1;
1613 
1614     if (opts::DumpStaticKeys) {
1615       BC.outs() << "Static key jump entry: " << EntryID
1616                 << "\n\tJumpAddress:   0x" << Twine::utohexstr(JumpAddress)
1617                 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1618                 << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress)
1619                 << "\n\tIsLikely:      " << Info.Likely << '\n';
1620     }
1621 
1622     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress);
1623     if (!BF && opts::Verbosity) {
1624       BC.outs()
1625           << "BOLT-INFO: no function matches address 0x"
1626           << Twine::utohexstr(JumpAddress)
1627           << " of jump instruction referenced from static keys jump table\n";
1628     }
1629 
1630     if (!BF || !BC.shouldEmit(*BF))
1631       continue;
1632 
1633     MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress());
1634     if (!Inst)
1635       return createStringError(
1636           errc::executable_format_error,
1637           "no instruction at static keys jump site address 0x%" PRIx64,
1638           JumpAddress);
1639 
1640     if (!BF->containsAddress(TargetAddress))
1641       return createStringError(
1642           errc::executable_format_error,
1643           "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1644           JumpAddress, TargetAddress);
1645 
1646     const bool IsBranch = BC.MIB->isBranch(*Inst);
1647     if (!IsBranch && !BC.MIB->isNoop(*Inst))
1648       return createStringError(errc::executable_format_error,
1649                                "jump or nop expected at address 0x%" PRIx64,
1650                                JumpAddress);
1651 
1652     const uint64_t Size = BC.computeInstructionSize(*Inst);
1653     if (Size != 2 && Size != 5) {
1654       return createStringError(
1655           errc::executable_format_error,
1656           "unexpected static keys jump size at address 0x%" PRIx64,
1657           JumpAddress);
1658     }
1659 
1660     MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress);
1661     MCInst StaticKeyBranch;
1662 
1663     // Create a conditional branch instruction. The actual conditional code type
1664     // should not matter as long as it's a valid code. The instruction should be
1665     // treated as a conditional branch for control-flow purposes. Before we emit
1666     // the code, it will be converted to a different instruction in
1667     // rewriteStaticKeysJumpTable().
1668     //
1669     // NB: for older kernels, under LongJumpLabels option, we create long
1670     //     conditional branch to guarantee that code size estimation takes
1671     //     into account the extra bytes needed for long branch that will be used
1672     //     by the kernel patching code. Newer kernels can work with both short
1673     //     and long branches. The code for long conditional branch is larger
1674     //     than unconditional one, so we are pessimistic in our estimations.
1675     if (opts::LongJumpLabels)
1676       BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1677     else
1678       BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1679     BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch);
1680     BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID);
1681     *Inst = StaticKeyBranch;
1682 
1683     // IsBranch = InitialValue ^ LIKELY
1684     //
1685     //    0 0 0
1686     //    1 0 1
1687     //    1 1 0
1688     //    0 1 1
1689     //
1690     // => InitialValue = IsBranch ^ LIKELY
1691     Info.InitValue = IsBranch ^ Info.Likely;
1692 
1693     // Add annotations to facilitate manual code analysis.
1694     BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely);
1695     BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue);
1696     if (!BC.MIB->getSize(*Inst))
1697       BC.MIB->setSize(*Inst, Size);
1698 
1699     if (opts::LongJumpLabels)
1700       BC.MIB->setSize(*Inst, 5);
1701   }
1702 
1703   BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1704 
1705   return Error::success();
1706 }
1707 
1708 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1709 // relaxed. In post-emit pass we will convert those jumps into nops when
1710 // necessary. We do the unconditional conversion into jumps so that the jumps
1711 // can be relaxed and the optimal size of jump/nop instruction is selected.
1712 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1713   if (!StaticKeysJumpSection)
1714     return Error::success();
1715 
1716   uint64_t NumShort = 0;
1717   uint64_t NumLong = 0;
1718   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1719     if (!BC.shouldEmit(BF))
1720       continue;
1721 
1722     for (BinaryBasicBlock &BB : BF) {
1723       for (MCInst &Inst : BB) {
1724         if (!BC.MIB->isDynamicBranch(Inst))
1725           continue;
1726 
1727         const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1728         MCSymbol *Target =
1729             const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1730         assert(Target && "Target symbol should be set.");
1731 
1732         const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1733         const bool IsBranch = Info.Likely ^ Info.InitValue;
1734 
1735         uint32_t Size = *BC.MIB->getSize(Inst);
1736         if (Size == 2)
1737           ++NumShort;
1738         else if (Size == 5)
1739           ++NumLong;
1740         else
1741           llvm_unreachable("Wrong size for static keys jump instruction.");
1742 
1743         MCInst NewInst;
1744         // Replace the instruction with unconditional jump even if it needs to
1745         // be nop in the binary.
1746         if (opts::LongJumpLabels) {
1747           BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get());
1748         } else {
1749           // Newer kernels can handle short and long jumps for static keys.
1750           // Optimistically, emit short jump and check if it gets relaxed into
1751           // a long one during post-emit. Only then convert the jump to a nop.
1752           BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get());
1753         }
1754 
1755         BC.MIB->moveAnnotations(std::move(Inst), NewInst);
1756         Inst = NewInst;
1757 
1758         // Mark the instruction for nop conversion.
1759         if (!IsBranch)
1760           NopIDs.insert(EntryID);
1761 
1762         MCSymbol *Label =
1763             BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get());
1764 
1765         // Create a relocation against the label.
1766         const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1767                                      StaticKeysJumpSection->getAddress() +
1768                                      (EntryID - 1) * 16;
1769         StaticKeysJumpSection->addRelocation(EntryOffset, Label,
1770                                              ELF::R_X86_64_PC32,
1771                                              /*Addend*/ 0);
1772         StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target,
1773                                              ELF::R_X86_64_PC32, /*Addend*/ 0);
1774       }
1775     }
1776   }
1777 
1778   BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1779             << NumLong << " long static keys jumps in optimized functions\n";
1780 
1781   return Error::success();
1782 }
1783 
1784 // Post-emit pass of static keys jump section. Convert jumps to nops.
1785 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1786   if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1787     return Error::success();
1788 
1789   const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1790   DataExtractor DE(StaticKeysJumpSection->getOutputContents(),
1791                    BC.AsmInfo->isLittleEndian(),
1792                    BC.AsmInfo->getCodePointerSize());
1793   DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1794   const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1795   uint32_t EntryID = 0;
1796   uint64_t NumShort = 0;
1797   uint64_t NumLong = 0;
1798   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1799     const uint64_t JumpAddress =
1800         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1801     const uint64_t TargetAddress =
1802         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1803     const uint64_t KeyAddress =
1804         SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1805 
1806     // Consume the status of the cursor.
1807     if (!Cursor)
1808       return createStringError(errc::executable_format_error,
1809                                "out of bounds while updating static keys: %s",
1810                                toString(Cursor.takeError()).c_str());
1811 
1812     ++EntryID;
1813 
1814     LLVM_DEBUG({
1815       dbgs() << "\n\tJumpAddress:   0x" << Twine::utohexstr(JumpAddress)
1816              << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1817              << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress) << '\n';
1818     });
1819     (void)TargetAddress;
1820     (void)KeyAddress;
1821 
1822     BinaryFunction *BF =
1823         BC.getBinaryFunctionContainingAddress(JumpAddress,
1824                                               /*CheckPastEnd*/ false,
1825                                               /*UseMaxSize*/ true);
1826     assert(BF && "Cannot get function for modified static key.");
1827 
1828     if (!BF->isEmitted())
1829       continue;
1830 
1831     // Disassemble instruction to collect stats even if nop-conversion is
1832     // unnecessary.
1833     MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1834         reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1835     assert(Contents.size() && "Non-empty function image expected.");
1836 
1837     MCInst Inst;
1838     uint64_t Size;
1839     const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1840     if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0,
1841                                    nulls())) {
1842       llvm_unreachable("Unable to disassemble jump instruction.");
1843     }
1844     assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1845 
1846     if (Size == 2)
1847       ++NumShort;
1848     else if (Size == 5)
1849       ++NumLong;
1850     else
1851       llvm_unreachable("Unexpected size for static keys jump instruction.");
1852 
1853     // Check if we need to convert jump instruction into a nop.
1854     if (!NopIDs.contains(EntryID))
1855       continue;
1856 
1857     SmallString<15> NopCode;
1858     raw_svector_ostream VecOS(NopCode);
1859     BC.MAB->writeNopData(VecOS, Size, BC.STI.get());
1860     for (uint64_t I = 0; I < Size; ++I)
1861       Contents[JumpOffset + I] = NopCode[I];
1862   }
1863 
1864   BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
1865             << " long static keys jumps in optimized functions\n";
1866 
1867   return Error::success();
1868 }
1869 
1870 } // namespace
1871 
1872 std::unique_ptr<MetadataRewriter>
1873 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
1874   return std::make_unique<LinuxKernelRewriter>(BC);
1875 }
1876