xref: /llvm-project/bolt/lib/Rewrite/LinuxKernelRewriter.cpp (revision 99b4532b8b724db5fcbb80b86053a4c7371f2c1e)
1 //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Support for updating Linux Kernel metadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/BinaryFunction.h"
14 #include "bolt/Rewrite/MetadataRewriter.h"
15 #include "bolt/Rewrite/MetadataRewriters.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/Support/BinaryStreamWriter.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Errc.h"
24 
25 #define DEBUG_TYPE "bolt-linux"
26 
27 using namespace llvm;
28 using namespace bolt;
29 
30 namespace opts {
31 
32 static cl::opt<bool>
33     AltInstHasPadLen("alt-inst-has-padlen",
34                      cl::desc("specify that .altinstructions has padlen field"),
35                      cl::init(false), cl::Hidden, cl::cat(BoltCategory));
36 
37 static cl::opt<uint32_t>
38     AltInstFeatureSize("alt-inst-feature-size",
39                        cl::desc("size of feature field in .altinstructions"),
40                        cl::init(2), cl::Hidden, cl::cat(BoltCategory));
41 
42 static cl::opt<bool>
43     DumpAltInstructions("dump-alt-instructions",
44                         cl::desc("dump Linux alternative instructions info"),
45                         cl::init(false), cl::Hidden, cl::cat(BoltCategory));
46 
47 static cl::opt<bool>
48     DumpExceptions("dump-linux-exceptions",
49                    cl::desc("dump Linux kernel exception table"),
50                    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
51 
52 static cl::opt<bool>
53     DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"),
54             cl::init(false), cl::Hidden, cl::cat(BoltCategory));
55 
56 static cl::opt<bool> DumpParavirtualPatchSites(
57     "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"),
58     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
59 
60 static cl::opt<bool>
61     DumpPCIFixups("dump-pci-fixups",
62                   cl::desc("dump Linux kernel PCI fixup table"),
63                   cl::init(false), cl::Hidden, cl::cat(BoltCategory));
64 
65 static cl::opt<bool> DumpSMPLocks("dump-smp-locks",
66                                   cl::desc("dump Linux kernel SMP locks"),
67                                   cl::init(false), cl::Hidden,
68                                   cl::cat(BoltCategory));
69 
70 static cl::opt<bool> DumpStaticCalls("dump-static-calls",
71                                      cl::desc("dump Linux kernel static calls"),
72                                      cl::init(false), cl::Hidden,
73                                      cl::cat(BoltCategory));
74 
75 static cl::opt<bool>
76     DumpStaticKeys("dump-static-keys",
77                    cl::desc("dump Linux kernel static keys jump table"),
78                    cl::init(false), cl::Hidden, cl::cat(BoltCategory));
79 
80 static cl::opt<bool> LongJumpLabels(
81     "long-jump-labels",
82     cl::desc("always use long jumps/nops for Linux kernel static keys"),
83     cl::init(false), cl::Hidden, cl::cat(BoltCategory));
84 
85 static cl::opt<bool>
86     PrintORC("print-orc",
87              cl::desc("print ORC unwind information for instructions"),
88              cl::init(true), cl::Hidden, cl::cat(BoltCategory));
89 
90 } // namespace opts
91 
92 /// Linux Kernel supports stack unwinding using ORC (oops rewind capability).
93 /// ORC state at every IP can be described by the following data structure.
94 struct ORCState {
95   int16_t SPOffset;
96   int16_t BPOffset;
97   int16_t Info;
98 
99   bool operator==(const ORCState &Other) const {
100     return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset &&
101            Info == Other.Info;
102   }
103 
104   bool operator!=(const ORCState &Other) const { return !(*this == Other); }
105 };
106 
107 /// Section terminator ORC entry.
108 static ORCState NullORC = {0, 0, 0};
109 
110 /// Basic printer for ORC entry. It does not provide the same level of
111 /// information as objtool (for now).
112 inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
113   if (!opts::PrintORC)
114     return OS;
115   if (E != NullORC)
116     OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
117                  E.Info);
118   else
119     OS << "{terminator}";
120 
121   return OS;
122 }
123 
124 namespace {
125 
126 class LinuxKernelRewriter final : public MetadataRewriter {
127   /// Information required for updating metadata referencing an instruction.
128   struct InstructionFixup {
129     BinarySection &Section; // Section referencing the instruction.
130     uint64_t Offset;        // Offset in the section above.
131     BinaryFunction &BF;     // Function containing the instruction.
132     MCSymbol &Label;        // Label marking the instruction.
133     bool IsPCRelative;      // If the reference type is relative.
134   };
135   std::vector<InstructionFixup> Fixups;
136 
137   /// Size of an entry in .smp_locks section.
138   static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4;
139 
140   /// Linux ORC sections.
141   ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address;
142   ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address;
143 
144   /// Size of entries in ORC sections.
145   static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6;
146   static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4;
147 
148   struct ORCListEntry {
149     uint64_t IP;        /// Instruction address.
150     BinaryFunction *BF; /// Binary function corresponding to the entry.
151     ORCState ORC;       /// Stack unwind info in ORC format.
152 
153     /// ORC entries are sorted by their IPs. Terminator entries (NullORC)
154     /// should precede other entries with the same address.
155     bool operator<(const ORCListEntry &Other) const {
156       if (IP < Other.IP)
157         return 1;
158       if (IP > Other.IP)
159         return 0;
160       return ORC == NullORC && Other.ORC != NullORC;
161     }
162   };
163 
164   using ORCListType = std::vector<ORCListEntry>;
165   ORCListType ORCEntries;
166 
167   /// Number of entries in the input file ORC sections.
168   uint64_t NumORCEntries = 0;
169 
170   /// Section containing static keys jump table.
171   ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address;
172   uint64_t StaticKeysJumpTableAddress = 0;
173   static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8;
174 
175   struct JumpInfoEntry {
176     bool Likely;
177     bool InitValue;
178   };
179   SmallVector<JumpInfoEntry, 16> JumpInfo;
180 
181   /// Static key entries that need nop conversion.
182   DenseSet<uint32_t> NopIDs;
183 
184   /// Section containing static call table.
185   ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address;
186   uint64_t StaticCallTableAddress = 0;
187   static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8;
188 
189   struct StaticCallInfo {
190     uint32_t ID;              /// Identifier of the entry in the table.
191     BinaryFunction *Function; /// Function containing associated call.
192     MCSymbol *Label;          /// Label attached to the call.
193   };
194   using StaticCallListType = std::vector<StaticCallInfo>;
195   StaticCallListType StaticCallEntries;
196 
197   /// Section containing the Linux exception table.
198   ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address;
199   static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12;
200 
201   /// Functions with exception handling code.
202   DenseSet<BinaryFunction *> FunctionsWithExceptions;
203 
204   /// Section with paravirtual patch sites.
205   ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address;
206 
207   /// Alignment of paravirtual patch structures.
208   static constexpr size_t PARA_PATCH_ALIGN = 8;
209 
210   /// .altinstructions section.
211   ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address;
212 
213   /// Section containing Linux bug table.
214   ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address;
215 
216   /// Size of bug_entry struct.
217   static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12;
218 
219   /// List of bug entries per function.
220   using FunctionBugListType =
221       DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>;
222   FunctionBugListType FunctionBugList;
223 
224   /// .pci_fixup section.
225   ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address;
226   static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16;
227 
228   /// Process linux kernel special sections and their relocations.
229   void processLKSections();
230 
231   /// Process __ksymtab and __ksymtab_gpl.
232   void processLKKSymtab(bool IsGPL = false);
233 
234   // Create relocations in sections requiring fixups.
235   //
236   // Make sure functions that will not be emitted are marked as such before this
237   // function is executed.
238   void processInstructionFixups();
239 
240   /// Process .smp_locks section.
241   Error processSMPLocks();
242 
243   /// Read ORC unwind information and annotate instructions.
244   Error readORCTables();
245 
246   /// Update ORC for functions once CFG is constructed.
247   Error processORCPostCFG();
248 
249   /// Update ORC data in the binary.
250   Error rewriteORCTables();
251 
252   /// Validate written ORC tables after binary emission.
253   Error validateORCTables();
254 
255   /// Static call table handling.
256   Error readStaticCalls();
257   Error rewriteStaticCalls();
258 
259   Error readExceptionTable();
260   Error rewriteExceptionTable();
261 
262   /// Paravirtual instruction patch sites.
263   Error readParaInstructions();
264   Error rewriteParaInstructions();
265 
266   /// __bug_table section handling.
267   Error readBugTable();
268   Error rewriteBugTable();
269 
270   /// Do no process functions containing instruction annotated with
271   /// \p Annotation.
272   void skipFunctionsWithAnnotation(StringRef Annotation) const;
273 
274   /// Handle alternative instruction info from .altinstructions.
275   Error readAltInstructions();
276   Error rewriteAltInstructions();
277 
278   /// Read .pci_fixup
279   Error readPCIFixupTable();
280 
281   /// Handle static keys jump table.
282   Error readStaticKeysJumpTable();
283   Error rewriteStaticKeysJumpTable();
284   Error updateStaticKeysJumpTablePostEmit();
285 
286 public:
287   LinuxKernelRewriter(BinaryContext &BC)
288       : MetadataRewriter("linux-kernel-rewriter", BC) {}
289 
290   Error preCFGInitializer() override {
291     processLKSections();
292 
293     if (Error E = processSMPLocks())
294       return E;
295 
296     if (Error E = readORCTables())
297       return E;
298 
299     if (Error E = readStaticCalls())
300       return E;
301 
302     if (Error E = readExceptionTable())
303       return E;
304 
305     if (Error E = readParaInstructions())
306       return E;
307 
308     if (Error E = readBugTable())
309       return E;
310 
311     if (Error E = readAltInstructions())
312       return E;
313 
314     if (Error E = readPCIFixupTable())
315       return E;
316 
317     if (Error E = readStaticKeysJumpTable())
318       return E;
319 
320     return Error::success();
321   }
322 
323   Error postCFGInitializer() override {
324     if (Error E = processORCPostCFG())
325       return E;
326 
327     return Error::success();
328   }
329 
330   Error preEmitFinalizer() override {
331     // Since rewriteExceptionTable() can mark functions as non-simple, run it
332     // before other rewriters that depend on simple/emit status.
333     if (Error E = rewriteExceptionTable())
334       return E;
335 
336     if (Error E = rewriteAltInstructions())
337       return E;
338 
339     if (Error E = rewriteParaInstructions())
340       return E;
341 
342     if (Error E = rewriteORCTables())
343       return E;
344 
345     if (Error E = rewriteStaticCalls())
346       return E;
347 
348     if (Error E = rewriteStaticKeysJumpTable())
349       return E;
350 
351     if (Error E = rewriteBugTable())
352       return E;
353 
354     processInstructionFixups();
355 
356     return Error::success();
357   }
358 
359   Error postEmitFinalizer() override {
360     if (Error E = updateStaticKeysJumpTablePostEmit())
361       return E;
362 
363     if (Error E = validateORCTables())
364       return E;
365 
366     return Error::success();
367   }
368 };
369 
370 void LinuxKernelRewriter::processLKSections() {
371   processLKKSymtab();
372   processLKKSymtab(true);
373 }
374 
375 /// Process __ksymtab[_gpl] sections of Linux Kernel.
376 /// This section lists all the vmlinux symbols that kernel modules can access.
377 ///
378 /// All the entries are 4 bytes each and hence we can read them by one by one
379 /// and ignore the ones that are not pointing to the .text section. All pointers
380 /// are PC relative offsets. Always, points to the beginning of the function.
381 void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {
382   StringRef SectionName = "__ksymtab";
383   if (IsGPL)
384     SectionName = "__ksymtab_gpl";
385   ErrorOr<BinarySection &> SectionOrError =
386       BC.getUniqueSectionByName(SectionName);
387   assert(SectionOrError &&
388          "__ksymtab[_gpl] section not found in Linux Kernel binary");
389   const uint64_t SectionSize = SectionOrError->getSize();
390   const uint64_t SectionAddress = SectionOrError->getAddress();
391   assert((SectionSize % 4) == 0 &&
392          "The size of the __ksymtab[_gpl] section should be a multiple of 4");
393 
394   for (uint64_t I = 0; I < SectionSize; I += 4) {
395     const uint64_t EntryAddress = SectionAddress + I;
396     ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
397     assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
398     const int32_t SignedOffset = *Offset;
399     const uint64_t RefAddress = EntryAddress + SignedOffset;
400     BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress);
401     if (!BF)
402       continue;
403 
404     BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0,
405                      *Offset);
406   }
407 }
408 
409 /// .smp_locks section contains PC-relative references to instructions with LOCK
410 /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems.
411 Error LinuxKernelRewriter::processSMPLocks() {
412   ErrorOr<BinarySection &> SMPLocksSection =
413       BC.getUniqueSectionByName(".smp_locks");
414   if (!SMPLocksSection)
415     return Error::success();
416 
417   const uint64_t SectionSize = SMPLocksSection->getSize();
418   const uint64_t SectionAddress = SMPLocksSection->getAddress();
419   if (SectionSize % SMP_LOCKS_ENTRY_SIZE)
420     return createStringError(errc::executable_format_error,
421                              "bad size of .smp_locks section");
422 
423   DataExtractor DE = DataExtractor(SMPLocksSection->getContents(),
424                                    BC.AsmInfo->isLittleEndian(),
425                                    BC.AsmInfo->getCodePointerSize());
426   DataExtractor::Cursor Cursor(0);
427   while (Cursor && Cursor.tell() < SectionSize) {
428     const uint64_t Offset = Cursor.tell();
429     const uint64_t IP = SectionAddress + Offset + (int32_t)DE.getU32(Cursor);
430 
431     // Consume the status of the cursor.
432     if (!Cursor)
433       return createStringError(errc::executable_format_error,
434                                "error while reading .smp_locks: %s",
435                                toString(Cursor.takeError()).c_str());
436 
437     if (opts::DumpSMPLocks)
438       BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(IP) << '\n';
439 
440     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(IP);
441     if (!BF || !BC.shouldEmit(*BF))
442       continue;
443 
444     MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
445     if (!Inst)
446       return createStringError(errc::executable_format_error,
447                                "no instruction matches lock at 0x%" PRIx64, IP);
448 
449     // Check for duplicate entries.
450     if (BC.MIB->hasAnnotation(*Inst, "SMPLock"))
451       return createStringError(errc::executable_format_error,
452                                "duplicate SMP lock at 0x%" PRIx64, IP);
453 
454     BC.MIB->addAnnotation(*Inst, "SMPLock", true);
455     MCSymbol *Label =
456         BC.MIB->getOrCreateInstLabel(*Inst, "__SMPLock_", BC.Ctx.get());
457 
458     Fixups.push_back({*SMPLocksSection, Offset, *BF, *Label,
459                       /*IsPCRelative*/ true});
460   }
461 
462   const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE;
463   BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n";
464 
465   return Error::success();
466 }
467 
468 void LinuxKernelRewriter::processInstructionFixups() {
469   for (InstructionFixup &Fixup : Fixups) {
470     if (!BC.shouldEmit(Fixup.BF))
471       continue;
472 
473     Fixup.Section.addRelocation(Fixup.Offset, &Fixup.Label,
474                                 Fixup.IsPCRelative ? ELF::R_X86_64_PC32
475                                                    : ELF::R_X86_64_64,
476                                 /*Addend*/ 0);
477   }
478 }
479 
480 Error LinuxKernelRewriter::readORCTables() {
481   // NOTE: we should ignore relocations for orc tables as the tables are sorted
482   // post-link time and relocations are not updated.
483   ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind");
484   ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip");
485 
486   if (!ORCUnwindSection && !ORCUnwindIPSection)
487     return Error::success();
488 
489   if (!ORCUnwindSection || !ORCUnwindIPSection)
490     return createStringError(errc::executable_format_error,
491                              "missing ORC section");
492 
493   NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
494   if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
495       ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
496     return createStringError(errc::executable_format_error,
497                              "ORC entries number mismatch detected");
498 
499   const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
500   DataExtractor OrcDE = DataExtractor(ORCUnwindSection->getContents(),
501                                       BC.AsmInfo->isLittleEndian(),
502                                       BC.AsmInfo->getCodePointerSize());
503   DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getContents(),
504                                      BC.AsmInfo->isLittleEndian(),
505                                      BC.AsmInfo->getCodePointerSize());
506   DataExtractor::Cursor ORCCursor(0);
507   DataExtractor::Cursor IPCursor(0);
508   uint64_t PrevIP = 0;
509   for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
510     const uint64_t IP =
511         IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
512 
513     // Consume the status of the cursor.
514     if (!IPCursor)
515       return createStringError(errc::executable_format_error,
516                                "out of bounds while reading ORC IP table: %s",
517                                toString(IPCursor.takeError()).c_str());
518 
519     if (IP < PrevIP && opts::Verbosity)
520       BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP)
521                 << " detected while reading ORC\n";
522 
523     PrevIP = IP;
524 
525     // Store all entries, includes those we are not going to update as the
526     // tables need to be sorted globally before being written out.
527     ORCEntries.push_back(ORCListEntry());
528     ORCListEntry &Entry = ORCEntries.back();
529 
530     Entry.IP = IP;
531     Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
532     Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
533     Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
534     Entry.BF = nullptr;
535 
536     // Consume the status of the cursor.
537     if (!ORCCursor)
538       return createStringError(errc::executable_format_error,
539                                "out of bounds while reading ORC: %s",
540                                toString(ORCCursor.takeError()).c_str());
541 
542     if (Entry.ORC == NullORC)
543       continue;
544 
545     BinaryFunction *&BF = Entry.BF;
546     BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
547 
548     // If the entry immediately pointing past the end of the function is not
549     // the terminator entry, then it does not belong to this function.
550     if (BF && BF->getAddress() + BF->getSize() == IP)
551       BF = 0;
552 
553     if (!BF) {
554       if (opts::Verbosity)
555         BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
556                   << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
557       continue;
558     }
559 
560     BF->setHasORC(true);
561 
562     if (!BF->hasInstructions())
563       continue;
564 
565     MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
566     if (!Inst)
567       return createStringError(
568           errc::executable_format_error,
569           "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
570 
571     // Some addresses will have two entries associated with them. The first
572     // one being a "weak" section terminator. Since we ignore the terminator,
573     // we should only assign one entry per instruction.
574     if (BC.MIB->hasAnnotation(*Inst, "ORC"))
575       return createStringError(
576           errc::executable_format_error,
577           "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP);
578 
579     BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
580   }
581 
582   BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
583 
584   if (opts::DumpORC) {
585     BC.outs() << "BOLT-INFO: ORC unwind information:\n";
586     for (const ORCListEntry &E : ORCEntries) {
587       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
588       if (E.BF)
589         BC.outs() << ": " << *E.BF;
590       BC.outs() << '\n';
591     }
592   }
593 
594   // Add entries for functions that don't have explicit ORC info at the start.
595   // We'll have the correct info for them even if ORC for the preceding function
596   // changes.
597   ORCListType NewEntries;
598   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
599     auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
600       return E.IP <= BF.getAddress();
601     });
602     if (It != ORCEntries.begin())
603       --It;
604 
605     if (It->BF == &BF)
606       continue;
607 
608     if (It->ORC == NullORC && It->IP == BF.getAddress()) {
609       assert(!It->BF);
610       It->BF = &BF;
611       continue;
612     }
613 
614     NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
615     if (It->ORC != NullORC)
616       BF.setHasORC(true);
617   }
618 
619   llvm::copy(NewEntries, std::back_inserter(ORCEntries));
620   llvm::sort(ORCEntries);
621 
622   if (opts::DumpORC) {
623     BC.outs() << "BOLT-INFO: amended ORC unwind information:\n";
624     for (const ORCListEntry &E : ORCEntries) {
625       BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
626       if (E.BF)
627         BC.outs() << ": " << *E.BF;
628       BC.outs() << '\n';
629     }
630   }
631 
632   return Error::success();
633 }
634 
635 Error LinuxKernelRewriter::processORCPostCFG() {
636   if (!NumORCEntries)
637     return Error::success();
638 
639   // Propagate ORC to the rest of the function. We can annotate every
640   // instruction in every function, but to minimize the overhead, we annotate
641   // the first instruction in every basic block to reflect the state at the
642   // entry. This way, the ORC state can be calculated based on annotations
643   // regardless of the basic block layout. Note that if we insert/delete
644   // instructions, we must take care to attach ORC info to the new/deleted ones.
645   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
646 
647     std::optional<ORCState> CurrentState;
648     for (BinaryBasicBlock &BB : BF) {
649       for (MCInst &Inst : BB) {
650         ErrorOr<ORCState> State =
651             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
652 
653         if (State) {
654           CurrentState = *State;
655           continue;
656         }
657 
658         // Get state for the start of the function.
659         if (!CurrentState) {
660           // A terminator entry (NullORC) can match the function address. If
661           // there's also a non-terminator entry, it will be placed after the
662           // terminator. Hence, we are looking for the last ORC entry that
663           // matches the address.
664           auto It =
665               llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
666                 return E.IP <= BF.getAddress();
667               });
668           if (It != ORCEntries.begin())
669             --It;
670 
671           assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
672                  "ORC info at function entry expected.");
673 
674           if (It->ORC == NullORC && BF.hasORC()) {
675             BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
676                       << BF << '\n';
677           }
678 
679           It->BF = &BF;
680 
681           CurrentState = It->ORC;
682           if (It->ORC != NullORC)
683             BF.setHasORC(true);
684         }
685 
686         // While printing ORC, attach info to every instruction for convenience.
687         if (opts::PrintORC || &Inst == &BB.front())
688           BC.MIB->addAnnotation(Inst, "ORC", *CurrentState);
689       }
690     }
691   }
692 
693   return Error::success();
694 }
695 
696 Error LinuxKernelRewriter::rewriteORCTables() {
697   if (!NumORCEntries)
698     return Error::success();
699 
700   // Update ORC sections in-place. As we change the code, the number of ORC
701   // entries may increase for some functions. However, as we remove terminator
702   // redundancy (see below), more space is freed up and we should always be able
703   // to fit new ORC tables in the reserved space.
704   auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
705     const size_t Size = Section.getSize();
706     uint8_t *NewContents = new uint8_t[Size];
707     Section.updateContents(NewContents, Size);
708     Section.setOutputFileOffset(Section.getInputFileOffset());
709     return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
710                                                        ? endianness::little
711                                                        : endianness::big);
712   };
713   BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
714   BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
715 
716   uint64_t NumEmitted = 0;
717   std::optional<ORCState> LastEmittedORC;
718   auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
719                           MCSymbol *Label = 0, bool Force = false) -> Error {
720     if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
721       return Error::success();
722 
723     LastEmittedORC = ORC;
724 
725     if (++NumEmitted > NumORCEntries)
726       return createStringError(errc::executable_format_error,
727                                "exceeded the number of allocated ORC entries");
728 
729     if (Label)
730       ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
731                                         Relocation::getPC32(), /*Addend*/ 0);
732 
733     const int32_t IPValue =
734         IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
735     if (Error E = UnwindIPWriter.writeInteger(IPValue))
736       return E;
737 
738     if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
739       return E;
740     if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
741       return E;
742     if (Error E = UnwindWriter.writeInteger(ORC.Info))
743       return E;
744 
745     return Error::success();
746   };
747 
748   // Emit new ORC entries for the emitted function.
749   auto emitORC = [&](const FunctionFragment &FF) -> Error {
750     ORCState CurrentState = NullORC;
751     for (BinaryBasicBlock *BB : FF) {
752       for (MCInst &Inst : *BB) {
753         ErrorOr<ORCState> ErrorOrState =
754             BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
755         if (!ErrorOrState || *ErrorOrState == CurrentState)
756           continue;
757 
758         // Issue label for the instruction.
759         MCSymbol *Label =
760             BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get());
761 
762         if (Error E = emitORCEntry(0, *ErrorOrState, Label))
763           return E;
764 
765         CurrentState = *ErrorOrState;
766       }
767     }
768 
769     return Error::success();
770   };
771 
772   // Emit ORC entries for cold fragments. We assume that these fragments are
773   // emitted contiguously in memory using reserved space in the kernel. This
774   // assumption is validated in post-emit pass validateORCTables() where we
775   // check that ORC entries are sorted by their addresses.
776   auto emitColdORC = [&]() -> Error {
777     for (BinaryFunction &BF :
778          llvm::make_second_range(BC.getBinaryFunctions())) {
779       if (!BC.shouldEmit(BF))
780         continue;
781       for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
782         if (Error E = emitORC(FF))
783           return E;
784     }
785 
786     return Error::success();
787   };
788 
789   bool ShouldEmitCold = !BC.BOLTReserved.empty();
790   for (ORCListEntry &Entry : ORCEntries) {
791     if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
792       if (Error E = emitColdORC())
793         return E;
794 
795       // Emit terminator entry at the end of the reserved region.
796       if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
797         return E;
798 
799       ShouldEmitCold = false;
800     }
801 
802     // Emit original entries for functions that we haven't modified.
803     if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
804       // Emit terminator only if it marks the start of a function.
805       if (Entry.ORC == NullORC && !Entry.BF)
806         continue;
807       if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
808         return E;
809       continue;
810     }
811 
812     // Emit all ORC entries for a function referenced by an entry and skip over
813     // the rest of entries for this function by resetting its ORC attribute.
814     if (Entry.BF->hasORC()) {
815       if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
816         return E;
817       Entry.BF->setHasORC(false);
818     }
819   }
820 
821   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
822                     << " ORC entries\n");
823 
824   // Populate ORC tables with a terminator entry with max address to match the
825   // original table sizes.
826   const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
827   while (UnwindWriter.bytesRemaining()) {
828     if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
829       return E;
830   }
831 
832   return Error::success();
833 }
834 
835 Error LinuxKernelRewriter::validateORCTables() {
836   if (!ORCUnwindIPSection)
837     return Error::success();
838 
839   const uint64_t IPSectionAddress = ORCUnwindIPSection->getAddress();
840   DataExtractor IPDE = DataExtractor(ORCUnwindIPSection->getOutputContents(),
841                                      BC.AsmInfo->isLittleEndian(),
842                                      BC.AsmInfo->getCodePointerSize());
843   DataExtractor::Cursor IPCursor(0);
844   uint64_t PrevIP = 0;
845   for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
846     const uint64_t IP =
847         IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
848     if (!IPCursor)
849       return createStringError(errc::executable_format_error,
850                                "out of bounds while reading ORC IP table: %s",
851                                toString(IPCursor.takeError()).c_str());
852 
853     assert(IP >= PrevIP && "Unsorted ORC table detected");
854     (void)PrevIP;
855     PrevIP = IP;
856   }
857 
858   return Error::success();
859 }
860 
861 /// The static call site table is created by objtool and contains entries in the
862 /// following format:
863 ///
864 ///    struct static_call_site {
865 ///      s32 addr;
866 ///      s32 key;
867 ///    };
868 ///
869 Error LinuxKernelRewriter::readStaticCalls() {
870   const BinaryData *StaticCallTable =
871       BC.getBinaryDataByName("__start_static_call_sites");
872   if (!StaticCallTable)
873     return Error::success();
874 
875   StaticCallTableAddress = StaticCallTable->getAddress();
876 
877   const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites");
878   if (!Stop)
879     return createStringError(errc::executable_format_error,
880                              "missing __stop_static_call_sites symbol");
881 
882   ErrorOr<BinarySection &> ErrorOrSection =
883       BC.getSectionForAddress(StaticCallTableAddress);
884   if (!ErrorOrSection)
885     return createStringError(errc::executable_format_error,
886                              "no section matching __start_static_call_sites");
887 
888   StaticCallSection = *ErrorOrSection;
889   if (!StaticCallSection->containsAddress(Stop->getAddress() - 1))
890     return createStringError(errc::executable_format_error,
891                              "__stop_static_call_sites not in the same section "
892                              "as __start_static_call_sites");
893 
894   if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE)
895     return createStringError(errc::executable_format_error,
896                              "static call table size error");
897 
898   const uint64_t SectionAddress = StaticCallSection->getAddress();
899   DataExtractor DE(StaticCallSection->getContents(),
900                    BC.AsmInfo->isLittleEndian(),
901                    BC.AsmInfo->getCodePointerSize());
902   DataExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress);
903   uint32_t EntryID = 0;
904   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
905     const uint64_t CallAddress =
906         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
907     const uint64_t KeyAddress =
908         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
909 
910     // Consume the status of the cursor.
911     if (!Cursor)
912       return createStringError(errc::executable_format_error,
913                                "out of bounds while reading static calls: %s",
914                                toString(Cursor.takeError()).c_str());
915 
916     ++EntryID;
917 
918     if (opts::DumpStaticCalls) {
919       BC.outs() << "Static Call Site: " << EntryID << '\n';
920       BC.outs() << "\tCallAddress:   0x" << Twine::utohexstr(CallAddress)
921                 << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress)
922                 << '\n';
923     }
924 
925     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress);
926     if (!BF)
927       continue;
928 
929     if (!BC.shouldEmit(*BF))
930       continue;
931 
932     if (!BF->hasInstructions())
933       continue;
934 
935     MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress());
936     if (!Inst)
937       return createStringError(errc::executable_format_error,
938                                "no instruction at call site address 0x%" PRIx64,
939                                CallAddress);
940 
941     // Check for duplicate entries.
942     if (BC.MIB->hasAnnotation(*Inst, "StaticCall"))
943       return createStringError(errc::executable_format_error,
944                                "duplicate static call site at 0x%" PRIx64,
945                                CallAddress);
946 
947     BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID);
948 
949     MCSymbol *Label =
950         BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get());
951 
952     StaticCallEntries.push_back({EntryID, BF, Label});
953   }
954 
955   BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size()
956             << " static call entries\n";
957 
958   return Error::success();
959 }
960 
961 /// The static call table is sorted during boot time in
962 /// static_call_sort_entries(). This makes it possible to update existing
963 /// entries in-place ignoring their relative order.
964 Error LinuxKernelRewriter::rewriteStaticCalls() {
965   if (!StaticCallTableAddress || !StaticCallSection)
966     return Error::success();
967 
968   for (auto &Entry : StaticCallEntries) {
969     if (!Entry.Function)
970       continue;
971 
972     BinaryFunction &BF = *Entry.Function;
973     if (!BC.shouldEmit(BF))
974       continue;
975 
976     // Create a relocation against the label.
977     const uint64_t EntryOffset = StaticCallTableAddress -
978                                  StaticCallSection->getAddress() +
979                                  (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE;
980     StaticCallSection->addRelocation(EntryOffset, Entry.Label,
981                                      ELF::R_X86_64_PC32, /*Addend*/ 0);
982   }
983 
984   return Error::success();
985 }
986 
987 /// Instructions that access user-space memory can cause page faults. These
988 /// faults will be handled by the kernel and execution will resume at the fixup
989 /// code location if the address was invalid. The kernel uses the exception
990 /// table to match the faulting instruction to its fixup. The table consists of
991 /// the following entries:
992 ///
993 ///   struct exception_table_entry {
994 ///     int insn;
995 ///     int fixup;
996 ///     int data;
997 ///   };
998 ///
999 /// More info at:
1000 /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
1001 Error LinuxKernelRewriter::readExceptionTable() {
1002   ExceptionsSection = BC.getUniqueSectionByName("__ex_table");
1003   if (!ExceptionsSection)
1004     return Error::success();
1005 
1006   if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE)
1007     return createStringError(errc::executable_format_error,
1008                              "exception table size error");
1009 
1010   const uint64_t SectionAddress = ExceptionsSection->getAddress();
1011   DataExtractor DE(ExceptionsSection->getContents(),
1012                    BC.AsmInfo->isLittleEndian(),
1013                    BC.AsmInfo->getCodePointerSize());
1014   DataExtractor::Cursor Cursor(0);
1015   uint32_t EntryID = 0;
1016   while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) {
1017     const uint64_t InstAddress =
1018         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1019     const uint64_t FixupAddress =
1020         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1021     const uint64_t Data = DE.getU32(Cursor);
1022 
1023     // Consume the status of the cursor.
1024     if (!Cursor)
1025       return createStringError(
1026           errc::executable_format_error,
1027           "out of bounds while reading exception table: %s",
1028           toString(Cursor.takeError()).c_str());
1029 
1030     ++EntryID;
1031 
1032     if (opts::DumpExceptions) {
1033       BC.outs() << "Exception Entry: " << EntryID << '\n';
1034       BC.outs() << "\tInsn:  0x" << Twine::utohexstr(InstAddress) << '\n'
1035                 << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n'
1036                 << "\tData:  0x" << Twine::utohexstr(Data) << '\n';
1037     }
1038 
1039     MCInst *Inst = nullptr;
1040     MCSymbol *FixupLabel = nullptr;
1041 
1042     BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress);
1043     if (InstBF && BC.shouldEmit(*InstBF)) {
1044       Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress());
1045       if (!Inst)
1046         return createStringError(errc::executable_format_error,
1047                                  "no instruction at address 0x%" PRIx64
1048                                  " in exception table",
1049                                  InstAddress);
1050       BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID);
1051       FunctionsWithExceptions.insert(InstBF);
1052     }
1053 
1054     if (!InstBF && opts::Verbosity) {
1055       BC.outs() << "BOLT-INFO: no function matches instruction at 0x"
1056                 << Twine::utohexstr(InstAddress)
1057                 << " referenced by Linux exception table\n";
1058     }
1059 
1060     BinaryFunction *FixupBF =
1061         BC.getBinaryFunctionContainingAddress(FixupAddress);
1062     if (FixupBF && BC.shouldEmit(*FixupBF)) {
1063       const uint64_t Offset = FixupAddress - FixupBF->getAddress();
1064       if (!FixupBF->getInstructionAtOffset(Offset))
1065         return createStringError(errc::executable_format_error,
1066                                  "no instruction at fixup address 0x%" PRIx64
1067                                  " in exception table",
1068                                  FixupAddress);
1069       FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset)
1070                           : FixupBF->getSymbol();
1071       if (Inst)
1072         BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName());
1073       FunctionsWithExceptions.insert(FixupBF);
1074     }
1075 
1076     if (!FixupBF && opts::Verbosity) {
1077       BC.outs() << "BOLT-INFO: no function matches fixup code at 0x"
1078                 << Twine::utohexstr(FixupAddress)
1079                 << " referenced by Linux exception table\n";
1080     }
1081   }
1082 
1083   BC.outs() << "BOLT-INFO: parsed "
1084             << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE
1085             << " exception table entries\n";
1086 
1087   return Error::success();
1088 }
1089 
1090 /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects
1091 /// the exception table to be sorted. Hence we have to sort it after code
1092 /// reordering.
1093 Error LinuxKernelRewriter::rewriteExceptionTable() {
1094   // Disable output of functions with exceptions before rewrite support is
1095   // added.
1096   for (BinaryFunction *BF : FunctionsWithExceptions)
1097     BF->setSimple(false);
1098 
1099   return Error::success();
1100 }
1101 
1102 /// .parainsrtuctions section contains information for patching parvirtual call
1103 /// instructions during runtime. The entries in the section are in the form:
1104 ///
1105 ///    struct paravirt_patch_site {
1106 ///      u8 *instr;    /* original instructions */
1107 ///      u8 type;      /* type of this instruction */
1108 ///      u8 len;       /* length of original instruction */
1109 ///    };
1110 ///
1111 /// Note that the structures are aligned at 8-byte boundary.
1112 Error LinuxKernelRewriter::readParaInstructions() {
1113   ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions");
1114   if (!ParavirtualPatchSection)
1115     return Error::success();
1116 
1117   DataExtractor DE = DataExtractor(ParavirtualPatchSection->getContents(),
1118                                    BC.AsmInfo->isLittleEndian(),
1119                                    BC.AsmInfo->getCodePointerSize());
1120   uint32_t EntryID = 0;
1121   DataExtractor::Cursor Cursor(0);
1122   while (Cursor && !DE.eof(Cursor)) {
1123     const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN));
1124     if (!DE.isValidOffset(NextOffset))
1125       break;
1126 
1127     Cursor.seek(NextOffset);
1128 
1129     const uint64_t InstrLocation = DE.getU64(Cursor);
1130     const uint8_t Type = DE.getU8(Cursor);
1131     const uint8_t Len = DE.getU8(Cursor);
1132 
1133     if (!Cursor)
1134       return createStringError(
1135           errc::executable_format_error,
1136           "out of bounds while reading .parainstructions: %s",
1137           toString(Cursor.takeError()).c_str());
1138 
1139     ++EntryID;
1140 
1141     if (opts::DumpParavirtualPatchSites) {
1142       BC.outs() << "Paravirtual patch site: " << EntryID << '\n';
1143       BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation)
1144                 << "\n\tType:  0x" << Twine::utohexstr(Type) << "\n\tLen:   0x"
1145                 << Twine::utohexstr(Len) << '\n';
1146     }
1147 
1148     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation);
1149     if (!BF && opts::Verbosity) {
1150       BC.outs() << "BOLT-INFO: no function matches address 0x"
1151                 << Twine::utohexstr(InstrLocation)
1152                 << " referenced by paravirutal patch site\n";
1153     }
1154 
1155     if (BF && BC.shouldEmit(*BF)) {
1156       MCInst *Inst =
1157           BF->getInstructionAtOffset(InstrLocation - BF->getAddress());
1158       if (!Inst)
1159         return createStringError(errc::executable_format_error,
1160                                  "no instruction at address 0x%" PRIx64
1161                                  " in paravirtual call site %d",
1162                                  InstrLocation, EntryID);
1163       BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID);
1164     }
1165   }
1166 
1167   BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n";
1168 
1169   return Error::success();
1170 }
1171 
1172 void LinuxKernelRewriter::skipFunctionsWithAnnotation(
1173     StringRef Annotation) const {
1174   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1175     if (!BC.shouldEmit(BF))
1176       continue;
1177     for (const BinaryBasicBlock &BB : BF) {
1178       const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) {
1179         return BC.MIB->hasAnnotation(Inst, Annotation);
1180       });
1181       if (HasAnnotation) {
1182         BF.setSimple(false);
1183         break;
1184       }
1185     }
1186   }
1187 }
1188 
1189 Error LinuxKernelRewriter::rewriteParaInstructions() {
1190   // Disable output of functions with paravirtual instructions before the
1191   // rewrite support is complete.
1192   skipFunctionsWithAnnotation("ParaSite");
1193 
1194   return Error::success();
1195 }
1196 
1197 /// Process __bug_table section.
1198 /// This section contains information useful for kernel debugging, mostly
1199 /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON().
1200 ///
1201 /// Each entry in the section is a struct bug_entry that contains a pointer to
1202 /// the ud2 instruction corresponding to the bug, corresponding file name (both
1203 /// pointers use PC relative offset addressing), line number, and flags.
1204 /// The definition of the struct bug_entry can be found in
1205 /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction
1206 /// address encoded as a PC-relative offset. In theory, it could be an absolute
1207 /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice
1208 /// the kernel code relies on it being a relative offset on x86-64.
1209 Error LinuxKernelRewriter::readBugTable() {
1210   BugTableSection = BC.getUniqueSectionByName("__bug_table");
1211   if (!BugTableSection)
1212     return Error::success();
1213 
1214   if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE)
1215     return createStringError(errc::executable_format_error,
1216                              "bug table size error");
1217 
1218   const uint64_t SectionAddress = BugTableSection->getAddress();
1219   DataExtractor DE(BugTableSection->getContents(), BC.AsmInfo->isLittleEndian(),
1220                    BC.AsmInfo->getCodePointerSize());
1221   DataExtractor::Cursor Cursor(0);
1222   uint32_t EntryID = 0;
1223   while (Cursor && Cursor.tell() < BugTableSection->getSize()) {
1224     const uint64_t Pos = Cursor.tell();
1225     const uint64_t InstAddress =
1226         SectionAddress + Pos + (int32_t)DE.getU32(Cursor);
1227     Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE);
1228 
1229     if (!Cursor)
1230       return createStringError(errc::executable_format_error,
1231                                "out of bounds while reading __bug_table: %s",
1232                                toString(Cursor.takeError()).c_str());
1233 
1234     ++EntryID;
1235 
1236     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress);
1237     if (!BF && opts::Verbosity) {
1238       BC.outs() << "BOLT-INFO: no function matches address 0x"
1239                 << Twine::utohexstr(InstAddress)
1240                 << " referenced by bug table\n";
1241     }
1242 
1243     if (BF && BC.shouldEmit(*BF)) {
1244       MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress());
1245       if (!Inst)
1246         return createStringError(errc::executable_format_error,
1247                                  "no instruction at address 0x%" PRIx64
1248                                  " referenced by bug table entry %d",
1249                                  InstAddress, EntryID);
1250       BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID);
1251 
1252       FunctionBugList[BF].push_back(EntryID);
1253     }
1254   }
1255 
1256   BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n";
1257 
1258   return Error::success();
1259 }
1260 
1261 /// find_bug() uses linear search to match an address to an entry in the bug
1262 /// table. Hence, there is no need to sort entries when rewriting the table.
1263 /// When we need to erase an entry, we set its instruction address to zero.
1264 Error LinuxKernelRewriter::rewriteBugTable() {
1265   if (!BugTableSection)
1266     return Error::success();
1267 
1268   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1269     if (!BC.shouldEmit(BF))
1270       continue;
1271 
1272     if (!FunctionBugList.count(&BF))
1273       continue;
1274 
1275     // Bugs that will be emitted for this function.
1276     DenseSet<uint32_t> EmittedIDs;
1277     for (BinaryBasicBlock &BB : BF) {
1278       for (MCInst &Inst : BB) {
1279         if (!BC.MIB->hasAnnotation(Inst, "BugEntry"))
1280           continue;
1281         const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry");
1282         EmittedIDs.insert(ID);
1283 
1284         // Create a relocation entry for this bug entry.
1285         MCSymbol *Label =
1286             BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get());
1287         const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1288         BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32,
1289                                        /*Addend*/ 0);
1290       }
1291     }
1292 
1293     // Clear bug entries that were not emitted for this function, e.g. as a
1294     // result of DCE, but setting their instruction address to zero.
1295     for (const uint32_t ID : FunctionBugList[&BF]) {
1296       if (!EmittedIDs.count(ID)) {
1297         const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE;
1298         BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32,
1299                                        /*Addend*/ 0);
1300       }
1301     }
1302   }
1303 
1304   return Error::success();
1305 }
1306 
1307 /// The kernel can replace certain instruction sequences depending on hardware
1308 /// it is running on and features specified during boot time. The information
1309 /// about alternative instruction sequences is stored in .altinstructions
1310 /// section. The format of entries in this section is defined in
1311 /// arch/x86/include/asm/alternative.h:
1312 ///
1313 ///   struct alt_instr {
1314 ///     s32 instr_offset;
1315 ///     s32 repl_offset;
1316 ///     uXX feature;
1317 ///     u8  instrlen;
1318 ///     u8  replacementlen;
1319 ///	    u8  padlen;         // present in older kernels
1320 ///   } __packed;
1321 ///
1322 /// Note the structures is packed.
1323 Error LinuxKernelRewriter::readAltInstructions() {
1324   AltInstrSection = BC.getUniqueSectionByName(".altinstructions");
1325   if (!AltInstrSection)
1326     return Error::success();
1327 
1328   const uint64_t Address = AltInstrSection->getAddress();
1329   DataExtractor DE = DataExtractor(AltInstrSection->getContents(),
1330                                    BC.AsmInfo->isLittleEndian(),
1331                                    BC.AsmInfo->getCodePointerSize());
1332   uint64_t EntryID = 0;
1333   DataExtractor::Cursor Cursor(0);
1334   while (Cursor && !DE.eof(Cursor)) {
1335     const uint64_t OrgInstAddress =
1336         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1337     const uint64_t AltInstAddress =
1338         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1339     const uint64_t Feature = DE.getUnsigned(Cursor, opts::AltInstFeatureSize);
1340     const uint8_t OrgSize = DE.getU8(Cursor);
1341     const uint8_t AltSize = DE.getU8(Cursor);
1342 
1343     // Older kernels may have the padlen field.
1344     const uint8_t PadLen = opts::AltInstHasPadLen ? DE.getU8(Cursor) : 0;
1345 
1346     if (!Cursor)
1347       return createStringError(
1348           errc::executable_format_error,
1349           "out of bounds while reading .altinstructions: %s",
1350           toString(Cursor.takeError()).c_str());
1351 
1352     ++EntryID;
1353 
1354     if (opts::DumpAltInstructions) {
1355       BC.outs() << "Alternative instruction entry: " << EntryID
1356                 << "\n\tOrg:     0x" << Twine::utohexstr(OrgInstAddress)
1357                 << "\n\tAlt:     0x" << Twine::utohexstr(AltInstAddress)
1358                 << "\n\tFeature: 0x" << Twine::utohexstr(Feature)
1359                 << "\n\tOrgSize: " << (int)OrgSize
1360                 << "\n\tAltSize: " << (int)AltSize << '\n';
1361       if (opts::AltInstHasPadLen)
1362         BC.outs() << "\tPadLen:  " << (int)PadLen << '\n';
1363     }
1364 
1365     if (AltSize > OrgSize)
1366       return createStringError(errc::executable_format_error,
1367                                "error reading .altinstructions");
1368 
1369     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress);
1370     if (!BF && opts::Verbosity) {
1371       BC.outs() << "BOLT-INFO: no function matches address 0x"
1372                 << Twine::utohexstr(OrgInstAddress)
1373                 << " of instruction from .altinstructions\n";
1374     }
1375 
1376     BinaryFunction *AltBF =
1377         BC.getBinaryFunctionContainingAddress(AltInstAddress);
1378     if (AltBF && BC.shouldEmit(*AltBF)) {
1379       BC.errs()
1380           << "BOLT-WARNING: alternative instruction sequence found in function "
1381           << *AltBF << '\n';
1382       AltBF->setIgnored();
1383     }
1384 
1385     if (!BF || !BC.shouldEmit(*BF))
1386       continue;
1387 
1388     if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
1389       return createStringError(errc::executable_format_error,
1390                                "error reading .altinstructions");
1391 
1392     MCInst *Inst =
1393         BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress());
1394     if (!Inst)
1395       return createStringError(errc::executable_format_error,
1396                                "no instruction at address 0x%" PRIx64
1397                                " referenced by .altinstructions entry %d",
1398                                OrgInstAddress, EntryID);
1399 
1400     // There could be more than one alternative instruction sequences for the
1401     // same original instruction. Annotate each alternative separately.
1402     std::string AnnotationName = "AltInst";
1403     unsigned N = 2;
1404     while (BC.MIB->hasAnnotation(*Inst, AnnotationName))
1405       AnnotationName = "AltInst" + std::to_string(N++);
1406 
1407     BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1408 
1409     // Annotate all instructions from the original sequence. Note that it's not
1410     // the most efficient way to look for instructions in the address range,
1411     // but since alternative instructions are uncommon, it will do for now.
1412     for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) {
1413       Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset -
1414                                         BF->getAddress());
1415       if (Inst)
1416         BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID);
1417     }
1418   }
1419 
1420   BC.outs() << "BOLT-INFO: parsed " << EntryID
1421             << " alternative instruction entries\n";
1422 
1423   return Error::success();
1424 }
1425 
1426 Error LinuxKernelRewriter::rewriteAltInstructions() {
1427   // Disable output of functions with alt instructions before the rewrite
1428   // support is complete.
1429   skipFunctionsWithAnnotation("AltInst");
1430 
1431   return Error::success();
1432 }
1433 
1434 /// When the Linux kernel needs to handle an error associated with a given PCI
1435 /// device, it uses a table stored in .pci_fixup section to locate a fixup code
1436 /// specific to the vendor and the problematic device. The section contains a
1437 /// list of the following structures defined in include/linux/pci.h:
1438 ///
1439 ///   struct pci_fixup {
1440 ///     u16 vendor;     /* Or PCI_ANY_ID */
1441 ///     u16 device;     /* Or PCI_ANY_ID */
1442 ///     u32 class;      /* Or PCI_ANY_ID */
1443 ///     unsigned int class_shift; /* should be 0, 8, 16 */
1444 ///     int hook_offset;
1445 ///   };
1446 ///
1447 /// Normally, the hook will point to a function start and we don't have to
1448 /// update the pointer if we are not relocating functions. Hence, while reading
1449 /// the table we validate this assumption. If a function has a fixup code in the
1450 /// middle of its body, we issue a warning and ignore it.
1451 Error LinuxKernelRewriter::readPCIFixupTable() {
1452   PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup");
1453   if (!PCIFixupSection)
1454     return Error::success();
1455 
1456   if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE)
1457     return createStringError(errc::executable_format_error,
1458                              "PCI fixup table size error");
1459 
1460   const uint64_t Address = PCIFixupSection->getAddress();
1461   DataExtractor DE = DataExtractor(PCIFixupSection->getContents(),
1462                                    BC.AsmInfo->isLittleEndian(),
1463                                    BC.AsmInfo->getCodePointerSize());
1464   uint64_t EntryID = 0;
1465   DataExtractor::Cursor Cursor(0);
1466   while (Cursor && !DE.eof(Cursor)) {
1467     const uint16_t Vendor = DE.getU16(Cursor);
1468     const uint16_t Device = DE.getU16(Cursor);
1469     const uint32_t Class = DE.getU32(Cursor);
1470     const uint32_t ClassShift = DE.getU32(Cursor);
1471     const uint64_t HookAddress =
1472         Address + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1473 
1474     if (!Cursor)
1475       return createStringError(errc::executable_format_error,
1476                                "out of bounds while reading .pci_fixup: %s",
1477                                toString(Cursor.takeError()).c_str());
1478 
1479     ++EntryID;
1480 
1481     if (opts::DumpPCIFixups) {
1482       BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor       0x"
1483                 << Twine::utohexstr(Vendor) << "\n\tDevice:      0x"
1484                 << Twine::utohexstr(Device) << "\n\tClass:       0x"
1485                 << Twine::utohexstr(Class) << "\n\tClassShift:  0x"
1486                 << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x"
1487                 << Twine::utohexstr(HookAddress) << '\n';
1488     }
1489 
1490     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress);
1491     if (!BF && opts::Verbosity) {
1492       BC.outs() << "BOLT-INFO: no function matches address 0x"
1493                 << Twine::utohexstr(HookAddress)
1494                 << " of hook from .pci_fixup\n";
1495     }
1496 
1497     if (!BF || !BC.shouldEmit(*BF))
1498       continue;
1499 
1500     if (const uint64_t Offset = HookAddress - BF->getAddress()) {
1501       BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function "
1502                 << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n';
1503       BF->setSimple(false);
1504     }
1505   }
1506 
1507   BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n";
1508 
1509   return Error::success();
1510 }
1511 
1512 /// Runtime code modification used by static keys is the most ubiquitous
1513 /// self-modifying feature of the Linux kernel. The idea is to eliminate the
1514 /// condition check and associated conditional jump on a hot path if that
1515 /// condition (based on a boolean value of a static key) does not change often.
1516 /// Whenever the condition changes, the kernel runtime modifies all code paths
1517 /// associated with that key flipping the code between nop and (unconditional)
1518 /// jump. The information about the code is stored in a static key jump table
1519 /// and contains the list of entries of the following type from
1520 /// include/linux/jump_label.h:
1521 //
1522 ///   struct jump_entry {
1523 ///     s32 code;
1524 ///     s32 target;
1525 ///     long key; // key may be far away from the core kernel under KASLR
1526 ///   };
1527 ///
1528 /// The list does not have to be stored in any sorted way, but it is sorted at
1529 /// boot time (or module initialization time) first by "key" and then by "code".
1530 /// jump_label_sort_entries() is responsible for sorting the table.
1531 ///
1532 /// The key in jump_entry structure uses lower two bits of the key address
1533 /// (which itself is aligned) to store extra information. We are interested in
1534 /// the lower bit which indicates if the key is likely to be set on the code
1535 /// path associated with this jump_entry.
1536 ///
1537 /// static_key_{enable,disable}() functions modify the code based on key and
1538 /// jump table entries.
1539 ///
1540 /// jump_label_update() updates all code entries for a given key. Batch mode is
1541 /// used for x86.
1542 ///
1543 /// The actual patching happens in text_poke_bp_batch() that overrides the first
1544 /// byte of the sequence with int3 before proceeding with actual code
1545 /// replacement.
1546 Error LinuxKernelRewriter::readStaticKeysJumpTable() {
1547   const BinaryData *StaticKeysJumpTable =
1548       BC.getBinaryDataByName("__start___jump_table");
1549   if (!StaticKeysJumpTable)
1550     return Error::success();
1551 
1552   StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress();
1553 
1554   const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1555   if (!Stop)
1556     return createStringError(errc::executable_format_error,
1557                              "missing __stop___jump_table symbol");
1558 
1559   ErrorOr<BinarySection &> ErrorOrSection =
1560       BC.getSectionForAddress(StaticKeysJumpTableAddress);
1561   if (!ErrorOrSection)
1562     return createStringError(errc::executable_format_error,
1563                              "no section matching __start___jump_table");
1564 
1565   StaticKeysJumpSection = *ErrorOrSection;
1566   if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1))
1567     return createStringError(errc::executable_format_error,
1568                              "__stop___jump_table not in the same section "
1569                              "as __start___jump_table");
1570 
1571   if ((Stop->getAddress() - StaticKeysJumpTableAddress) %
1572       STATIC_KEYS_JUMP_ENTRY_SIZE)
1573     return createStringError(errc::executable_format_error,
1574                              "static keys jump table size error");
1575 
1576   const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1577   DataExtractor DE(StaticKeysJumpSection->getContents(),
1578                    BC.AsmInfo->isLittleEndian(),
1579                    BC.AsmInfo->getCodePointerSize());
1580   DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1581   uint32_t EntryID = 0;
1582   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1583     const uint64_t JumpAddress =
1584         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1585     const uint64_t TargetAddress =
1586         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1587     const uint64_t KeyAddress =
1588         SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1589 
1590     // Consume the status of the cursor.
1591     if (!Cursor)
1592       return createStringError(
1593           errc::executable_format_error,
1594           "out of bounds while reading static keys jump table: %s",
1595           toString(Cursor.takeError()).c_str());
1596 
1597     ++EntryID;
1598 
1599     JumpInfo.push_back(JumpInfoEntry());
1600     JumpInfoEntry &Info = JumpInfo.back();
1601     Info.Likely = KeyAddress & 1;
1602 
1603     if (opts::DumpStaticKeys) {
1604       BC.outs() << "Static key jump entry: " << EntryID
1605                 << "\n\tJumpAddress:   0x" << Twine::utohexstr(JumpAddress)
1606                 << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1607                 << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress)
1608                 << "\n\tIsLikely:      " << Info.Likely << '\n';
1609     }
1610 
1611     BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress);
1612     if (!BF && opts::Verbosity) {
1613       BC.outs()
1614           << "BOLT-INFO: no function matches address 0x"
1615           << Twine::utohexstr(JumpAddress)
1616           << " of jump instruction referenced from static keys jump table\n";
1617     }
1618 
1619     if (!BF || !BC.shouldEmit(*BF))
1620       continue;
1621 
1622     MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress());
1623     if (!Inst)
1624       return createStringError(
1625           errc::executable_format_error,
1626           "no instruction at static keys jump site address 0x%" PRIx64,
1627           JumpAddress);
1628 
1629     if (!BF->containsAddress(TargetAddress))
1630       return createStringError(
1631           errc::executable_format_error,
1632           "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64,
1633           JumpAddress, TargetAddress);
1634 
1635     const bool IsBranch = BC.MIB->isBranch(*Inst);
1636     if (!IsBranch && !BC.MIB->isNoop(*Inst))
1637       return createStringError(errc::executable_format_error,
1638                                "jump or nop expected at address 0x%" PRIx64,
1639                                JumpAddress);
1640 
1641     const uint64_t Size = BC.computeInstructionSize(*Inst);
1642     if (Size != 2 && Size != 5) {
1643       return createStringError(
1644           errc::executable_format_error,
1645           "unexpected static keys jump size at address 0x%" PRIx64,
1646           JumpAddress);
1647     }
1648 
1649     MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress);
1650     MCInst StaticKeyBranch;
1651 
1652     // Create a conditional branch instruction. The actual conditional code type
1653     // should not matter as long as it's a valid code. The instruction should be
1654     // treated as a conditional branch for control-flow purposes. Before we emit
1655     // the code, it will be converted to a different instruction in
1656     // rewriteStaticKeysJumpTable().
1657     //
1658     // NB: for older kernels, under LongJumpLabels option, we create long
1659     //     conditional branch to guarantee that code size estimation takes
1660     //     into account the extra bytes needed for long branch that will be used
1661     //     by the kernel patching code. Newer kernels can work with both short
1662     //     and long branches. The code for long conditional branch is larger
1663     //     than unconditional one, so we are pessimistic in our estimations.
1664     if (opts::LongJumpLabels)
1665       BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1666     else
1667       BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get());
1668     BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch);
1669     BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID);
1670     *Inst = StaticKeyBranch;
1671 
1672     // IsBranch = InitialValue ^ LIKELY
1673     //
1674     //    0 0 0
1675     //    1 0 1
1676     //    1 1 0
1677     //    0 1 1
1678     //
1679     // => InitialValue = IsBranch ^ LIKELY
1680     Info.InitValue = IsBranch ^ Info.Likely;
1681 
1682     // Add annotations to facilitate manual code analysis.
1683     BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely);
1684     BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue);
1685     if (!BC.MIB->getSize(*Inst))
1686       BC.MIB->setSize(*Inst, Size);
1687 
1688     if (!BC.MIB->getOffset(*Inst))
1689       BC.MIB->setOffset(*Inst, JumpAddress - BF->getAddress());
1690 
1691     if (opts::LongJumpLabels)
1692       BC.MIB->setSize(*Inst, 5);
1693   }
1694 
1695   BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n";
1696 
1697   return Error::success();
1698 }
1699 
1700 // Pre-emit pass. Convert dynamic branch instructions into jumps that could be
1701 // relaxed. In post-emit pass we will convert those jumps into nops when
1702 // necessary. We do the unconditional conversion into jumps so that the jumps
1703 // can be relaxed and the optimal size of jump/nop instruction is selected.
1704 Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() {
1705   if (!StaticKeysJumpSection)
1706     return Error::success();
1707 
1708   uint64_t NumShort = 0;
1709   uint64_t NumLong = 0;
1710   for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
1711     if (!BC.shouldEmit(BF))
1712       continue;
1713 
1714     for (BinaryBasicBlock &BB : BF) {
1715       for (MCInst &Inst : BB) {
1716         if (!BC.MIB->isDynamicBranch(Inst))
1717           continue;
1718 
1719         const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst);
1720         MCSymbol *Target =
1721             const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst));
1722         assert(Target && "Target symbol should be set.");
1723 
1724         const JumpInfoEntry &Info = JumpInfo[EntryID - 1];
1725         const bool IsBranch = Info.Likely ^ Info.InitValue;
1726 
1727         uint32_t Size = *BC.MIB->getSize(Inst);
1728         if (Size == 2)
1729           ++NumShort;
1730         else if (Size == 5)
1731           ++NumLong;
1732         else
1733           llvm_unreachable("Wrong size for static keys jump instruction.");
1734 
1735         MCInst NewInst;
1736         // Replace the instruction with unconditional jump even if it needs to
1737         // be nop in the binary.
1738         if (opts::LongJumpLabels) {
1739           BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get());
1740         } else {
1741           // Newer kernels can handle short and long jumps for static keys.
1742           // Optimistically, emit short jump and check if it gets relaxed into
1743           // a long one during post-emit. Only then convert the jump to a nop.
1744           BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get());
1745         }
1746 
1747         BC.MIB->moveAnnotations(std::move(Inst), NewInst);
1748         Inst = NewInst;
1749 
1750         // Mark the instruction for nop conversion.
1751         if (!IsBranch)
1752           NopIDs.insert(EntryID);
1753 
1754         MCSymbol *Label =
1755             BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get());
1756 
1757         // Create a relocation against the label.
1758         const uint64_t EntryOffset = StaticKeysJumpTableAddress -
1759                                      StaticKeysJumpSection->getAddress() +
1760                                      (EntryID - 1) * 16;
1761         StaticKeysJumpSection->addRelocation(EntryOffset, Label,
1762                                              ELF::R_X86_64_PC32,
1763                                              /*Addend*/ 0);
1764         StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target,
1765                                              ELF::R_X86_64_PC32, /*Addend*/ 0);
1766       }
1767     }
1768   }
1769 
1770   BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and "
1771             << NumLong << " long static keys jumps in optimized functions\n";
1772 
1773   return Error::success();
1774 }
1775 
1776 // Post-emit pass of static keys jump section. Convert jumps to nops.
1777 Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
1778   if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized())
1779     return Error::success();
1780 
1781   const uint64_t SectionAddress = StaticKeysJumpSection->getAddress();
1782   DataExtractor DE(StaticKeysJumpSection->getOutputContents(),
1783                    BC.AsmInfo->isLittleEndian(),
1784                    BC.AsmInfo->getCodePointerSize());
1785   DataExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress);
1786   const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table");
1787   uint32_t EntryID = 0;
1788   uint64_t NumShort = 0;
1789   uint64_t NumLong = 0;
1790   while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) {
1791     const uint64_t JumpAddress =
1792         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1793     const uint64_t TargetAddress =
1794         SectionAddress + Cursor.tell() + (int32_t)DE.getU32(Cursor);
1795     const uint64_t KeyAddress =
1796         SectionAddress + Cursor.tell() + (int64_t)DE.getU64(Cursor);
1797 
1798     // Consume the status of the cursor.
1799     if (!Cursor)
1800       return createStringError(errc::executable_format_error,
1801                                "out of bounds while updating static keys: %s",
1802                                toString(Cursor.takeError()).c_str());
1803 
1804     ++EntryID;
1805 
1806     LLVM_DEBUG({
1807       dbgs() << "\n\tJumpAddress:   0x" << Twine::utohexstr(JumpAddress)
1808              << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
1809              << "\n\tKeyAddress:    0x" << Twine::utohexstr(KeyAddress) << '\n';
1810     });
1811     (void)TargetAddress;
1812     (void)KeyAddress;
1813 
1814     BinaryFunction *BF =
1815         BC.getBinaryFunctionContainingAddress(JumpAddress,
1816                                               /*CheckPastEnd*/ false,
1817                                               /*UseMaxSize*/ true);
1818     assert(BF && "Cannot get function for modified static key.");
1819 
1820     if (!BF->isEmitted())
1821       continue;
1822 
1823     // Disassemble instruction to collect stats even if nop-conversion is
1824     // unnecessary.
1825     MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>(
1826         reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize());
1827     assert(Contents.size() && "Non-empty function image expected.");
1828 
1829     MCInst Inst;
1830     uint64_t Size;
1831     const uint64_t JumpOffset = JumpAddress - BF->getAddress();
1832     if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0,
1833                                    nulls())) {
1834       llvm_unreachable("Unable to disassemble jump instruction.");
1835     }
1836     assert(BC.MIB->isBranch(Inst) && "Branch instruction expected.");
1837 
1838     if (Size == 2)
1839       ++NumShort;
1840     else if (Size == 5)
1841       ++NumLong;
1842     else
1843       llvm_unreachable("Unexpected size for static keys jump instruction.");
1844 
1845     // Check if we need to convert jump instruction into a nop.
1846     if (!NopIDs.contains(EntryID))
1847       continue;
1848 
1849     SmallString<15> NopCode;
1850     raw_svector_ostream VecOS(NopCode);
1851     BC.MAB->writeNopData(VecOS, Size, BC.STI.get());
1852     for (uint64_t I = 0; I < Size; ++I)
1853       Contents[JumpOffset + I] = NopCode[I];
1854   }
1855 
1856   BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong
1857             << " long static keys jumps in optimized functions\n";
1858 
1859   return Error::success();
1860 }
1861 
1862 } // namespace
1863 
1864 std::unique_ptr<MetadataRewriter>
1865 llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) {
1866   return std::make_unique<LinuxKernelRewriter>(BC);
1867 }
1868