xref: /llvm-project/lld/ELF/Arch/X86.cpp (revision c1a6defd9ff1540638d660888c5f32ea5cf4fa7d)
1 //===- X86.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "OutputSections.h"
10 #include "Symbols.h"
11 #include "SyntheticSections.h"
12 #include "Target.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
15 
16 using namespace llvm;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 namespace {
23 class X86 : public TargetInfo {
24 public:
25   X86(Ctx &);
26   int getTlsGdRelaxSkip(RelType type) const override;
27   RelExpr getRelExpr(RelType type, const Symbol &s,
28                      const uint8_t *loc) const override;
29   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30   void writeGotPltHeader(uint8_t *buf) const override;
31   RelType getDynRel(RelType type) const override;
32   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34   void writePltHeader(uint8_t *buf) const override;
35   void writePlt(uint8_t *buf, const Symbol &sym,
36                 uint64_t pltEntryAddr) const override;
37   void relocate(uint8_t *loc, const Relocation &rel,
38                 uint64_t val) const override;
39 
40   RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
41   void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
42 
43 private:
44   void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45   void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46   void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47   void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48 };
49 } // namespace
50 
51 X86::X86(Ctx &ctx) : TargetInfo(ctx) {
52   copyRel = R_386_COPY;
53   gotRel = R_386_GLOB_DAT;
54   pltRel = R_386_JUMP_SLOT;
55   iRelativeRel = R_386_IRELATIVE;
56   relativeRel = R_386_RELATIVE;
57   symbolicRel = R_386_32;
58   tlsDescRel = R_386_TLS_DESC;
59   tlsGotRel = R_386_TLS_TPOFF;
60   tlsModuleIndexRel = R_386_TLS_DTPMOD32;
61   tlsOffsetRel = R_386_TLS_DTPOFF32;
62   gotBaseSymInGotPlt = true;
63   pltHeaderSize = 16;
64   pltEntrySize = 16;
65   ipltEntrySize = 16;
66   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
67 
68   // Align to the non-PAE large page size (known as a superpage or huge page).
69   // FreeBSD automatically promotes large, superpage-aligned allocations.
70   defaultImageBase = 0x400000;
71 }
72 
73 int X86::getTlsGdRelaxSkip(RelType type) const {
74   // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
75   return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
76 }
77 
78 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
79                         const uint8_t *loc) const {
80   switch (type) {
81   case R_386_8:
82   case R_386_16:
83   case R_386_32:
84     return R_ABS;
85   case R_386_TLS_LDO_32:
86     return R_DTPREL;
87   case R_386_TLS_GD:
88     return R_TLSGD_GOTPLT;
89   case R_386_TLS_LDM:
90     return R_TLSLD_GOTPLT;
91   case R_386_PLT32:
92     return R_PLT_PC;
93   case R_386_PC8:
94   case R_386_PC16:
95   case R_386_PC32:
96     return R_PC;
97   case R_386_GOTPC:
98     return R_GOTPLTONLY_PC;
99   case R_386_TLS_IE:
100     return R_GOT;
101   case R_386_GOT32:
102   case R_386_GOT32X:
103     // These relocations are arguably mis-designed because their calculations
104     // depend on the instructions they are applied to. This is bad because we
105     // usually don't care about whether the target section contains valid
106     // machine instructions or not. But this is part of the documented ABI, so
107     // we had to implement as the standard requires.
108     //
109     // x86 does not support PC-relative data access. Therefore, in order to
110     // access GOT contents, a GOT address needs to be known at link-time
111     // (which means non-PIC) or compilers have to emit code to get a GOT
112     // address at runtime (which means code is position-independent but
113     // compilers need to emit extra code for each GOT access.) This decision
114     // is made at compile-time. In the latter case, compilers emit code to
115     // load a GOT address to a register, which is usually %ebx.
116     //
117     // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
118     // foo@GOT(%ebx).
119     //
120     // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
121     // find such relocation, we should report an error. foo@GOT is resolved to
122     // an *absolute* address of foo's GOT entry, because both GOT address and
123     // foo's offset are known. In other words, it's G + A.
124     //
125     // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
126     // foo's GOT entry in the table, because GOT address is not known but foo's
127     // offset in the table is known. It's G + A - GOT.
128     //
129     // It's unfortunate that compilers emit the same relocation for these
130     // different use cases. In order to distinguish them, we have to read a
131     // machine instruction.
132     //
133     // The following code implements it. We assume that Loc[0] is the first byte
134     // of a displacement or an immediate field of a valid machine
135     // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
136     // the byte, we can determine whether the instruction uses the operand as an
137     // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
138     return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
139   case R_386_TLS_GOTDESC:
140     return R_TLSDESC_GOTPLT;
141   case R_386_TLS_DESC_CALL:
142     return R_TLSDESC_CALL;
143   case R_386_TLS_GOTIE:
144     return R_GOTPLT;
145   case R_386_GOTOFF:
146     return R_GOTPLTREL;
147   case R_386_TLS_LE:
148     return R_TPREL;
149   case R_386_TLS_LE_32:
150     return R_TPREL_NEG;
151   case R_386_NONE:
152     return R_NONE;
153   default:
154     Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
155              << ") against symbol " << &s;
156     return R_NONE;
157   }
158 }
159 
160 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
161   switch (expr) {
162   default:
163     return expr;
164   case R_RELAX_TLS_GD_TO_IE:
165     return R_RELAX_TLS_GD_TO_IE_GOTPLT;
166   case R_RELAX_TLS_GD_TO_LE:
167     return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
168                                 : R_RELAX_TLS_GD_TO_LE;
169   }
170 }
171 
172 void X86::writeGotPltHeader(uint8_t *buf) const {
173   write32le(buf, ctx.mainPart->dynamic->getVA());
174 }
175 
176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
177   // Entries in .got.plt initially points back to the corresponding
178   // PLT entries with a fixed offset to skip the first instruction.
179   write32le(buf, s.getPltVA(ctx) + 6);
180 }
181 
182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
183   // An x86 entry is the address of the ifunc resolver function.
184   write32le(buf, s.getVA(ctx));
185 }
186 
187 RelType X86::getDynRel(RelType type) const {
188   if (type == R_386_TLS_LE)
189     return R_386_TLS_TPOFF;
190   if (type == R_386_TLS_LE_32)
191     return R_386_TLS_TPOFF32;
192   return type;
193 }
194 
195 void X86::writePltHeader(uint8_t *buf) const {
196   if (ctx.arg.isPic) {
197     const uint8_t v[] = {
198         0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
199         0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
200         0x90, 0x90, 0x90, 0x90              // nop
201     };
202     memcpy(buf, v, sizeof(v));
203     return;
204   }
205 
206   const uint8_t pltData[] = {
207       0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
208       0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
209       0x90, 0x90, 0x90, 0x90, // nop
210   };
211   memcpy(buf, pltData, sizeof(pltData));
212   uint32_t gotPlt = ctx.in.gotPlt->getVA();
213   write32le(buf + 2, gotPlt + 4);
214   write32le(buf + 8, gotPlt + 8);
215 }
216 
217 void X86::writePlt(uint8_t *buf, const Symbol &sym,
218                    uint64_t pltEntryAddr) const {
219   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
220   if (ctx.arg.isPic) {
221     const uint8_t inst[] = {
222         0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
223         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
224         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
225     };
226     memcpy(buf, inst, sizeof(inst));
227     write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
228   } else {
229     const uint8_t inst[] = {
230         0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
231         0x68, 0,    0, 0, 0,    // pushl $reloc_offset
232         0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
233     };
234     memcpy(buf, inst, sizeof(inst));
235     write32le(buf + 2, sym.getGotPltVA(ctx));
236   }
237 
238   write32le(buf + 7, relOff);
239   write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16);
240 }
241 
242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
243   switch (type) {
244   case R_386_8:
245   case R_386_PC8:
246     return SignExtend64<8>(*buf);
247   case R_386_16:
248   case R_386_PC16:
249     return SignExtend64<16>(read16le(buf));
250   case R_386_32:
251   case R_386_GLOB_DAT:
252   case R_386_GOT32:
253   case R_386_GOT32X:
254   case R_386_GOTOFF:
255   case R_386_GOTPC:
256   case R_386_IRELATIVE:
257   case R_386_PC32:
258   case R_386_PLT32:
259   case R_386_RELATIVE:
260   case R_386_TLS_GOTDESC:
261   case R_386_TLS_DESC_CALL:
262   case R_386_TLS_DTPMOD32:
263   case R_386_TLS_DTPOFF32:
264   case R_386_TLS_LDO_32:
265   case R_386_TLS_LDM:
266   case R_386_TLS_IE:
267   case R_386_TLS_IE_32:
268   case R_386_TLS_LE:
269   case R_386_TLS_LE_32:
270   case R_386_TLS_GD:
271   case R_386_TLS_GD_32:
272   case R_386_TLS_GOTIE:
273   case R_386_TLS_TPOFF:
274   case R_386_TLS_TPOFF32:
275     return SignExtend64<32>(read32le(buf));
276   case R_386_TLS_DESC:
277     return SignExtend64<32>(read32le(buf + 4));
278   case R_386_NONE:
279   case R_386_JUMP_SLOT:
280     // These relocations are defined as not having an implicit addend.
281     return 0;
282   default:
283     InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
284     return 0;
285   }
286 }
287 
288 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
289   switch (rel.type) {
290   case R_386_8:
291     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
292     // being used for some 16-bit programs such as boot loaders, so
293     // we want to support them.
294     checkIntUInt(ctx, loc, val, 8, rel);
295     *loc = val;
296     break;
297   case R_386_PC8:
298     checkInt(ctx, loc, val, 8, rel);
299     *loc = val;
300     break;
301   case R_386_16:
302     checkIntUInt(ctx, loc, val, 16, rel);
303     write16le(loc, val);
304     break;
305   case R_386_PC16:
306     // R_386_PC16 is normally used with 16 bit code. In that situation
307     // the PC is 16 bits, just like the addend. This means that it can
308     // point from any 16 bit address to any other if the possibility
309     // of wrapping is included.
310     // The only restriction we have to check then is that the destination
311     // address fits in 16 bits. That is impossible to do here. The problem is
312     // that we are passed the final value, which already had the
313     // current location subtracted from it.
314     // We just check that Val fits in 17 bits. This misses some cases, but
315     // should have no false positives.
316     checkInt(ctx, loc, val, 17, rel);
317     write16le(loc, val);
318     break;
319   case R_386_32:
320   case R_386_GOT32:
321   case R_386_GOT32X:
322   case R_386_GOTOFF:
323   case R_386_GOTPC:
324   case R_386_PC32:
325   case R_386_PLT32:
326   case R_386_RELATIVE:
327   case R_386_TLS_GOTDESC:
328   case R_386_TLS_DESC_CALL:
329   case R_386_TLS_DTPMOD32:
330   case R_386_TLS_DTPOFF32:
331   case R_386_TLS_GD:
332   case R_386_TLS_GOTIE:
333   case R_386_TLS_IE:
334   case R_386_TLS_LDM:
335   case R_386_TLS_LDO_32:
336   case R_386_TLS_LE:
337   case R_386_TLS_LE_32:
338   case R_386_TLS_TPOFF:
339   case R_386_TLS_TPOFF32:
340     checkInt(ctx, loc, val, 32, rel);
341     write32le(loc, val);
342     break;
343   case R_386_TLS_DESC:
344     // The addend is stored in the second 32-bit word.
345     write32le(loc + 4, val);
346     break;
347   default:
348     llvm_unreachable("unknown relocation");
349   }
350 }
351 
352 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
353                          uint64_t val) const {
354   if (rel.type == R_386_TLS_GD) {
355     // Convert (loc[-2] == 0x04)
356     //   leal x@tlsgd(, %ebx, 1), %eax
357     //   call ___tls_get_addr@plt
358     // or
359     //   leal x@tlsgd(%reg), %eax
360     //   call *___tls_get_addr@got(%reg)
361     // to
362     const uint8_t inst[] = {
363         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
364         0x81, 0xe8, 0,    0,    0,    0,    // subl x@ntpoff(%ebx), %eax
365     };
366     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
367     memcpy(w, inst, sizeof(inst));
368     write32le(w + 8, val);
369   } else if (rel.type == R_386_TLS_GOTDESC) {
370     // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
371     //
372     // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
373     if (memcmp(loc - 2, "\x8d\x83", 2)) {
374       ErrAlways(ctx)
375           << getErrorLoc(ctx, loc - 2)
376           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
377       return;
378     }
379     loc[-1] = 0x05;
380     write32le(loc, val);
381   } else {
382     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
383     assert(rel.type == R_386_TLS_DESC_CALL);
384     loc[0] = 0x66;
385     loc[1] = 0x90;
386   }
387 }
388 
389 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
390                          uint64_t val) const {
391   if (rel.type == R_386_TLS_GD) {
392     // Convert (loc[-2] == 0x04)
393     //   leal x@tlsgd(, %ebx, 1), %eax
394     //   call ___tls_get_addr@plt
395     // or
396     //   leal x@tlsgd(%reg), %eax
397     //   call *___tls_get_addr@got(%reg)
398     const uint8_t inst[] = {
399         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
400         0x03, 0x83, 0,    0,    0,    0,    // addl x@gottpoff(%ebx), %eax
401     };
402     uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
403     memcpy(w, inst, sizeof(inst));
404     write32le(w + 8, val);
405   } else if (rel.type == R_386_TLS_GOTDESC) {
406     // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
407     if (memcmp(loc - 2, "\x8d\x83", 2)) {
408       ErrAlways(ctx)
409           << getErrorLoc(ctx, loc - 2)
410           << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
411       return;
412     }
413     loc[-2] = 0x8b;
414     write32le(loc, val);
415   } else {
416     // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
417     assert(rel.type == R_386_TLS_DESC_CALL);
418     loc[0] = 0x66;
419     loc[1] = 0x90;
420   }
421 }
422 
423 // In some conditions, relocations can be optimized to avoid using GOT.
424 // This function does that for Initial Exec to Local Exec case.
425 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
426                          uint64_t val) const {
427   // Ulrich's document section 6.2 says that @gotntpoff can
428   // be used with MOVL or ADDL instructions.
429   // @indntpoff is similar to @gotntpoff, but for use in
430   // position dependent code.
431   uint8_t reg = (loc[-1] >> 3) & 7;
432 
433   if (rel.type == R_386_TLS_IE) {
434     if (loc[-1] == 0xa1) {
435       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
436       // This case is different from the generic case below because
437       // this is a 5 byte instruction while below is 6 bytes.
438       loc[-1] = 0xb8;
439     } else if (loc[-2] == 0x8b) {
440       // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
441       loc[-2] = 0xc7;
442       loc[-1] = 0xc0 | reg;
443     } else {
444       // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
445       loc[-2] = 0x81;
446       loc[-1] = 0xc0 | reg;
447     }
448   } else {
449     assert(rel.type == R_386_TLS_GOTIE);
450     if (loc[-2] == 0x8b) {
451       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
452       loc[-2] = 0xc7;
453       loc[-1] = 0xc0 | reg;
454     } else {
455       // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
456       loc[-2] = 0x8d;
457       loc[-1] = 0x80 | (reg << 3) | reg;
458     }
459   }
460   write32le(loc, val);
461 }
462 
463 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
464                          uint64_t val) const {
465   if (rel.type == R_386_TLS_LDO_32) {
466     write32le(loc, val);
467     return;
468   }
469 
470   if (loc[4] == 0xe8) {
471     // Convert
472     //   leal x(%reg),%eax
473     //   call ___tls_get_addr@plt
474     // to
475     const uint8_t inst[] = {
476         0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
477         0x90,                               // nop
478         0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
479     };
480     memcpy(loc - 2, inst, sizeof(inst));
481     return;
482   }
483 
484   // Convert
485   //   leal x(%reg),%eax
486   //   call *___tls_get_addr@got(%reg)
487   // to
488   const uint8_t inst[] = {
489       0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
490       0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
491   };
492   memcpy(loc - 2, inst, sizeof(inst));
493 }
494 
495 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
496   uint64_t secAddr = sec.getOutputSection()->addr;
497   if (auto *s = dyn_cast<InputSection>(&sec))
498     secAddr += s->outSecOff;
499   for (const Relocation &rel : sec.relocs()) {
500     uint8_t *loc = buf + rel.offset;
501     const uint64_t val =
502         SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32);
503     switch (rel.expr) {
504     case R_RELAX_TLS_GD_TO_IE_GOTPLT:
505       relaxTlsGdToIe(loc, rel, val);
506       continue;
507     case R_RELAX_TLS_GD_TO_LE:
508     case R_RELAX_TLS_GD_TO_LE_NEG:
509       relaxTlsGdToLe(loc, rel, val);
510       continue;
511     case R_RELAX_TLS_LD_TO_LE:
512       relaxTlsLdToLe(loc, rel, val);
513       break;
514     case R_RELAX_TLS_IE_TO_LE:
515       relaxTlsIeToLe(loc, rel, val);
516       continue;
517     default:
518       relocate(loc, rel, val);
519       break;
520     }
521   }
522 }
523 
524 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
525 // entries containing endbr32 instructions. A PLT entry will be split into two
526 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
527 namespace {
528 class IntelIBT : public X86 {
529 public:
530   IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
531   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
532   void writePlt(uint8_t *buf, const Symbol &sym,
533                 uint64_t pltEntryAddr) const override;
534   void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
535 
536   static const unsigned IBTPltHeaderSize = 16;
537 };
538 } // namespace
539 
540 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
541   uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
542                 s.getPltIdx(ctx) * pltEntrySize;
543   write32le(buf, va);
544 }
545 
546 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
547                         uint64_t /*pltEntryAddr*/) const {
548   if (ctx.arg.isPic) {
549     const uint8_t inst[] = {
550         0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
551         0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
552         0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
553     };
554     memcpy(buf, inst, sizeof(inst));
555     write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
556     return;
557   }
558 
559   const uint8_t inst[] = {
560       0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
561       0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
562       0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
563   };
564   memcpy(buf, inst, sizeof(inst));
565   write32le(buf + 6, sym.getGotPltVA(ctx));
566 }
567 
568 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
569   writePltHeader(buf);
570   buf += IBTPltHeaderSize;
571 
572   const uint8_t inst[] = {
573       0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
574       0x68, 0,    0,    0,    0, // pushl $reloc_offset
575       0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
576       0x66, 0x90,                // nop
577   };
578 
579   for (size_t i = 0; i < numEntries; ++i) {
580     memcpy(buf, inst, sizeof(inst));
581     write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
582     write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
583     buf += sizeof(inst);
584   }
585 }
586 
587 namespace {
588 class RetpolinePic : public X86 {
589 public:
590   RetpolinePic(Ctx &);
591   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
592   void writePltHeader(uint8_t *buf) const override;
593   void writePlt(uint8_t *buf, const Symbol &sym,
594                 uint64_t pltEntryAddr) const override;
595 };
596 
597 class RetpolineNoPic : public X86 {
598 public:
599   RetpolineNoPic(Ctx &);
600   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
601   void writePltHeader(uint8_t *buf) const override;
602   void writePlt(uint8_t *buf, const Symbol &sym,
603                 uint64_t pltEntryAddr) const override;
604 };
605 } // namespace
606 
607 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
608   pltHeaderSize = 48;
609   pltEntrySize = 32;
610   ipltEntrySize = 32;
611 }
612 
613 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
614   write32le(buf, s.getPltVA(ctx) + 17);
615 }
616 
617 void RetpolinePic::writePltHeader(uint8_t *buf) const {
618   const uint8_t insn[] = {
619       0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
620       0x50,                                     // 6:    pushl %eax
621       0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
622       0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
623       0xf3, 0x90,                               // 12: loop: pause
624       0x0f, 0xae, 0xe8,                         // 14:   lfence
625       0xeb, 0xf9,                               // 17:   jmp loop
626       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
627       0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
628       0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
629       0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
630       0x89, 0xc8,                               // 2b:   mov %ecx, %eax
631       0x59,                                     // 2d:   pop %ecx
632       0xc3,                                     // 2e:   ret
633       0xcc,                                     // 2f:   int3; padding
634   };
635   memcpy(buf, insn, sizeof(insn));
636 }
637 
638 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
639                             uint64_t pltEntryAddr) const {
640   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
641   const uint8_t insn[] = {
642       0x50,                            // pushl %eax
643       0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
644       0xe8, 0,    0,    0,    0,       // call plt+0x20
645       0xe9, 0,    0,    0,    0,       // jmp plt+0x12
646       0x68, 0,    0,    0,    0,       // pushl $reloc_offset
647       0xe9, 0,    0,    0,    0,       // jmp plt+0
648       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
649   };
650   memcpy(buf, insn, sizeof(insn));
651 
652   uint32_t ebx = ctx.in.gotPlt->getVA();
653   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
654   write32le(buf + 3, sym.getGotPltVA(ctx) - ebx);
655   write32le(buf + 8, -off - 12 + 32);
656   write32le(buf + 13, -off - 17 + 18);
657   write32le(buf + 18, relOff);
658   write32le(buf + 23, -off - 27);
659 }
660 
661 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
662   pltHeaderSize = 48;
663   pltEntrySize = 32;
664   ipltEntrySize = 32;
665 }
666 
667 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
668   write32le(buf, s.getPltVA(ctx) + 16);
669 }
670 
671 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
672   const uint8_t insn[] = {
673       0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
674       0x50,                            // 6:    pushl %eax
675       0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
676       0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
677       0xf3, 0x90,                      // 11: loop: pause
678       0x0f, 0xae, 0xe8,                // 13:   lfence
679       0xeb, 0xf9,                      // 16:   jmp loop
680       0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
681       0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
682       0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
683       0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
684       0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
685       0x89, 0xc8,                      // 2b:   mov %ecx, %eax
686       0x59,                            // 2d:   pop %ecx
687       0xc3,                            // 2e:   ret
688       0xcc,                            // 2f:   int3; padding
689   };
690   memcpy(buf, insn, sizeof(insn));
691 
692   uint32_t gotPlt = ctx.in.gotPlt->getVA();
693   write32le(buf + 2, gotPlt + 4);
694   write32le(buf + 8, gotPlt + 8);
695 }
696 
697 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
698                               uint64_t pltEntryAddr) const {
699   unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
700   const uint8_t insn[] = {
701       0x50,                         // 0:  pushl %eax
702       0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
703       0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
704       0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
705       0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
706       0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
707       0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
708       0xcc,                         // 1f: int3; padding
709   };
710   memcpy(buf, insn, sizeof(insn));
711 
712   unsigned off = pltEntryAddr - ctx.in.plt->getVA();
713   write32le(buf + 2, sym.getGotPltVA(ctx));
714   write32le(buf + 7, -off - 11 + 32);
715   write32le(buf + 12, -off - 16 + 17);
716   write32le(buf + 17, relOff);
717   write32le(buf + 22, -off - 26);
718 }
719 
720 void elf::setX86TargetInfo(Ctx &ctx) {
721   if (ctx.arg.zRetpolineplt) {
722     if (ctx.arg.isPic)
723       ctx.target.reset(new RetpolinePic(ctx));
724     else
725       ctx.target.reset(new RetpolineNoPic(ctx));
726     return;
727   }
728 
729   if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
730     ctx.target.reset(new IntelIBT(ctx));
731   else
732     ctx.target.reset(new X86(ctx));
733 }
734