1 //===- X86.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "OutputSections.h" 10 #include "Symbols.h" 11 #include "SyntheticSections.h" 12 #include "Target.h" 13 #include "lld/Common/ErrorHandler.h" 14 #include "llvm/Support/Endian.h" 15 16 using namespace llvm; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class X86 : public TargetInfo { 24 public: 25 X86(Ctx &); 26 int getTlsGdRelaxSkip(RelType type) const override; 27 RelExpr getRelExpr(RelType type, const Symbol &s, 28 const uint8_t *loc) const override; 29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 30 void writeGotPltHeader(uint8_t *buf) const override; 31 RelType getDynRel(RelType type) const override; 32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 34 void writePltHeader(uint8_t *buf) const override; 35 void writePlt(uint8_t *buf, const Symbol &sym, 36 uint64_t pltEntryAddr) const override; 37 void relocate(uint8_t *loc, const Relocation &rel, 38 uint64_t val) const override; 39 40 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 41 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; 42 43 private: 44 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 45 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 46 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 47 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 48 }; 49 } // namespace 50 51 X86::X86(Ctx &ctx) : TargetInfo(ctx) { 52 copyRel = R_386_COPY; 53 gotRel = R_386_GLOB_DAT; 54 pltRel = R_386_JUMP_SLOT; 55 iRelativeRel = R_386_IRELATIVE; 56 relativeRel = R_386_RELATIVE; 57 symbolicRel = R_386_32; 58 tlsDescRel = R_386_TLS_DESC; 59 tlsGotRel = R_386_TLS_TPOFF; 60 tlsModuleIndexRel = R_386_TLS_DTPMOD32; 61 tlsOffsetRel = R_386_TLS_DTPOFF32; 62 gotBaseSymInGotPlt = true; 63 pltHeaderSize = 16; 64 pltEntrySize = 16; 65 ipltEntrySize = 16; 66 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 67 68 // Align to the non-PAE large page size (known as a superpage or huge page). 69 // FreeBSD automatically promotes large, superpage-aligned allocations. 70 defaultImageBase = 0x400000; 71 } 72 73 int X86::getTlsGdRelaxSkip(RelType type) const { 74 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. 75 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; 76 } 77 78 RelExpr X86::getRelExpr(RelType type, const Symbol &s, 79 const uint8_t *loc) const { 80 switch (type) { 81 case R_386_8: 82 case R_386_16: 83 case R_386_32: 84 return R_ABS; 85 case R_386_TLS_LDO_32: 86 return R_DTPREL; 87 case R_386_TLS_GD: 88 return R_TLSGD_GOTPLT; 89 case R_386_TLS_LDM: 90 return R_TLSLD_GOTPLT; 91 case R_386_PLT32: 92 return R_PLT_PC; 93 case R_386_PC8: 94 case R_386_PC16: 95 case R_386_PC32: 96 return R_PC; 97 case R_386_GOTPC: 98 return R_GOTPLTONLY_PC; 99 case R_386_TLS_IE: 100 return R_GOT; 101 case R_386_GOT32: 102 case R_386_GOT32X: 103 // These relocations are arguably mis-designed because their calculations 104 // depend on the instructions they are applied to. This is bad because we 105 // usually don't care about whether the target section contains valid 106 // machine instructions or not. But this is part of the documented ABI, so 107 // we had to implement as the standard requires. 108 // 109 // x86 does not support PC-relative data access. Therefore, in order to 110 // access GOT contents, a GOT address needs to be known at link-time 111 // (which means non-PIC) or compilers have to emit code to get a GOT 112 // address at runtime (which means code is position-independent but 113 // compilers need to emit extra code for each GOT access.) This decision 114 // is made at compile-time. In the latter case, compilers emit code to 115 // load a GOT address to a register, which is usually %ebx. 116 // 117 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or 118 // foo@GOT(%ebx). 119 // 120 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we 121 // find such relocation, we should report an error. foo@GOT is resolved to 122 // an *absolute* address of foo's GOT entry, because both GOT address and 123 // foo's offset are known. In other words, it's G + A. 124 // 125 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to 126 // foo's GOT entry in the table, because GOT address is not known but foo's 127 // offset in the table is known. It's G + A - GOT. 128 // 129 // It's unfortunate that compilers emit the same relocation for these 130 // different use cases. In order to distinguish them, we have to read a 131 // machine instruction. 132 // 133 // The following code implements it. We assume that Loc[0] is the first byte 134 // of a displacement or an immediate field of a valid machine 135 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at 136 // the byte, we can determine whether the instruction uses the operand as an 137 // absolute address (R_GOT) or a register-relative address (R_GOTPLT). 138 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; 139 case R_386_TLS_GOTDESC: 140 return R_TLSDESC_GOTPLT; 141 case R_386_TLS_DESC_CALL: 142 return R_TLSDESC_CALL; 143 case R_386_TLS_GOTIE: 144 return R_GOTPLT; 145 case R_386_GOTOFF: 146 return R_GOTPLTREL; 147 case R_386_TLS_LE: 148 return R_TPREL; 149 case R_386_TLS_LE_32: 150 return R_TPREL_NEG; 151 case R_386_NONE: 152 return R_NONE; 153 default: 154 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v 155 << ") against symbol " << &s; 156 return R_NONE; 157 } 158 } 159 160 RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { 161 switch (expr) { 162 default: 163 return expr; 164 case R_RELAX_TLS_GD_TO_IE: 165 return R_RELAX_TLS_GD_TO_IE_GOTPLT; 166 case R_RELAX_TLS_GD_TO_LE: 167 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG 168 : R_RELAX_TLS_GD_TO_LE; 169 } 170 } 171 172 void X86::writeGotPltHeader(uint8_t *buf) const { 173 write32le(buf, ctx.mainPart->dynamic->getVA()); 174 } 175 176 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { 177 // Entries in .got.plt initially points back to the corresponding 178 // PLT entries with a fixed offset to skip the first instruction. 179 write32le(buf, s.getPltVA(ctx) + 6); 180 } 181 182 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 183 // An x86 entry is the address of the ifunc resolver function. 184 write32le(buf, s.getVA(ctx)); 185 } 186 187 RelType X86::getDynRel(RelType type) const { 188 if (type == R_386_TLS_LE) 189 return R_386_TLS_TPOFF; 190 if (type == R_386_TLS_LE_32) 191 return R_386_TLS_TPOFF32; 192 return type; 193 } 194 195 void X86::writePltHeader(uint8_t *buf) const { 196 if (ctx.arg.isPic) { 197 const uint8_t v[] = { 198 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) 199 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) 200 0x90, 0x90, 0x90, 0x90 // nop 201 }; 202 memcpy(buf, v, sizeof(v)); 203 return; 204 } 205 206 const uint8_t pltData[] = { 207 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) 208 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) 209 0x90, 0x90, 0x90, 0x90, // nop 210 }; 211 memcpy(buf, pltData, sizeof(pltData)); 212 uint32_t gotPlt = ctx.in.gotPlt->getVA(); 213 write32le(buf + 2, gotPlt + 4); 214 write32le(buf + 8, gotPlt + 8); 215 } 216 217 void X86::writePlt(uint8_t *buf, const Symbol &sym, 218 uint64_t pltEntryAddr) const { 219 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 220 if (ctx.arg.isPic) { 221 const uint8_t inst[] = { 222 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) 223 0x68, 0, 0, 0, 0, // pushl $reloc_offset 224 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 225 }; 226 memcpy(buf, inst, sizeof(inst)); 227 write32le(buf + 2, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); 228 } else { 229 const uint8_t inst[] = { 230 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 231 0x68, 0, 0, 0, 0, // pushl $reloc_offset 232 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC 233 }; 234 memcpy(buf, inst, sizeof(inst)); 235 write32le(buf + 2, sym.getGotPltVA(ctx)); 236 } 237 238 write32le(buf + 7, relOff); 239 write32le(buf + 12, ctx.in.plt->getVA() - pltEntryAddr - 16); 240 } 241 242 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { 243 switch (type) { 244 case R_386_8: 245 case R_386_PC8: 246 return SignExtend64<8>(*buf); 247 case R_386_16: 248 case R_386_PC16: 249 return SignExtend64<16>(read16le(buf)); 250 case R_386_32: 251 case R_386_GLOB_DAT: 252 case R_386_GOT32: 253 case R_386_GOT32X: 254 case R_386_GOTOFF: 255 case R_386_GOTPC: 256 case R_386_IRELATIVE: 257 case R_386_PC32: 258 case R_386_PLT32: 259 case R_386_RELATIVE: 260 case R_386_TLS_GOTDESC: 261 case R_386_TLS_DESC_CALL: 262 case R_386_TLS_DTPMOD32: 263 case R_386_TLS_DTPOFF32: 264 case R_386_TLS_LDO_32: 265 case R_386_TLS_LDM: 266 case R_386_TLS_IE: 267 case R_386_TLS_IE_32: 268 case R_386_TLS_LE: 269 case R_386_TLS_LE_32: 270 case R_386_TLS_GD: 271 case R_386_TLS_GD_32: 272 case R_386_TLS_GOTIE: 273 case R_386_TLS_TPOFF: 274 case R_386_TLS_TPOFF32: 275 return SignExtend64<32>(read32le(buf)); 276 case R_386_TLS_DESC: 277 return SignExtend64<32>(read32le(buf + 4)); 278 case R_386_NONE: 279 case R_386_JUMP_SLOT: 280 // These relocations are defined as not having an implicit addend. 281 return 0; 282 default: 283 InternalErr(ctx, buf) << "cannot read addend for relocation " << type; 284 return 0; 285 } 286 } 287 288 void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { 289 switch (rel.type) { 290 case R_386_8: 291 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are 292 // being used for some 16-bit programs such as boot loaders, so 293 // we want to support them. 294 checkIntUInt(ctx, loc, val, 8, rel); 295 *loc = val; 296 break; 297 case R_386_PC8: 298 checkInt(ctx, loc, val, 8, rel); 299 *loc = val; 300 break; 301 case R_386_16: 302 checkIntUInt(ctx, loc, val, 16, rel); 303 write16le(loc, val); 304 break; 305 case R_386_PC16: 306 // R_386_PC16 is normally used with 16 bit code. In that situation 307 // the PC is 16 bits, just like the addend. This means that it can 308 // point from any 16 bit address to any other if the possibility 309 // of wrapping is included. 310 // The only restriction we have to check then is that the destination 311 // address fits in 16 bits. That is impossible to do here. The problem is 312 // that we are passed the final value, which already had the 313 // current location subtracted from it. 314 // We just check that Val fits in 17 bits. This misses some cases, but 315 // should have no false positives. 316 checkInt(ctx, loc, val, 17, rel); 317 write16le(loc, val); 318 break; 319 case R_386_32: 320 case R_386_GOT32: 321 case R_386_GOT32X: 322 case R_386_GOTOFF: 323 case R_386_GOTPC: 324 case R_386_PC32: 325 case R_386_PLT32: 326 case R_386_RELATIVE: 327 case R_386_TLS_GOTDESC: 328 case R_386_TLS_DESC_CALL: 329 case R_386_TLS_DTPMOD32: 330 case R_386_TLS_DTPOFF32: 331 case R_386_TLS_GD: 332 case R_386_TLS_GOTIE: 333 case R_386_TLS_IE: 334 case R_386_TLS_LDM: 335 case R_386_TLS_LDO_32: 336 case R_386_TLS_LE: 337 case R_386_TLS_LE_32: 338 case R_386_TLS_TPOFF: 339 case R_386_TLS_TPOFF32: 340 checkInt(ctx, loc, val, 32, rel); 341 write32le(loc, val); 342 break; 343 case R_386_TLS_DESC: 344 // The addend is stored in the second 32-bit word. 345 write32le(loc + 4, val); 346 break; 347 default: 348 llvm_unreachable("unknown relocation"); 349 } 350 } 351 352 void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 353 uint64_t val) const { 354 if (rel.type == R_386_TLS_GD) { 355 // Convert (loc[-2] == 0x04) 356 // leal x@tlsgd(, %ebx, 1), %eax 357 // call ___tls_get_addr@plt 358 // or 359 // leal x@tlsgd(%reg), %eax 360 // call *___tls_get_addr@got(%reg) 361 // to 362 const uint8_t inst[] = { 363 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 364 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax 365 }; 366 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 367 memcpy(w, inst, sizeof(inst)); 368 write32le(w + 8, val); 369 } else if (rel.type == R_386_TLS_GOTDESC) { 370 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. 371 // 372 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. 373 if (memcmp(loc - 2, "\x8d\x83", 2)) { 374 ErrAlways(ctx) 375 << getErrorLoc(ctx, loc - 2) 376 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"; 377 return; 378 } 379 loc[-1] = 0x05; 380 write32le(loc, val); 381 } else { 382 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 383 assert(rel.type == R_386_TLS_DESC_CALL); 384 loc[0] = 0x66; 385 loc[1] = 0x90; 386 } 387 } 388 389 void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 390 uint64_t val) const { 391 if (rel.type == R_386_TLS_GD) { 392 // Convert (loc[-2] == 0x04) 393 // leal x@tlsgd(, %ebx, 1), %eax 394 // call ___tls_get_addr@plt 395 // or 396 // leal x@tlsgd(%reg), %eax 397 // call *___tls_get_addr@got(%reg) 398 const uint8_t inst[] = { 399 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax 400 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax 401 }; 402 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; 403 memcpy(w, inst, sizeof(inst)); 404 write32le(w + 8, val); 405 } else if (rel.type == R_386_TLS_GOTDESC) { 406 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. 407 if (memcmp(loc - 2, "\x8d\x83", 2)) { 408 ErrAlways(ctx) 409 << getErrorLoc(ctx, loc - 2) 410 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax"; 411 return; 412 } 413 loc[-2] = 0x8b; 414 write32le(loc, val); 415 } else { 416 // Convert call *x@tlsdesc(%eax) to xchg ax, ax. 417 assert(rel.type == R_386_TLS_DESC_CALL); 418 loc[0] = 0x66; 419 loc[1] = 0x90; 420 } 421 } 422 423 // In some conditions, relocations can be optimized to avoid using GOT. 424 // This function does that for Initial Exec to Local Exec case. 425 void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 426 uint64_t val) const { 427 // Ulrich's document section 6.2 says that @gotntpoff can 428 // be used with MOVL or ADDL instructions. 429 // @indntpoff is similar to @gotntpoff, but for use in 430 // position dependent code. 431 uint8_t reg = (loc[-1] >> 3) & 7; 432 433 if (rel.type == R_386_TLS_IE) { 434 if (loc[-1] == 0xa1) { 435 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" 436 // This case is different from the generic case below because 437 // this is a 5 byte instruction while below is 6 bytes. 438 loc[-1] = 0xb8; 439 } else if (loc[-2] == 0x8b) { 440 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" 441 loc[-2] = 0xc7; 442 loc[-1] = 0xc0 | reg; 443 } else { 444 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" 445 loc[-2] = 0x81; 446 loc[-1] = 0xc0 | reg; 447 } 448 } else { 449 assert(rel.type == R_386_TLS_GOTIE); 450 if (loc[-2] == 0x8b) { 451 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" 452 loc[-2] = 0xc7; 453 loc[-1] = 0xc0 | reg; 454 } else { 455 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" 456 loc[-2] = 0x8d; 457 loc[-1] = 0x80 | (reg << 3) | reg; 458 } 459 } 460 write32le(loc, val); 461 } 462 463 void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, 464 uint64_t val) const { 465 if (rel.type == R_386_TLS_LDO_32) { 466 write32le(loc, val); 467 return; 468 } 469 470 if (loc[4] == 0xe8) { 471 // Convert 472 // leal x(%reg),%eax 473 // call ___tls_get_addr@plt 474 // to 475 const uint8_t inst[] = { 476 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 477 0x90, // nop 478 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi 479 }; 480 memcpy(loc - 2, inst, sizeof(inst)); 481 return; 482 } 483 484 // Convert 485 // leal x(%reg),%eax 486 // call *___tls_get_addr@got(%reg) 487 // to 488 const uint8_t inst[] = { 489 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax 490 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi 491 }; 492 memcpy(loc - 2, inst, sizeof(inst)); 493 } 494 495 void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { 496 uint64_t secAddr = sec.getOutputSection()->addr; 497 if (auto *s = dyn_cast<InputSection>(&sec)) 498 secAddr += s->outSecOff; 499 for (const Relocation &rel : sec.relocs()) { 500 uint8_t *loc = buf + rel.offset; 501 const uint64_t val = 502 SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), 32); 503 switch (rel.expr) { 504 case R_RELAX_TLS_GD_TO_IE_GOTPLT: 505 relaxTlsGdToIe(loc, rel, val); 506 continue; 507 case R_RELAX_TLS_GD_TO_LE: 508 case R_RELAX_TLS_GD_TO_LE_NEG: 509 relaxTlsGdToLe(loc, rel, val); 510 continue; 511 case R_RELAX_TLS_LD_TO_LE: 512 relaxTlsLdToLe(loc, rel, val); 513 break; 514 case R_RELAX_TLS_IE_TO_LE: 515 relaxTlsIeToLe(loc, rel, val); 516 continue; 517 default: 518 relocate(loc, rel, val); 519 break; 520 } 521 } 522 } 523 524 // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT 525 // entries containing endbr32 instructions. A PLT entry will be split into two 526 // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). 527 namespace { 528 class IntelIBT : public X86 { 529 public: 530 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; } 531 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 532 void writePlt(uint8_t *buf, const Symbol &sym, 533 uint64_t pltEntryAddr) const override; 534 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; 535 536 static const unsigned IBTPltHeaderSize = 16; 537 }; 538 } // namespace 539 540 void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { 541 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize + 542 s.getPltIdx(ctx) * pltEntrySize; 543 write32le(buf, va); 544 } 545 546 void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, 547 uint64_t /*pltEntryAddr*/) const { 548 if (ctx.arg.isPic) { 549 const uint8_t inst[] = { 550 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 551 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) 552 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 553 }; 554 memcpy(buf, inst, sizeof(inst)); 555 write32le(buf + 6, sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA()); 556 return; 557 } 558 559 const uint8_t inst[] = { 560 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 561 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT 562 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop 563 }; 564 memcpy(buf, inst, sizeof(inst)); 565 write32le(buf + 6, sym.getGotPltVA(ctx)); 566 } 567 568 void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { 569 writePltHeader(buf); 570 buf += IBTPltHeaderSize; 571 572 const uint8_t inst[] = { 573 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 574 0x68, 0, 0, 0, 0, // pushl $reloc_offset 575 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC 576 0x66, 0x90, // nop 577 }; 578 579 for (size_t i = 0; i < numEntries; ++i) { 580 memcpy(buf, inst, sizeof(inst)); 581 write32le(buf + 5, i * sizeof(object::ELF32LE::Rel)); 582 write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); 583 buf += sizeof(inst); 584 } 585 } 586 587 namespace { 588 class RetpolinePic : public X86 { 589 public: 590 RetpolinePic(Ctx &); 591 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 592 void writePltHeader(uint8_t *buf) const override; 593 void writePlt(uint8_t *buf, const Symbol &sym, 594 uint64_t pltEntryAddr) const override; 595 }; 596 597 class RetpolineNoPic : public X86 { 598 public: 599 RetpolineNoPic(Ctx &); 600 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 601 void writePltHeader(uint8_t *buf) const override; 602 void writePlt(uint8_t *buf, const Symbol &sym, 603 uint64_t pltEntryAddr) const override; 604 }; 605 } // namespace 606 607 RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) { 608 pltHeaderSize = 48; 609 pltEntrySize = 32; 610 ipltEntrySize = 32; 611 } 612 613 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 614 write32le(buf, s.getPltVA(ctx) + 17); 615 } 616 617 void RetpolinePic::writePltHeader(uint8_t *buf) const { 618 const uint8_t insn[] = { 619 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) 620 0x50, // 6: pushl %eax 621 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax 622 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next 623 0xf3, 0x90, // 12: loop: pause 624 0x0f, 0xae, 0xe8, // 14: lfence 625 0xeb, 0xf9, // 17: jmp loop 626 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 627 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 628 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 629 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 630 0x89, 0xc8, // 2b: mov %ecx, %eax 631 0x59, // 2d: pop %ecx 632 0xc3, // 2e: ret 633 0xcc, // 2f: int3; padding 634 }; 635 memcpy(buf, insn, sizeof(insn)); 636 } 637 638 void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, 639 uint64_t pltEntryAddr) const { 640 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 641 const uint8_t insn[] = { 642 0x50, // pushl %eax 643 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax 644 0xe8, 0, 0, 0, 0, // call plt+0x20 645 0xe9, 0, 0, 0, 0, // jmp plt+0x12 646 0x68, 0, 0, 0, 0, // pushl $reloc_offset 647 0xe9, 0, 0, 0, 0, // jmp plt+0 648 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding 649 }; 650 memcpy(buf, insn, sizeof(insn)); 651 652 uint32_t ebx = ctx.in.gotPlt->getVA(); 653 unsigned off = pltEntryAddr - ctx.in.plt->getVA(); 654 write32le(buf + 3, sym.getGotPltVA(ctx) - ebx); 655 write32le(buf + 8, -off - 12 + 32); 656 write32le(buf + 13, -off - 17 + 18); 657 write32le(buf + 18, relOff); 658 write32le(buf + 23, -off - 27); 659 } 660 661 RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) { 662 pltHeaderSize = 48; 663 pltEntrySize = 32; 664 ipltEntrySize = 32; 665 } 666 667 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { 668 write32le(buf, s.getPltVA(ctx) + 16); 669 } 670 671 void RetpolineNoPic::writePltHeader(uint8_t *buf) const { 672 const uint8_t insn[] = { 673 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 674 0x50, // 6: pushl %eax 675 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax 676 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next 677 0xf3, 0x90, // 11: loop: pause 678 0x0f, 0xae, 0xe8, // 13: lfence 679 0xeb, 0xf9, // 16: jmp loop 680 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 681 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 682 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) 683 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx 684 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) 685 0x89, 0xc8, // 2b: mov %ecx, %eax 686 0x59, // 2d: pop %ecx 687 0xc3, // 2e: ret 688 0xcc, // 2f: int3; padding 689 }; 690 memcpy(buf, insn, sizeof(insn)); 691 692 uint32_t gotPlt = ctx.in.gotPlt->getVA(); 693 write32le(buf + 2, gotPlt + 4); 694 write32le(buf + 8, gotPlt + 8); 695 } 696 697 void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, 698 uint64_t pltEntryAddr) const { 699 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx); 700 const uint8_t insn[] = { 701 0x50, // 0: pushl %eax 702 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax 703 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 704 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 705 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset 706 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 707 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding 708 0xcc, // 1f: int3; padding 709 }; 710 memcpy(buf, insn, sizeof(insn)); 711 712 unsigned off = pltEntryAddr - ctx.in.plt->getVA(); 713 write32le(buf + 2, sym.getGotPltVA(ctx)); 714 write32le(buf + 7, -off - 11 + 32); 715 write32le(buf + 12, -off - 16 + 17); 716 write32le(buf + 17, relOff); 717 write32le(buf + 22, -off - 26); 718 } 719 720 void elf::setX86TargetInfo(Ctx &ctx) { 721 if (ctx.arg.zRetpolineplt) { 722 if (ctx.arg.isPic) 723 ctx.target.reset(new RetpolinePic(ctx)); 724 else 725 ctx.target.reset(new RetpolineNoPic(ctx)); 726 return; 727 } 728 729 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) 730 ctx.target.reset(new IntelIBT(ctx)); 731 else 732 ctx.target.reset(new X86(ctx)); 733 } 734