187104faaSGreg McGary //===- ARM64.cpp ----------------------------------------------------------===// 287104faaSGreg McGary // 387104faaSGreg McGary // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 487104faaSGreg McGary // See https://llvm.org/LICENSE.txt for license information. 587104faaSGreg McGary // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 687104faaSGreg McGary // 787104faaSGreg McGary //===----------------------------------------------------------------------===// 887104faaSGreg McGary 93bc88eb3SJez Ng #include "Arch/ARM64Common.h" 1087104faaSGreg McGary #include "InputFiles.h" 1187104faaSGreg McGary #include "Symbols.h" 1287104faaSGreg McGary #include "SyntheticSections.h" 1387104faaSGreg McGary #include "Target.h" 1487104faaSGreg McGary 1587104faaSGreg McGary #include "lld/Common/ErrorHandler.h" 16e183bf8eSJez Ng #include "mach-o/compact_unwind_encoding.h" 1787104faaSGreg McGary #include "llvm/ADT/SmallVector.h" 1887104faaSGreg McGary #include "llvm/ADT/StringRef.h" 1987104faaSGreg McGary #include "llvm/BinaryFormat/MachO.h" 2087104faaSGreg McGary #include "llvm/Support/Endian.h" 21a8843ec9SDaniel Bertalan #include "llvm/Support/LEB128.h" 2287104faaSGreg McGary #include "llvm/Support/MathExtras.h" 2387104faaSGreg McGary 2493c8559bSGreg McGary using namespace llvm; 2587104faaSGreg McGary using namespace llvm::MachO; 2687104faaSGreg McGary using namespace llvm::support::endian; 2787104faaSGreg McGary using namespace lld; 2887104faaSGreg McGary using namespace lld::macho; 2987104faaSGreg McGary 3087104faaSGreg McGary namespace { 3187104faaSGreg McGary 323bc88eb3SJez Ng struct ARM64 : ARM64Common { 3387104faaSGreg McGary ARM64(); 340d30e92fSDaniel Bertalan void writeStub(uint8_t *buf, const Symbol &, uint64_t) const override; 3587104faaSGreg McGary void writeStubHelperHeader(uint8_t *buf) const override; 367f3ddf84SJez Ng void writeStubHelperEntry(uint8_t *buf, const Symbol &, 3787104faaSGreg McGary uint64_t entryAddr) const override; 383c24fae3SKeith Smiley 393c24fae3SKeith Smiley void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, 4039139317SKyungwoo Lee uint64_t &stubOffset, uint64_t selrefVA, 4177e204c7SKyungwoo Lee Symbol *objcMsgSend) const override; 4293c8559bSGreg McGary void populateThunk(InputSection *thunk, Symbol *funcSym) override; 43a8843ec9SDaniel Bertalan void applyOptimizationHints(uint8_t *, const ObjFile &) const override; 44d1756165Salx32 45d1756165Salx32 void initICFSafeThunkBody(InputSection *thunk, 46d1756165Salx32 InputSection *branchTarget) const override; 4774046855Salx32 InputSection *getThunkBranchTarget(InputSection *thunk) const override; 48d1756165Salx32 uint32_t getICFSafeThunkSize() const override; 4987104faaSGreg McGary }; 5087104faaSGreg McGary 5187104faaSGreg McGary } // namespace 5287104faaSGreg McGary 5387104faaSGreg McGary // Random notes on reloc types: 5487104faaSGreg McGary // ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12 555e851733SJez Ng // POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte 565e851733SJez Ng // absolute version of this relocation. The semantics of the absolute relocation 575e851733SJez Ng // are weird -- it results in the value of the GOT slot being written, instead 585e851733SJez Ng // of the address. Let's not support it unless we find a real-world use case. 591fb9466cSDaniel Bertalan static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{ 6087104faaSGreg McGary #define B(x) RelocAttrBits::x 6188cb786eSJez Ng {"UNSIGNED", 6288cb786eSJez Ng B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)}, 631aa29dffSJez Ng {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)}, 6487104faaSGreg McGary {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)}, 6587104faaSGreg McGary {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)}, 6687104faaSGreg McGary {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)}, 6787104faaSGreg McGary {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)}, 6887104faaSGreg McGary {"GOT_LOAD_PAGEOFF12", 6987104faaSGreg McGary B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)}, 705e851733SJez Ng {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)}, 7187104faaSGreg McGary {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)}, 7287104faaSGreg McGary {"TLVP_LOAD_PAGEOFF12", 7387104faaSGreg McGary B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)}, 7487104faaSGreg McGary {"ADDEND", B(ADDEND)}, 7587104faaSGreg McGary #undef B 7687104faaSGreg McGary }}; 7787104faaSGreg McGary 7887104faaSGreg McGary static constexpr uint32_t stubCode[] = { 7987104faaSGreg McGary 0x90000010, // 00: adrp x16, __la_symbol_ptr@page 8087104faaSGreg McGary 0xf9400210, // 04: ldr x16, [x16, __la_symbol_ptr@pageoff] 8187104faaSGreg McGary 0xd61f0200, // 08: br x16 8287104faaSGreg McGary }; 8387104faaSGreg McGary 840d30e92fSDaniel Bertalan void ARM64::writeStub(uint8_t *buf8, const Symbol &sym, 850d30e92fSDaniel Bertalan uint64_t pointerVA) const { 860d30e92fSDaniel Bertalan ::writeStub(buf8, stubCode, sym, pointerVA); 8787104faaSGreg McGary } 8887104faaSGreg McGary 8987104faaSGreg McGary static constexpr uint32_t stubHelperHeaderCode[] = { 9087104faaSGreg McGary 0x90000011, // 00: adrp x17, _dyld_private@page 9187104faaSGreg McGary 0x91000231, // 04: add x17, x17, _dyld_private@pageoff 9287104faaSGreg McGary 0xa9bf47f0, // 08: stp x16/x17, [sp, #-16]! 9387104faaSGreg McGary 0x90000010, // 0c: adrp x16, dyld_stub_binder@page 9487104faaSGreg McGary 0xf9400210, // 10: ldr x16, [x16, dyld_stub_binder@pageoff] 9587104faaSGreg McGary 0xd61f0200, // 14: br x16 9687104faaSGreg McGary }; 9787104faaSGreg McGary 9887104faaSGreg McGary void ARM64::writeStubHelperHeader(uint8_t *buf8) const { 994938b090SJez Ng ::writeStubHelperHeader<LP64>(buf8, stubHelperHeaderCode); 10087104faaSGreg McGary } 10187104faaSGreg McGary 10287104faaSGreg McGary static constexpr uint32_t stubHelperEntryCode[] = { 10387104faaSGreg McGary 0x18000050, // 00: ldr w16, l0 10487104faaSGreg McGary 0x14000000, // 04: b stubHelperHeader 10587104faaSGreg McGary 0x00000000, // 08: l0: .long 0 10687104faaSGreg McGary }; 10787104faaSGreg McGary 1087f3ddf84SJez Ng void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym, 10987104faaSGreg McGary uint64_t entryVA) const { 1104938b090SJez Ng ::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA); 11187104faaSGreg McGary } 11287104faaSGreg McGary 1133c24fae3SKeith Smiley static constexpr uint32_t objcStubsFastCode[] = { 1143c24fae3SKeith Smiley 0x90000001, // adrp x1, __objc_selrefs@page 1153c24fae3SKeith Smiley 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff] 1163c24fae3SKeith Smiley 0x90000010, // adrp x16, _got@page 1173c24fae3SKeith Smiley 0xf9400210, // ldr x16, [x16, _objc_msgSend@pageoff] 1183c24fae3SKeith Smiley 0xd61f0200, // br x16 1193c24fae3SKeith Smiley 0xd4200020, // brk #0x1 1203c24fae3SKeith Smiley 0xd4200020, // brk #0x1 1213c24fae3SKeith Smiley 0xd4200020, // brk #0x1 1223c24fae3SKeith Smiley }; 1233c24fae3SKeith Smiley 12477e204c7SKyungwoo Lee static constexpr uint32_t objcStubsSmallCode[] = { 12577e204c7SKyungwoo Lee 0x90000001, // adrp x1, __objc_selrefs@page 12677e204c7SKyungwoo Lee 0xf9400021, // ldr x1, [x1, @selector("foo")@pageoff] 12777e204c7SKyungwoo Lee 0x14000000, // b _objc_msgSend 12877e204c7SKyungwoo Lee }; 12977e204c7SKyungwoo Lee 1303c24fae3SKeith Smiley void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr, 13139139317SKyungwoo Lee uint64_t &stubOffset, uint64_t selrefVA, 13277e204c7SKyungwoo Lee Symbol *objcMsgSend) const { 13377e204c7SKyungwoo Lee uint64_t objcMsgSendAddr; 13477e204c7SKyungwoo Lee uint64_t objcStubSize; 13577e204c7SKyungwoo Lee uint64_t objcMsgSendIndex; 13677e204c7SKyungwoo Lee 13777e204c7SKyungwoo Lee if (config->objcStubsMode == ObjCStubsMode::fast) { 13877e204c7SKyungwoo Lee objcStubSize = target->objcStubsFastSize; 13977e204c7SKyungwoo Lee objcMsgSendAddr = in.got->addr; 14077e204c7SKyungwoo Lee objcMsgSendIndex = objcMsgSend->gotIndex; 14177e204c7SKyungwoo Lee ::writeObjCMsgSendFastStub<LP64>(buf, objcStubsFastCode, sym, stubsAddr, 14239139317SKyungwoo Lee stubOffset, selrefVA, objcMsgSendAddr, 14339139317SKyungwoo Lee objcMsgSendIndex); 14477e204c7SKyungwoo Lee } else { 14577e204c7SKyungwoo Lee assert(config->objcStubsMode == ObjCStubsMode::small); 14677e204c7SKyungwoo Lee objcStubSize = target->objcStubsSmallSize; 14777e204c7SKyungwoo Lee if (auto *d = dyn_cast<Defined>(objcMsgSend)) { 14877e204c7SKyungwoo Lee objcMsgSendAddr = d->getVA(); 14977e204c7SKyungwoo Lee objcMsgSendIndex = 0; 15077e204c7SKyungwoo Lee } else { 15177e204c7SKyungwoo Lee objcMsgSendAddr = in.stubs->addr; 15277e204c7SKyungwoo Lee objcMsgSendIndex = objcMsgSend->stubsIndex; 15377e204c7SKyungwoo Lee } 15477e204c7SKyungwoo Lee ::writeObjCMsgSendSmallStub<LP64>(buf, objcStubsSmallCode, sym, stubsAddr, 15539139317SKyungwoo Lee stubOffset, selrefVA, objcMsgSendAddr, 15639139317SKyungwoo Lee objcMsgSendIndex); 15777e204c7SKyungwoo Lee } 15877e204c7SKyungwoo Lee stubOffset += objcStubSize; 1593c24fae3SKeith Smiley } 1603c24fae3SKeith Smiley 16193c8559bSGreg McGary // A thunk is the relaxed variation of stubCode. We don't need the 16293c8559bSGreg McGary // extra indirection through a lazy pointer because the target address 16393c8559bSGreg McGary // is known at link time. 16493c8559bSGreg McGary static constexpr uint32_t thunkCode[] = { 16593c8559bSGreg McGary 0x90000010, // 00: adrp x16, <thunk.ptr>@page 16693c8559bSGreg McGary 0x91000210, // 04: add x16, [x16,<thunk.ptr>@pageoff] 16793c8559bSGreg McGary 0xd61f0200, // 08: br x16 16893c8559bSGreg McGary }; 16993c8559bSGreg McGary 17093c8559bSGreg McGary void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) { 17193c8559bSGreg McGary thunk->align = 4; 17293c8559bSGreg McGary thunk->data = {reinterpret_cast<const uint8_t *>(thunkCode), 17393c8559bSGreg McGary sizeof(thunkCode)}; 1743df4c5a9SJez Ng thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_PAGEOFF12, 17593c8559bSGreg McGary /*pcrel=*/false, /*length=*/2, 17693c8559bSGreg McGary /*offset=*/4, /*addend=*/0, 1773df4c5a9SJez Ng /*referent=*/funcSym); 1783df4c5a9SJez Ng thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_PAGE21, 17993c8559bSGreg McGary /*pcrel=*/true, /*length=*/2, 18093c8559bSGreg McGary /*offset=*/0, /*addend=*/0, 1813df4c5a9SJez Ng /*referent=*/funcSym); 18293c8559bSGreg McGary } 183d1756165Salx32 // Just a single direct branch to the target function. 184d1756165Salx32 static constexpr uint32_t icfSafeThunkCode[] = { 185d1756165Salx32 0x14000000, // 08: b target 186d1756165Salx32 }; 187d1756165Salx32 188d1756165Salx32 void ARM64::initICFSafeThunkBody(InputSection *thunk, 189d1756165Salx32 InputSection *branchTarget) const { 190d1756165Salx32 // The base data here will not be itself modified, we'll just be adding a 191d1756165Salx32 // reloc below. So we can directly use the constexpr above as the data. 192d1756165Salx32 thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode), 193d1756165Salx32 sizeof(icfSafeThunkCode)}; 194d1756165Salx32 195d1756165Salx32 thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26, 196d1756165Salx32 /*pcrel=*/true, /*length=*/2, 197d1756165Salx32 /*offset=*/0, /*addend=*/0, 198d1756165Salx32 /*referent=*/branchTarget); 199d1756165Salx32 } 200d1756165Salx32 20174046855Salx32 InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const { 20274046855Salx32 assert(thunk->relocs.size() == 1 && 20374046855Salx32 "expected a single reloc on ARM64 ICF thunk"); 20474046855Salx32 auto &reloc = thunk->relocs[0]; 205e04fde19SKazu Hirata assert(isa<InputSection *>(reloc.referent) && 20674046855Salx32 "ARM64 thunk reloc is expected to point to an InputSection"); 20774046855Salx32 208*a0ec3858SKazu Hirata return cast<InputSection *>(reloc.referent); 20974046855Salx32 } 21074046855Salx32 211d1756165Salx32 uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); } 21293c8559bSGreg McGary 2133bc88eb3SJez Ng ARM64::ARM64() : ARM64Common(LP64()) { 21487104faaSGreg McGary cpuType = CPU_TYPE_ARM64; 21587104faaSGreg McGary cpuSubtype = CPU_SUBTYPE_ARM64_ALL; 21687104faaSGreg McGary 21787104faaSGreg McGary stubSize = sizeof(stubCode); 21893c8559bSGreg McGary thunkSize = sizeof(thunkCode); 21997211975SNico Weber 2203c24fae3SKeith Smiley objcStubsFastSize = sizeof(objcStubsFastCode); 22177e204c7SKyungwoo Lee objcStubsFastAlignment = 32; 22277e204c7SKyungwoo Lee objcStubsSmallSize = sizeof(objcStubsSmallCode); 22377e204c7SKyungwoo Lee objcStubsSmallAlignment = 4; 2243c24fae3SKeith Smiley 22597211975SNico Weber // Branch immediate is two's complement 26 bits, which is implicitly 22697211975SNico Weber // multiplied by 4 (since all functions are 4-aligned: The branch range 22797211975SNico Weber // is -4*(2**(26-1))..4*(2**(26-1) - 1). 22897211975SNico Weber backwardBranchRange = 128 * 1024 * 1024; 22997211975SNico Weber forwardBranchRange = backwardBranchRange - 4; 23097211975SNico Weber 231e183bf8eSJez Ng modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF; 232e183bf8eSJez Ng subtractorRelocType = ARM64_RELOC_SUBTRACTOR; 233e183bf8eSJez Ng unsignedRelocType = ARM64_RELOC_UNSIGNED; 234e183bf8eSJez Ng 23587104faaSGreg McGary stubHelperHeaderSize = sizeof(stubHelperHeaderCode); 23687104faaSGreg McGary stubHelperEntrySize = sizeof(stubHelperEntryCode); 2371fb9466cSDaniel Bertalan 2381fb9466cSDaniel Bertalan relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()}; 23987104faaSGreg McGary } 24087104faaSGreg McGary 241a3f67f09SDaniel Bertalan namespace { 242a3f67f09SDaniel Bertalan struct Adrp { 243a3f67f09SDaniel Bertalan uint32_t destRegister; 244a8ec90adSDaniel Bertalan int64_t addend; 245a3f67f09SDaniel Bertalan }; 246a3f67f09SDaniel Bertalan 247a3f67f09SDaniel Bertalan struct Add { 248a3f67f09SDaniel Bertalan uint8_t destRegister; 249a3f67f09SDaniel Bertalan uint8_t srcRegister; 250a3f67f09SDaniel Bertalan uint32_t addend; 251a3f67f09SDaniel Bertalan }; 252a3f67f09SDaniel Bertalan 253573c7e6bSDaniel Bertalan enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 }; 254573c7e6bSDaniel Bertalan 255573c7e6bSDaniel Bertalan struct Ldr { 256573c7e6bSDaniel Bertalan uint8_t destRegister; 257573c7e6bSDaniel Bertalan uint8_t baseRegister; 258ecb14fd8SDaniel Bertalan uint8_t p2Size; 259573c7e6bSDaniel Bertalan bool isFloat; 260573c7e6bSDaniel Bertalan ExtendType extendType; 261ecb14fd8SDaniel Bertalan int64_t offset; 262573c7e6bSDaniel Bertalan }; 263a3f67f09SDaniel Bertalan } // namespace 264a3f67f09SDaniel Bertalan 265a3f67f09SDaniel Bertalan static bool parseAdrp(uint32_t insn, Adrp &adrp) { 266a3f67f09SDaniel Bertalan if ((insn & 0x9f000000) != 0x90000000) 267a3f67f09SDaniel Bertalan return false; 268a3f67f09SDaniel Bertalan adrp.destRegister = insn & 0x1f; 269a8ec90adSDaniel Bertalan uint64_t immHi = (insn >> 5) & 0x7ffff; 270a8ec90adSDaniel Bertalan uint64_t immLo = (insn >> 29) & 0x3; 271a8ec90adSDaniel Bertalan adrp.addend = SignExtend64<21>(immLo | (immHi << 2)) * 4096; 272a3f67f09SDaniel Bertalan return true; 273a3f67f09SDaniel Bertalan } 274a3f67f09SDaniel Bertalan 275a3f67f09SDaniel Bertalan static bool parseAdd(uint32_t insn, Add &add) { 276a3f67f09SDaniel Bertalan if ((insn & 0xffc00000) != 0x91000000) 277a3f67f09SDaniel Bertalan return false; 278a3f67f09SDaniel Bertalan add.destRegister = insn & 0x1f; 279a3f67f09SDaniel Bertalan add.srcRegister = (insn >> 5) & 0x1f; 280a3f67f09SDaniel Bertalan add.addend = (insn >> 10) & 0xfff; 281a3f67f09SDaniel Bertalan return true; 282a3f67f09SDaniel Bertalan } 283a3f67f09SDaniel Bertalan 284573c7e6bSDaniel Bertalan static bool parseLdr(uint32_t insn, Ldr &ldr) { 285573c7e6bSDaniel Bertalan ldr.destRegister = insn & 0x1f; 286573c7e6bSDaniel Bertalan ldr.baseRegister = (insn >> 5) & 0x1f; 287573c7e6bSDaniel Bertalan uint8_t size = insn >> 30; 288573c7e6bSDaniel Bertalan uint8_t opc = (insn >> 22) & 3; 289573c7e6bSDaniel Bertalan 290573c7e6bSDaniel Bertalan if ((insn & 0x3fc00000) == 0x39400000) { 291573c7e6bSDaniel Bertalan // LDR (immediate), LDRB (immediate), LDRH (immediate) 292ecb14fd8SDaniel Bertalan ldr.p2Size = size; 293573c7e6bSDaniel Bertalan ldr.extendType = ZeroExtend; 294573c7e6bSDaniel Bertalan ldr.isFloat = false; 295573c7e6bSDaniel Bertalan } else if ((insn & 0x3f800000) == 0x39800000) { 296573c7e6bSDaniel Bertalan // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate) 297ecb14fd8SDaniel Bertalan ldr.p2Size = size; 298573c7e6bSDaniel Bertalan ldr.extendType = static_cast<ExtendType>(opc); 299573c7e6bSDaniel Bertalan ldr.isFloat = false; 300573c7e6bSDaniel Bertalan } else if ((insn & 0x3f400000) == 0x3d400000) { 301573c7e6bSDaniel Bertalan // LDR (immediate, SIMD&FP) 302573c7e6bSDaniel Bertalan ldr.extendType = ZeroExtend; 303573c7e6bSDaniel Bertalan ldr.isFloat = true; 304ecb14fd8SDaniel Bertalan if (opc == 1) 305ecb14fd8SDaniel Bertalan ldr.p2Size = size; 306573c7e6bSDaniel Bertalan else if (size == 0 && opc == 3) 307ecb14fd8SDaniel Bertalan ldr.p2Size = 4; 308573c7e6bSDaniel Bertalan else 309573c7e6bSDaniel Bertalan return false; 310573c7e6bSDaniel Bertalan } else { 311573c7e6bSDaniel Bertalan return false; 312573c7e6bSDaniel Bertalan } 313ecb14fd8SDaniel Bertalan ldr.offset = ((insn >> 10) & 0xfff) << ldr.p2Size; 314573c7e6bSDaniel Bertalan return true; 315573c7e6bSDaniel Bertalan } 316573c7e6bSDaniel Bertalan 317ecb14fd8SDaniel Bertalan static bool isValidAdrOffset(int32_t delta) { return isInt<21>(delta); } 318ecb14fd8SDaniel Bertalan 319a3f67f09SDaniel Bertalan static void writeAdr(void *loc, uint32_t dest, int32_t delta) { 320ecb14fd8SDaniel Bertalan assert(isValidAdrOffset(delta)); 321a3f67f09SDaniel Bertalan uint32_t opcode = 0x10000000; 322a3f67f09SDaniel Bertalan uint32_t immHi = (delta & 0x001ffffc) << 3; 323a3f67f09SDaniel Bertalan uint32_t immLo = (delta & 0x00000003) << 29; 324a3f67f09SDaniel Bertalan write32le(loc, opcode | immHi | immLo | dest); 325a3f67f09SDaniel Bertalan } 326a3f67f09SDaniel Bertalan 327a3f67f09SDaniel Bertalan static void writeNop(void *loc) { write32le(loc, 0xd503201f); } 328a3f67f09SDaniel Bertalan 329ecb14fd8SDaniel Bertalan static bool isLiteralLdrEligible(const Ldr &ldr) { 330ecb14fd8SDaniel Bertalan return ldr.p2Size > 1 && isShiftedInt<19, 2>(ldr.offset); 331ecb14fd8SDaniel Bertalan } 332ecb14fd8SDaniel Bertalan 333ecb14fd8SDaniel Bertalan static void writeLiteralLdr(void *loc, const Ldr &ldr) { 334ecb14fd8SDaniel Bertalan assert(isLiteralLdrEligible(ldr)); 335ecb14fd8SDaniel Bertalan uint32_t imm19 = (ldr.offset / 4 & maskTrailingOnes<uint32_t>(19)) << 5; 336ecb14fd8SDaniel Bertalan uint32_t opcode; 337ecb14fd8SDaniel Bertalan switch (ldr.p2Size) { 338ecb14fd8SDaniel Bertalan case 2: 339ecb14fd8SDaniel Bertalan if (ldr.isFloat) 340573c7e6bSDaniel Bertalan opcode = 0x1c000000; 341573c7e6bSDaniel Bertalan else 342ecb14fd8SDaniel Bertalan opcode = ldr.extendType == Sign64 ? 0x98000000 : 0x18000000; 343573c7e6bSDaniel Bertalan break; 344ecb14fd8SDaniel Bertalan case 3: 345ecb14fd8SDaniel Bertalan opcode = ldr.isFloat ? 0x5c000000 : 0x58000000; 346573c7e6bSDaniel Bertalan break; 347ecb14fd8SDaniel Bertalan case 4: 348573c7e6bSDaniel Bertalan opcode = 0x9c000000; 349573c7e6bSDaniel Bertalan break; 350573c7e6bSDaniel Bertalan default: 351ecb14fd8SDaniel Bertalan llvm_unreachable("Invalid literal ldr size"); 352573c7e6bSDaniel Bertalan } 353ecb14fd8SDaniel Bertalan write32le(loc, opcode | imm19 | ldr.destRegister); 354ecb14fd8SDaniel Bertalan } 355ecb14fd8SDaniel Bertalan 356ecb14fd8SDaniel Bertalan static bool isImmediateLdrEligible(const Ldr &ldr) { 357ecb14fd8SDaniel Bertalan // Note: We deviate from ld64's behavior, which converts to immediate loads 358ecb14fd8SDaniel Bertalan // only if ldr.offset < 4096, even though the offset is divided by the load's 359ecb14fd8SDaniel Bertalan // size in the 12-bit immediate operand. Only the unsigned offset variant is 360ecb14fd8SDaniel Bertalan // supported. 361ecb14fd8SDaniel Bertalan 362ecb14fd8SDaniel Bertalan uint32_t size = 1 << ldr.p2Size; 363ecb14fd8SDaniel Bertalan return ldr.offset >= 0 && (ldr.offset % size) == 0 && 364ecb14fd8SDaniel Bertalan isUInt<12>(ldr.offset >> ldr.p2Size); 365ecb14fd8SDaniel Bertalan } 366ecb14fd8SDaniel Bertalan 367ecb14fd8SDaniel Bertalan static void writeImmediateLdr(void *loc, const Ldr &ldr) { 368ecb14fd8SDaniel Bertalan assert(isImmediateLdrEligible(ldr)); 369ecb14fd8SDaniel Bertalan uint32_t opcode = 0x39000000; 370ecb14fd8SDaniel Bertalan if (ldr.isFloat) { 371ecb14fd8SDaniel Bertalan opcode |= 0x04000000; 372ecb14fd8SDaniel Bertalan assert(ldr.extendType == ZeroExtend); 373ecb14fd8SDaniel Bertalan } 374ecb14fd8SDaniel Bertalan opcode |= ldr.destRegister; 375ecb14fd8SDaniel Bertalan opcode |= ldr.baseRegister << 5; 376ecb14fd8SDaniel Bertalan uint8_t size, opc; 377ecb14fd8SDaniel Bertalan if (ldr.p2Size == 4) { 378ecb14fd8SDaniel Bertalan size = 0; 379ecb14fd8SDaniel Bertalan opc = 3; 380ecb14fd8SDaniel Bertalan } else { 381ecb14fd8SDaniel Bertalan opc = ldr.extendType; 382ecb14fd8SDaniel Bertalan size = ldr.p2Size; 383ecb14fd8SDaniel Bertalan } 384ecb14fd8SDaniel Bertalan uint32_t immBits = ldr.offset >> ldr.p2Size; 385ecb14fd8SDaniel Bertalan write32le(loc, opcode | (immBits << 10) | (opc << 22) | (size << 30)); 386573c7e6bSDaniel Bertalan } 387573c7e6bSDaniel Bertalan 388a3f67f09SDaniel Bertalan // Transforms a pair of adrp+add instructions into an adr instruction if the 389a3f67f09SDaniel Bertalan // target is within the +/- 1 MiB range allowed by the adr's 21 bit signed 390a3f67f09SDaniel Bertalan // immediate offset. 391a3f67f09SDaniel Bertalan // 392a3f67f09SDaniel Bertalan // adrp xN, _foo@PAGE 393a3f67f09SDaniel Bertalan // add xM, xN, _foo@PAGEOFF 394a3f67f09SDaniel Bertalan // -> 395a3f67f09SDaniel Bertalan // adr xM, _foo 396a3f67f09SDaniel Bertalan // nop 397a8843ec9SDaniel Bertalan static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec, 398a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2) { 399a8ec90adSDaniel Bertalan uint32_t ins1 = read32le(buf + offset1); 400a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 401a3f67f09SDaniel Bertalan Adrp adrp; 402a3f67f09SDaniel Bertalan Add add; 403a8ec90adSDaniel Bertalan if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add)) 404a3f67f09SDaniel Bertalan return; 405a3f67f09SDaniel Bertalan if (adrp.destRegister != add.srcRegister) 406a3f67f09SDaniel Bertalan return; 407a3f67f09SDaniel Bertalan 408a8ec90adSDaniel Bertalan uint64_t addr1 = isec->getVA() + offset1; 409a8ec90adSDaniel Bertalan uint64_t referent = pageBits(addr1) + adrp.addend + add.addend; 410a8ec90adSDaniel Bertalan int64_t delta = referent - addr1; 411ecb14fd8SDaniel Bertalan if (!isValidAdrOffset(delta)) 412a3f67f09SDaniel Bertalan return; 413a3f67f09SDaniel Bertalan 414a8ec90adSDaniel Bertalan writeAdr(buf + offset1, add.destRegister, delta); 415a8ec90adSDaniel Bertalan writeNop(buf + offset2); 416a3f67f09SDaniel Bertalan } 417a3f67f09SDaniel Bertalan 418a3f67f09SDaniel Bertalan // Transforms two adrp instructions into a single adrp if their referent 419a3f67f09SDaniel Bertalan // addresses are located on the same 4096 byte page. 420a3f67f09SDaniel Bertalan // 421a3f67f09SDaniel Bertalan // adrp xN, _foo@PAGE 422a3f67f09SDaniel Bertalan // adrp xN, _bar@PAGE 423a3f67f09SDaniel Bertalan // -> 424a3f67f09SDaniel Bertalan // adrp xN, _foo@PAGE 425a3f67f09SDaniel Bertalan // nop 426a8843ec9SDaniel Bertalan static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec, 427a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2) { 428a8ec90adSDaniel Bertalan uint32_t ins1 = read32le(buf + offset1); 429a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 430a3f67f09SDaniel Bertalan Adrp adrp1, adrp2; 431a3f67f09SDaniel Bertalan if (!parseAdrp(ins1, adrp1) || !parseAdrp(ins2, adrp2)) 432a3f67f09SDaniel Bertalan return; 433a3f67f09SDaniel Bertalan if (adrp1.destRegister != adrp2.destRegister) 434a3f67f09SDaniel Bertalan return; 435a3f67f09SDaniel Bertalan 436a8ec90adSDaniel Bertalan uint64_t page1 = pageBits(offset1 + isec->getVA()) + adrp1.addend; 437a8ec90adSDaniel Bertalan uint64_t page2 = pageBits(offset2 + isec->getVA()) + adrp2.addend; 438a8ec90adSDaniel Bertalan if (page1 != page2) 439a3f67f09SDaniel Bertalan return; 440a3f67f09SDaniel Bertalan 441a8ec90adSDaniel Bertalan writeNop(buf + offset2); 442a3f67f09SDaniel Bertalan } 443a3f67f09SDaniel Bertalan 444573c7e6bSDaniel Bertalan // Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal) 445573c7e6bSDaniel Bertalan // load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB, 446573c7e6bSDaniel Bertalan // as ldr can encode a signed 19-bit offset that gets multiplied by 4. 447573c7e6bSDaniel Bertalan // 448573c7e6bSDaniel Bertalan // adrp xN, _foo@PAGE 449573c7e6bSDaniel Bertalan // ldr xM, [xN, _foo@PAGEOFF] 450573c7e6bSDaniel Bertalan // -> 451573c7e6bSDaniel Bertalan // nop 452573c7e6bSDaniel Bertalan // ldr xM, _foo 453a8843ec9SDaniel Bertalan static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec, 454a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2) { 455a8ec90adSDaniel Bertalan uint32_t ins1 = read32le(buf + offset1); 456a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 457573c7e6bSDaniel Bertalan Adrp adrp; 458573c7e6bSDaniel Bertalan Ldr ldr; 459a8ec90adSDaniel Bertalan if (!parseAdrp(ins1, adrp) || !parseLdr(ins2, ldr)) 460573c7e6bSDaniel Bertalan return; 461573c7e6bSDaniel Bertalan if (adrp.destRegister != ldr.baseRegister) 462573c7e6bSDaniel Bertalan return; 463573c7e6bSDaniel Bertalan 464a8ec90adSDaniel Bertalan uint64_t addr1 = isec->getVA() + offset1; 465a8ec90adSDaniel Bertalan uint64_t addr2 = isec->getVA() + offset2; 466a8ec90adSDaniel Bertalan uint64_t referent = pageBits(addr1) + adrp.addend + ldr.offset; 467a8ec90adSDaniel Bertalan ldr.offset = referent - addr2; 468ecb14fd8SDaniel Bertalan if (!isLiteralLdrEligible(ldr)) 469573c7e6bSDaniel Bertalan return; 470573c7e6bSDaniel Bertalan 471a8ec90adSDaniel Bertalan writeNop(buf + offset1); 472a8ec90adSDaniel Bertalan writeLiteralLdr(buf + offset2, ldr); 473573c7e6bSDaniel Bertalan } 474573c7e6bSDaniel Bertalan 4752028fe6fSDaniel Bertalan // GOT loads are emitted by the compiler as a pair of adrp and ldr instructions, 4762028fe6fSDaniel Bertalan // but they may be changed to adrp+add by relaxGotLoad(). This hint performs 4772028fe6fSDaniel Bertalan // the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed. 478a8843ec9SDaniel Bertalan static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec, 479a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2) { 480a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 4812028fe6fSDaniel Bertalan Add add; 4822028fe6fSDaniel Bertalan Ldr ldr; 4832028fe6fSDaniel Bertalan if (parseAdd(ins2, add)) 484a8843ec9SDaniel Bertalan applyAdrpAdd(buf, isec, offset1, offset2); 4852028fe6fSDaniel Bertalan else if (parseLdr(ins2, ldr)) 486a8843ec9SDaniel Bertalan applyAdrpLdr(buf, isec, offset1, offset2); 4872028fe6fSDaniel Bertalan } 4882028fe6fSDaniel Bertalan 489d1e40f4dSDaniel Bertalan // Optimizes an adrp+add+ldr sequence used for loading from a local symbol's 490d1e40f4dSDaniel Bertalan // address by loading directly if it's close enough, or to an adrp(p)+ldr 491d1e40f4dSDaniel Bertalan // sequence if it's not. 492d1e40f4dSDaniel Bertalan // 493d1e40f4dSDaniel Bertalan // adrp x0, _foo@PAGE 494d1e40f4dSDaniel Bertalan // add x1, x0, _foo@PAGEOFF 495d1e40f4dSDaniel Bertalan // ldr x2, [x1, #off] 496a8843ec9SDaniel Bertalan static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec, 497a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2, 498a8ec90adSDaniel Bertalan uint64_t offset3) { 499a8ec90adSDaniel Bertalan uint32_t ins1 = read32le(buf + offset1); 500ecb14fd8SDaniel Bertalan Adrp adrp; 501ecb14fd8SDaniel Bertalan if (!parseAdrp(ins1, adrp)) 502ecb14fd8SDaniel Bertalan return; 503a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 504d1e40f4dSDaniel Bertalan Add add; 505d1e40f4dSDaniel Bertalan if (!parseAdd(ins2, add)) 506d1e40f4dSDaniel Bertalan return; 507a8ec90adSDaniel Bertalan uint32_t ins3 = read32le(buf + offset3); 508d1e40f4dSDaniel Bertalan Ldr ldr; 509d1e40f4dSDaniel Bertalan if (!parseLdr(ins3, ldr)) 510d1e40f4dSDaniel Bertalan return; 511d1e40f4dSDaniel Bertalan if (adrp.destRegister != add.srcRegister) 512ecb14fd8SDaniel Bertalan return; 513d1e40f4dSDaniel Bertalan if (add.destRegister != ldr.baseRegister) 514ecb14fd8SDaniel Bertalan return; 515ecb14fd8SDaniel Bertalan 516ecb14fd8SDaniel Bertalan // Load from the target address directly. 517ecb14fd8SDaniel Bertalan // nop 518ecb14fd8SDaniel Bertalan // nop 519ecb14fd8SDaniel Bertalan // ldr x2, [_foo + #off] 520a8ec90adSDaniel Bertalan uint64_t addr1 = isec->getVA() + offset1; 521a8ec90adSDaniel Bertalan uint64_t addr3 = isec->getVA() + offset3; 522a8ec90adSDaniel Bertalan uint64_t referent = pageBits(addr1) + adrp.addend + add.addend; 523d1e40f4dSDaniel Bertalan Ldr literalLdr = ldr; 524a8ec90adSDaniel Bertalan literalLdr.offset += referent - addr3; 525ecb14fd8SDaniel Bertalan if (isLiteralLdrEligible(literalLdr)) { 526a8ec90adSDaniel Bertalan writeNop(buf + offset1); 527a8ec90adSDaniel Bertalan writeNop(buf + offset2); 528a8ec90adSDaniel Bertalan writeLiteralLdr(buf + offset3, literalLdr); 529ecb14fd8SDaniel Bertalan return; 530ecb14fd8SDaniel Bertalan } 531ecb14fd8SDaniel Bertalan 532ecb14fd8SDaniel Bertalan // Load the target address into a register and load from there indirectly. 533ecb14fd8SDaniel Bertalan // adr x1, _foo 534ecb14fd8SDaniel Bertalan // nop 535ecb14fd8SDaniel Bertalan // ldr x2, [x1, #off] 536a8ec90adSDaniel Bertalan int64_t adrOffset = referent - addr1; 537ecb14fd8SDaniel Bertalan if (isValidAdrOffset(adrOffset)) { 538a8ec90adSDaniel Bertalan writeAdr(buf + offset1, ldr.baseRegister, adrOffset); 539d1e40f4dSDaniel Bertalan // Note: ld64 moves the offset into the adr instruction for AdrpAddLdr, but 540d1e40f4dSDaniel Bertalan // not for AdrpLdrGotLdr. Its effect is the same either way. 541a8ec90adSDaniel Bertalan writeNop(buf + offset2); 542ecb14fd8SDaniel Bertalan return; 543ecb14fd8SDaniel Bertalan } 544ecb14fd8SDaniel Bertalan 545ecb14fd8SDaniel Bertalan // Move the target's page offset into the ldr's immediate offset. 546ecb14fd8SDaniel Bertalan // adrp x0, _foo@PAGE 547ecb14fd8SDaniel Bertalan // nop 548ecb14fd8SDaniel Bertalan // ldr x2, [x0, _foo@PAGEOFF + #off] 549d1e40f4dSDaniel Bertalan Ldr immediateLdr = ldr; 550ecb14fd8SDaniel Bertalan immediateLdr.baseRegister = adrp.destRegister; 551d1e40f4dSDaniel Bertalan immediateLdr.offset += add.addend; 552ecb14fd8SDaniel Bertalan if (isImmediateLdrEligible(immediateLdr)) { 553a8ec90adSDaniel Bertalan writeNop(buf + offset2); 554a8ec90adSDaniel Bertalan writeImmediateLdr(buf + offset3, immediateLdr); 555ecb14fd8SDaniel Bertalan return; 556ecb14fd8SDaniel Bertalan } 557d1e40f4dSDaniel Bertalan } 558d1e40f4dSDaniel Bertalan 559d1e40f4dSDaniel Bertalan // Relaxes a GOT-indirect load. 560d1e40f4dSDaniel Bertalan // If the referenced symbol is external and its GOT entry is within +/- 1 MiB, 561d1e40f4dSDaniel Bertalan // the GOT entry can be loaded with a single literal ldr instruction. 562d1e40f4dSDaniel Bertalan // If the referenced symbol is local and thus has been relaxed to adrp+add+ldr, 563d1e40f4dSDaniel Bertalan // we perform the AdrpAddLdr transformation. 564a8843ec9SDaniel Bertalan static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec, 565a8843ec9SDaniel Bertalan uint64_t offset1, uint64_t offset2, 566a8ec90adSDaniel Bertalan uint64_t offset3) { 567a8ec90adSDaniel Bertalan uint32_t ins2 = read32le(buf + offset2); 568d1e40f4dSDaniel Bertalan Add add; 569d1e40f4dSDaniel Bertalan Ldr ldr2; 570d1e40f4dSDaniel Bertalan 571d1e40f4dSDaniel Bertalan if (parseAdd(ins2, add)) { 572a8843ec9SDaniel Bertalan applyAdrpAddLdr(buf, isec, offset1, offset2, offset3); 573ecb14fd8SDaniel Bertalan } else if (parseLdr(ins2, ldr2)) { 574ecb14fd8SDaniel Bertalan // adrp x1, _foo@GOTPAGE 575ecb14fd8SDaniel Bertalan // ldr x2, [x1, _foo@GOTPAGEOFF] 576ecb14fd8SDaniel Bertalan // ldr x3, [x2, #off] 577d1e40f4dSDaniel Bertalan 578a8ec90adSDaniel Bertalan uint32_t ins1 = read32le(buf + offset1); 579d1e40f4dSDaniel Bertalan Adrp adrp; 580d1e40f4dSDaniel Bertalan if (!parseAdrp(ins1, adrp)) 581d1e40f4dSDaniel Bertalan return; 582a8ec90adSDaniel Bertalan uint32_t ins3 = read32le(buf + offset3); 583d1e40f4dSDaniel Bertalan Ldr ldr3; 584d1e40f4dSDaniel Bertalan if (!parseLdr(ins3, ldr3)) 585d1e40f4dSDaniel Bertalan return; 586d1e40f4dSDaniel Bertalan 587ecb14fd8SDaniel Bertalan if (ldr2.baseRegister != adrp.destRegister) 588ecb14fd8SDaniel Bertalan return; 589ecb14fd8SDaniel Bertalan if (ldr3.baseRegister != ldr2.destRegister) 590ecb14fd8SDaniel Bertalan return; 591ecb14fd8SDaniel Bertalan // Loads from the GOT must be pointer sized. 592ecb14fd8SDaniel Bertalan if (ldr2.p2Size != 3 || ldr2.isFloat) 593ecb14fd8SDaniel Bertalan return; 594ecb14fd8SDaniel Bertalan 595a8ec90adSDaniel Bertalan uint64_t addr1 = isec->getVA() + offset1; 596a8ec90adSDaniel Bertalan uint64_t addr2 = isec->getVA() + offset2; 597a8ec90adSDaniel Bertalan uint64_t referent = pageBits(addr1) + adrp.addend + ldr2.offset; 598ecb14fd8SDaniel Bertalan // Load the GOT entry's address directly. 599ecb14fd8SDaniel Bertalan // nop 600ecb14fd8SDaniel Bertalan // ldr x2, _foo@GOTPAGE + _foo@GOTPAGEOFF 601ecb14fd8SDaniel Bertalan // ldr x3, [x2, #off] 602ecb14fd8SDaniel Bertalan Ldr literalLdr = ldr2; 603a8ec90adSDaniel Bertalan literalLdr.offset = referent - addr2; 604ecb14fd8SDaniel Bertalan if (isLiteralLdrEligible(literalLdr)) { 605a8ec90adSDaniel Bertalan writeNop(buf + offset1); 606a8ec90adSDaniel Bertalan writeLiteralLdr(buf + offset2, literalLdr); 607ecb14fd8SDaniel Bertalan } 608ecb14fd8SDaniel Bertalan } 609ecb14fd8SDaniel Bertalan } 610ecb14fd8SDaniel Bertalan 611a8843ec9SDaniel Bertalan static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) { 612a8843ec9SDaniel Bertalan unsigned int n = 0; 613a8843ec9SDaniel Bertalan uint64_t value = decodeULEB128(ptr, &n, end); 614a8843ec9SDaniel Bertalan ptr += n; 615a8843ec9SDaniel Bertalan return value; 616a8843ec9SDaniel Bertalan } 617a3f67f09SDaniel Bertalan 618a8843ec9SDaniel Bertalan template <typename Callback> 619a8843ec9SDaniel Bertalan static void forEachHint(ArrayRef<uint8_t> data, Callback callback) { 620a8843ec9SDaniel Bertalan std::array<uint64_t, 3> args; 621a3f67f09SDaniel Bertalan 622a8843ec9SDaniel Bertalan for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) { 623a8843ec9SDaniel Bertalan uint64_t type = readValue(p, end); 624a8843ec9SDaniel Bertalan if (type == 0) 625a8843ec9SDaniel Bertalan break; 626a8843ec9SDaniel Bertalan 627a8843ec9SDaniel Bertalan uint64_t argCount = readValue(p, end); 628a8843ec9SDaniel Bertalan // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others. 629a8843ec9SDaniel Bertalan if (argCount > 3) { 630a8843ec9SDaniel Bertalan for (unsigned i = 0; i < argCount; ++i) 631a8843ec9SDaniel Bertalan readValue(p, end); 632a8843ec9SDaniel Bertalan continue; 633a8843ec9SDaniel Bertalan } 634a8843ec9SDaniel Bertalan 635a8843ec9SDaniel Bertalan for (unsigned i = 0; i < argCount; ++i) 636a8843ec9SDaniel Bertalan args[i] = readValue(p, end); 637a8843ec9SDaniel Bertalan callback(type, ArrayRef<uint64_t>(args.data(), argCount)); 638a8843ec9SDaniel Bertalan } 639a8843ec9SDaniel Bertalan } 640a8843ec9SDaniel Bertalan 641a8843ec9SDaniel Bertalan // On RISC architectures like arm64, materializing a memory address generally 642a8843ec9SDaniel Bertalan // takes multiple instructions. If the referenced symbol is located close enough 643a8843ec9SDaniel Bertalan // in memory, fewer instructions are needed. 644a8843ec9SDaniel Bertalan // 645a8843ec9SDaniel Bertalan // Linker optimization hints record where addresses are computed. After 646a8843ec9SDaniel Bertalan // addresses have been assigned, if possible, we change them to a shorter 647a8843ec9SDaniel Bertalan // sequence of instructions. The size of the binary is not modified; the 648a8843ec9SDaniel Bertalan // eliminated instructions are replaced with NOPs. This still leads to faster 649a8843ec9SDaniel Bertalan // code as the CPU can skip over NOPs quickly. 650a8843ec9SDaniel Bertalan // 651a8843ec9SDaniel Bertalan // LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which 652a8843ec9SDaniel Bertalan // points to a sequence of ULEB128-encoded numbers. Each entry specifies a 653a8843ec9SDaniel Bertalan // transformation kind, and 2 or 3 addresses where the instructions are located. 654a8843ec9SDaniel Bertalan void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const { 655a8843ec9SDaniel Bertalan ArrayRef<uint8_t> data = obj.getOptimizationHints(); 656a8843ec9SDaniel Bertalan if (data.empty()) 657a8843ec9SDaniel Bertalan return; 658a8843ec9SDaniel Bertalan 659a8843ec9SDaniel Bertalan const ConcatInputSection *section = nullptr; 660a8843ec9SDaniel Bertalan uint64_t sectionAddr = 0; 661a8843ec9SDaniel Bertalan uint8_t *buf = nullptr; 662a8843ec9SDaniel Bertalan 663a8843ec9SDaniel Bertalan auto findSection = [&](uint64_t addr) { 664a8843ec9SDaniel Bertalan if (section && addr >= sectionAddr && 665a8843ec9SDaniel Bertalan addr < sectionAddr + section->getSize()) 666a8843ec9SDaniel Bertalan return true; 667a8843ec9SDaniel Bertalan 668595cd45aSVy Nguyen if (obj.sections.empty()) 669595cd45aSVy Nguyen return false; 670a8843ec9SDaniel Bertalan auto secIt = std::prev(llvm::upper_bound( 671a8843ec9SDaniel Bertalan obj.sections, addr, 672a8843ec9SDaniel Bertalan [](uint64_t off, const Section *sec) { return off < sec->addr; })); 673a8843ec9SDaniel Bertalan const Section *sec = *secIt; 674a8843ec9SDaniel Bertalan 675595cd45aSVy Nguyen if (sec->subsections.empty()) 676595cd45aSVy Nguyen return false; 677a8843ec9SDaniel Bertalan auto subsecIt = std::prev(llvm::upper_bound( 678a8843ec9SDaniel Bertalan sec->subsections, addr - sec->addr, 679a8843ec9SDaniel Bertalan [](uint64_t off, Subsection subsec) { return off < subsec.offset; })); 680a8843ec9SDaniel Bertalan const Subsection &subsec = *subsecIt; 681a8843ec9SDaniel Bertalan const ConcatInputSection *isec = 682a8843ec9SDaniel Bertalan dyn_cast_or_null<ConcatInputSection>(subsec.isec); 683a8843ec9SDaniel Bertalan if (!isec || isec->shouldOmitFromOutput()) 684a8843ec9SDaniel Bertalan return false; 685a8843ec9SDaniel Bertalan 686a8843ec9SDaniel Bertalan section = isec; 687a8843ec9SDaniel Bertalan sectionAddr = subsec.offset + sec->addr; 688a8843ec9SDaniel Bertalan buf = outBuf + section->outSecOff + section->parent->fileOff; 689a8843ec9SDaniel Bertalan return true; 690a8843ec9SDaniel Bertalan }; 691a8843ec9SDaniel Bertalan 692a8843ec9SDaniel Bertalan auto isValidOffset = [&](uint64_t offset) { 693a8843ec9SDaniel Bertalan if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) { 6940e8d4980SJez Ng error(toString(&obj) + 6950e8d4980SJez Ng ": linker optimization hint spans multiple sections"); 696a8843ec9SDaniel Bertalan return false; 697a8843ec9SDaniel Bertalan } 698a8843ec9SDaniel Bertalan return true; 699a8843ec9SDaniel Bertalan }; 700a8843ec9SDaniel Bertalan 701a8843ec9SDaniel Bertalan bool hasAdrpAdrp = false; 702a8843ec9SDaniel Bertalan forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) { 703a8843ec9SDaniel Bertalan if (kind == LOH_ARM64_ADRP_ADRP) { 704a8843ec9SDaniel Bertalan hasAdrpAdrp = true; 705a8843ec9SDaniel Bertalan return; 706a8843ec9SDaniel Bertalan } 707a8843ec9SDaniel Bertalan 708a8843ec9SDaniel Bertalan if (!findSection(args[0])) 709a8843ec9SDaniel Bertalan return; 710a8843ec9SDaniel Bertalan switch (kind) { 711a8843ec9SDaniel Bertalan case LOH_ARM64_ADRP_ADD: 712a8843ec9SDaniel Bertalan if (isValidOffset(args[1])) 713a8843ec9SDaniel Bertalan applyAdrpAdd(buf, section, args[0] - sectionAddr, 714a8843ec9SDaniel Bertalan args[1] - sectionAddr); 715a3f67f09SDaniel Bertalan break; 716a3f67f09SDaniel Bertalan case LOH_ARM64_ADRP_LDR: 717a8843ec9SDaniel Bertalan if (isValidOffset(args[1])) 718a8843ec9SDaniel Bertalan applyAdrpLdr(buf, section, args[0] - sectionAddr, 719a8843ec9SDaniel Bertalan args[1] - sectionAddr); 720a8843ec9SDaniel Bertalan break; 721a8843ec9SDaniel Bertalan case LOH_ARM64_ADRP_LDR_GOT: 722a8843ec9SDaniel Bertalan if (isValidOffset(args[1])) 723a8843ec9SDaniel Bertalan applyAdrpLdrGot(buf, section, args[0] - sectionAddr, 724a8843ec9SDaniel Bertalan args[1] - sectionAddr); 725573c7e6bSDaniel Bertalan break; 726a3f67f09SDaniel Bertalan case LOH_ARM64_ADRP_ADD_LDR: 727a8843ec9SDaniel Bertalan if (isValidOffset(args[1]) && isValidOffset(args[2])) 728a8843ec9SDaniel Bertalan applyAdrpAddLdr(buf, section, args[0] - sectionAddr, 729a8843ec9SDaniel Bertalan args[1] - sectionAddr, args[2] - sectionAddr); 730ecb14fd8SDaniel Bertalan break; 731a3f67f09SDaniel Bertalan case LOH_ARM64_ADRP_LDR_GOT_LDR: 732a8843ec9SDaniel Bertalan if (isValidOffset(args[1]) && isValidOffset(args[2])) 733a8843ec9SDaniel Bertalan applyAdrpLdrGotLdr(buf, section, args[0] - sectionAddr, 734a8843ec9SDaniel Bertalan args[1] - sectionAddr, args[2] - sectionAddr); 735ecb14fd8SDaniel Bertalan break; 736a3f67f09SDaniel Bertalan case LOH_ARM64_ADRP_ADD_STR: 737a3f67f09SDaniel Bertalan case LOH_ARM64_ADRP_LDR_GOT_STR: 738a3f67f09SDaniel Bertalan // TODO: Implement these 739a3f67f09SDaniel Bertalan break; 740a3f67f09SDaniel Bertalan } 741a8843ec9SDaniel Bertalan }); 742a3f67f09SDaniel Bertalan 743a8843ec9SDaniel Bertalan if (!hasAdrpAdrp) 744a8843ec9SDaniel Bertalan return; 745a8843ec9SDaniel Bertalan 746a8843ec9SDaniel Bertalan // AdrpAdrp optimization hints are performed in a second pass because they 747a8843ec9SDaniel Bertalan // might interfere with other transformations. For instance, consider the 748a8843ec9SDaniel Bertalan // following input: 749a8843ec9SDaniel Bertalan // 750a8843ec9SDaniel Bertalan // adrp x0, _foo@PAGE 751a8843ec9SDaniel Bertalan // add x1, x0, _foo@PAGEOFF 752a8843ec9SDaniel Bertalan // adrp x0, _bar@PAGE 753a8843ec9SDaniel Bertalan // add x2, x0, _bar@PAGEOFF 754a8843ec9SDaniel Bertalan // 755a8843ec9SDaniel Bertalan // If we perform the AdrpAdrp relaxation first, we get: 756a8843ec9SDaniel Bertalan // 757a8843ec9SDaniel Bertalan // adrp x0, _foo@PAGE 758a8843ec9SDaniel Bertalan // add x1, x0, _foo@PAGEOFF 759a8843ec9SDaniel Bertalan // nop 760a8843ec9SDaniel Bertalan // add x2, x0, _bar@PAGEOFF 761a8843ec9SDaniel Bertalan // 762a8843ec9SDaniel Bertalan // If we then apply AdrpAdd to the first two instructions, the add will have a 763a8843ec9SDaniel Bertalan // garbage value in x0: 764a8843ec9SDaniel Bertalan // 765a8843ec9SDaniel Bertalan // adr x1, _foo 766a8843ec9SDaniel Bertalan // nop 767a8843ec9SDaniel Bertalan // nop 768a8843ec9SDaniel Bertalan // add x2, x0, _bar@PAGEOFF 769a8843ec9SDaniel Bertalan forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) { 770a8843ec9SDaniel Bertalan if (kind != LOH_ARM64_ADRP_ADRP) 771a8843ec9SDaniel Bertalan return; 772a8843ec9SDaniel Bertalan if (!findSection(args[0])) 773a8843ec9SDaniel Bertalan return; 774a8843ec9SDaniel Bertalan if (isValidOffset(args[1])) 775a8843ec9SDaniel Bertalan applyAdrpAdrp(buf, section, args[0] - sectionAddr, args[1] - sectionAddr); 776a8843ec9SDaniel Bertalan }); 777a3f67f09SDaniel Bertalan } 778a3f67f09SDaniel Bertalan 77987104faaSGreg McGary TargetInfo *macho::createARM64TargetInfo() { 78087104faaSGreg McGary static ARM64 t; 78187104faaSGreg McGary return &t; 78287104faaSGreg McGary } 783