15ffd83dbSDimitry Andric //===- SyntheticSections.h -------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H 105ffd83dbSDimitry Andric #define LLD_MACHO_SYNTHETIC_SECTIONS_H 115ffd83dbSDimitry Andric 125ffd83dbSDimitry Andric #include "Config.h" 135ffd83dbSDimitry Andric #include "ExportTrie.h" 145ffd83dbSDimitry Andric #include "InputSection.h" 155ffd83dbSDimitry Andric #include "OutputSection.h" 16e8d8bef9SDimitry Andric #include "OutputSegment.h" 175ffd83dbSDimitry Andric #include "Target.h" 18fe6060f1SDimitry Andric #include "Writer.h" 195ffd83dbSDimitry Andric 20fe6060f1SDimitry Andric #include "llvm/ADT/DenseMap.h" 21fe6060f1SDimitry Andric #include "llvm/ADT/Hashing.h" 22*0fca6ea1SDimitry Andric #include "llvm/ADT/MapVector.h" 235ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 24bdd1243dSDimitry Andric #include "llvm/BinaryFormat/MachO.h" 25fe6060f1SDimitry Andric #include "llvm/Support/MathExtras.h" 265ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h" 275ffd83dbSDimitry Andric 28fe6060f1SDimitry Andric #include <unordered_map> 29fe6060f1SDimitry Andric 30e8d8bef9SDimitry Andric namespace llvm { 31e8d8bef9SDimitry Andric class DWARFUnit; 32e8d8bef9SDimitry Andric } // namespace llvm 33e8d8bef9SDimitry Andric 34bdd1243dSDimitry Andric namespace lld::macho { 355ffd83dbSDimitry Andric 36e8d8bef9SDimitry Andric class Defined; 375ffd83dbSDimitry Andric class DylibSymbol; 385ffd83dbSDimitry Andric class LoadCommand; 39e8d8bef9SDimitry Andric class ObjFile; 40fe6060f1SDimitry Andric class UnwindInfoSection; 415ffd83dbSDimitry Andric 425ffd83dbSDimitry Andric class SyntheticSection : public OutputSection { 435ffd83dbSDimitry Andric public: 445ffd83dbSDimitry Andric SyntheticSection(const char *segname, const char *name); 455ffd83dbSDimitry Andric virtual ~SyntheticSection() = default; 465ffd83dbSDimitry Andric 475ffd83dbSDimitry Andric static bool classof(const OutputSection *sec) { 485ffd83dbSDimitry Andric return sec->kind() == SyntheticKind; 495ffd83dbSDimitry Andric } 505ffd83dbSDimitry Andric 51fe6060f1SDimitry Andric StringRef segname; 52fe6060f1SDimitry Andric // This fake InputSection makes it easier for us to write code that applies 53fe6060f1SDimitry Andric // generically to both user inputs and synthetics. 54fe6060f1SDimitry Andric InputSection *isec; 555ffd83dbSDimitry Andric }; 565ffd83dbSDimitry Andric 57e8d8bef9SDimitry Andric // All sections in __LINKEDIT should inherit from this. 58e8d8bef9SDimitry Andric class LinkEditSection : public SyntheticSection { 59e8d8bef9SDimitry Andric public: 60e8d8bef9SDimitry Andric LinkEditSection(const char *segname, const char *name) 61e8d8bef9SDimitry Andric : SyntheticSection(segname, name) { 62fe6060f1SDimitry Andric align = target->wordSize; 63e8d8bef9SDimitry Andric } 64e8d8bef9SDimitry Andric 651fd87a68SDimitry Andric // Implementations of this method can assume that the regular (non-__LINKEDIT) 661fd87a68SDimitry Andric // sections already have their addresses assigned. 67fe6060f1SDimitry Andric virtual void finalizeContents() {} 68fe6060f1SDimitry Andric 69e8d8bef9SDimitry Andric // Sections in __LINKEDIT are special: their offsets are recorded in the 70e8d8bef9SDimitry Andric // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section 71e8d8bef9SDimitry Andric // headers. 72972a253aSDimitry Andric bool isHidden() const final { return true; } 73e8d8bef9SDimitry Andric 74e8d8bef9SDimitry Andric virtual uint64_t getRawSize() const = 0; 75e8d8bef9SDimitry Andric 76e8d8bef9SDimitry Andric // codesign (or more specifically libstuff) checks that each section in 77e8d8bef9SDimitry Andric // __LINKEDIT ends where the next one starts -- no gaps are permitted. We 78e8d8bef9SDimitry Andric // therefore align every section's start and end points to WordSize. 79e8d8bef9SDimitry Andric // 80e8d8bef9SDimitry Andric // NOTE: This assumes that the extra bytes required for alignment can be 81e8d8bef9SDimitry Andric // zero-valued bytes. 82972a253aSDimitry Andric uint64_t getSize() const final { return llvm::alignTo(getRawSize(), align); } 83e8d8bef9SDimitry Andric }; 84e8d8bef9SDimitry Andric 855ffd83dbSDimitry Andric // The header of the Mach-O file, which must have a file offset of zero. 86fe6060f1SDimitry Andric class MachHeaderSection final : public SyntheticSection { 875ffd83dbSDimitry Andric public: 885ffd83dbSDimitry Andric MachHeaderSection(); 895ffd83dbSDimitry Andric bool isHidden() const override { return true; } 905ffd83dbSDimitry Andric uint64_t getSize() const override; 915ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 925ffd83dbSDimitry Andric 93fe6060f1SDimitry Andric void addLoadCommand(LoadCommand *); 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andric protected: 965ffd83dbSDimitry Andric std::vector<LoadCommand *> loadCommands; 975ffd83dbSDimitry Andric uint32_t sizeOfCmds = 0; 985ffd83dbSDimitry Andric }; 995ffd83dbSDimitry Andric 1005ffd83dbSDimitry Andric // A hidden section that exists solely for the purpose of creating the 1015ffd83dbSDimitry Andric // __PAGEZERO segment, which is used to catch null pointer dereferences. 102fe6060f1SDimitry Andric class PageZeroSection final : public SyntheticSection { 1035ffd83dbSDimitry Andric public: 1045ffd83dbSDimitry Andric PageZeroSection(); 1055ffd83dbSDimitry Andric bool isHidden() const override { return true; } 10681ad6265SDimitry Andric bool isNeeded() const override { return target->pageZeroSize != 0; } 107fe6060f1SDimitry Andric uint64_t getSize() const override { return target->pageZeroSize; } 1085ffd83dbSDimitry Andric uint64_t getFileSize() const override { return 0; } 1095ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override {} 1105ffd83dbSDimitry Andric }; 1115ffd83dbSDimitry Andric 112e8d8bef9SDimitry Andric // This is the base class for the GOT and TLVPointer sections, which are nearly 113e8d8bef9SDimitry Andric // functionally identical -- they will both be populated by dyld with addresses 114e8d8bef9SDimitry Andric // to non-lazily-loaded dylib symbols. The main difference is that the 115e8d8bef9SDimitry Andric // TLVPointerSection stores references to thread-local variables. 116e8d8bef9SDimitry Andric class NonLazyPointerSectionBase : public SyntheticSection { 1175ffd83dbSDimitry Andric public: 118e8d8bef9SDimitry Andric NonLazyPointerSectionBase(const char *segname, const char *name); 1195ffd83dbSDimitry Andric const llvm::SetVector<const Symbol *> &getEntries() const { return entries; } 1205ffd83dbSDimitry Andric bool isNeeded() const override { return !entries.empty(); } 121fe6060f1SDimitry Andric uint64_t getSize() const override { 122fe6060f1SDimitry Andric return entries.size() * target->wordSize; 123fe6060f1SDimitry Andric } 1245ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 125e8d8bef9SDimitry Andric void addEntry(Symbol *sym); 126fe6060f1SDimitry Andric uint64_t getVA(uint32_t gotIndex) const { 127fe6060f1SDimitry Andric return addr + gotIndex * target->wordSize; 128fe6060f1SDimitry Andric } 1295ffd83dbSDimitry Andric 1305ffd83dbSDimitry Andric private: 1315ffd83dbSDimitry Andric llvm::SetVector<const Symbol *> entries; 1325ffd83dbSDimitry Andric }; 1335ffd83dbSDimitry Andric 134fe6060f1SDimitry Andric class GotSection final : public NonLazyPointerSectionBase { 135e8d8bef9SDimitry Andric public: 136fe6060f1SDimitry Andric GotSection(); 137e8d8bef9SDimitry Andric }; 138e8d8bef9SDimitry Andric 139fe6060f1SDimitry Andric class TlvPointerSection final : public NonLazyPointerSectionBase { 140e8d8bef9SDimitry Andric public: 141fe6060f1SDimitry Andric TlvPointerSection(); 142e8d8bef9SDimitry Andric }; 143e8d8bef9SDimitry Andric 144e8d8bef9SDimitry Andric struct Location { 145fe6060f1SDimitry Andric const InputSection *isec; 146fe6060f1SDimitry Andric uint64_t offset; 147e8d8bef9SDimitry Andric 148fe6060f1SDimitry Andric Location(const InputSection *isec, uint64_t offset) 149fe6060f1SDimitry Andric : isec(isec), offset(offset) {} 150fe6060f1SDimitry Andric uint64_t getVA() const { return isec->getVA(offset); } 151e8d8bef9SDimitry Andric }; 152e8d8bef9SDimitry Andric 153e8d8bef9SDimitry Andric // Stores rebase opcodes, which tell dyld where absolute addresses have been 154e8d8bef9SDimitry Andric // encoded in the binary. If the binary is not loaded at its preferred address, 155e8d8bef9SDimitry Andric // dyld has to rebase these addresses by adding an offset to them. 156fe6060f1SDimitry Andric class RebaseSection final : public LinkEditSection { 157e8d8bef9SDimitry Andric public: 158e8d8bef9SDimitry Andric RebaseSection(); 159fe6060f1SDimitry Andric void finalizeContents() override; 160e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return contents.size(); } 161e8d8bef9SDimitry Andric bool isNeeded() const override { return !locations.empty(); } 162e8d8bef9SDimitry Andric void writeTo(uint8_t *buf) const override; 163e8d8bef9SDimitry Andric 164fe6060f1SDimitry Andric void addEntry(const InputSection *isec, uint64_t offset) { 165e8d8bef9SDimitry Andric if (config->isPic) 16606c3fb27SDimitry Andric locations.emplace_back(isec, offset); 167e8d8bef9SDimitry Andric } 168e8d8bef9SDimitry Andric 169e8d8bef9SDimitry Andric private: 170e8d8bef9SDimitry Andric std::vector<Location> locations; 171e8d8bef9SDimitry Andric SmallVector<char, 128> contents; 172e8d8bef9SDimitry Andric }; 173e8d8bef9SDimitry Andric 1745ffd83dbSDimitry Andric struct BindingEntry { 1755ffd83dbSDimitry Andric int64_t addend; 176e8d8bef9SDimitry Andric Location target; 177fe6060f1SDimitry Andric BindingEntry(int64_t addend, Location target) 17806c3fb27SDimitry Andric : addend(addend), target(target) {} 1795ffd83dbSDimitry Andric }; 1805ffd83dbSDimitry Andric 181fe6060f1SDimitry Andric template <class Sym> 182fe6060f1SDimitry Andric using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>; 183fe6060f1SDimitry Andric 1845ffd83dbSDimitry Andric // Stores bind opcodes for telling dyld which symbols to load non-lazily. 185fe6060f1SDimitry Andric class BindingSection final : public LinkEditSection { 1865ffd83dbSDimitry Andric public: 1875ffd83dbSDimitry Andric BindingSection(); 188fe6060f1SDimitry Andric void finalizeContents() override; 189e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return contents.size(); } 190fe6060f1SDimitry Andric bool isNeeded() const override { return !bindingsMap.empty(); } 1915ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 1925ffd83dbSDimitry Andric 19381ad6265SDimitry Andric void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset, 19481ad6265SDimitry Andric int64_t addend = 0) { 195fe6060f1SDimitry Andric bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); 1965ffd83dbSDimitry Andric } 1975ffd83dbSDimitry Andric 1985ffd83dbSDimitry Andric private: 19981ad6265SDimitry Andric BindingsMap<const Symbol *> bindingsMap; 2005ffd83dbSDimitry Andric SmallVector<char, 128> contents; 2015ffd83dbSDimitry Andric }; 2025ffd83dbSDimitry Andric 203e8d8bef9SDimitry Andric // Stores bind opcodes for telling dyld which weak symbols need coalescing. 204e8d8bef9SDimitry Andric // There are two types of entries in this section: 205e8d8bef9SDimitry Andric // 206e8d8bef9SDimitry Andric // 1) Non-weak definitions: This is a symbol definition that weak symbols in 207e8d8bef9SDimitry Andric // other dylibs should coalesce to. 208e8d8bef9SDimitry Andric // 209e8d8bef9SDimitry Andric // 2) Weak bindings: These tell dyld that a given symbol reference should 210fe6060f1SDimitry Andric // coalesce to a non-weak definition if one is found. Note that unlike the 211e8d8bef9SDimitry Andric // entries in the BindingSection, the bindings here only refer to these 212e8d8bef9SDimitry Andric // symbols by name, but do not specify which dylib to load them from. 213fe6060f1SDimitry Andric class WeakBindingSection final : public LinkEditSection { 214e8d8bef9SDimitry Andric public: 215e8d8bef9SDimitry Andric WeakBindingSection(); 216fe6060f1SDimitry Andric void finalizeContents() override; 217e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return contents.size(); } 218e8d8bef9SDimitry Andric bool isNeeded() const override { 219fe6060f1SDimitry Andric return !bindingsMap.empty() || !definitions.empty(); 220e8d8bef9SDimitry Andric } 221e8d8bef9SDimitry Andric 222e8d8bef9SDimitry Andric void writeTo(uint8_t *buf) const override; 223e8d8bef9SDimitry Andric 224fe6060f1SDimitry Andric void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset, 225fe6060f1SDimitry Andric int64_t addend = 0) { 226fe6060f1SDimitry Andric bindingsMap[symbol].emplace_back(addend, Location(isec, offset)); 227e8d8bef9SDimitry Andric } 228e8d8bef9SDimitry Andric 229fe6060f1SDimitry Andric bool hasEntry() const { return !bindingsMap.empty(); } 230e8d8bef9SDimitry Andric 231e8d8bef9SDimitry Andric void addNonWeakDefinition(const Defined *defined) { 232e8d8bef9SDimitry Andric definitions.emplace_back(defined); 233e8d8bef9SDimitry Andric } 234e8d8bef9SDimitry Andric 235e8d8bef9SDimitry Andric bool hasNonWeakDefinition() const { return !definitions.empty(); } 236e8d8bef9SDimitry Andric 237e8d8bef9SDimitry Andric private: 238fe6060f1SDimitry Andric BindingsMap<const Symbol *> bindingsMap; 239e8d8bef9SDimitry Andric std::vector<const Defined *> definitions; 240e8d8bef9SDimitry Andric SmallVector<char, 128> contents; 241e8d8bef9SDimitry Andric }; 242e8d8bef9SDimitry Andric 2435ffd83dbSDimitry Andric // The following sections implement lazy symbol binding -- very similar to the 2445ffd83dbSDimitry Andric // PLT mechanism in ELF. 2455ffd83dbSDimitry Andric // 246e8d8bef9SDimitry Andric // ELF's .plt section is broken up into two sections in Mach-O: StubsSection 247e8d8bef9SDimitry Andric // and StubHelperSection. Calls to functions in dylibs will end up calling into 2485ffd83dbSDimitry Andric // StubsSection, which contains indirect jumps to addresses stored in the 2495ffd83dbSDimitry Andric // LazyPointerSection (the counterpart to ELF's .plt.got). 2505ffd83dbSDimitry Andric // 251e8d8bef9SDimitry Andric // We will first describe how non-weak symbols are handled. 252e8d8bef9SDimitry Andric // 253e8d8bef9SDimitry Andric // At program start, the LazyPointerSection contains addresses that point into 254e8d8bef9SDimitry Andric // one of the entry points in the middle of the StubHelperSection. The code in 2555ffd83dbSDimitry Andric // StubHelperSection will push on the stack an offset into the 2565ffd83dbSDimitry Andric // LazyBindingSection. The push is followed by a jump to the beginning of the 2575ffd83dbSDimitry Andric // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder. 2585ffd83dbSDimitry Andric // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in 2595ffd83dbSDimitry Andric // the GOT. 2605ffd83dbSDimitry Andric // 2615ffd83dbSDimitry Andric // The stub binder will look up the bind opcodes in the LazyBindingSection at 262e8d8bef9SDimitry Andric // the given offset. The bind opcodes will tell the binder to update the 263e8d8bef9SDimitry Andric // address in the LazyPointerSection to point to the symbol, so that subsequent 264e8d8bef9SDimitry Andric // calls don't have to redo the symbol resolution. The binder will then jump to 265e8d8bef9SDimitry Andric // the resolved symbol. 266e8d8bef9SDimitry Andric // 267e8d8bef9SDimitry Andric // With weak symbols, the situation is slightly different. Since there is no 268e8d8bef9SDimitry Andric // "weak lazy" lookup, function calls to weak symbols are always non-lazily 269e8d8bef9SDimitry Andric // bound. We emit both regular non-lazy bindings as well as weak bindings, in 270e8d8bef9SDimitry Andric // order that the weak bindings may overwrite the non-lazy bindings if an 271e8d8bef9SDimitry Andric // appropriate symbol is found at runtime. However, the bound addresses will 272e8d8bef9SDimitry Andric // still be written (non-lazily) into the LazyPointerSection. 273bdd1243dSDimitry Andric // 274bdd1243dSDimitry Andric // Symbols are always bound eagerly when chained fixups are used. In that case, 275bdd1243dSDimitry Andric // StubsSection contains indirect jumps to addresses stored in the GotSection. 276bdd1243dSDimitry Andric // The GOT directly contains the fixup entries, which will be replaced by the 277bdd1243dSDimitry Andric // address of the target symbols on load. LazyPointerSection and 278bdd1243dSDimitry Andric // StubHelperSection are not used. 2795ffd83dbSDimitry Andric 280fe6060f1SDimitry Andric class StubsSection final : public SyntheticSection { 2815ffd83dbSDimitry Andric public: 2825ffd83dbSDimitry Andric StubsSection(); 2835ffd83dbSDimitry Andric uint64_t getSize() const override; 2845ffd83dbSDimitry Andric bool isNeeded() const override { return !entries.empty(); } 285fe6060f1SDimitry Andric void finalize() override; 2865ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 287e8d8bef9SDimitry Andric const llvm::SetVector<Symbol *> &getEntries() const { return entries; } 288bdd1243dSDimitry Andric // Creates a stub for the symbol and the corresponding entry in the 289bdd1243dSDimitry Andric // LazyPointerSection. 290bdd1243dSDimitry Andric void addEntry(Symbol *); 291fe6060f1SDimitry Andric uint64_t getVA(uint32_t stubsIndex) const { 292fe6060f1SDimitry Andric assert(isFinal || target->usesThunks()); 293fe6060f1SDimitry Andric // ConcatOutputSection::finalize() can seek the address of a 294fe6060f1SDimitry Andric // stub before its address is assigned. Before __stubs is 295fe6060f1SDimitry Andric // finalized, return a contrived out-of-range address. 296fe6060f1SDimitry Andric return isFinal ? addr + stubsIndex * target->stubSize 297fe6060f1SDimitry Andric : TargetInfo::outOfRangeVA; 298fe6060f1SDimitry Andric } 299fe6060f1SDimitry Andric 300fe6060f1SDimitry Andric bool isFinal = false; // is address assigned? 3015ffd83dbSDimitry Andric 3025ffd83dbSDimitry Andric private: 303e8d8bef9SDimitry Andric llvm::SetVector<Symbol *> entries; 3045ffd83dbSDimitry Andric }; 3055ffd83dbSDimitry Andric 306fe6060f1SDimitry Andric class StubHelperSection final : public SyntheticSection { 3075ffd83dbSDimitry Andric public: 3085ffd83dbSDimitry Andric StubHelperSection(); 3095ffd83dbSDimitry Andric uint64_t getSize() const override; 3105ffd83dbSDimitry Andric bool isNeeded() const override; 3115ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 3125ffd83dbSDimitry Andric 313bdd1243dSDimitry Andric void setUp(); 3145ffd83dbSDimitry Andric 3155ffd83dbSDimitry Andric DylibSymbol *stubBinder = nullptr; 316e8d8bef9SDimitry Andric Defined *dyldPrivate = nullptr; 3175ffd83dbSDimitry Andric }; 3185ffd83dbSDimitry Andric 319*0fca6ea1SDimitry Andric class ObjCSelRefsHelper { 320*0fca6ea1SDimitry Andric public: 321*0fca6ea1SDimitry Andric static void initialize(); 322*0fca6ea1SDimitry Andric static void cleanup(); 323*0fca6ea1SDimitry Andric 324*0fca6ea1SDimitry Andric static ConcatInputSection *getSelRef(StringRef methname); 325*0fca6ea1SDimitry Andric static ConcatInputSection *makeSelRef(StringRef methname); 326*0fca6ea1SDimitry Andric 327*0fca6ea1SDimitry Andric private: 328*0fca6ea1SDimitry Andric static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *> 329*0fca6ea1SDimitry Andric methnameToSelref; 330*0fca6ea1SDimitry Andric }; 331*0fca6ea1SDimitry Andric 332bdd1243dSDimitry Andric // Objective-C stubs are hoisted objc_msgSend calls per selector called in the 333bdd1243dSDimitry Andric // program. Apple Clang produces undefined symbols to each stub, such as 334bdd1243dSDimitry Andric // '_objc_msgSend$foo', which are then synthesized by the linker. The stubs 335bdd1243dSDimitry Andric // load the particular selector 'foo' from __objc_selrefs, setting it to the 336bdd1243dSDimitry Andric // first argument of the objc_msgSend call, and then jumps to objc_msgSend. The 337bdd1243dSDimitry Andric // actual stub contents are mirrored from ld64. 338bdd1243dSDimitry Andric class ObjCStubsSection final : public SyntheticSection { 339bdd1243dSDimitry Andric public: 340bdd1243dSDimitry Andric ObjCStubsSection(); 341bdd1243dSDimitry Andric void addEntry(Symbol *sym); 342bdd1243dSDimitry Andric uint64_t getSize() const override; 343bdd1243dSDimitry Andric bool isNeeded() const override { return !symbols.empty(); } 344bdd1243dSDimitry Andric void finalize() override { isec->isFinal = true; } 345bdd1243dSDimitry Andric void writeTo(uint8_t *buf) const override; 346bdd1243dSDimitry Andric void setUp(); 347bdd1243dSDimitry Andric 348bdd1243dSDimitry Andric static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$"; 349*0fca6ea1SDimitry Andric static bool isObjCStubSymbol(Symbol *sym); 350*0fca6ea1SDimitry Andric static StringRef getMethname(Symbol *sym); 351bdd1243dSDimitry Andric 352bdd1243dSDimitry Andric private: 353bdd1243dSDimitry Andric std::vector<Defined *> symbols; 3547a6dacacSDimitry Andric Symbol *objcMsgSend = nullptr; 355bdd1243dSDimitry Andric }; 356bdd1243dSDimitry Andric 357e8d8bef9SDimitry Andric // Note that this section may also be targeted by non-lazy bindings. In 358e8d8bef9SDimitry Andric // particular, this happens when branch relocations target weak symbols. 359fe6060f1SDimitry Andric class LazyPointerSection final : public SyntheticSection { 3605ffd83dbSDimitry Andric public: 3615ffd83dbSDimitry Andric LazyPointerSection(); 3625ffd83dbSDimitry Andric uint64_t getSize() const override; 3635ffd83dbSDimitry Andric bool isNeeded() const override; 3645ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 365bdd1243dSDimitry Andric uint64_t getVA(uint32_t index) const { 366bdd1243dSDimitry Andric return addr + (index << target->p2WordSize); 367bdd1243dSDimitry Andric } 3685ffd83dbSDimitry Andric }; 3695ffd83dbSDimitry Andric 370fe6060f1SDimitry Andric class LazyBindingSection final : public LinkEditSection { 3715ffd83dbSDimitry Andric public: 3725ffd83dbSDimitry Andric LazyBindingSection(); 373fe6060f1SDimitry Andric void finalizeContents() override; 374e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return contents.size(); } 375e8d8bef9SDimitry Andric bool isNeeded() const override { return !entries.empty(); } 3765ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 377e8d8bef9SDimitry Andric // Note that every entry here will by referenced by a corresponding entry in 378e8d8bef9SDimitry Andric // the StubHelperSection. 37981ad6265SDimitry Andric void addEntry(Symbol *dysym); 38081ad6265SDimitry Andric const llvm::SetVector<Symbol *> &getEntries() const { return entries; } 3815ffd83dbSDimitry Andric 3825ffd83dbSDimitry Andric private: 38381ad6265SDimitry Andric uint32_t encode(const Symbol &); 384e8d8bef9SDimitry Andric 38581ad6265SDimitry Andric llvm::SetVector<Symbol *> entries; 3865ffd83dbSDimitry Andric SmallVector<char, 128> contents; 3875ffd83dbSDimitry Andric llvm::raw_svector_ostream os{contents}; 3885ffd83dbSDimitry Andric }; 3895ffd83dbSDimitry Andric 3905ffd83dbSDimitry Andric // Stores a trie that describes the set of exported symbols. 391fe6060f1SDimitry Andric class ExportSection final : public LinkEditSection { 3925ffd83dbSDimitry Andric public: 3935ffd83dbSDimitry Andric ExportSection(); 394fe6060f1SDimitry Andric void finalizeContents() override; 395e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return size; } 396349cc55cSDimitry Andric bool isNeeded() const override { return size; } 3975ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 3985ffd83dbSDimitry Andric 399e8d8bef9SDimitry Andric bool hasWeakSymbol = false; 400e8d8bef9SDimitry Andric 4015ffd83dbSDimitry Andric private: 4025ffd83dbSDimitry Andric TrieBuilder trieBuilder; 4035ffd83dbSDimitry Andric size_t size = 0; 4045ffd83dbSDimitry Andric }; 4055ffd83dbSDimitry Andric 406bdd1243dSDimitry Andric // Stores 'data in code' entries that describe the locations of data regions 407bdd1243dSDimitry Andric // inside code sections. This is used by llvm-objdump to distinguish jump tables 408bdd1243dSDimitry Andric // and stop them from being disassembled as instructions. 409fe6060f1SDimitry Andric class DataInCodeSection final : public LinkEditSection { 410fe6060f1SDimitry Andric public: 411fe6060f1SDimitry Andric DataInCodeSection(); 412fe6060f1SDimitry Andric void finalizeContents() override; 413fe6060f1SDimitry Andric uint64_t getRawSize() const override { 414fe6060f1SDimitry Andric return sizeof(llvm::MachO::data_in_code_entry) * entries.size(); 415fe6060f1SDimitry Andric } 416fe6060f1SDimitry Andric void writeTo(uint8_t *buf) const override; 417fe6060f1SDimitry Andric 418fe6060f1SDimitry Andric private: 419fe6060f1SDimitry Andric std::vector<llvm::MachO::data_in_code_entry> entries; 420fe6060f1SDimitry Andric }; 421fe6060f1SDimitry Andric 422fe6060f1SDimitry Andric // Stores ULEB128 delta encoded addresses of functions. 423fe6060f1SDimitry Andric class FunctionStartsSection final : public LinkEditSection { 424fe6060f1SDimitry Andric public: 425fe6060f1SDimitry Andric FunctionStartsSection(); 426fe6060f1SDimitry Andric void finalizeContents() override; 427fe6060f1SDimitry Andric uint64_t getRawSize() const override { return contents.size(); } 428fe6060f1SDimitry Andric void writeTo(uint8_t *buf) const override; 429fe6060f1SDimitry Andric 430fe6060f1SDimitry Andric private: 431fe6060f1SDimitry Andric SmallVector<char, 128> contents; 432fe6060f1SDimitry Andric }; 433fe6060f1SDimitry Andric 4345ffd83dbSDimitry Andric // Stores the strings referenced by the symbol table. 435fe6060f1SDimitry Andric class StringTableSection final : public LinkEditSection { 4365ffd83dbSDimitry Andric public: 4375ffd83dbSDimitry Andric StringTableSection(); 4385ffd83dbSDimitry Andric // Returns the start offset of the added string. 4395ffd83dbSDimitry Andric uint32_t addString(StringRef); 440e8d8bef9SDimitry Andric uint64_t getRawSize() const override { return size; } 4415ffd83dbSDimitry Andric void writeTo(uint8_t *buf) const override; 4425ffd83dbSDimitry Andric 443fe6060f1SDimitry Andric static constexpr size_t emptyStringIndex = 1; 444fe6060f1SDimitry Andric 4455ffd83dbSDimitry Andric private: 446e8d8bef9SDimitry Andric // ld64 emits string tables which start with a space and a zero byte. We 447e8d8bef9SDimitry Andric // match its behavior here since some tools depend on it. 448fe6060f1SDimitry Andric // Consequently, the empty string will be at index 1, not zero. 449e8d8bef9SDimitry Andric std::vector<StringRef> strings{" "}; 450e8d8bef9SDimitry Andric size_t size = 2; 4515ffd83dbSDimitry Andric }; 4525ffd83dbSDimitry Andric 4535ffd83dbSDimitry Andric struct SymtabEntry { 4545ffd83dbSDimitry Andric Symbol *sym; 4555ffd83dbSDimitry Andric size_t strx; 4565ffd83dbSDimitry Andric }; 4575ffd83dbSDimitry Andric 458e8d8bef9SDimitry Andric struct StabsEntry { 459e8d8bef9SDimitry Andric uint8_t type = 0; 460fe6060f1SDimitry Andric uint32_t strx = StringTableSection::emptyStringIndex; 461e8d8bef9SDimitry Andric uint8_t sect = 0; 462e8d8bef9SDimitry Andric uint16_t desc = 0; 463e8d8bef9SDimitry Andric uint64_t value = 0; 464e8d8bef9SDimitry Andric 465e8d8bef9SDimitry Andric StabsEntry() = default; 466e8d8bef9SDimitry Andric explicit StabsEntry(uint8_t type) : type(type) {} 467e8d8bef9SDimitry Andric }; 468e8d8bef9SDimitry Andric 469e8d8bef9SDimitry Andric // Symbols of the same type must be laid out contiguously: we choose to emit 470e8d8bef9SDimitry Andric // all local symbols first, then external symbols, and finally undefined 471e8d8bef9SDimitry Andric // symbols. For each symbol type, the LC_DYSYMTAB load command will record the 472e8d8bef9SDimitry Andric // range (start index and total number) of those symbols in the symbol table. 473e8d8bef9SDimitry Andric class SymtabSection : public LinkEditSection { 4745ffd83dbSDimitry Andric public: 475fe6060f1SDimitry Andric void finalizeContents() override; 476e8d8bef9SDimitry Andric uint32_t getNumSymbols() const; 477e8d8bef9SDimitry Andric uint32_t getNumLocalSymbols() const { 478e8d8bef9SDimitry Andric return stabs.size() + localSymbols.size(); 479e8d8bef9SDimitry Andric } 480e8d8bef9SDimitry Andric uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } 481e8d8bef9SDimitry Andric uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } 4825ffd83dbSDimitry Andric 4835ffd83dbSDimitry Andric private: 48481ad6265SDimitry Andric void emitBeginSourceStab(StringRef); 485e8d8bef9SDimitry Andric void emitEndSourceStab(); 486e8d8bef9SDimitry Andric void emitObjectFileStab(ObjFile *); 487e8d8bef9SDimitry Andric void emitEndFunStab(Defined *); 488e8d8bef9SDimitry Andric void emitStabs(); 489e8d8bef9SDimitry Andric 490fe6060f1SDimitry Andric protected: 491fe6060f1SDimitry Andric SymtabSection(StringTableSection &); 492fe6060f1SDimitry Andric 4935ffd83dbSDimitry Andric StringTableSection &stringTableSection; 494e8d8bef9SDimitry Andric // STABS symbols are always local symbols, but we represent them with special 495e8d8bef9SDimitry Andric // entries because they may use fields like n_sect and n_desc differently. 496e8d8bef9SDimitry Andric std::vector<StabsEntry> stabs; 497e8d8bef9SDimitry Andric std::vector<SymtabEntry> localSymbols; 498e8d8bef9SDimitry Andric std::vector<SymtabEntry> externalSymbols; 499e8d8bef9SDimitry Andric std::vector<SymtabEntry> undefinedSymbols; 500e8d8bef9SDimitry Andric }; 501e8d8bef9SDimitry Andric 502fe6060f1SDimitry Andric template <class LP> SymtabSection *makeSymtabSection(StringTableSection &); 503fe6060f1SDimitry Andric 504e8d8bef9SDimitry Andric // The indirect symbol table is a list of 32-bit integers that serve as indices 505e8d8bef9SDimitry Andric // into the (actual) symbol table. The indirect symbol table is a 506e8d8bef9SDimitry Andric // concatenation of several sub-arrays of indices, each sub-array belonging to 507e8d8bef9SDimitry Andric // a separate section. The starting offset of each sub-array is stored in the 508e8d8bef9SDimitry Andric // reserved1 header field of the respective section. 509e8d8bef9SDimitry Andric // 510e8d8bef9SDimitry Andric // These sub-arrays provide symbol information for sections that store 511e8d8bef9SDimitry Andric // contiguous sequences of symbol references. These references can be pointers 512e8d8bef9SDimitry Andric // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g. 513e8d8bef9SDimitry Andric // function stubs). 514fe6060f1SDimitry Andric class IndirectSymtabSection final : public LinkEditSection { 515e8d8bef9SDimitry Andric public: 516e8d8bef9SDimitry Andric IndirectSymtabSection(); 517fe6060f1SDimitry Andric void finalizeContents() override; 518e8d8bef9SDimitry Andric uint32_t getNumSymbols() const; 519e8d8bef9SDimitry Andric uint64_t getRawSize() const override { 520e8d8bef9SDimitry Andric return getNumSymbols() * sizeof(uint32_t); 521e8d8bef9SDimitry Andric } 522e8d8bef9SDimitry Andric bool isNeeded() const override; 523e8d8bef9SDimitry Andric void writeTo(uint8_t *buf) const override; 5245ffd83dbSDimitry Andric }; 5255ffd83dbSDimitry Andric 526fe6060f1SDimitry Andric // The code signature comes at the very end of the linked output file. 527fe6060f1SDimitry Andric class CodeSignatureSection final : public LinkEditSection { 528fe6060f1SDimitry Andric public: 529349cc55cSDimitry Andric // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file 530349cc55cSDimitry Andric // and any changes here, should be repeated there. 531fe6060f1SDimitry Andric static constexpr uint8_t blockSizeShift = 12; 532fe6060f1SDimitry Andric static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB 533fe6060f1SDimitry Andric static constexpr size_t hashSize = 256 / 8; 534fe6060f1SDimitry Andric static constexpr size_t blobHeadersSize = llvm::alignTo<8>( 535fe6060f1SDimitry Andric sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex)); 536fe6060f1SDimitry Andric static constexpr uint32_t fixedHeadersSize = 537fe6060f1SDimitry Andric blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory); 538fe6060f1SDimitry Andric 539fe6060f1SDimitry Andric uint32_t fileNamePad = 0; 540fe6060f1SDimitry Andric uint32_t allHeadersSize = 0; 541fe6060f1SDimitry Andric StringRef fileName; 542fe6060f1SDimitry Andric 543fe6060f1SDimitry Andric CodeSignatureSection(); 544fe6060f1SDimitry Andric uint64_t getRawSize() const override; 545fe6060f1SDimitry Andric bool isNeeded() const override { return true; } 546fe6060f1SDimitry Andric void writeTo(uint8_t *buf) const override; 547fe6060f1SDimitry Andric uint32_t getBlockCount() const; 548fe6060f1SDimitry Andric void writeHashes(uint8_t *buf) const; 549fe6060f1SDimitry Andric }; 550fe6060f1SDimitry Andric 551fe6060f1SDimitry Andric class CStringSection : public SyntheticSection { 552fe6060f1SDimitry Andric public: 553bdd1243dSDimitry Andric CStringSection(const char *name); 554fe6060f1SDimitry Andric void addInput(CStringInputSection *); 555fe6060f1SDimitry Andric uint64_t getSize() const override { return size; } 556fe6060f1SDimitry Andric virtual void finalizeContents(); 557fe6060f1SDimitry Andric bool isNeeded() const override { return !inputs.empty(); } 558fe6060f1SDimitry Andric void writeTo(uint8_t *buf) const override; 559fe6060f1SDimitry Andric 560fe6060f1SDimitry Andric std::vector<CStringInputSection *> inputs; 561fe6060f1SDimitry Andric 562fe6060f1SDimitry Andric private: 563fe6060f1SDimitry Andric uint64_t size; 564fe6060f1SDimitry Andric }; 565fe6060f1SDimitry Andric 566fe6060f1SDimitry Andric class DeduplicatedCStringSection final : public CStringSection { 567fe6060f1SDimitry Andric public: 568bdd1243dSDimitry Andric DeduplicatedCStringSection(const char *name) : CStringSection(name){}; 56981ad6265SDimitry Andric uint64_t getSize() const override { return size; } 570fe6060f1SDimitry Andric void finalizeContents() override; 57181ad6265SDimitry Andric void writeTo(uint8_t *buf) const override; 572fe6060f1SDimitry Andric 57381ad6265SDimitry Andric struct StringOffset { 57481ad6265SDimitry Andric uint8_t trailingZeros; 57581ad6265SDimitry Andric uint64_t outSecOff = UINT64_MAX; 57681ad6265SDimitry Andric 57781ad6265SDimitry Andric explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {} 57881ad6265SDimitry Andric }; 579bdd1243dSDimitry Andric 580bdd1243dSDimitry Andric StringOffset getStringOffset(StringRef str) const; 581bdd1243dSDimitry Andric 582bdd1243dSDimitry Andric private: 58381ad6265SDimitry Andric llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap; 58481ad6265SDimitry Andric size_t size = 0; 585fe6060f1SDimitry Andric }; 586fe6060f1SDimitry Andric 587fe6060f1SDimitry Andric /* 588fe6060f1SDimitry Andric * This section contains deduplicated literal values. The 16-byte values are 589fe6060f1SDimitry Andric * laid out first, followed by the 8- and then the 4-byte ones. 590fe6060f1SDimitry Andric */ 591fe6060f1SDimitry Andric class WordLiteralSection final : public SyntheticSection { 592fe6060f1SDimitry Andric public: 593fe6060f1SDimitry Andric using UInt128 = std::pair<uint64_t, uint64_t>; 594fe6060f1SDimitry Andric // I don't think the standard guarantees the size of a pair, so let's make 595fe6060f1SDimitry Andric // sure it's exact -- that way we can construct it via `mmap`. 596bdd1243dSDimitry Andric static_assert(sizeof(UInt128) == 16); 597fe6060f1SDimitry Andric 598fe6060f1SDimitry Andric WordLiteralSection(); 599fe6060f1SDimitry Andric void addInput(WordLiteralInputSection *); 600fe6060f1SDimitry Andric void finalizeContents(); 601fe6060f1SDimitry Andric void writeTo(uint8_t *buf) const override; 602fe6060f1SDimitry Andric 603fe6060f1SDimitry Andric uint64_t getSize() const override { 604fe6060f1SDimitry Andric return literal16Map.size() * 16 + literal8Map.size() * 8 + 605fe6060f1SDimitry Andric literal4Map.size() * 4; 606fe6060f1SDimitry Andric } 607fe6060f1SDimitry Andric 608fe6060f1SDimitry Andric bool isNeeded() const override { 609fe6060f1SDimitry Andric return !literal16Map.empty() || !literal4Map.empty() || 610fe6060f1SDimitry Andric !literal8Map.empty(); 611fe6060f1SDimitry Andric } 612fe6060f1SDimitry Andric 613349cc55cSDimitry Andric uint64_t getLiteral16Offset(uintptr_t buf) const { 614fe6060f1SDimitry Andric return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16; 615fe6060f1SDimitry Andric } 616fe6060f1SDimitry Andric 617349cc55cSDimitry Andric uint64_t getLiteral8Offset(uintptr_t buf) const { 618fe6060f1SDimitry Andric return literal16Map.size() * 16 + 619fe6060f1SDimitry Andric literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8; 620fe6060f1SDimitry Andric } 621fe6060f1SDimitry Andric 622349cc55cSDimitry Andric uint64_t getLiteral4Offset(uintptr_t buf) const { 623fe6060f1SDimitry Andric return literal16Map.size() * 16 + literal8Map.size() * 8 + 624fe6060f1SDimitry Andric literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4; 625fe6060f1SDimitry Andric } 626fe6060f1SDimitry Andric 627fe6060f1SDimitry Andric private: 628fe6060f1SDimitry Andric std::vector<WordLiteralInputSection *> inputs; 629fe6060f1SDimitry Andric 630fe6060f1SDimitry Andric template <class T> struct Hasher { 631fe6060f1SDimitry Andric llvm::hash_code operator()(T v) const { return llvm::hash_value(v); } 632fe6060f1SDimitry Andric }; 633fe6060f1SDimitry Andric // We're using unordered_map instead of DenseMap here because we need to 634fe6060f1SDimitry Andric // support all possible integer values -- there are no suitable tombstone 635fe6060f1SDimitry Andric // values for DenseMap. 636fe6060f1SDimitry Andric std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map; 637fe6060f1SDimitry Andric std::unordered_map<uint64_t, uint64_t> literal8Map; 638fe6060f1SDimitry Andric std::unordered_map<uint32_t, uint64_t> literal4Map; 639fe6060f1SDimitry Andric }; 640fe6060f1SDimitry Andric 641fcaf7f86SDimitry Andric class ObjCImageInfoSection final : public SyntheticSection { 642fcaf7f86SDimitry Andric public: 643fcaf7f86SDimitry Andric ObjCImageInfoSection(); 644fcaf7f86SDimitry Andric bool isNeeded() const override { return !files.empty(); } 645fcaf7f86SDimitry Andric uint64_t getSize() const override { return 8; } 646fcaf7f86SDimitry Andric void addFile(const InputFile *file) { 647fcaf7f86SDimitry Andric assert(!file->objCImageInfo.empty()); 648fcaf7f86SDimitry Andric files.push_back(file); 649fcaf7f86SDimitry Andric } 650fcaf7f86SDimitry Andric void finalizeContents(); 651fcaf7f86SDimitry Andric void writeTo(uint8_t *buf) const override; 652fcaf7f86SDimitry Andric 653fcaf7f86SDimitry Andric private: 654fcaf7f86SDimitry Andric struct ImageInfo { 655fcaf7f86SDimitry Andric uint8_t swiftVersion = 0; 656fcaf7f86SDimitry Andric bool hasCategoryClassProperties = false; 657fcaf7f86SDimitry Andric } info; 658fcaf7f86SDimitry Andric static ImageInfo parseImageInfo(const InputFile *); 659fcaf7f86SDimitry Andric std::vector<const InputFile *> files; // files with image info 660fcaf7f86SDimitry Andric }; 661fcaf7f86SDimitry Andric 662bdd1243dSDimitry Andric // This section stores 32-bit __TEXT segment offsets of initializer functions. 663bdd1243dSDimitry Andric // 664bdd1243dSDimitry Andric // The compiler stores pointers to initializers in __mod_init_func. These need 665bdd1243dSDimitry Andric // to be fixed up at load time, which takes time and dirties memory. By 666bdd1243dSDimitry Andric // synthesizing InitOffsetsSection from them, this data can live in the 667bdd1243dSDimitry Andric // read-only __TEXT segment instead. This section is used by default when 668bdd1243dSDimitry Andric // chained fixups are enabled. 669bdd1243dSDimitry Andric // 670bdd1243dSDimitry Andric // There is no similar counterpart to __mod_term_func, as that section is 671bdd1243dSDimitry Andric // deprecated, and static destructors are instead handled by registering them 672bdd1243dSDimitry Andric // via __cxa_atexit from an autogenerated initializer function (see D121736). 673bdd1243dSDimitry Andric class InitOffsetsSection final : public SyntheticSection { 674bdd1243dSDimitry Andric public: 675bdd1243dSDimitry Andric InitOffsetsSection(); 676bdd1243dSDimitry Andric bool isNeeded() const override { return !sections.empty(); } 677bdd1243dSDimitry Andric uint64_t getSize() const override; 678bdd1243dSDimitry Andric void writeTo(uint8_t *buf) const override; 679bdd1243dSDimitry Andric void setUp(); 680bdd1243dSDimitry Andric 681bdd1243dSDimitry Andric void addInput(ConcatInputSection *isec) { sections.push_back(isec); } 682bdd1243dSDimitry Andric const std::vector<ConcatInputSection *> &inputs() const { return sections; } 683bdd1243dSDimitry Andric 684bdd1243dSDimitry Andric private: 685bdd1243dSDimitry Andric std::vector<ConcatInputSection *> sections; 686bdd1243dSDimitry Andric }; 687bdd1243dSDimitry Andric 688*0fca6ea1SDimitry Andric // This SyntheticSection is for the __objc_methlist section, which contains 689*0fca6ea1SDimitry Andric // relative method lists if the -objc_relative_method_lists option is enabled. 690*0fca6ea1SDimitry Andric class ObjCMethListSection final : public SyntheticSection { 691*0fca6ea1SDimitry Andric public: 692*0fca6ea1SDimitry Andric ObjCMethListSection(); 693*0fca6ea1SDimitry Andric 694*0fca6ea1SDimitry Andric static bool isMethodList(const ConcatInputSection *isec); 695*0fca6ea1SDimitry Andric void addInput(ConcatInputSection *isec) { inputs.push_back(isec); } 696*0fca6ea1SDimitry Andric std::vector<ConcatInputSection *> getInputs() { return inputs; } 697*0fca6ea1SDimitry Andric 698*0fca6ea1SDimitry Andric void setUp(); 699*0fca6ea1SDimitry Andric void finalize() override; 700*0fca6ea1SDimitry Andric bool isNeeded() const override { return !inputs.empty(); } 701*0fca6ea1SDimitry Andric uint64_t getSize() const override { return sectionSize; } 702*0fca6ea1SDimitry Andric void writeTo(uint8_t *bufStart) const override; 703*0fca6ea1SDimitry Andric 704*0fca6ea1SDimitry Andric private: 705*0fca6ea1SDimitry Andric void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags, 706*0fca6ea1SDimitry Andric uint32_t &structCount) const; 707*0fca6ea1SDimitry Andric void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags, 708*0fca6ea1SDimitry Andric uint32_t structCount) const; 709*0fca6ea1SDimitry Andric uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const; 710*0fca6ea1SDimitry Andric void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf, 711*0fca6ea1SDimitry Andric uint32_t &inSecOff, uint32_t &outSecOff, 712*0fca6ea1SDimitry Andric bool useSelRef) const; 713*0fca6ea1SDimitry Andric uint32_t writeRelativeMethodList(const ConcatInputSection *isec, 714*0fca6ea1SDimitry Andric uint8_t *buf) const; 715*0fca6ea1SDimitry Andric 716*0fca6ea1SDimitry Andric static constexpr uint32_t methodListHeaderSize = 717*0fca6ea1SDimitry Andric /*structSizeAndFlags*/ sizeof(uint32_t) + 718*0fca6ea1SDimitry Andric /*structCount*/ sizeof(uint32_t); 719*0fca6ea1SDimitry Andric // Relative method lists are supported only for 3-pointer method lists 720*0fca6ea1SDimitry Andric static constexpr uint32_t pointersPerStruct = 3; 721*0fca6ea1SDimitry Andric // The runtime identifies relative method lists via this magic value 722*0fca6ea1SDimitry Andric static constexpr uint32_t relMethodHeaderFlag = 0x80000000; 723*0fca6ea1SDimitry Andric // In the method list header, the first 2 bytes are the size of struct 724*0fca6ea1SDimitry Andric static constexpr uint32_t structSizeMask = 0x0000FFFF; 725*0fca6ea1SDimitry Andric // In the method list header, the last 2 bytes are the flags for the struct 726*0fca6ea1SDimitry Andric static constexpr uint32_t structFlagsMask = 0xFFFF0000; 727*0fca6ea1SDimitry Andric // Relative method lists have 4 byte alignment as all data in the InputSection 728*0fca6ea1SDimitry Andric // is 4 byte 729*0fca6ea1SDimitry Andric static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t); 730*0fca6ea1SDimitry Andric 731*0fca6ea1SDimitry Andric // The output size of the __objc_methlist section, computed during finalize() 732*0fca6ea1SDimitry Andric uint32_t sectionSize = 0; 733*0fca6ea1SDimitry Andric std::vector<ConcatInputSection *> inputs; 734*0fca6ea1SDimitry Andric }; 735*0fca6ea1SDimitry Andric 736bdd1243dSDimitry Andric // Chained fixups are a replacement for classic dyld opcodes. In this format, 737bdd1243dSDimitry Andric // most of the metadata necessary for binding symbols and rebasing addresses is 738bdd1243dSDimitry Andric // stored directly in the memory location that will have the fixup applied. 739bdd1243dSDimitry Andric // 740bdd1243dSDimitry Andric // The fixups form singly linked lists; each one covering a single page in 741bdd1243dSDimitry Andric // memory. The __LINKEDIT,__chainfixups section stores the page offset of the 742bdd1243dSDimitry Andric // first fixup of each page; the rest can be found by walking the chain using 743bdd1243dSDimitry Andric // the offset that is embedded in each entry. 744bdd1243dSDimitry Andric // 745bdd1243dSDimitry Andric // This setup allows pages to be relocated lazily at page-in time and without 746bdd1243dSDimitry Andric // being dirtied. The kernel can discard and load them again as needed. This 747bdd1243dSDimitry Andric // technique, called page-in linking, was introduced in macOS 13. 748bdd1243dSDimitry Andric // 749bdd1243dSDimitry Andric // The benefits of this format are: 750bdd1243dSDimitry Andric // - smaller __LINKEDIT segment, as most of the fixup information is stored in 751bdd1243dSDimitry Andric // the data segment 752bdd1243dSDimitry Andric // - faster startup, since not all relocations need to be done upfront 753bdd1243dSDimitry Andric // - slightly lower memory usage, as fewer pages are dirtied 754bdd1243dSDimitry Andric // 755bdd1243dSDimitry Andric // Userspace x86_64 and arm64 binaries have two types of fixup entries: 756bdd1243dSDimitry Andric // - Rebase entries contain an absolute address, to which the object's load 757bdd1243dSDimitry Andric // address will be added to get the final value. This is used for loading 758bdd1243dSDimitry Andric // the address of a symbol defined in the same binary. 759bdd1243dSDimitry Andric // - Binding entries are mostly used for symbols imported from other dylibs, 760bdd1243dSDimitry Andric // but for weakly bound and interposable symbols as well. They are looked up 761bdd1243dSDimitry Andric // by a (symbol name, library) pair stored in __chainfixups. This import 762bdd1243dSDimitry Andric // entry also encodes whether the import is weak (i.e. if the symbol is 763bdd1243dSDimitry Andric // missing, it should be set to null instead of producing a load error). 764bdd1243dSDimitry Andric // The fixup encodes an ordinal associated with the import, and an optional 765bdd1243dSDimitry Andric // addend. 766bdd1243dSDimitry Andric // 767bdd1243dSDimitry Andric // The entries are tightly packed 64-bit bitfields. One of the bits specifies 768bdd1243dSDimitry Andric // which kind of fixup to interpret them as. 769bdd1243dSDimitry Andric // 770bdd1243dSDimitry Andric // LLD generates the fixup data in 5 stages: 771bdd1243dSDimitry Andric // 1. While scanning relocations, we make a note of each location that needs 772bdd1243dSDimitry Andric // a fixup by calling addRebase() or addBinding(). During this, we assign 773bdd1243dSDimitry Andric // a unique ordinal for each (symbol name, library, addend) import tuple. 774bdd1243dSDimitry Andric // 2. After addresses have been assigned to all sections, and thus the memory 775bdd1243dSDimitry Andric // layout of the linked image is final; finalizeContents() is called. Here, 776bdd1243dSDimitry Andric // the page offsets of the chain start entries are calculated. 777bdd1243dSDimitry Andric // 3. ChainedFixupsSection::writeTo() writes the page start offsets and the 778bdd1243dSDimitry Andric // imports table to the output file. 779bdd1243dSDimitry Andric // 4. Each section's fixup entries are encoded and written to disk in 780bdd1243dSDimitry Andric // ConcatInputSection::writeTo(), but without writing the offsets that form 781bdd1243dSDimitry Andric // the chain. 782bdd1243dSDimitry Andric // 5. Finally, each page's (which might correspond to multiple sections) 783bdd1243dSDimitry Andric // fixups are linked together in Writer::buildFixupChains(). 784bdd1243dSDimitry Andric class ChainedFixupsSection final : public LinkEditSection { 785bdd1243dSDimitry Andric public: 786bdd1243dSDimitry Andric ChainedFixupsSection(); 787bdd1243dSDimitry Andric void finalizeContents() override; 788bdd1243dSDimitry Andric uint64_t getRawSize() const override { return size; } 789bdd1243dSDimitry Andric bool isNeeded() const override; 790bdd1243dSDimitry Andric void writeTo(uint8_t *buf) const override; 791bdd1243dSDimitry Andric 792bdd1243dSDimitry Andric void addRebase(const InputSection *isec, uint64_t offset) { 793bdd1243dSDimitry Andric locations.emplace_back(isec, offset); 794bdd1243dSDimitry Andric } 795bdd1243dSDimitry Andric void addBinding(const Symbol *dysym, const InputSection *isec, 796bdd1243dSDimitry Andric uint64_t offset, int64_t addend = 0); 797bdd1243dSDimitry Andric 798bdd1243dSDimitry Andric void setHasNonWeakDefinition() { hasNonWeakDef = true; } 799bdd1243dSDimitry Andric 800bdd1243dSDimitry Andric // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind. 801bdd1243dSDimitry Andric std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym, 802bdd1243dSDimitry Andric int64_t addend) const; 803bdd1243dSDimitry Andric 804bdd1243dSDimitry Andric const std::vector<Location> &getLocations() const { return locations; } 805bdd1243dSDimitry Andric 806bdd1243dSDimitry Andric bool hasWeakBinding() const { return hasWeakBind; } 807bdd1243dSDimitry Andric bool hasNonWeakDefinition() const { return hasNonWeakDef; } 808bdd1243dSDimitry Andric 809bdd1243dSDimitry Andric private: 810bdd1243dSDimitry Andric // Location::offset initially stores the offset within an InputSection, but 811bdd1243dSDimitry Andric // contains output segment offsets after finalizeContents(). 812bdd1243dSDimitry Andric std::vector<Location> locations; 813bdd1243dSDimitry Andric // (target symbol, addend) => import ordinal 814bdd1243dSDimitry Andric llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings; 815bdd1243dSDimitry Andric 816bdd1243dSDimitry Andric struct SegmentInfo { 817bdd1243dSDimitry Andric SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {} 818bdd1243dSDimitry Andric 819bdd1243dSDimitry Andric const OutputSegment *oseg; 820bdd1243dSDimitry Andric // (page index, fixup starts offset) 821bdd1243dSDimitry Andric llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts; 822bdd1243dSDimitry Andric 823bdd1243dSDimitry Andric size_t getSize() const; 824bdd1243dSDimitry Andric size_t writeTo(uint8_t *buf) const; 825bdd1243dSDimitry Andric }; 826bdd1243dSDimitry Andric llvm::SmallVector<SegmentInfo, 4> fixupSegments; 827bdd1243dSDimitry Andric 828bdd1243dSDimitry Andric size_t symtabSize = 0; 829bdd1243dSDimitry Andric size_t size = 0; 830bdd1243dSDimitry Andric 831bdd1243dSDimitry Andric bool needsAddend = false; 832bdd1243dSDimitry Andric bool needsLargeAddend = false; 833bdd1243dSDimitry Andric bool hasWeakBind = false; 834bdd1243dSDimitry Andric bool hasNonWeakDef = false; 835bdd1243dSDimitry Andric llvm::MachO::ChainedImportFormat importFormat; 836bdd1243dSDimitry Andric }; 837bdd1243dSDimitry Andric 838bdd1243dSDimitry Andric void writeChainedRebase(uint8_t *buf, uint64_t targetVA); 839bdd1243dSDimitry Andric void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); 840bdd1243dSDimitry Andric 8415ffd83dbSDimitry Andric struct InStruct { 84281ad6265SDimitry Andric const uint8_t *bufferStart = nullptr; 843e8d8bef9SDimitry Andric MachHeaderSection *header = nullptr; 844fe6060f1SDimitry Andric CStringSection *cStringSection = nullptr; 845bdd1243dSDimitry Andric DeduplicatedCStringSection *objcMethnameSection = nullptr; 846fe6060f1SDimitry Andric WordLiteralSection *wordLiteralSection = nullptr; 847e8d8bef9SDimitry Andric RebaseSection *rebase = nullptr; 8485ffd83dbSDimitry Andric BindingSection *binding = nullptr; 849e8d8bef9SDimitry Andric WeakBindingSection *weakBinding = nullptr; 850e8d8bef9SDimitry Andric LazyBindingSection *lazyBinding = nullptr; 851e8d8bef9SDimitry Andric ExportSection *exports = nullptr; 8525ffd83dbSDimitry Andric GotSection *got = nullptr; 853e8d8bef9SDimitry Andric TlvPointerSection *tlvPointers = nullptr; 8545ffd83dbSDimitry Andric LazyPointerSection *lazyPointers = nullptr; 8555ffd83dbSDimitry Andric StubsSection *stubs = nullptr; 8565ffd83dbSDimitry Andric StubHelperSection *stubHelper = nullptr; 857bdd1243dSDimitry Andric ObjCStubsSection *objcStubs = nullptr; 858fe6060f1SDimitry Andric UnwindInfoSection *unwindInfo = nullptr; 859fcaf7f86SDimitry Andric ObjCImageInfoSection *objCImageInfo = nullptr; 860fe6060f1SDimitry Andric ConcatInputSection *imageLoaderCache = nullptr; 861bdd1243dSDimitry Andric InitOffsetsSection *initOffsets = nullptr; 862*0fca6ea1SDimitry Andric ObjCMethListSection *objcMethList = nullptr; 863bdd1243dSDimitry Andric ChainedFixupsSection *chainedFixups = nullptr; 8645ffd83dbSDimitry Andric }; 8655ffd83dbSDimitry Andric 8665ffd83dbSDimitry Andric extern InStruct in; 8675ffd83dbSDimitry Andric extern std::vector<SyntheticSection *> syntheticSections; 8685ffd83dbSDimitry Andric 869fe6060f1SDimitry Andric void createSyntheticSymbols(); 870fe6060f1SDimitry Andric 871bdd1243dSDimitry Andric } // namespace lld::macho 8725ffd83dbSDimitry Andric 8735ffd83dbSDimitry Andric #endif 874