xref: /freebsd-src/contrib/llvm-project/lld/MachO/SyntheticSections.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15ffd83dbSDimitry Andric //===- SyntheticSections.h -------------------------------------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
105ffd83dbSDimitry Andric #define LLD_MACHO_SYNTHETIC_SECTIONS_H
115ffd83dbSDimitry Andric 
125ffd83dbSDimitry Andric #include "Config.h"
135ffd83dbSDimitry Andric #include "ExportTrie.h"
145ffd83dbSDimitry Andric #include "InputSection.h"
155ffd83dbSDimitry Andric #include "OutputSection.h"
16e8d8bef9SDimitry Andric #include "OutputSegment.h"
175ffd83dbSDimitry Andric #include "Target.h"
18fe6060f1SDimitry Andric #include "Writer.h"
195ffd83dbSDimitry Andric 
20fe6060f1SDimitry Andric #include "llvm/ADT/DenseMap.h"
21fe6060f1SDimitry Andric #include "llvm/ADT/Hashing.h"
22*0fca6ea1SDimitry Andric #include "llvm/ADT/MapVector.h"
235ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h"
24bdd1243dSDimitry Andric #include "llvm/BinaryFormat/MachO.h"
25fe6060f1SDimitry Andric #include "llvm/Support/MathExtras.h"
265ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h"
275ffd83dbSDimitry Andric 
28fe6060f1SDimitry Andric #include <unordered_map>
29fe6060f1SDimitry Andric 
30e8d8bef9SDimitry Andric namespace llvm {
31e8d8bef9SDimitry Andric class DWARFUnit;
32e8d8bef9SDimitry Andric } // namespace llvm
33e8d8bef9SDimitry Andric 
34bdd1243dSDimitry Andric namespace lld::macho {
355ffd83dbSDimitry Andric 
36e8d8bef9SDimitry Andric class Defined;
375ffd83dbSDimitry Andric class DylibSymbol;
385ffd83dbSDimitry Andric class LoadCommand;
39e8d8bef9SDimitry Andric class ObjFile;
40fe6060f1SDimitry Andric class UnwindInfoSection;
415ffd83dbSDimitry Andric 
425ffd83dbSDimitry Andric class SyntheticSection : public OutputSection {
435ffd83dbSDimitry Andric public:
445ffd83dbSDimitry Andric   SyntheticSection(const char *segname, const char *name);
455ffd83dbSDimitry Andric   virtual ~SyntheticSection() = default;
465ffd83dbSDimitry Andric 
475ffd83dbSDimitry Andric   static bool classof(const OutputSection *sec) {
485ffd83dbSDimitry Andric     return sec->kind() == SyntheticKind;
495ffd83dbSDimitry Andric   }
505ffd83dbSDimitry Andric 
51fe6060f1SDimitry Andric   StringRef segname;
52fe6060f1SDimitry Andric   // This fake InputSection makes it easier for us to write code that applies
53fe6060f1SDimitry Andric   // generically to both user inputs and synthetics.
54fe6060f1SDimitry Andric   InputSection *isec;
555ffd83dbSDimitry Andric };
565ffd83dbSDimitry Andric 
57e8d8bef9SDimitry Andric // All sections in __LINKEDIT should inherit from this.
58e8d8bef9SDimitry Andric class LinkEditSection : public SyntheticSection {
59e8d8bef9SDimitry Andric public:
60e8d8bef9SDimitry Andric   LinkEditSection(const char *segname, const char *name)
61e8d8bef9SDimitry Andric       : SyntheticSection(segname, name) {
62fe6060f1SDimitry Andric     align = target->wordSize;
63e8d8bef9SDimitry Andric   }
64e8d8bef9SDimitry Andric 
651fd87a68SDimitry Andric   // Implementations of this method can assume that the regular (non-__LINKEDIT)
661fd87a68SDimitry Andric   // sections already have their addresses assigned.
67fe6060f1SDimitry Andric   virtual void finalizeContents() {}
68fe6060f1SDimitry Andric 
69e8d8bef9SDimitry Andric   // Sections in __LINKEDIT are special: their offsets are recorded in the
70e8d8bef9SDimitry Andric   // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
71e8d8bef9SDimitry Andric   // headers.
72972a253aSDimitry Andric   bool isHidden() const final { return true; }
73e8d8bef9SDimitry Andric 
74e8d8bef9SDimitry Andric   virtual uint64_t getRawSize() const = 0;
75e8d8bef9SDimitry Andric 
76e8d8bef9SDimitry Andric   // codesign (or more specifically libstuff) checks that each section in
77e8d8bef9SDimitry Andric   // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
78e8d8bef9SDimitry Andric   // therefore align every section's start and end points to WordSize.
79e8d8bef9SDimitry Andric   //
80e8d8bef9SDimitry Andric   // NOTE: This assumes that the extra bytes required for alignment can be
81e8d8bef9SDimitry Andric   // zero-valued bytes.
82972a253aSDimitry Andric   uint64_t getSize() const final { return llvm::alignTo(getRawSize(), align); }
83e8d8bef9SDimitry Andric };
84e8d8bef9SDimitry Andric 
855ffd83dbSDimitry Andric // The header of the Mach-O file, which must have a file offset of zero.
86fe6060f1SDimitry Andric class MachHeaderSection final : public SyntheticSection {
875ffd83dbSDimitry Andric public:
885ffd83dbSDimitry Andric   MachHeaderSection();
895ffd83dbSDimitry Andric   bool isHidden() const override { return true; }
905ffd83dbSDimitry Andric   uint64_t getSize() const override;
915ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
925ffd83dbSDimitry Andric 
93fe6060f1SDimitry Andric   void addLoadCommand(LoadCommand *);
94fe6060f1SDimitry Andric 
95fe6060f1SDimitry Andric protected:
965ffd83dbSDimitry Andric   std::vector<LoadCommand *> loadCommands;
975ffd83dbSDimitry Andric   uint32_t sizeOfCmds = 0;
985ffd83dbSDimitry Andric };
995ffd83dbSDimitry Andric 
1005ffd83dbSDimitry Andric // A hidden section that exists solely for the purpose of creating the
1015ffd83dbSDimitry Andric // __PAGEZERO segment, which is used to catch null pointer dereferences.
102fe6060f1SDimitry Andric class PageZeroSection final : public SyntheticSection {
1035ffd83dbSDimitry Andric public:
1045ffd83dbSDimitry Andric   PageZeroSection();
1055ffd83dbSDimitry Andric   bool isHidden() const override { return true; }
10681ad6265SDimitry Andric   bool isNeeded() const override { return target->pageZeroSize != 0; }
107fe6060f1SDimitry Andric   uint64_t getSize() const override { return target->pageZeroSize; }
1085ffd83dbSDimitry Andric   uint64_t getFileSize() const override { return 0; }
1095ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override {}
1105ffd83dbSDimitry Andric };
1115ffd83dbSDimitry Andric 
112e8d8bef9SDimitry Andric // This is the base class for the GOT and TLVPointer sections, which are nearly
113e8d8bef9SDimitry Andric // functionally identical -- they will both be populated by dyld with addresses
114e8d8bef9SDimitry Andric // to non-lazily-loaded dylib symbols. The main difference is that the
115e8d8bef9SDimitry Andric // TLVPointerSection stores references to thread-local variables.
116e8d8bef9SDimitry Andric class NonLazyPointerSectionBase : public SyntheticSection {
1175ffd83dbSDimitry Andric public:
118e8d8bef9SDimitry Andric   NonLazyPointerSectionBase(const char *segname, const char *name);
1195ffd83dbSDimitry Andric   const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
1205ffd83dbSDimitry Andric   bool isNeeded() const override { return !entries.empty(); }
121fe6060f1SDimitry Andric   uint64_t getSize() const override {
122fe6060f1SDimitry Andric     return entries.size() * target->wordSize;
123fe6060f1SDimitry Andric   }
1245ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
125e8d8bef9SDimitry Andric   void addEntry(Symbol *sym);
126fe6060f1SDimitry Andric   uint64_t getVA(uint32_t gotIndex) const {
127fe6060f1SDimitry Andric     return addr + gotIndex * target->wordSize;
128fe6060f1SDimitry Andric   }
1295ffd83dbSDimitry Andric 
1305ffd83dbSDimitry Andric private:
1315ffd83dbSDimitry Andric   llvm::SetVector<const Symbol *> entries;
1325ffd83dbSDimitry Andric };
1335ffd83dbSDimitry Andric 
134fe6060f1SDimitry Andric class GotSection final : public NonLazyPointerSectionBase {
135e8d8bef9SDimitry Andric public:
136fe6060f1SDimitry Andric   GotSection();
137e8d8bef9SDimitry Andric };
138e8d8bef9SDimitry Andric 
139fe6060f1SDimitry Andric class TlvPointerSection final : public NonLazyPointerSectionBase {
140e8d8bef9SDimitry Andric public:
141fe6060f1SDimitry Andric   TlvPointerSection();
142e8d8bef9SDimitry Andric };
143e8d8bef9SDimitry Andric 
144e8d8bef9SDimitry Andric struct Location {
145fe6060f1SDimitry Andric   const InputSection *isec;
146fe6060f1SDimitry Andric   uint64_t offset;
147e8d8bef9SDimitry Andric 
148fe6060f1SDimitry Andric   Location(const InputSection *isec, uint64_t offset)
149fe6060f1SDimitry Andric       : isec(isec), offset(offset) {}
150fe6060f1SDimitry Andric   uint64_t getVA() const { return isec->getVA(offset); }
151e8d8bef9SDimitry Andric };
152e8d8bef9SDimitry Andric 
153e8d8bef9SDimitry Andric // Stores rebase opcodes, which tell dyld where absolute addresses have been
154e8d8bef9SDimitry Andric // encoded in the binary. If the binary is not loaded at its preferred address,
155e8d8bef9SDimitry Andric // dyld has to rebase these addresses by adding an offset to them.
156fe6060f1SDimitry Andric class RebaseSection final : public LinkEditSection {
157e8d8bef9SDimitry Andric public:
158e8d8bef9SDimitry Andric   RebaseSection();
159fe6060f1SDimitry Andric   void finalizeContents() override;
160e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return contents.size(); }
161e8d8bef9SDimitry Andric   bool isNeeded() const override { return !locations.empty(); }
162e8d8bef9SDimitry Andric   void writeTo(uint8_t *buf) const override;
163e8d8bef9SDimitry Andric 
164fe6060f1SDimitry Andric   void addEntry(const InputSection *isec, uint64_t offset) {
165e8d8bef9SDimitry Andric     if (config->isPic)
16606c3fb27SDimitry Andric       locations.emplace_back(isec, offset);
167e8d8bef9SDimitry Andric   }
168e8d8bef9SDimitry Andric 
169e8d8bef9SDimitry Andric private:
170e8d8bef9SDimitry Andric   std::vector<Location> locations;
171e8d8bef9SDimitry Andric   SmallVector<char, 128> contents;
172e8d8bef9SDimitry Andric };
173e8d8bef9SDimitry Andric 
1745ffd83dbSDimitry Andric struct BindingEntry {
1755ffd83dbSDimitry Andric   int64_t addend;
176e8d8bef9SDimitry Andric   Location target;
177fe6060f1SDimitry Andric   BindingEntry(int64_t addend, Location target)
17806c3fb27SDimitry Andric       : addend(addend), target(target) {}
1795ffd83dbSDimitry Andric };
1805ffd83dbSDimitry Andric 
181fe6060f1SDimitry Andric template <class Sym>
182fe6060f1SDimitry Andric using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
183fe6060f1SDimitry Andric 
1845ffd83dbSDimitry Andric // Stores bind opcodes for telling dyld which symbols to load non-lazily.
185fe6060f1SDimitry Andric class BindingSection final : public LinkEditSection {
1865ffd83dbSDimitry Andric public:
1875ffd83dbSDimitry Andric   BindingSection();
188fe6060f1SDimitry Andric   void finalizeContents() override;
189e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return contents.size(); }
190fe6060f1SDimitry Andric   bool isNeeded() const override { return !bindingsMap.empty(); }
1915ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
1925ffd83dbSDimitry Andric 
19381ad6265SDimitry Andric   void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset,
19481ad6265SDimitry Andric                 int64_t addend = 0) {
195fe6060f1SDimitry Andric     bindingsMap[dysym].emplace_back(addend, Location(isec, offset));
1965ffd83dbSDimitry Andric   }
1975ffd83dbSDimitry Andric 
1985ffd83dbSDimitry Andric private:
19981ad6265SDimitry Andric   BindingsMap<const Symbol *> bindingsMap;
2005ffd83dbSDimitry Andric   SmallVector<char, 128> contents;
2015ffd83dbSDimitry Andric };
2025ffd83dbSDimitry Andric 
203e8d8bef9SDimitry Andric // Stores bind opcodes for telling dyld which weak symbols need coalescing.
204e8d8bef9SDimitry Andric // There are two types of entries in this section:
205e8d8bef9SDimitry Andric //
206e8d8bef9SDimitry Andric //   1) Non-weak definitions: This is a symbol definition that weak symbols in
207e8d8bef9SDimitry Andric //   other dylibs should coalesce to.
208e8d8bef9SDimitry Andric //
209e8d8bef9SDimitry Andric //   2) Weak bindings: These tell dyld that a given symbol reference should
210fe6060f1SDimitry Andric //   coalesce to a non-weak definition if one is found. Note that unlike the
211e8d8bef9SDimitry Andric //   entries in the BindingSection, the bindings here only refer to these
212e8d8bef9SDimitry Andric //   symbols by name, but do not specify which dylib to load them from.
213fe6060f1SDimitry Andric class WeakBindingSection final : public LinkEditSection {
214e8d8bef9SDimitry Andric public:
215e8d8bef9SDimitry Andric   WeakBindingSection();
216fe6060f1SDimitry Andric   void finalizeContents() override;
217e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return contents.size(); }
218e8d8bef9SDimitry Andric   bool isNeeded() const override {
219fe6060f1SDimitry Andric     return !bindingsMap.empty() || !definitions.empty();
220e8d8bef9SDimitry Andric   }
221e8d8bef9SDimitry Andric 
222e8d8bef9SDimitry Andric   void writeTo(uint8_t *buf) const override;
223e8d8bef9SDimitry Andric 
224fe6060f1SDimitry Andric   void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
225fe6060f1SDimitry Andric                 int64_t addend = 0) {
226fe6060f1SDimitry Andric     bindingsMap[symbol].emplace_back(addend, Location(isec, offset));
227e8d8bef9SDimitry Andric   }
228e8d8bef9SDimitry Andric 
229fe6060f1SDimitry Andric   bool hasEntry() const { return !bindingsMap.empty(); }
230e8d8bef9SDimitry Andric 
231e8d8bef9SDimitry Andric   void addNonWeakDefinition(const Defined *defined) {
232e8d8bef9SDimitry Andric     definitions.emplace_back(defined);
233e8d8bef9SDimitry Andric   }
234e8d8bef9SDimitry Andric 
235e8d8bef9SDimitry Andric   bool hasNonWeakDefinition() const { return !definitions.empty(); }
236e8d8bef9SDimitry Andric 
237e8d8bef9SDimitry Andric private:
238fe6060f1SDimitry Andric   BindingsMap<const Symbol *> bindingsMap;
239e8d8bef9SDimitry Andric   std::vector<const Defined *> definitions;
240e8d8bef9SDimitry Andric   SmallVector<char, 128> contents;
241e8d8bef9SDimitry Andric };
242e8d8bef9SDimitry Andric 
2435ffd83dbSDimitry Andric // The following sections implement lazy symbol binding -- very similar to the
2445ffd83dbSDimitry Andric // PLT mechanism in ELF.
2455ffd83dbSDimitry Andric //
246e8d8bef9SDimitry Andric // ELF's .plt section is broken up into two sections in Mach-O: StubsSection
247e8d8bef9SDimitry Andric // and StubHelperSection. Calls to functions in dylibs will end up calling into
2485ffd83dbSDimitry Andric // StubsSection, which contains indirect jumps to addresses stored in the
2495ffd83dbSDimitry Andric // LazyPointerSection (the counterpart to ELF's .plt.got).
2505ffd83dbSDimitry Andric //
251e8d8bef9SDimitry Andric // We will first describe how non-weak symbols are handled.
252e8d8bef9SDimitry Andric //
253e8d8bef9SDimitry Andric // At program start, the LazyPointerSection contains addresses that point into
254e8d8bef9SDimitry Andric // one of the entry points in the middle of the StubHelperSection. The code in
2555ffd83dbSDimitry Andric // StubHelperSection will push on the stack an offset into the
2565ffd83dbSDimitry Andric // LazyBindingSection. The push is followed by a jump to the beginning of the
2575ffd83dbSDimitry Andric // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
2585ffd83dbSDimitry Andric // dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
2595ffd83dbSDimitry Andric // the GOT.
2605ffd83dbSDimitry Andric //
2615ffd83dbSDimitry Andric // The stub binder will look up the bind opcodes in the LazyBindingSection at
262e8d8bef9SDimitry Andric // the given offset. The bind opcodes will tell the binder to update the
263e8d8bef9SDimitry Andric // address in the LazyPointerSection to point to the symbol, so that subsequent
264e8d8bef9SDimitry Andric // calls don't have to redo the symbol resolution. The binder will then jump to
265e8d8bef9SDimitry Andric // the resolved symbol.
266e8d8bef9SDimitry Andric //
267e8d8bef9SDimitry Andric // With weak symbols, the situation is slightly different. Since there is no
268e8d8bef9SDimitry Andric // "weak lazy" lookup, function calls to weak symbols are always non-lazily
269e8d8bef9SDimitry Andric // bound. We emit both regular non-lazy bindings as well as weak bindings, in
270e8d8bef9SDimitry Andric // order that the weak bindings may overwrite the non-lazy bindings if an
271e8d8bef9SDimitry Andric // appropriate symbol is found at runtime. However, the bound addresses will
272e8d8bef9SDimitry Andric // still be written (non-lazily) into the LazyPointerSection.
273bdd1243dSDimitry Andric //
274bdd1243dSDimitry Andric // Symbols are always bound eagerly when chained fixups are used. In that case,
275bdd1243dSDimitry Andric // StubsSection contains indirect jumps to addresses stored in the GotSection.
276bdd1243dSDimitry Andric // The GOT directly contains the fixup entries, which will be replaced by the
277bdd1243dSDimitry Andric // address of the target symbols on load. LazyPointerSection and
278bdd1243dSDimitry Andric // StubHelperSection are not used.
2795ffd83dbSDimitry Andric 
280fe6060f1SDimitry Andric class StubsSection final : public SyntheticSection {
2815ffd83dbSDimitry Andric public:
2825ffd83dbSDimitry Andric   StubsSection();
2835ffd83dbSDimitry Andric   uint64_t getSize() const override;
2845ffd83dbSDimitry Andric   bool isNeeded() const override { return !entries.empty(); }
285fe6060f1SDimitry Andric   void finalize() override;
2865ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
287e8d8bef9SDimitry Andric   const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
288bdd1243dSDimitry Andric   // Creates a stub for the symbol and the corresponding entry in the
289bdd1243dSDimitry Andric   // LazyPointerSection.
290bdd1243dSDimitry Andric   void addEntry(Symbol *);
291fe6060f1SDimitry Andric   uint64_t getVA(uint32_t stubsIndex) const {
292fe6060f1SDimitry Andric     assert(isFinal || target->usesThunks());
293fe6060f1SDimitry Andric     // ConcatOutputSection::finalize() can seek the address of a
294fe6060f1SDimitry Andric     // stub before its address is assigned. Before __stubs is
295fe6060f1SDimitry Andric     // finalized, return a contrived out-of-range address.
296fe6060f1SDimitry Andric     return isFinal ? addr + stubsIndex * target->stubSize
297fe6060f1SDimitry Andric                    : TargetInfo::outOfRangeVA;
298fe6060f1SDimitry Andric   }
299fe6060f1SDimitry Andric 
300fe6060f1SDimitry Andric   bool isFinal = false; // is address assigned?
3015ffd83dbSDimitry Andric 
3025ffd83dbSDimitry Andric private:
303e8d8bef9SDimitry Andric   llvm::SetVector<Symbol *> entries;
3045ffd83dbSDimitry Andric };
3055ffd83dbSDimitry Andric 
306fe6060f1SDimitry Andric class StubHelperSection final : public SyntheticSection {
3075ffd83dbSDimitry Andric public:
3085ffd83dbSDimitry Andric   StubHelperSection();
3095ffd83dbSDimitry Andric   uint64_t getSize() const override;
3105ffd83dbSDimitry Andric   bool isNeeded() const override;
3115ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
3125ffd83dbSDimitry Andric 
313bdd1243dSDimitry Andric   void setUp();
3145ffd83dbSDimitry Andric 
3155ffd83dbSDimitry Andric   DylibSymbol *stubBinder = nullptr;
316e8d8bef9SDimitry Andric   Defined *dyldPrivate = nullptr;
3175ffd83dbSDimitry Andric };
3185ffd83dbSDimitry Andric 
319*0fca6ea1SDimitry Andric class ObjCSelRefsHelper {
320*0fca6ea1SDimitry Andric public:
321*0fca6ea1SDimitry Andric   static void initialize();
322*0fca6ea1SDimitry Andric   static void cleanup();
323*0fca6ea1SDimitry Andric 
324*0fca6ea1SDimitry Andric   static ConcatInputSection *getSelRef(StringRef methname);
325*0fca6ea1SDimitry Andric   static ConcatInputSection *makeSelRef(StringRef methname);
326*0fca6ea1SDimitry Andric 
327*0fca6ea1SDimitry Andric private:
328*0fca6ea1SDimitry Andric   static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
329*0fca6ea1SDimitry Andric       methnameToSelref;
330*0fca6ea1SDimitry Andric };
331*0fca6ea1SDimitry Andric 
332bdd1243dSDimitry Andric // Objective-C stubs are hoisted objc_msgSend calls per selector called in the
333bdd1243dSDimitry Andric // program. Apple Clang produces undefined symbols to each stub, such as
334bdd1243dSDimitry Andric // '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
335bdd1243dSDimitry Andric // load the particular selector 'foo' from __objc_selrefs, setting it to the
336bdd1243dSDimitry Andric // first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
337bdd1243dSDimitry Andric // actual stub contents are mirrored from ld64.
338bdd1243dSDimitry Andric class ObjCStubsSection final : public SyntheticSection {
339bdd1243dSDimitry Andric public:
340bdd1243dSDimitry Andric   ObjCStubsSection();
341bdd1243dSDimitry Andric   void addEntry(Symbol *sym);
342bdd1243dSDimitry Andric   uint64_t getSize() const override;
343bdd1243dSDimitry Andric   bool isNeeded() const override { return !symbols.empty(); }
344bdd1243dSDimitry Andric   void finalize() override { isec->isFinal = true; }
345bdd1243dSDimitry Andric   void writeTo(uint8_t *buf) const override;
346bdd1243dSDimitry Andric   void setUp();
347bdd1243dSDimitry Andric 
348bdd1243dSDimitry Andric   static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
349*0fca6ea1SDimitry Andric   static bool isObjCStubSymbol(Symbol *sym);
350*0fca6ea1SDimitry Andric   static StringRef getMethname(Symbol *sym);
351bdd1243dSDimitry Andric 
352bdd1243dSDimitry Andric private:
353bdd1243dSDimitry Andric   std::vector<Defined *> symbols;
3547a6dacacSDimitry Andric   Symbol *objcMsgSend = nullptr;
355bdd1243dSDimitry Andric };
356bdd1243dSDimitry Andric 
357e8d8bef9SDimitry Andric // Note that this section may also be targeted by non-lazy bindings. In
358e8d8bef9SDimitry Andric // particular, this happens when branch relocations target weak symbols.
359fe6060f1SDimitry Andric class LazyPointerSection final : public SyntheticSection {
3605ffd83dbSDimitry Andric public:
3615ffd83dbSDimitry Andric   LazyPointerSection();
3625ffd83dbSDimitry Andric   uint64_t getSize() const override;
3635ffd83dbSDimitry Andric   bool isNeeded() const override;
3645ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
365bdd1243dSDimitry Andric   uint64_t getVA(uint32_t index) const {
366bdd1243dSDimitry Andric     return addr + (index << target->p2WordSize);
367bdd1243dSDimitry Andric   }
3685ffd83dbSDimitry Andric };
3695ffd83dbSDimitry Andric 
370fe6060f1SDimitry Andric class LazyBindingSection final : public LinkEditSection {
3715ffd83dbSDimitry Andric public:
3725ffd83dbSDimitry Andric   LazyBindingSection();
373fe6060f1SDimitry Andric   void finalizeContents() override;
374e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return contents.size(); }
375e8d8bef9SDimitry Andric   bool isNeeded() const override { return !entries.empty(); }
3765ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
377e8d8bef9SDimitry Andric   // Note that every entry here will by referenced by a corresponding entry in
378e8d8bef9SDimitry Andric   // the StubHelperSection.
37981ad6265SDimitry Andric   void addEntry(Symbol *dysym);
38081ad6265SDimitry Andric   const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
3815ffd83dbSDimitry Andric 
3825ffd83dbSDimitry Andric private:
38381ad6265SDimitry Andric   uint32_t encode(const Symbol &);
384e8d8bef9SDimitry Andric 
38581ad6265SDimitry Andric   llvm::SetVector<Symbol *> entries;
3865ffd83dbSDimitry Andric   SmallVector<char, 128> contents;
3875ffd83dbSDimitry Andric   llvm::raw_svector_ostream os{contents};
3885ffd83dbSDimitry Andric };
3895ffd83dbSDimitry Andric 
3905ffd83dbSDimitry Andric // Stores a trie that describes the set of exported symbols.
391fe6060f1SDimitry Andric class ExportSection final : public LinkEditSection {
3925ffd83dbSDimitry Andric public:
3935ffd83dbSDimitry Andric   ExportSection();
394fe6060f1SDimitry Andric   void finalizeContents() override;
395e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return size; }
396349cc55cSDimitry Andric   bool isNeeded() const override { return size; }
3975ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
3985ffd83dbSDimitry Andric 
399e8d8bef9SDimitry Andric   bool hasWeakSymbol = false;
400e8d8bef9SDimitry Andric 
4015ffd83dbSDimitry Andric private:
4025ffd83dbSDimitry Andric   TrieBuilder trieBuilder;
4035ffd83dbSDimitry Andric   size_t size = 0;
4045ffd83dbSDimitry Andric };
4055ffd83dbSDimitry Andric 
406bdd1243dSDimitry Andric // Stores 'data in code' entries that describe the locations of data regions
407bdd1243dSDimitry Andric // inside code sections. This is used by llvm-objdump to distinguish jump tables
408bdd1243dSDimitry Andric // and stop them from being disassembled as instructions.
409fe6060f1SDimitry Andric class DataInCodeSection final : public LinkEditSection {
410fe6060f1SDimitry Andric public:
411fe6060f1SDimitry Andric   DataInCodeSection();
412fe6060f1SDimitry Andric   void finalizeContents() override;
413fe6060f1SDimitry Andric   uint64_t getRawSize() const override {
414fe6060f1SDimitry Andric     return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
415fe6060f1SDimitry Andric   }
416fe6060f1SDimitry Andric   void writeTo(uint8_t *buf) const override;
417fe6060f1SDimitry Andric 
418fe6060f1SDimitry Andric private:
419fe6060f1SDimitry Andric   std::vector<llvm::MachO::data_in_code_entry> entries;
420fe6060f1SDimitry Andric };
421fe6060f1SDimitry Andric 
422fe6060f1SDimitry Andric // Stores ULEB128 delta encoded addresses of functions.
423fe6060f1SDimitry Andric class FunctionStartsSection final : public LinkEditSection {
424fe6060f1SDimitry Andric public:
425fe6060f1SDimitry Andric   FunctionStartsSection();
426fe6060f1SDimitry Andric   void finalizeContents() override;
427fe6060f1SDimitry Andric   uint64_t getRawSize() const override { return contents.size(); }
428fe6060f1SDimitry Andric   void writeTo(uint8_t *buf) const override;
429fe6060f1SDimitry Andric 
430fe6060f1SDimitry Andric private:
431fe6060f1SDimitry Andric   SmallVector<char, 128> contents;
432fe6060f1SDimitry Andric };
433fe6060f1SDimitry Andric 
4345ffd83dbSDimitry Andric // Stores the strings referenced by the symbol table.
435fe6060f1SDimitry Andric class StringTableSection final : public LinkEditSection {
4365ffd83dbSDimitry Andric public:
4375ffd83dbSDimitry Andric   StringTableSection();
4385ffd83dbSDimitry Andric   // Returns the start offset of the added string.
4395ffd83dbSDimitry Andric   uint32_t addString(StringRef);
440e8d8bef9SDimitry Andric   uint64_t getRawSize() const override { return size; }
4415ffd83dbSDimitry Andric   void writeTo(uint8_t *buf) const override;
4425ffd83dbSDimitry Andric 
443fe6060f1SDimitry Andric   static constexpr size_t emptyStringIndex = 1;
444fe6060f1SDimitry Andric 
4455ffd83dbSDimitry Andric private:
446e8d8bef9SDimitry Andric   // ld64 emits string tables which start with a space and a zero byte. We
447e8d8bef9SDimitry Andric   // match its behavior here since some tools depend on it.
448fe6060f1SDimitry Andric   // Consequently, the empty string will be at index 1, not zero.
449e8d8bef9SDimitry Andric   std::vector<StringRef> strings{" "};
450e8d8bef9SDimitry Andric   size_t size = 2;
4515ffd83dbSDimitry Andric };
4525ffd83dbSDimitry Andric 
4535ffd83dbSDimitry Andric struct SymtabEntry {
4545ffd83dbSDimitry Andric   Symbol *sym;
4555ffd83dbSDimitry Andric   size_t strx;
4565ffd83dbSDimitry Andric };
4575ffd83dbSDimitry Andric 
458e8d8bef9SDimitry Andric struct StabsEntry {
459e8d8bef9SDimitry Andric   uint8_t type = 0;
460fe6060f1SDimitry Andric   uint32_t strx = StringTableSection::emptyStringIndex;
461e8d8bef9SDimitry Andric   uint8_t sect = 0;
462e8d8bef9SDimitry Andric   uint16_t desc = 0;
463e8d8bef9SDimitry Andric   uint64_t value = 0;
464e8d8bef9SDimitry Andric 
465e8d8bef9SDimitry Andric   StabsEntry() = default;
466e8d8bef9SDimitry Andric   explicit StabsEntry(uint8_t type) : type(type) {}
467e8d8bef9SDimitry Andric };
468e8d8bef9SDimitry Andric 
469e8d8bef9SDimitry Andric // Symbols of the same type must be laid out contiguously: we choose to emit
470e8d8bef9SDimitry Andric // all local symbols first, then external symbols, and finally undefined
471e8d8bef9SDimitry Andric // symbols. For each symbol type, the LC_DYSYMTAB load command will record the
472e8d8bef9SDimitry Andric // range (start index and total number) of those symbols in the symbol table.
473e8d8bef9SDimitry Andric class SymtabSection : public LinkEditSection {
4745ffd83dbSDimitry Andric public:
475fe6060f1SDimitry Andric   void finalizeContents() override;
476e8d8bef9SDimitry Andric   uint32_t getNumSymbols() const;
477e8d8bef9SDimitry Andric   uint32_t getNumLocalSymbols() const {
478e8d8bef9SDimitry Andric     return stabs.size() + localSymbols.size();
479e8d8bef9SDimitry Andric   }
480e8d8bef9SDimitry Andric   uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
481e8d8bef9SDimitry Andric   uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
4825ffd83dbSDimitry Andric 
4835ffd83dbSDimitry Andric private:
48481ad6265SDimitry Andric   void emitBeginSourceStab(StringRef);
485e8d8bef9SDimitry Andric   void emitEndSourceStab();
486e8d8bef9SDimitry Andric   void emitObjectFileStab(ObjFile *);
487e8d8bef9SDimitry Andric   void emitEndFunStab(Defined *);
488e8d8bef9SDimitry Andric   void emitStabs();
489e8d8bef9SDimitry Andric 
490fe6060f1SDimitry Andric protected:
491fe6060f1SDimitry Andric   SymtabSection(StringTableSection &);
492fe6060f1SDimitry Andric 
4935ffd83dbSDimitry Andric   StringTableSection &stringTableSection;
494e8d8bef9SDimitry Andric   // STABS symbols are always local symbols, but we represent them with special
495e8d8bef9SDimitry Andric   // entries because they may use fields like n_sect and n_desc differently.
496e8d8bef9SDimitry Andric   std::vector<StabsEntry> stabs;
497e8d8bef9SDimitry Andric   std::vector<SymtabEntry> localSymbols;
498e8d8bef9SDimitry Andric   std::vector<SymtabEntry> externalSymbols;
499e8d8bef9SDimitry Andric   std::vector<SymtabEntry> undefinedSymbols;
500e8d8bef9SDimitry Andric };
501e8d8bef9SDimitry Andric 
502fe6060f1SDimitry Andric template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
503fe6060f1SDimitry Andric 
504e8d8bef9SDimitry Andric // The indirect symbol table is a list of 32-bit integers that serve as indices
505e8d8bef9SDimitry Andric // into the (actual) symbol table. The indirect symbol table is a
506e8d8bef9SDimitry Andric // concatenation of several sub-arrays of indices, each sub-array belonging to
507e8d8bef9SDimitry Andric // a separate section. The starting offset of each sub-array is stored in the
508e8d8bef9SDimitry Andric // reserved1 header field of the respective section.
509e8d8bef9SDimitry Andric //
510e8d8bef9SDimitry Andric // These sub-arrays provide symbol information for sections that store
511e8d8bef9SDimitry Andric // contiguous sequences of symbol references. These references can be pointers
512e8d8bef9SDimitry Andric // (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
513e8d8bef9SDimitry Andric // function stubs).
514fe6060f1SDimitry Andric class IndirectSymtabSection final : public LinkEditSection {
515e8d8bef9SDimitry Andric public:
516e8d8bef9SDimitry Andric   IndirectSymtabSection();
517fe6060f1SDimitry Andric   void finalizeContents() override;
518e8d8bef9SDimitry Andric   uint32_t getNumSymbols() const;
519e8d8bef9SDimitry Andric   uint64_t getRawSize() const override {
520e8d8bef9SDimitry Andric     return getNumSymbols() * sizeof(uint32_t);
521e8d8bef9SDimitry Andric   }
522e8d8bef9SDimitry Andric   bool isNeeded() const override;
523e8d8bef9SDimitry Andric   void writeTo(uint8_t *buf) const override;
5245ffd83dbSDimitry Andric };
5255ffd83dbSDimitry Andric 
526fe6060f1SDimitry Andric // The code signature comes at the very end of the linked output file.
527fe6060f1SDimitry Andric class CodeSignatureSection final : public LinkEditSection {
528fe6060f1SDimitry Andric public:
529349cc55cSDimitry Andric   // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
530349cc55cSDimitry Andric   // and any changes here, should be repeated there.
531fe6060f1SDimitry Andric   static constexpr uint8_t blockSizeShift = 12;
532fe6060f1SDimitry Andric   static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
533fe6060f1SDimitry Andric   static constexpr size_t hashSize = 256 / 8;
534fe6060f1SDimitry Andric   static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
535fe6060f1SDimitry Andric       sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
536fe6060f1SDimitry Andric   static constexpr uint32_t fixedHeadersSize =
537fe6060f1SDimitry Andric       blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
538fe6060f1SDimitry Andric 
539fe6060f1SDimitry Andric   uint32_t fileNamePad = 0;
540fe6060f1SDimitry Andric   uint32_t allHeadersSize = 0;
541fe6060f1SDimitry Andric   StringRef fileName;
542fe6060f1SDimitry Andric 
543fe6060f1SDimitry Andric   CodeSignatureSection();
544fe6060f1SDimitry Andric   uint64_t getRawSize() const override;
545fe6060f1SDimitry Andric   bool isNeeded() const override { return true; }
546fe6060f1SDimitry Andric   void writeTo(uint8_t *buf) const override;
547fe6060f1SDimitry Andric   uint32_t getBlockCount() const;
548fe6060f1SDimitry Andric   void writeHashes(uint8_t *buf) const;
549fe6060f1SDimitry Andric };
550fe6060f1SDimitry Andric 
551fe6060f1SDimitry Andric class CStringSection : public SyntheticSection {
552fe6060f1SDimitry Andric public:
553bdd1243dSDimitry Andric   CStringSection(const char *name);
554fe6060f1SDimitry Andric   void addInput(CStringInputSection *);
555fe6060f1SDimitry Andric   uint64_t getSize() const override { return size; }
556fe6060f1SDimitry Andric   virtual void finalizeContents();
557fe6060f1SDimitry Andric   bool isNeeded() const override { return !inputs.empty(); }
558fe6060f1SDimitry Andric   void writeTo(uint8_t *buf) const override;
559fe6060f1SDimitry Andric 
560fe6060f1SDimitry Andric   std::vector<CStringInputSection *> inputs;
561fe6060f1SDimitry Andric 
562fe6060f1SDimitry Andric private:
563fe6060f1SDimitry Andric   uint64_t size;
564fe6060f1SDimitry Andric };
565fe6060f1SDimitry Andric 
566fe6060f1SDimitry Andric class DeduplicatedCStringSection final : public CStringSection {
567fe6060f1SDimitry Andric public:
568bdd1243dSDimitry Andric   DeduplicatedCStringSection(const char *name) : CStringSection(name){};
56981ad6265SDimitry Andric   uint64_t getSize() const override { return size; }
570fe6060f1SDimitry Andric   void finalizeContents() override;
57181ad6265SDimitry Andric   void writeTo(uint8_t *buf) const override;
572fe6060f1SDimitry Andric 
57381ad6265SDimitry Andric   struct StringOffset {
57481ad6265SDimitry Andric     uint8_t trailingZeros;
57581ad6265SDimitry Andric     uint64_t outSecOff = UINT64_MAX;
57681ad6265SDimitry Andric 
57781ad6265SDimitry Andric     explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
57881ad6265SDimitry Andric   };
579bdd1243dSDimitry Andric 
580bdd1243dSDimitry Andric   StringOffset getStringOffset(StringRef str) const;
581bdd1243dSDimitry Andric 
582bdd1243dSDimitry Andric private:
58381ad6265SDimitry Andric   llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
58481ad6265SDimitry Andric   size_t size = 0;
585fe6060f1SDimitry Andric };
586fe6060f1SDimitry Andric 
587fe6060f1SDimitry Andric /*
588fe6060f1SDimitry Andric  * This section contains deduplicated literal values. The 16-byte values are
589fe6060f1SDimitry Andric  * laid out first, followed by the 8- and then the 4-byte ones.
590fe6060f1SDimitry Andric  */
591fe6060f1SDimitry Andric class WordLiteralSection final : public SyntheticSection {
592fe6060f1SDimitry Andric public:
593fe6060f1SDimitry Andric   using UInt128 = std::pair<uint64_t, uint64_t>;
594fe6060f1SDimitry Andric   // I don't think the standard guarantees the size of a pair, so let's make
595fe6060f1SDimitry Andric   // sure it's exact -- that way we can construct it via `mmap`.
596bdd1243dSDimitry Andric   static_assert(sizeof(UInt128) == 16);
597fe6060f1SDimitry Andric 
598fe6060f1SDimitry Andric   WordLiteralSection();
599fe6060f1SDimitry Andric   void addInput(WordLiteralInputSection *);
600fe6060f1SDimitry Andric   void finalizeContents();
601fe6060f1SDimitry Andric   void writeTo(uint8_t *buf) const override;
602fe6060f1SDimitry Andric 
603fe6060f1SDimitry Andric   uint64_t getSize() const override {
604fe6060f1SDimitry Andric     return literal16Map.size() * 16 + literal8Map.size() * 8 +
605fe6060f1SDimitry Andric            literal4Map.size() * 4;
606fe6060f1SDimitry Andric   }
607fe6060f1SDimitry Andric 
608fe6060f1SDimitry Andric   bool isNeeded() const override {
609fe6060f1SDimitry Andric     return !literal16Map.empty() || !literal4Map.empty() ||
610fe6060f1SDimitry Andric            !literal8Map.empty();
611fe6060f1SDimitry Andric   }
612fe6060f1SDimitry Andric 
613349cc55cSDimitry Andric   uint64_t getLiteral16Offset(uintptr_t buf) const {
614fe6060f1SDimitry Andric     return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
615fe6060f1SDimitry Andric   }
616fe6060f1SDimitry Andric 
617349cc55cSDimitry Andric   uint64_t getLiteral8Offset(uintptr_t buf) const {
618fe6060f1SDimitry Andric     return literal16Map.size() * 16 +
619fe6060f1SDimitry Andric            literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
620fe6060f1SDimitry Andric   }
621fe6060f1SDimitry Andric 
622349cc55cSDimitry Andric   uint64_t getLiteral4Offset(uintptr_t buf) const {
623fe6060f1SDimitry Andric     return literal16Map.size() * 16 + literal8Map.size() * 8 +
624fe6060f1SDimitry Andric            literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
625fe6060f1SDimitry Andric   }
626fe6060f1SDimitry Andric 
627fe6060f1SDimitry Andric private:
628fe6060f1SDimitry Andric   std::vector<WordLiteralInputSection *> inputs;
629fe6060f1SDimitry Andric 
630fe6060f1SDimitry Andric   template <class T> struct Hasher {
631fe6060f1SDimitry Andric     llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
632fe6060f1SDimitry Andric   };
633fe6060f1SDimitry Andric   // We're using unordered_map instead of DenseMap here because we need to
634fe6060f1SDimitry Andric   // support all possible integer values -- there are no suitable tombstone
635fe6060f1SDimitry Andric   // values for DenseMap.
636fe6060f1SDimitry Andric   std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
637fe6060f1SDimitry Andric   std::unordered_map<uint64_t, uint64_t> literal8Map;
638fe6060f1SDimitry Andric   std::unordered_map<uint32_t, uint64_t> literal4Map;
639fe6060f1SDimitry Andric };
640fe6060f1SDimitry Andric 
641fcaf7f86SDimitry Andric class ObjCImageInfoSection final : public SyntheticSection {
642fcaf7f86SDimitry Andric public:
643fcaf7f86SDimitry Andric   ObjCImageInfoSection();
644fcaf7f86SDimitry Andric   bool isNeeded() const override { return !files.empty(); }
645fcaf7f86SDimitry Andric   uint64_t getSize() const override { return 8; }
646fcaf7f86SDimitry Andric   void addFile(const InputFile *file) {
647fcaf7f86SDimitry Andric     assert(!file->objCImageInfo.empty());
648fcaf7f86SDimitry Andric     files.push_back(file);
649fcaf7f86SDimitry Andric   }
650fcaf7f86SDimitry Andric   void finalizeContents();
651fcaf7f86SDimitry Andric   void writeTo(uint8_t *buf) const override;
652fcaf7f86SDimitry Andric 
653fcaf7f86SDimitry Andric private:
654fcaf7f86SDimitry Andric   struct ImageInfo {
655fcaf7f86SDimitry Andric     uint8_t swiftVersion = 0;
656fcaf7f86SDimitry Andric     bool hasCategoryClassProperties = false;
657fcaf7f86SDimitry Andric   } info;
658fcaf7f86SDimitry Andric   static ImageInfo parseImageInfo(const InputFile *);
659fcaf7f86SDimitry Andric   std::vector<const InputFile *> files; // files with image info
660fcaf7f86SDimitry Andric };
661fcaf7f86SDimitry Andric 
662bdd1243dSDimitry Andric // This section stores 32-bit __TEXT segment offsets of initializer functions.
663bdd1243dSDimitry Andric //
664bdd1243dSDimitry Andric // The compiler stores pointers to initializers in __mod_init_func. These need
665bdd1243dSDimitry Andric // to be fixed up at load time, which takes time and dirties memory. By
666bdd1243dSDimitry Andric // synthesizing InitOffsetsSection from them, this data can live in the
667bdd1243dSDimitry Andric // read-only __TEXT segment instead. This section is used by default when
668bdd1243dSDimitry Andric // chained fixups are enabled.
669bdd1243dSDimitry Andric //
670bdd1243dSDimitry Andric // There is no similar counterpart to __mod_term_func, as that section is
671bdd1243dSDimitry Andric // deprecated, and static destructors are instead handled by registering them
672bdd1243dSDimitry Andric // via __cxa_atexit from an autogenerated initializer function (see D121736).
673bdd1243dSDimitry Andric class InitOffsetsSection final : public SyntheticSection {
674bdd1243dSDimitry Andric public:
675bdd1243dSDimitry Andric   InitOffsetsSection();
676bdd1243dSDimitry Andric   bool isNeeded() const override { return !sections.empty(); }
677bdd1243dSDimitry Andric   uint64_t getSize() const override;
678bdd1243dSDimitry Andric   void writeTo(uint8_t *buf) const override;
679bdd1243dSDimitry Andric   void setUp();
680bdd1243dSDimitry Andric 
681bdd1243dSDimitry Andric   void addInput(ConcatInputSection *isec) { sections.push_back(isec); }
682bdd1243dSDimitry Andric   const std::vector<ConcatInputSection *> &inputs() const { return sections; }
683bdd1243dSDimitry Andric 
684bdd1243dSDimitry Andric private:
685bdd1243dSDimitry Andric   std::vector<ConcatInputSection *> sections;
686bdd1243dSDimitry Andric };
687bdd1243dSDimitry Andric 
688*0fca6ea1SDimitry Andric // This SyntheticSection is for the __objc_methlist section, which contains
689*0fca6ea1SDimitry Andric // relative method lists if the -objc_relative_method_lists option is enabled.
690*0fca6ea1SDimitry Andric class ObjCMethListSection final : public SyntheticSection {
691*0fca6ea1SDimitry Andric public:
692*0fca6ea1SDimitry Andric   ObjCMethListSection();
693*0fca6ea1SDimitry Andric 
694*0fca6ea1SDimitry Andric   static bool isMethodList(const ConcatInputSection *isec);
695*0fca6ea1SDimitry Andric   void addInput(ConcatInputSection *isec) { inputs.push_back(isec); }
696*0fca6ea1SDimitry Andric   std::vector<ConcatInputSection *> getInputs() { return inputs; }
697*0fca6ea1SDimitry Andric 
698*0fca6ea1SDimitry Andric   void setUp();
699*0fca6ea1SDimitry Andric   void finalize() override;
700*0fca6ea1SDimitry Andric   bool isNeeded() const override { return !inputs.empty(); }
701*0fca6ea1SDimitry Andric   uint64_t getSize() const override { return sectionSize; }
702*0fca6ea1SDimitry Andric   void writeTo(uint8_t *bufStart) const override;
703*0fca6ea1SDimitry Andric 
704*0fca6ea1SDimitry Andric private:
705*0fca6ea1SDimitry Andric   void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags,
706*0fca6ea1SDimitry Andric                             uint32_t &structCount) const;
707*0fca6ea1SDimitry Andric   void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
708*0fca6ea1SDimitry Andric                              uint32_t structCount) const;
709*0fca6ea1SDimitry Andric   uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const;
710*0fca6ea1SDimitry Andric   void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf,
711*0fca6ea1SDimitry Andric                                   uint32_t &inSecOff, uint32_t &outSecOff,
712*0fca6ea1SDimitry Andric                                   bool useSelRef) const;
713*0fca6ea1SDimitry Andric   uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
714*0fca6ea1SDimitry Andric                                    uint8_t *buf) const;
715*0fca6ea1SDimitry Andric 
716*0fca6ea1SDimitry Andric   static constexpr uint32_t methodListHeaderSize =
717*0fca6ea1SDimitry Andric       /*structSizeAndFlags*/ sizeof(uint32_t) +
718*0fca6ea1SDimitry Andric       /*structCount*/ sizeof(uint32_t);
719*0fca6ea1SDimitry Andric   // Relative method lists are supported only for 3-pointer method lists
720*0fca6ea1SDimitry Andric   static constexpr uint32_t pointersPerStruct = 3;
721*0fca6ea1SDimitry Andric   // The runtime identifies relative method lists via this magic value
722*0fca6ea1SDimitry Andric   static constexpr uint32_t relMethodHeaderFlag = 0x80000000;
723*0fca6ea1SDimitry Andric   // In the method list header, the first 2 bytes are the size of struct
724*0fca6ea1SDimitry Andric   static constexpr uint32_t structSizeMask = 0x0000FFFF;
725*0fca6ea1SDimitry Andric   // In the method list header, the last 2 bytes are the flags for the struct
726*0fca6ea1SDimitry Andric   static constexpr uint32_t structFlagsMask = 0xFFFF0000;
727*0fca6ea1SDimitry Andric   // Relative method lists have 4 byte alignment as all data in the InputSection
728*0fca6ea1SDimitry Andric   // is 4 byte
729*0fca6ea1SDimitry Andric   static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t);
730*0fca6ea1SDimitry Andric 
731*0fca6ea1SDimitry Andric   // The output size of the __objc_methlist section, computed during finalize()
732*0fca6ea1SDimitry Andric   uint32_t sectionSize = 0;
733*0fca6ea1SDimitry Andric   std::vector<ConcatInputSection *> inputs;
734*0fca6ea1SDimitry Andric };
735*0fca6ea1SDimitry Andric 
736bdd1243dSDimitry Andric // Chained fixups are a replacement for classic dyld opcodes. In this format,
737bdd1243dSDimitry Andric // most of the metadata necessary for binding symbols and rebasing addresses is
738bdd1243dSDimitry Andric // stored directly in the memory location that will have the fixup applied.
739bdd1243dSDimitry Andric //
740bdd1243dSDimitry Andric // The fixups form singly linked lists; each one covering a single page in
741bdd1243dSDimitry Andric // memory. The __LINKEDIT,__chainfixups section stores the page offset of the
742bdd1243dSDimitry Andric // first fixup of each page; the rest can be found by walking the chain using
743bdd1243dSDimitry Andric // the offset that is embedded in each entry.
744bdd1243dSDimitry Andric //
745bdd1243dSDimitry Andric // This setup allows pages to be relocated lazily at page-in time and without
746bdd1243dSDimitry Andric // being dirtied. The kernel can discard and load them again as needed. This
747bdd1243dSDimitry Andric // technique, called page-in linking, was introduced in macOS 13.
748bdd1243dSDimitry Andric //
749bdd1243dSDimitry Andric // The benefits of this format are:
750bdd1243dSDimitry Andric //  - smaller __LINKEDIT segment, as most of the fixup information is stored in
751bdd1243dSDimitry Andric //    the data segment
752bdd1243dSDimitry Andric //  - faster startup, since not all relocations need to be done upfront
753bdd1243dSDimitry Andric //  - slightly lower memory usage, as fewer pages are dirtied
754bdd1243dSDimitry Andric //
755bdd1243dSDimitry Andric // Userspace x86_64 and arm64 binaries have two types of fixup entries:
756bdd1243dSDimitry Andric //   - Rebase entries contain an absolute address, to which the object's load
757bdd1243dSDimitry Andric //     address will be added to get the final value. This is used for loading
758bdd1243dSDimitry Andric //     the address of a symbol defined in the same binary.
759bdd1243dSDimitry Andric //   - Binding entries are mostly used for symbols imported from other dylibs,
760bdd1243dSDimitry Andric //     but for weakly bound and interposable symbols as well. They are looked up
761bdd1243dSDimitry Andric //     by a (symbol name, library) pair stored in __chainfixups. This import
762bdd1243dSDimitry Andric //     entry also encodes whether the import is weak (i.e. if the symbol is
763bdd1243dSDimitry Andric //     missing, it should be set to null instead of producing a load error).
764bdd1243dSDimitry Andric //     The fixup encodes an ordinal associated with the import, and an optional
765bdd1243dSDimitry Andric //     addend.
766bdd1243dSDimitry Andric //
767bdd1243dSDimitry Andric // The entries are tightly packed 64-bit bitfields. One of the bits specifies
768bdd1243dSDimitry Andric // which kind of fixup to interpret them as.
769bdd1243dSDimitry Andric //
770bdd1243dSDimitry Andric // LLD generates the fixup data in 5 stages:
771bdd1243dSDimitry Andric //   1. While scanning relocations, we make a note of each location that needs
772bdd1243dSDimitry Andric //      a fixup by calling addRebase() or addBinding(). During this, we assign
773bdd1243dSDimitry Andric //      a unique ordinal for each (symbol name, library, addend) import tuple.
774bdd1243dSDimitry Andric //   2. After addresses have been assigned to all sections, and thus the memory
775bdd1243dSDimitry Andric //      layout of the linked image is final; finalizeContents() is called. Here,
776bdd1243dSDimitry Andric //      the page offsets of the chain start entries are calculated.
777bdd1243dSDimitry Andric //   3. ChainedFixupsSection::writeTo() writes the page start offsets and the
778bdd1243dSDimitry Andric //      imports table to the output file.
779bdd1243dSDimitry Andric //   4. Each section's fixup entries are encoded and written to disk in
780bdd1243dSDimitry Andric //      ConcatInputSection::writeTo(), but without writing the offsets that form
781bdd1243dSDimitry Andric //      the chain.
782bdd1243dSDimitry Andric //   5. Finally, each page's (which might correspond to multiple sections)
783bdd1243dSDimitry Andric //      fixups are linked together in Writer::buildFixupChains().
784bdd1243dSDimitry Andric class ChainedFixupsSection final : public LinkEditSection {
785bdd1243dSDimitry Andric public:
786bdd1243dSDimitry Andric   ChainedFixupsSection();
787bdd1243dSDimitry Andric   void finalizeContents() override;
788bdd1243dSDimitry Andric   uint64_t getRawSize() const override { return size; }
789bdd1243dSDimitry Andric   bool isNeeded() const override;
790bdd1243dSDimitry Andric   void writeTo(uint8_t *buf) const override;
791bdd1243dSDimitry Andric 
792bdd1243dSDimitry Andric   void addRebase(const InputSection *isec, uint64_t offset) {
793bdd1243dSDimitry Andric     locations.emplace_back(isec, offset);
794bdd1243dSDimitry Andric   }
795bdd1243dSDimitry Andric   void addBinding(const Symbol *dysym, const InputSection *isec,
796bdd1243dSDimitry Andric                   uint64_t offset, int64_t addend = 0);
797bdd1243dSDimitry Andric 
798bdd1243dSDimitry Andric   void setHasNonWeakDefinition() { hasNonWeakDef = true; }
799bdd1243dSDimitry Andric 
800bdd1243dSDimitry Andric   // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
801bdd1243dSDimitry Andric   std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
802bdd1243dSDimitry Andric                                           int64_t addend) const;
803bdd1243dSDimitry Andric 
804bdd1243dSDimitry Andric   const std::vector<Location> &getLocations() const { return locations; }
805bdd1243dSDimitry Andric 
806bdd1243dSDimitry Andric   bool hasWeakBinding() const { return hasWeakBind; }
807bdd1243dSDimitry Andric   bool hasNonWeakDefinition() const { return hasNonWeakDef; }
808bdd1243dSDimitry Andric 
809bdd1243dSDimitry Andric private:
810bdd1243dSDimitry Andric   // Location::offset initially stores the offset within an InputSection, but
811bdd1243dSDimitry Andric   // contains output segment offsets after finalizeContents().
812bdd1243dSDimitry Andric   std::vector<Location> locations;
813bdd1243dSDimitry Andric   // (target symbol, addend) => import ordinal
814bdd1243dSDimitry Andric   llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
815bdd1243dSDimitry Andric 
816bdd1243dSDimitry Andric   struct SegmentInfo {
817bdd1243dSDimitry Andric     SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
818bdd1243dSDimitry Andric 
819bdd1243dSDimitry Andric     const OutputSegment *oseg;
820bdd1243dSDimitry Andric     // (page index, fixup starts offset)
821bdd1243dSDimitry Andric     llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
822bdd1243dSDimitry Andric 
823bdd1243dSDimitry Andric     size_t getSize() const;
824bdd1243dSDimitry Andric     size_t writeTo(uint8_t *buf) const;
825bdd1243dSDimitry Andric   };
826bdd1243dSDimitry Andric   llvm::SmallVector<SegmentInfo, 4> fixupSegments;
827bdd1243dSDimitry Andric 
828bdd1243dSDimitry Andric   size_t symtabSize = 0;
829bdd1243dSDimitry Andric   size_t size = 0;
830bdd1243dSDimitry Andric 
831bdd1243dSDimitry Andric   bool needsAddend = false;
832bdd1243dSDimitry Andric   bool needsLargeAddend = false;
833bdd1243dSDimitry Andric   bool hasWeakBind = false;
834bdd1243dSDimitry Andric   bool hasNonWeakDef = false;
835bdd1243dSDimitry Andric   llvm::MachO::ChainedImportFormat importFormat;
836bdd1243dSDimitry Andric };
837bdd1243dSDimitry Andric 
838bdd1243dSDimitry Andric void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
839bdd1243dSDimitry Andric void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
840bdd1243dSDimitry Andric 
8415ffd83dbSDimitry Andric struct InStruct {
84281ad6265SDimitry Andric   const uint8_t *bufferStart = nullptr;
843e8d8bef9SDimitry Andric   MachHeaderSection *header = nullptr;
844fe6060f1SDimitry Andric   CStringSection *cStringSection = nullptr;
845bdd1243dSDimitry Andric   DeduplicatedCStringSection *objcMethnameSection = nullptr;
846fe6060f1SDimitry Andric   WordLiteralSection *wordLiteralSection = nullptr;
847e8d8bef9SDimitry Andric   RebaseSection *rebase = nullptr;
8485ffd83dbSDimitry Andric   BindingSection *binding = nullptr;
849e8d8bef9SDimitry Andric   WeakBindingSection *weakBinding = nullptr;
850e8d8bef9SDimitry Andric   LazyBindingSection *lazyBinding = nullptr;
851e8d8bef9SDimitry Andric   ExportSection *exports = nullptr;
8525ffd83dbSDimitry Andric   GotSection *got = nullptr;
853e8d8bef9SDimitry Andric   TlvPointerSection *tlvPointers = nullptr;
8545ffd83dbSDimitry Andric   LazyPointerSection *lazyPointers = nullptr;
8555ffd83dbSDimitry Andric   StubsSection *stubs = nullptr;
8565ffd83dbSDimitry Andric   StubHelperSection *stubHelper = nullptr;
857bdd1243dSDimitry Andric   ObjCStubsSection *objcStubs = nullptr;
858fe6060f1SDimitry Andric   UnwindInfoSection *unwindInfo = nullptr;
859fcaf7f86SDimitry Andric   ObjCImageInfoSection *objCImageInfo = nullptr;
860fe6060f1SDimitry Andric   ConcatInputSection *imageLoaderCache = nullptr;
861bdd1243dSDimitry Andric   InitOffsetsSection *initOffsets = nullptr;
862*0fca6ea1SDimitry Andric   ObjCMethListSection *objcMethList = nullptr;
863bdd1243dSDimitry Andric   ChainedFixupsSection *chainedFixups = nullptr;
8645ffd83dbSDimitry Andric };
8655ffd83dbSDimitry Andric 
8665ffd83dbSDimitry Andric extern InStruct in;
8675ffd83dbSDimitry Andric extern std::vector<SyntheticSection *> syntheticSections;
8685ffd83dbSDimitry Andric 
869fe6060f1SDimitry Andric void createSyntheticSymbols();
870fe6060f1SDimitry Andric 
871bdd1243dSDimitry Andric } // namespace lld::macho
8725ffd83dbSDimitry Andric 
8735ffd83dbSDimitry Andric #endif
874