xref: /llvm-project/lld/MachO/Symbols.h (revision 691e3c64d08c32955c8f5f740d4ce0db00ee2307)
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "InputFiles.h"
14 #include "Target.h"
15 
16 #include "llvm/Object/Archive.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 namespace lld {
21 namespace macho {
22 
23 class MachHeaderSection;
24 
25 class Symbol {
26 public:
27   enum Kind {
28     DefinedKind,
29     UndefinedKind,
30     CommonKind,
31     DylibKind,
32     LazyArchiveKind,
33     LazyObjectKind,
34     AliasKind,
35   };
36 
37   // Enum that describes the type of Identical Code Folding (ICF) applied to a
38   // symbol. This information is crucial for accurately representing symbol
39   // sizes in the map file.
40   enum ICFFoldKind {
41     None, // No folding is applied.
42     Body, // The entire body (function or data) is folded.
43     Thunk // The function body is folded into a single branch thunk.
44   };
45 
46   virtual ~Symbol() {}
47 
48   Kind kind() const { return symbolKind; }
49 
50   StringRef getName() const { return {nameData, nameSize}; }
51 
52   bool isLive() const { return used; }
53   bool isLazy() const {
54     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
55   }
56 
57   virtual uint64_t getVA() const { return 0; }
58 
59   virtual bool isWeakDef() const { return false; }
60 
61   // Only undefined or dylib symbols can be weak references. A weak reference
62   // need not be satisfied at runtime, e.g. due to the symbol not being
63   // available on a given target platform.
64   virtual bool isWeakRef() const { return false; }
65 
66   virtual bool isTlv() const { return false; }
67 
68   // Whether this symbol is in the GOT or TLVPointer sections.
69   bool isInGot() const { return gotIndex != UINT32_MAX; }
70 
71   // Whether this symbol is in the StubsSection.
72   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
73 
74   uint64_t getStubVA() const;
75   uint64_t getLazyPtrVA() const;
76   uint64_t getGotVA() const;
77   uint64_t getTlvVA() const;
78   uint64_t resolveBranchVA() const {
79     assert(isa<Defined>(this) || isa<DylibSymbol>(this));
80     return isInStubs() ? getStubVA() : getVA();
81   }
82   uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
83   uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
84 
85   // The index of this symbol in the GOT or the TLVPointer section, depending
86   // on whether it is a thread-local. A given symbol cannot be referenced by
87   // both these sections at once.
88   uint32_t gotIndex = UINT32_MAX;
89   uint32_t lazyBindOffset = UINT32_MAX;
90   uint32_t stubsHelperIndex = UINT32_MAX;
91   uint32_t stubsIndex = UINT32_MAX;
92   uint32_t symtabIndex = UINT32_MAX;
93 
94   InputFile *getFile() const { return file; }
95 
96 protected:
97   Symbol(Kind k, StringRef name, InputFile *file)
98       : symbolKind(k), nameData(name.data()), file(file), nameSize(name.size()),
99         isUsedInRegularObj(!file || isa<ObjFile>(file)),
100         used(!config->deadStrip) {}
101 
102   Kind symbolKind;
103   const char *nameData;
104   InputFile *file;
105   uint32_t nameSize;
106 
107 public:
108   // True if this symbol was referenced by a regular (non-bitcode) object.
109   bool isUsedInRegularObj : 1;
110 
111   // True if this symbol is used from a live section.
112   bool used : 1;
113 };
114 
115 class Defined : public Symbol {
116 public:
117   Defined(StringRef name, InputFile *file, InputSection *isec, uint64_t value,
118           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
119           bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,
120           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,
121           bool interposable = false);
122 
123   bool isWeakDef() const override { return weakDef; }
124   bool isExternalWeakDef() const {
125     return isWeakDef() && isExternal() && !privateExtern;
126   }
127   bool isTlv() const override;
128 
129   bool isExternal() const { return external; }
130   bool isAbsolute() const { return originalIsec == nullptr; }
131 
132   uint64_t getVA() const override;
133 
134   // Returns the object file that this symbol was defined in. This value differs
135   // from `getFile()` if the symbol originated from a bitcode file.
136   ObjFile *getObjectFile() const;
137 
138   std::string getSourceLocation();
139 
140   // Get the canonical InputSection of the symbol.
141   InputSection *isec() const;
142 
143   // Get the canonical unwind entry of the symbol.
144   ConcatInputSection *unwindEntry() const;
145 
146   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
147 
148   // Place the bitfields first so that they can get placed in the tail padding
149   // of the parent class, on platforms which support it.
150   bool overridesWeakDef : 1;
151   // Whether this symbol should appear in the output binary's export trie.
152   bool privateExtern : 1;
153   // Whether this symbol should appear in the output symbol table.
154   bool includeInSymtab : 1;
155   // The ICF folding kind of this symbol: None / Body / Thunk.
156   LLVM_PREFERRED_TYPE(ICFFoldKind)
157   uint8_t identicalCodeFoldingKind : 2;
158   // Symbols marked referencedDynamically won't be removed from the output's
159   // symbol table by tools like strip. In theory, this could be set on arbitrary
160   // symbols in input object files. In practice, it's used solely for the
161   // synthetic __mh_execute_header symbol.
162   // This is information for the static linker, and it's also written to the
163   // output file's symbol table for tools running later (such as `strip`).
164   bool referencedDynamically : 1;
165   // Set on symbols that should not be removed by dead code stripping.
166   // Set for example on `__attribute__((used))` globals, or on some Objective-C
167   // metadata. This is information only for the static linker and not written
168   // to the output.
169   bool noDeadStrip : 1;
170   // Whether references to this symbol can be interposed at runtime to point to
171   // a different symbol definition (with the same name). For example, if both
172   // dylib A and B define an interposable symbol _foo, and we load A before B at
173   // runtime, then all references to _foo within dylib B will point to the
174   // definition in dylib A.
175   //
176   // Only extern symbols may be interposable.
177   bool interposable : 1;
178 
179   bool weakDefCanBeHidden : 1;
180 
181 private:
182   const bool weakDef : 1;
183   const bool external : 1;
184 
185 public:
186   // The native InputSection of the symbol. The symbol may be moved to another
187   // InputSection in which case originalIsec->canonical() will point to the new
188   // InputSection
189   InputSection *originalIsec;
190   // Contains the offset from the containing subsection. Note that this is
191   // different from nlist::n_value, which is the absolute address of the symbol.
192   uint64_t value;
193   // size is only calculated for regular (non-bitcode) symbols.
194   uint64_t size;
195   // This can be a subsection of either __compact_unwind or __eh_frame.
196   ConcatInputSection *originalUnwindEntry = nullptr;
197 };
198 
199 // This enum does double-duty: as a symbol property, it indicates whether & how
200 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
201 // of referenced symbols contained within the file. If there are both weak
202 // and strong references to the same file, we will count the file as
203 // strongly-referenced.
204 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
205 
206 class Undefined : public Symbol {
207 public:
208   Undefined(StringRef name, InputFile *file, RefState refState,
209             bool wasBitcodeSymbol)
210       : Symbol(UndefinedKind, name, file), refState(refState),
211         wasBitcodeSymbol(wasBitcodeSymbol) {
212     assert(refState != RefState::Unreferenced);
213   }
214 
215   bool isWeakRef() const override { return refState == RefState::Weak; }
216 
217   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
218 
219   RefState refState : 2;
220   bool wasBitcodeSymbol;
221 };
222 
223 // On Unix, it is traditionally allowed to write variable definitions without
224 // initialization expressions (such as "int foo;") to header files. These are
225 // called tentative definitions.
226 //
227 // Using tentative definitions is usually considered a bad practice; you should
228 // write only declarations (such as "extern int foo;") to header files.
229 // Nevertheless, the linker and the compiler have to do something to support
230 // bad code by allowing duplicate definitions for this particular case.
231 //
232 // The compiler creates common symbols when it sees tentative definitions.
233 // (You can suppress this behavior and let the compiler create a regular
234 // defined symbol by passing -fno-common. -fno-common is the default in clang
235 // as of LLVM 11.0.) When linking the final binary, if there are remaining
236 // common symbols after name resolution is complete, the linker converts them
237 // to regular defined symbols in a __common section.
238 class CommonSymbol : public Symbol {
239 public:
240   CommonSymbol(StringRef name, InputFile *file, uint64_t size, uint32_t align,
241                bool isPrivateExtern)
242       : Symbol(CommonKind, name, file), size(size),
243         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
244         privateExtern(isPrivateExtern) {
245     // TODO: cap maximum alignment
246   }
247 
248   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
249 
250   const uint64_t size;
251   const uint32_t align;
252   const bool privateExtern;
253 };
254 
255 class DylibSymbol : public Symbol {
256 public:
257   DylibSymbol(DylibFile *file, StringRef name, bool isWeakDef,
258               RefState refState, bool isTlv)
259       : Symbol(DylibKind, name, file), shouldReexport(false),
260         refState(refState), weakDef(isWeakDef), tlv(isTlv) {
261     if (file && refState > RefState::Unreferenced)
262       file->numReferencedSymbols++;
263   }
264 
265   uint64_t getVA() const override;
266   bool isWeakDef() const override { return weakDef; }
267 
268   // Symbols from weak libraries/frameworks are also weakly-referenced.
269   bool isWeakRef() const override {
270     return refState == RefState::Weak ||
271            (file && getFile()->umbrella->forceWeakImport);
272   }
273   bool isReferenced() const { return refState != RefState::Unreferenced; }
274   bool isTlv() const override { return tlv; }
275   bool isDynamicLookup() const { return file == nullptr; }
276   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
277 
278   DylibFile *getFile() const {
279     assert(!isDynamicLookup());
280     return cast<DylibFile>(file);
281   }
282 
283   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
284 
285   RefState getRefState() const { return refState; }
286 
287   void reference(RefState newState) {
288     assert(newState > RefState::Unreferenced);
289     if (refState == RefState::Unreferenced && file)
290       getFile()->numReferencedSymbols++;
291     refState = std::max(refState, newState);
292   }
293 
294   void unreference() {
295     // dynamic_lookup symbols have no file.
296     if (refState > RefState::Unreferenced && file) {
297       assert(getFile()->numReferencedSymbols > 0);
298       getFile()->numReferencedSymbols--;
299     }
300   }
301 
302   bool shouldReexport : 1;
303 
304 private:
305   RefState refState : 2;
306   const bool weakDef : 1;
307   const bool tlv : 1;
308 };
309 
310 class LazyArchive : public Symbol {
311 public:
312   LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
313       : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
314 
315   ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
316   void fetchArchiveMember();
317 
318   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
319 
320 private:
321   const llvm::object::Archive::Symbol sym;
322 };
323 
324 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
325 // --end-lib.
326 class LazyObject : public Symbol {
327 public:
328   LazyObject(InputFile &file, StringRef name)
329       : Symbol(LazyObjectKind, name, &file) {
330     isUsedInRegularObj = false;
331   }
332 
333   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
334 };
335 
336 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
337 // then all AliasSymbol instances will be converted into one of the other Symbol
338 // types after `createAliases()` runs.
339 class AliasSymbol final : public Symbol {
340 public:
341   AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
342               bool isPrivateExtern)
343       : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
344         aliasedName(aliasedName) {}
345 
346   StringRef getAliasedName() const { return aliasedName; }
347 
348   static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
349 
350   const bool privateExtern;
351 
352 private:
353   StringRef aliasedName;
354 };
355 
356 union SymbolUnion {
357   alignas(Defined) char a[sizeof(Defined)];
358   alignas(Undefined) char b[sizeof(Undefined)];
359   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
360   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
361   alignas(LazyArchive) char e[sizeof(LazyArchive)];
362   alignas(LazyObject) char f[sizeof(LazyObject)];
363   alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
364 };
365 
366 template <typename T, typename... ArgT>
367 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
368   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
369   static_assert(alignof(T) <= alignof(SymbolUnion),
370                 "SymbolUnion not aligned enough");
371   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
372          "Not a Symbol");
373 
374   bool isUsedInRegularObj = s->isUsedInRegularObj;
375   bool used = s->used;
376   T *sym = new (s) T(std::forward<ArgT>(arg)...);
377   sym->isUsedInRegularObj |= isUsedInRegularObj;
378   sym->used |= used;
379   return sym;
380 }
381 
382 // Can a symbol's address only be resolved at runtime?
383 inline bool needsBinding(const Symbol *sym) {
384   if (isa<DylibSymbol>(sym))
385     return true;
386   if (const auto *defined = dyn_cast<Defined>(sym))
387     return defined->isExternalWeakDef() || defined->interposable;
388   return false;
389 }
390 
391 // Symbols with `l` or `L` as a prefix are linker-private and never appear in
392 // the output.
393 inline bool isPrivateLabel(StringRef name) {
394   return name.starts_with("l") || name.starts_with("L");
395 }
396 } // namespace macho
397 
398 std::string toString(const macho::Symbol &);
399 std::string toMachOString(const llvm::object::Archive::Symbol &);
400 
401 } // namespace lld
402 
403 #endif
404