1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 16 #include "llvm/Object/Archive.h" 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/MathExtras.h" 19 20 namespace lld { 21 namespace macho { 22 23 class MachHeaderSection; 24 25 class Symbol { 26 public: 27 enum Kind { 28 DefinedKind, 29 UndefinedKind, 30 CommonKind, 31 DylibKind, 32 LazyArchiveKind, 33 LazyObjectKind, 34 AliasKind, 35 }; 36 37 // Enum that describes the type of Identical Code Folding (ICF) applied to a 38 // symbol. This information is crucial for accurately representing symbol 39 // sizes in the map file. 40 enum ICFFoldKind { 41 None, // No folding is applied. 42 Body, // The entire body (function or data) is folded. 43 Thunk // The function body is folded into a single branch thunk. 44 }; 45 46 virtual ~Symbol() {} 47 48 Kind kind() const { return symbolKind; } 49 50 StringRef getName() const { return {nameData, nameSize}; } 51 52 bool isLive() const { return used; } 53 bool isLazy() const { 54 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 55 } 56 57 virtual uint64_t getVA() const { return 0; } 58 59 virtual bool isWeakDef() const { return false; } 60 61 // Only undefined or dylib symbols can be weak references. A weak reference 62 // need not be satisfied at runtime, e.g. due to the symbol not being 63 // available on a given target platform. 64 virtual bool isWeakRef() const { return false; } 65 66 virtual bool isTlv() const { return false; } 67 68 // Whether this symbol is in the GOT or TLVPointer sections. 69 bool isInGot() const { return gotIndex != UINT32_MAX; } 70 71 // Whether this symbol is in the StubsSection. 72 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 73 74 uint64_t getStubVA() const; 75 uint64_t getLazyPtrVA() const; 76 uint64_t getGotVA() const; 77 uint64_t getTlvVA() const; 78 uint64_t resolveBranchVA() const { 79 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 80 return isInStubs() ? getStubVA() : getVA(); 81 } 82 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 83 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 84 85 // The index of this symbol in the GOT or the TLVPointer section, depending 86 // on whether it is a thread-local. A given symbol cannot be referenced by 87 // both these sections at once. 88 uint32_t gotIndex = UINT32_MAX; 89 uint32_t lazyBindOffset = UINT32_MAX; 90 uint32_t stubsHelperIndex = UINT32_MAX; 91 uint32_t stubsIndex = UINT32_MAX; 92 uint32_t symtabIndex = UINT32_MAX; 93 94 InputFile *getFile() const { return file; } 95 96 protected: 97 Symbol(Kind k, StringRef name, InputFile *file) 98 : symbolKind(k), nameData(name.data()), file(file), nameSize(name.size()), 99 isUsedInRegularObj(!file || isa<ObjFile>(file)), 100 used(!config->deadStrip) {} 101 102 Kind symbolKind; 103 const char *nameData; 104 InputFile *file; 105 uint32_t nameSize; 106 107 public: 108 // True if this symbol was referenced by a regular (non-bitcode) object. 109 bool isUsedInRegularObj : 1; 110 111 // True if this symbol is used from a live section. 112 bool used : 1; 113 }; 114 115 class Defined : public Symbol { 116 public: 117 Defined(StringRef name, InputFile *file, InputSection *isec, uint64_t value, 118 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 119 bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip, 120 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false, 121 bool interposable = false); 122 123 bool isWeakDef() const override { return weakDef; } 124 bool isExternalWeakDef() const { 125 return isWeakDef() && isExternal() && !privateExtern; 126 } 127 bool isTlv() const override; 128 129 bool isExternal() const { return external; } 130 bool isAbsolute() const { return originalIsec == nullptr; } 131 132 uint64_t getVA() const override; 133 134 // Returns the object file that this symbol was defined in. This value differs 135 // from `getFile()` if the symbol originated from a bitcode file. 136 ObjFile *getObjectFile() const; 137 138 std::string getSourceLocation(); 139 140 // Get the canonical InputSection of the symbol. 141 InputSection *isec() const; 142 143 // Get the canonical unwind entry of the symbol. 144 ConcatInputSection *unwindEntry() const; 145 146 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 147 148 // Place the bitfields first so that they can get placed in the tail padding 149 // of the parent class, on platforms which support it. 150 bool overridesWeakDef : 1; 151 // Whether this symbol should appear in the output binary's export trie. 152 bool privateExtern : 1; 153 // Whether this symbol should appear in the output symbol table. 154 bool includeInSymtab : 1; 155 // The ICF folding kind of this symbol: None / Body / Thunk. 156 LLVM_PREFERRED_TYPE(ICFFoldKind) 157 uint8_t identicalCodeFoldingKind : 2; 158 // Symbols marked referencedDynamically won't be removed from the output's 159 // symbol table by tools like strip. In theory, this could be set on arbitrary 160 // symbols in input object files. In practice, it's used solely for the 161 // synthetic __mh_execute_header symbol. 162 // This is information for the static linker, and it's also written to the 163 // output file's symbol table for tools running later (such as `strip`). 164 bool referencedDynamically : 1; 165 // Set on symbols that should not be removed by dead code stripping. 166 // Set for example on `__attribute__((used))` globals, or on some Objective-C 167 // metadata. This is information only for the static linker and not written 168 // to the output. 169 bool noDeadStrip : 1; 170 // Whether references to this symbol can be interposed at runtime to point to 171 // a different symbol definition (with the same name). For example, if both 172 // dylib A and B define an interposable symbol _foo, and we load A before B at 173 // runtime, then all references to _foo within dylib B will point to the 174 // definition in dylib A. 175 // 176 // Only extern symbols may be interposable. 177 bool interposable : 1; 178 179 bool weakDefCanBeHidden : 1; 180 181 private: 182 const bool weakDef : 1; 183 const bool external : 1; 184 185 public: 186 // The native InputSection of the symbol. The symbol may be moved to another 187 // InputSection in which case originalIsec->canonical() will point to the new 188 // InputSection 189 InputSection *originalIsec; 190 // Contains the offset from the containing subsection. Note that this is 191 // different from nlist::n_value, which is the absolute address of the symbol. 192 uint64_t value; 193 // size is only calculated for regular (non-bitcode) symbols. 194 uint64_t size; 195 // This can be a subsection of either __compact_unwind or __eh_frame. 196 ConcatInputSection *originalUnwindEntry = nullptr; 197 }; 198 199 // This enum does double-duty: as a symbol property, it indicates whether & how 200 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 201 // of referenced symbols contained within the file. If there are both weak 202 // and strong references to the same file, we will count the file as 203 // strongly-referenced. 204 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 205 206 class Undefined : public Symbol { 207 public: 208 Undefined(StringRef name, InputFile *file, RefState refState, 209 bool wasBitcodeSymbol) 210 : Symbol(UndefinedKind, name, file), refState(refState), 211 wasBitcodeSymbol(wasBitcodeSymbol) { 212 assert(refState != RefState::Unreferenced); 213 } 214 215 bool isWeakRef() const override { return refState == RefState::Weak; } 216 217 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 218 219 RefState refState : 2; 220 bool wasBitcodeSymbol; 221 }; 222 223 // On Unix, it is traditionally allowed to write variable definitions without 224 // initialization expressions (such as "int foo;") to header files. These are 225 // called tentative definitions. 226 // 227 // Using tentative definitions is usually considered a bad practice; you should 228 // write only declarations (such as "extern int foo;") to header files. 229 // Nevertheless, the linker and the compiler have to do something to support 230 // bad code by allowing duplicate definitions for this particular case. 231 // 232 // The compiler creates common symbols when it sees tentative definitions. 233 // (You can suppress this behavior and let the compiler create a regular 234 // defined symbol by passing -fno-common. -fno-common is the default in clang 235 // as of LLVM 11.0.) When linking the final binary, if there are remaining 236 // common symbols after name resolution is complete, the linker converts them 237 // to regular defined symbols in a __common section. 238 class CommonSymbol : public Symbol { 239 public: 240 CommonSymbol(StringRef name, InputFile *file, uint64_t size, uint32_t align, 241 bool isPrivateExtern) 242 : Symbol(CommonKind, name, file), size(size), 243 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 244 privateExtern(isPrivateExtern) { 245 // TODO: cap maximum alignment 246 } 247 248 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 249 250 const uint64_t size; 251 const uint32_t align; 252 const bool privateExtern; 253 }; 254 255 class DylibSymbol : public Symbol { 256 public: 257 DylibSymbol(DylibFile *file, StringRef name, bool isWeakDef, 258 RefState refState, bool isTlv) 259 : Symbol(DylibKind, name, file), shouldReexport(false), 260 refState(refState), weakDef(isWeakDef), tlv(isTlv) { 261 if (file && refState > RefState::Unreferenced) 262 file->numReferencedSymbols++; 263 } 264 265 uint64_t getVA() const override; 266 bool isWeakDef() const override { return weakDef; } 267 268 // Symbols from weak libraries/frameworks are also weakly-referenced. 269 bool isWeakRef() const override { 270 return refState == RefState::Weak || 271 (file && getFile()->umbrella->forceWeakImport); 272 } 273 bool isReferenced() const { return refState != RefState::Unreferenced; } 274 bool isTlv() const override { return tlv; } 275 bool isDynamicLookup() const { return file == nullptr; } 276 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 277 278 DylibFile *getFile() const { 279 assert(!isDynamicLookup()); 280 return cast<DylibFile>(file); 281 } 282 283 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 284 285 RefState getRefState() const { return refState; } 286 287 void reference(RefState newState) { 288 assert(newState > RefState::Unreferenced); 289 if (refState == RefState::Unreferenced && file) 290 getFile()->numReferencedSymbols++; 291 refState = std::max(refState, newState); 292 } 293 294 void unreference() { 295 // dynamic_lookup symbols have no file. 296 if (refState > RefState::Unreferenced && file) { 297 assert(getFile()->numReferencedSymbols > 0); 298 getFile()->numReferencedSymbols--; 299 } 300 } 301 302 bool shouldReexport : 1; 303 304 private: 305 RefState refState : 2; 306 const bool weakDef : 1; 307 const bool tlv : 1; 308 }; 309 310 class LazyArchive : public Symbol { 311 public: 312 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 313 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 314 315 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 316 void fetchArchiveMember(); 317 318 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 319 320 private: 321 const llvm::object::Archive::Symbol sym; 322 }; 323 324 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 325 // --end-lib. 326 class LazyObject : public Symbol { 327 public: 328 LazyObject(InputFile &file, StringRef name) 329 : Symbol(LazyObjectKind, name, &file) { 330 isUsedInRegularObj = false; 331 } 332 333 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 334 }; 335 336 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs, 337 // then all AliasSymbol instances will be converted into one of the other Symbol 338 // types after `createAliases()` runs. 339 class AliasSymbol final : public Symbol { 340 public: 341 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName, 342 bool isPrivateExtern) 343 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern), 344 aliasedName(aliasedName) {} 345 346 StringRef getAliasedName() const { return aliasedName; } 347 348 static bool classof(const Symbol *s) { return s->kind() == AliasKind; } 349 350 const bool privateExtern; 351 352 private: 353 StringRef aliasedName; 354 }; 355 356 union SymbolUnion { 357 alignas(Defined) char a[sizeof(Defined)]; 358 alignas(Undefined) char b[sizeof(Undefined)]; 359 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 360 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 361 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 362 alignas(LazyObject) char f[sizeof(LazyObject)]; 363 alignas(AliasSymbol) char g[sizeof(AliasSymbol)]; 364 }; 365 366 template <typename T, typename... ArgT> 367 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 368 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 369 static_assert(alignof(T) <= alignof(SymbolUnion), 370 "SymbolUnion not aligned enough"); 371 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 372 "Not a Symbol"); 373 374 bool isUsedInRegularObj = s->isUsedInRegularObj; 375 bool used = s->used; 376 T *sym = new (s) T(std::forward<ArgT>(arg)...); 377 sym->isUsedInRegularObj |= isUsedInRegularObj; 378 sym->used |= used; 379 return sym; 380 } 381 382 // Can a symbol's address only be resolved at runtime? 383 inline bool needsBinding(const Symbol *sym) { 384 if (isa<DylibSymbol>(sym)) 385 return true; 386 if (const auto *defined = dyn_cast<Defined>(sym)) 387 return defined->isExternalWeakDef() || defined->interposable; 388 return false; 389 } 390 391 // Symbols with `l` or `L` as a prefix are linker-private and never appear in 392 // the output. 393 inline bool isPrivateLabel(StringRef name) { 394 return name.starts_with("l") || name.starts_with("L"); 395 } 396 } // namespace macho 397 398 std::string toString(const macho::Symbol &); 399 std::string toMachOString(const llvm::object::Archive::Symbol &); 400 401 } // namespace lld 402 403 #endif 404