1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines various types of Symbols. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLD_ELF_SYMBOLS_H 14 #define LLD_ELF_SYMBOLS_H 15 16 #include "Config.h" 17 #include "lld/Common/LLVM.h" 18 #include "lld/Common/Memory.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/Object/ELF.h" 21 #include "llvm/Support/Compiler.h" 22 #include <tuple> 23 24 namespace lld::elf { 25 class CommonSymbol; 26 class Defined; 27 class OutputSection; 28 class SectionBase; 29 class InputSectionBase; 30 class SharedSymbol; 31 class Symbol; 32 class Undefined; 33 class LazySymbol; 34 class InputFile; 35 36 // Returns a string representation for a symbol for diagnostics. 37 std::string toStr(Ctx &, const Symbol &); 38 const ELFSyncStream &operator<<(const ELFSyncStream &, const Symbol *); 39 40 void printTraceSymbol(const Symbol &sym, StringRef name); 41 42 enum { 43 NEEDS_GOT = 1 << 0, 44 NEEDS_PLT = 1 << 1, 45 HAS_DIRECT_RELOC = 1 << 2, 46 // True if this symbol needs a canonical PLT entry, or (during 47 // postScanRelocations) a copy relocation. 48 NEEDS_COPY = 1 << 3, 49 NEEDS_TLSDESC = 1 << 4, 50 NEEDS_TLSGD = 1 << 5, 51 NEEDS_TLSGD_TO_IE = 1 << 6, 52 NEEDS_GOT_DTPREL = 1 << 7, 53 NEEDS_TLSIE = 1 << 8, 54 NEEDS_GOT_AUTH = 1 << 9, 55 NEEDS_GOT_NONAUTH = 1 << 10, 56 NEEDS_TLSDESC_AUTH = 1 << 11, 57 NEEDS_TLSDESC_NONAUTH = 1 << 12, 58 }; 59 60 // The base class for real symbol classes. 61 class Symbol { 62 public: 63 enum Kind { 64 PlaceholderKind, 65 DefinedKind, 66 CommonKind, 67 SharedKind, 68 UndefinedKind, 69 LazyKind, 70 }; 71 72 Kind kind() const { return static_cast<Kind>(symbolKind); } 73 74 // The file from which this symbol was created. 75 InputFile *file; 76 77 // The default copy constructor is deleted due to atomic flags. Define one for 78 // places where no atomic is needed. 79 Symbol(const Symbol &o) { memcpy(static_cast<void *>(this), &o, sizeof(o)); } 80 81 protected: 82 const char *nameData; 83 // 32-bit size saves space. 84 uint32_t nameSize; 85 86 public: 87 // The next three fields have the same meaning as the ELF symbol attributes. 88 // type and binding are placed in this order to optimize generating st_info, 89 // which is defined as (binding << 4) + (type & 0xf), on a little-endian 90 // system. 91 uint8_t type : 4; // symbol type 92 93 // Symbol binding. This is not overwritten by replace() to track 94 // changes during resolution. In particular: 95 // - An undefined weak is still weak when it resolves to a shared library. 96 // - An undefined weak will not extract archive members, but we have to 97 // remember it is weak. 98 uint8_t binding : 4; 99 100 uint8_t stOther; // st_other field value 101 102 uint8_t symbolKind; 103 104 // The partition whose dynamic symbol table contains this symbol's definition. 105 uint8_t partition; 106 107 // True if this symbol is preemptible at load time. 108 LLVM_PREFERRED_TYPE(bool) 109 uint8_t isPreemptible : 1; 110 111 // True if the symbol was used for linking and thus need to be added to the 112 // output file's symbol table. This is true for all symbols except for 113 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that 114 // are unreferenced except by other bitcode objects. 115 LLVM_PREFERRED_TYPE(bool) 116 uint8_t isUsedInRegularObj : 1; 117 118 // True if an undefined or shared symbol is used from a live section. 119 // 120 // NOTE: In Writer.cpp the field is used to mark local defined symbols 121 // which are referenced by relocations when -r or --emit-relocs is given. 122 LLVM_PREFERRED_TYPE(bool) 123 uint8_t used : 1; 124 125 // Used by a Defined symbol with protected or default visibility, to record 126 // whether it is required to be exported into .dynsym. This is set when any of 127 // the following conditions hold: 128 // 129 // - If there is an interposable symbol from a DSO. Note: We also do this for 130 // STV_PROTECTED symbols which can't be interposed (to match BFD behavior). 131 // - If -shared or --export-dynamic is specified, any symbol in an object 132 // file/bitcode sets this property, unless suppressed by LTO 133 // canBeOmittedFromSymbolTable(). 134 // 135 // Primarily set in two locations, (a) after parseSymbolVersion and 136 // (b) during demoteSymbols. 137 LLVM_PREFERRED_TYPE(bool) 138 uint8_t isExported : 1; 139 140 // Used to compute isExported. Set when defined or referenced by a SharedFile. 141 LLVM_PREFERRED_TYPE(bool) 142 uint8_t exportDynamic : 1; 143 144 LLVM_PREFERRED_TYPE(bool) 145 uint8_t ltoCanOmit : 1; 146 147 // True if this symbol is specified by --trace-symbol option. 148 LLVM_PREFERRED_TYPE(bool) 149 uint8_t traced : 1; 150 151 // True if the name contains '@'. 152 LLVM_PREFERRED_TYPE(bool) 153 uint8_t hasVersionSuffix : 1; 154 155 // Symbol visibility. This is the computed minimum visibility of all 156 // observed non-DSO symbols. 157 uint8_t visibility() const { return stOther & 3; } 158 void setVisibility(uint8_t visibility) { 159 stOther = (stOther & ~3) | visibility; 160 } 161 162 bool includeInDynsym(Ctx &) const; 163 uint8_t computeBinding(Ctx &) const; 164 bool isGlobal() const { return binding == llvm::ELF::STB_GLOBAL; } 165 bool isWeak() const { return binding == llvm::ELF::STB_WEAK; } 166 167 bool isUndefined() const { return symbolKind == UndefinedKind; } 168 bool isCommon() const { return symbolKind == CommonKind; } 169 bool isDefined() const { return symbolKind == DefinedKind; } 170 bool isShared() const { return symbolKind == SharedKind; } 171 bool isPlaceholder() const { return symbolKind == PlaceholderKind; } 172 173 bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; } 174 175 bool isLazy() const { return symbolKind == LazyKind; } 176 177 // True if this is an undefined weak symbol. This only works once 178 // all input files have been added. 179 bool isUndefWeak() const { return isWeak() && isUndefined(); } 180 181 StringRef getName() const { return {nameData, nameSize}; } 182 183 void setName(StringRef s) { 184 nameData = s.data(); 185 nameSize = s.size(); 186 } 187 188 void parseSymbolVersion(Ctx &); 189 190 // Get the NUL-terminated version suffix ("", "@...", or "@@..."). 191 // 192 // For @@, the name has been truncated by insert(). For @, the name has been 193 // truncated by Symbol::parseSymbolVersion(ctx). 194 const char *getVersionSuffix() const { return nameData + nameSize; } 195 196 uint32_t getGotIdx(Ctx &ctx) const { return ctx.symAux[auxIdx].gotIdx; } 197 uint32_t getPltIdx(Ctx &ctx) const { return ctx.symAux[auxIdx].pltIdx; } 198 uint32_t getTlsDescIdx(Ctx &ctx) const { 199 return ctx.symAux[auxIdx].tlsDescIdx; 200 } 201 uint32_t getTlsGdIdx(Ctx &ctx) const { return ctx.symAux[auxIdx].tlsGdIdx; } 202 203 bool isInGot(Ctx &ctx) const { return getGotIdx(ctx) != uint32_t(-1); } 204 bool isInPlt(Ctx &ctx) const { return getPltIdx(ctx) != uint32_t(-1); } 205 206 uint64_t getVA(Ctx &, int64_t addend = 0) const; 207 208 uint64_t getGotOffset(Ctx &) const; 209 uint64_t getGotVA(Ctx &) const; 210 uint64_t getGotPltOffset(Ctx &) const; 211 uint64_t getGotPltVA(Ctx &) const; 212 uint64_t getPltVA(Ctx &) const; 213 uint64_t getSize() const; 214 OutputSection *getOutputSection() const; 215 216 // The following two functions are used for symbol resolution. 217 // 218 // You are expected to call mergeProperties for all symbols in input 219 // files so that attributes that are attached to names rather than 220 // indivisual symbol (such as visibility) are merged together. 221 // 222 // Every time you read a new symbol from an input, you are supposed 223 // to call resolve() with the new symbol. That function replaces 224 // "this" object as a result of name resolution if the new symbol is 225 // more appropriate to be included in the output. 226 // 227 // For example, if "this" is an undefined symbol and a new symbol is 228 // a defined symbol, "this" is replaced with the new symbol. 229 void mergeProperties(const Symbol &other); 230 void resolve(Ctx &, const Undefined &other); 231 void resolve(Ctx &, const CommonSymbol &other); 232 void resolve(Ctx &, const Defined &other); 233 void resolve(Ctx &, const LazySymbol &other); 234 void resolve(Ctx &, const SharedSymbol &other); 235 236 // If this is a lazy symbol, extract an input file and add the symbol 237 // in the file to the symbol table. Calling this function on 238 // non-lazy object causes a runtime error. 239 void extract(Ctx &) const; 240 241 void checkDuplicate(Ctx &, const Defined &other) const; 242 243 private: 244 bool shouldReplace(Ctx &, const Defined &other) const; 245 246 protected: 247 Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding, 248 uint8_t stOther, uint8_t type) 249 : file(file), nameData(name.data()), nameSize(name.size()), type(type), 250 binding(binding), stOther(stOther), symbolKind(k), exportDynamic(false), 251 ltoCanOmit(false), archSpecificBit(false) {} 252 253 void overwrite(Symbol &sym, Kind k) const { 254 if (sym.traced) 255 printTraceSymbol(*this, sym.getName()); 256 sym.file = file; 257 sym.type = type; 258 sym.binding = binding; 259 sym.stOther = (stOther & ~3) | sym.visibility(); 260 sym.symbolKind = k; 261 } 262 263 public: 264 // True if this symbol is in the Iplt sub-section of the Plt and the Igot 265 // sub-section of the .got.plt or .got. 266 LLVM_PREFERRED_TYPE(bool) 267 uint8_t isInIplt : 1; 268 269 // True if this symbol needs a GOT entry and its GOT entry is actually in 270 // Igot. This will be true only for certain non-preemptible ifuncs. 271 LLVM_PREFERRED_TYPE(bool) 272 uint8_t gotInIgot : 1; 273 274 // True if defined relative to a section discarded by ICF. 275 LLVM_PREFERRED_TYPE(bool) 276 uint8_t folded : 1; 277 278 // Allow reuse of a bit between architecture-exclusive symbol flags. 279 // - needsTocRestore(): On PPC64, true if a call to this symbol needs to be 280 // followed by a restore of the toc pointer. 281 // - isTagged(): On AArch64, true if the symbol needs special relocation and 282 // metadata semantics because it's tagged, under the AArch64 MemtagABI. 283 LLVM_PREFERRED_TYPE(bool) 284 uint8_t archSpecificBit : 1; 285 bool needsTocRestore() const { return archSpecificBit; } 286 bool isTagged() const { return archSpecificBit; } 287 void setNeedsTocRestore(bool v) { archSpecificBit = v; } 288 void setIsTagged(bool v) { 289 archSpecificBit = v; 290 } 291 292 // True if this symbol is defined by a symbol assignment or wrapped by --wrap. 293 // 294 // LTO shouldn't inline the symbol because it doesn't know the final content 295 // of the symbol. 296 LLVM_PREFERRED_TYPE(bool) 297 uint8_t scriptDefined : 1; 298 299 // True if defined in a DSO. There may also be a definition in a relocatable 300 // object file. 301 LLVM_PREFERRED_TYPE(bool) 302 uint8_t dsoDefined : 1; 303 304 // True if defined in a DSO as protected visibility. 305 LLVM_PREFERRED_TYPE(bool) 306 uint8_t dsoProtected : 1; 307 308 // Temporary flags used to communicate which symbol entries need PLT and GOT 309 // entries during postScanRelocations(); 310 std::atomic<uint16_t> flags; 311 312 // A ctx.symAux index used to access GOT/PLT entry indexes. This is allocated 313 // in postScanRelocations(). 314 uint32_t auxIdx; 315 uint32_t dynsymIndex; 316 317 // If `file` is SharedFile (for SharedSymbol or copy-relocated Defined), this 318 // represents the Verdef index within the input DSO, which will be converted 319 // to a Verneed index in the output. Otherwise, this represents the Verdef 320 // index (VER_NDX_LOCAL, VER_NDX_GLOBAL, or a named version). 321 uint16_t versionId; 322 LLVM_PREFERRED_TYPE(bool) 323 uint8_t versionScriptAssigned : 1; 324 325 // True if targeted by a range extension thunk. 326 LLVM_PREFERRED_TYPE(bool) 327 uint8_t thunkAccessed : 1; 328 329 // True if the symbol is in the --dynamic-list file. A Defined symbol with 330 // protected or default visibility with this property is required to be 331 // exported into .dynsym. 332 LLVM_PREFERRED_TYPE(bool) 333 uint8_t inDynamicList : 1; 334 335 // Used to track if there has been at least one undefined reference to the 336 // symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK 337 // if the first undefined reference from a non-shared object is weak. 338 LLVM_PREFERRED_TYPE(bool) 339 uint8_t referenced : 1; 340 341 // Used to track if this symbol will be referenced after wrapping is performed 342 // (i.e. this will be true for foo if __real_foo is referenced, and will be 343 // true for __wrap_foo if foo is referenced). 344 LLVM_PREFERRED_TYPE(bool) 345 uint8_t referencedAfterWrap : 1; 346 347 void setFlags(uint16_t bits) { 348 flags.fetch_or(bits, std::memory_order_relaxed); 349 } 350 bool hasFlag(uint16_t bit) const { 351 assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2"); 352 return flags.load(std::memory_order_relaxed) & bit; 353 } 354 355 bool needsDynReloc() const { 356 return flags.load(std::memory_order_relaxed) & 357 (NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD | 358 NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE); 359 } 360 void allocateAux(Ctx &ctx) { 361 assert(auxIdx == 0); 362 auxIdx = ctx.symAux.size(); 363 ctx.symAux.emplace_back(); 364 } 365 366 bool isSection() const { return type == llvm::ELF::STT_SECTION; } 367 bool isTls() const { return type == llvm::ELF::STT_TLS; } 368 bool isFunc() const { return type == llvm::ELF::STT_FUNC; } 369 bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; } 370 bool isObject() const { return type == llvm::ELF::STT_OBJECT; } 371 bool isFile() const { return type == llvm::ELF::STT_FILE; } 372 }; 373 374 // Represents a symbol that is defined in the current output file. 375 class Defined : public Symbol { 376 public: 377 Defined(Ctx &ctx, InputFile *file, StringRef name, uint8_t binding, 378 uint8_t stOther, uint8_t type, uint64_t value, uint64_t size, 379 SectionBase *section) 380 : Symbol(DefinedKind, file, name, binding, stOther, type), value(value), 381 size(size), section(section) { 382 } 383 void overwrite(Symbol &sym) const; 384 385 static bool classof(const Symbol *s) { return s->isDefined(); } 386 387 uint64_t value; 388 uint64_t size; 389 SectionBase *section; 390 }; 391 392 // Represents a common symbol. 393 // 394 // On Unix, it is traditionally allowed to write variable definitions 395 // without initialization expressions (such as "int foo;") to header 396 // files. Such definition is called "tentative definition". 397 // 398 // Using tentative definition is usually considered a bad practice 399 // because you should write only declarations (such as "extern int 400 // foo;") to header files. Nevertheless, the linker and the compiler 401 // have to do something to support bad code by allowing duplicate 402 // definitions for this particular case. 403 // 404 // Common symbols represent variable definitions without initializations. 405 // The compiler creates common symbols when it sees variable definitions 406 // without initialization (you can suppress this behavior and let the 407 // compiler create a regular defined symbol by -fno-common). 408 // 409 // The linker allows common symbols to be replaced by regular defined 410 // symbols. If there are remaining common symbols after name resolution is 411 // complete, they are converted to regular defined symbols in a .bss 412 // section. (Therefore, the later passes don't see any CommonSymbols.) 413 class CommonSymbol : public Symbol { 414 public: 415 CommonSymbol(Ctx &ctx, InputFile *file, StringRef name, uint8_t binding, 416 uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size) 417 : Symbol(CommonKind, file, name, binding, stOther, type), 418 alignment(alignment), size(size) { 419 } 420 void overwrite(Symbol &sym) const { 421 Symbol::overwrite(sym, CommonKind); 422 auto &s = static_cast<CommonSymbol &>(sym); 423 s.alignment = alignment; 424 s.size = size; 425 } 426 427 static bool classof(const Symbol *s) { return s->isCommon(); } 428 429 uint32_t alignment; 430 uint64_t size; 431 }; 432 433 class Undefined : public Symbol { 434 public: 435 Undefined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther, 436 uint8_t type, uint32_t discardedSecIdx = 0) 437 : Symbol(UndefinedKind, file, name, binding, stOther, type), 438 discardedSecIdx(discardedSecIdx) {} 439 void overwrite(Symbol &sym) const { 440 Symbol::overwrite(sym, UndefinedKind); 441 auto &s = static_cast<Undefined &>(sym); 442 s.discardedSecIdx = discardedSecIdx; 443 s.nonPrevailing = nonPrevailing; 444 } 445 446 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 447 448 // The section index if in a discarded section, 0 otherwise. 449 uint32_t discardedSecIdx; 450 bool nonPrevailing = false; 451 }; 452 453 class SharedSymbol : public Symbol { 454 public: 455 static bool classof(const Symbol *s) { return s->kind() == SharedKind; } 456 457 SharedSymbol(InputFile &file, StringRef name, uint8_t binding, 458 uint8_t stOther, uint8_t type, uint64_t value, uint64_t size, 459 uint32_t alignment) 460 : Symbol(SharedKind, &file, name, binding, stOther, type), value(value), 461 size(size), alignment(alignment) { 462 dsoProtected = visibility() == llvm::ELF::STV_PROTECTED; 463 // GNU ifunc is a mechanism to allow user-supplied functions to 464 // resolve PLT slot values at load-time. This is contrary to the 465 // regular symbol resolution scheme in which symbols are resolved just 466 // by name. Using this hook, you can program how symbols are solved 467 // for you program. For example, you can make "memcpy" to be resolved 468 // to a SSE-enabled version of memcpy only when a machine running the 469 // program supports the SSE instruction set. 470 // 471 // Naturally, such symbols should always be called through their PLT 472 // slots. What GNU ifunc symbols point to are resolver functions, and 473 // calling them directly doesn't make sense (unless you are writing a 474 // loader). 475 // 476 // For DSO symbols, we always call them through PLT slots anyway. 477 // So there's no difference between GNU ifunc and regular function 478 // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC. 479 if (this->type == llvm::ELF::STT_GNU_IFUNC) 480 this->type = llvm::ELF::STT_FUNC; 481 } 482 void overwrite(Symbol &sym) const { 483 Symbol::overwrite(sym, SharedKind); 484 auto &s = static_cast<SharedSymbol &>(sym); 485 s.dsoProtected = dsoProtected; 486 s.value = value; 487 s.size = size; 488 s.alignment = alignment; 489 } 490 491 uint64_t value; // st_value 492 uint64_t size; // st_size 493 uint32_t alignment; 494 }; 495 496 // LazySymbol symbols represent symbols in object files between --start-lib and 497 // --end-lib options. LLD also handles traditional archives as if all the files 498 // in the archive are surrounded by --start-lib and --end-lib. 499 // 500 // A special complication is the handling of weak undefined symbols. They should 501 // not load a file, but we have to remember we have seen both the weak undefined 502 // and the lazy. We represent that with a lazy symbol with a weak binding. This 503 // means that code looking for undefined symbols normally also has to take lazy 504 // symbols into consideration. 505 class LazySymbol : public Symbol { 506 public: 507 LazySymbol(InputFile &file) 508 : Symbol(LazyKind, &file, {}, llvm::ELF::STB_GLOBAL, 509 llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {} 510 void overwrite(Symbol &sym) const { Symbol::overwrite(sym, LazyKind); } 511 512 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 513 }; 514 515 // A buffer class that is large enough to hold any Symbol-derived 516 // object. We allocate memory using this class and instantiate a symbol 517 // using the placement new. 518 519 // It is important to keep the size of SymbolUnion small for performance and 520 // memory usage reasons. 64 bytes is a soft limit based on the size of Defined 521 // on a 64-bit system. This is enforced by a static_assert in Symbols.cpp. 522 union SymbolUnion { 523 alignas(Defined) char a[sizeof(Defined)]; 524 alignas(CommonSymbol) char b[sizeof(CommonSymbol)]; 525 alignas(Undefined) char c[sizeof(Undefined)]; 526 alignas(SharedSymbol) char d[sizeof(SharedSymbol)]; 527 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 528 }; 529 530 template <typename... T> Defined *makeDefined(T &&...args) { 531 auto *sym = getSpecificAllocSingleton<SymbolUnion>().Allocate(); 532 memset(sym, 0, sizeof(Symbol)); 533 auto &s = *new (reinterpret_cast<Defined *>(sym)) Defined(std::forward<T>(args)...); 534 return &s; 535 } 536 537 void reportDuplicate(Ctx &, const Symbol &sym, const InputFile *newFile, 538 InputSectionBase *errSec, uint64_t errOffset); 539 void maybeWarnUnorderableSymbol(Ctx &, const Symbol *sym); 540 bool computeIsPreemptible(Ctx &, const Symbol &sym); 541 void parseVersionAndComputeIsPreemptible(Ctx &); 542 543 } // namespace lld::elf 544 545 #endif 546