1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_SYMBOLS_H 10 #define LLD_COFF_SYMBOLS_H 11 12 #include "Chunks.h" 13 #include "Config.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Memory.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Object/COFF.h" 19 #include <atomic> 20 #include <memory> 21 #include <vector> 22 23 namespace lld { 24 25 namespace coff { 26 27 using llvm::object::Archive; 28 using llvm::object::COFFSymbolRef; 29 using llvm::object::coff_import_header; 30 using llvm::object::coff_symbol_generic; 31 32 class ArchiveFile; 33 class COFFLinkerContext; 34 class InputFile; 35 class ObjFile; 36 class Symbol; 37 class SymbolTable; 38 39 const COFFSyncStream &operator<<(const COFFSyncStream &, 40 const llvm::object::Archive::Symbol *); 41 const COFFSyncStream &operator<<(const COFFSyncStream &, Symbol *); 42 43 // The base class for real symbol classes. 44 class Symbol { 45 public: 46 enum Kind { 47 // The order of these is significant. We start with the regular defined 48 // symbols as those are the most prevalent and the zero tag is the cheapest 49 // to set. Among the defined kinds, the lower the kind is preferred over 50 // the higher kind when testing whether one symbol should take precedence 51 // over another. 52 DefinedRegularKind = 0, 53 DefinedCommonKind, 54 DefinedLocalImportKind, 55 DefinedImportThunkKind, 56 DefinedImportDataKind, 57 DefinedAbsoluteKind, 58 DefinedSyntheticKind, 59 60 UndefinedKind, 61 LazyArchiveKind, 62 LazyObjectKind, 63 LazyDLLSymbolKind, 64 65 LastDefinedCOFFKind = DefinedCommonKind, 66 LastDefinedKind = DefinedSyntheticKind, 67 }; 68 69 Kind kind() const { return static_cast<Kind>(symbolKind); } 70 71 // Returns the symbol name. 72 StringRef getName() { 73 // COFF symbol names are read lazily for a performance reason. 74 // Non-external symbol names are never used by the linker except for logging 75 // or debugging. Their internal references are resolved not by name but by 76 // symbol index. And because they are not external, no one can refer them by 77 // name. Object files contain lots of non-external symbols, and creating 78 // StringRefs for them (which involves lots of strlen() on the string table) 79 // is a waste of time. 80 if (nameData == nullptr) 81 computeName(); 82 return StringRef(nameData, nameSize); 83 } 84 85 void replaceKeepingName(Symbol *other, size_t size); 86 87 // Returns the file from which this symbol was created. 88 InputFile *getFile(); 89 90 // Indicates that this symbol will be included in the final image. Only valid 91 // after calling markLive. 92 bool isLive() const; 93 94 bool isLazy() const { 95 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind || 96 symbolKind == LazyDLLSymbolKind; 97 } 98 99 private: 100 void computeName(); 101 102 protected: 103 friend SymbolTable; 104 explicit Symbol(Kind k, StringRef n = "") 105 : symbolKind(k), isExternal(true), isCOMDAT(false), 106 writtenToSymtab(false), isUsedInRegularObj(false), 107 pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false), 108 deferUndefined(false), canInline(true), isWeak(false), isAntiDep(false), 109 nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) { 110 assert((!n.empty() || k <= LastDefinedCOFFKind) && 111 "If the name is empty, the Symbol must be a DefinedCOFF."); 112 } 113 114 unsigned symbolKind : 8; 115 unsigned isExternal : 1; 116 117 public: 118 // This bit is used by the \c DefinedRegular subclass. 119 unsigned isCOMDAT : 1; 120 121 // This bit is used by Writer::createSymbolAndStringTable() to prevent 122 // symbols from being written to the symbol table more than once. 123 unsigned writtenToSymtab : 1; 124 125 // True if this symbol was referenced by a regular (non-bitcode) object. 126 unsigned isUsedInRegularObj : 1; 127 128 // True if we've seen both a lazy and an undefined symbol with this symbol 129 // name, which means that we have enqueued an archive member load and should 130 // not load any more archive members to resolve the same symbol. 131 unsigned pendingArchiveLoad : 1; 132 133 /// True if we've already added this symbol to the list of GC roots. 134 unsigned isGCRoot : 1; 135 136 unsigned isRuntimePseudoReloc : 1; 137 138 // True if we want to allow this symbol to be undefined in the early 139 // undefined check pass in SymbolTable::reportUnresolvable(), as it 140 // might be fixed up later. 141 unsigned deferUndefined : 1; 142 143 // False if LTO shouldn't inline whatever this symbol points to. If a symbol 144 // is overwritten after LTO, LTO shouldn't inline the symbol because it 145 // doesn't know the final contents of the symbol. 146 unsigned canInline : 1; 147 148 // True if the symbol is weak. This is only tracked for bitcode/LTO symbols. 149 // This information isn't written to the output; rather, it's used for 150 // managing weak symbol overrides. 151 unsigned isWeak : 1; 152 153 // True if the symbol is an anti-dependency. 154 unsigned isAntiDep : 1; 155 156 protected: 157 // Symbol name length. Assume symbol lengths fit in a 32-bit integer. 158 uint32_t nameSize; 159 160 const char *nameData; 161 }; 162 163 // The base class for any defined symbols, including absolute symbols, 164 // etc. 165 class Defined : public Symbol { 166 public: 167 Defined(Kind k, StringRef n) : Symbol(k, n) {} 168 169 static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } 170 171 // Returns the RVA (relative virtual address) of this symbol. The 172 // writer sets and uses RVAs. 173 uint64_t getRVA(); 174 175 // Returns the chunk containing this symbol. Absolute symbols and __ImageBase 176 // do not have chunks, so this may return null. 177 Chunk *getChunk(); 178 }; 179 180 // Symbols defined via a COFF object file or bitcode file. For COFF files, this 181 // stores a coff_symbol_generic*, and names of internal symbols are lazily 182 // loaded through that. For bitcode files, Sym is nullptr and the name is stored 183 // as a decomposed StringRef. 184 class DefinedCOFF : public Defined { 185 friend Symbol; 186 187 public: 188 DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) 189 : Defined(k, n), file(f), sym(s) {} 190 191 static bool classof(const Symbol *s) { 192 return s->kind() <= LastDefinedCOFFKind; 193 } 194 195 InputFile *getFile() { return file; } 196 197 COFFSymbolRef getCOFFSymbol(); 198 199 InputFile *file; 200 201 protected: 202 const coff_symbol_generic *sym; 203 }; 204 205 // Regular defined symbols read from object file symbol tables. 206 class DefinedRegular : public DefinedCOFF { 207 public: 208 DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, 209 bool isExternal = false, 210 const coff_symbol_generic *s = nullptr, 211 SectionChunk *c = nullptr, bool isWeak = false) 212 : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { 213 this->isExternal = isExternal; 214 this->isCOMDAT = isCOMDAT; 215 this->isWeak = isWeak; 216 } 217 218 static bool classof(const Symbol *s) { 219 return s->kind() == DefinedRegularKind; 220 } 221 222 uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } 223 SectionChunk *getChunk() const { return *data; } 224 uint32_t getValue() const { return sym->Value; } 225 226 SectionChunk **data; 227 }; 228 229 class DefinedCommon : public DefinedCOFF { 230 public: 231 DefinedCommon(InputFile *f, StringRef n, uint64_t size, 232 const coff_symbol_generic *s = nullptr, 233 CommonChunk *c = nullptr) 234 : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { 235 this->isExternal = true; 236 } 237 238 static bool classof(const Symbol *s) { 239 return s->kind() == DefinedCommonKind; 240 } 241 242 uint64_t getRVA() { return data->getRVA(); } 243 CommonChunk *getChunk() { return data; } 244 245 private: 246 friend SymbolTable; 247 uint64_t getSize() const { return size; } 248 CommonChunk *data; 249 uint64_t size; 250 }; 251 252 // Absolute symbols. 253 class DefinedAbsolute : public Defined { 254 public: 255 DefinedAbsolute(const COFFLinkerContext &c, StringRef n, COFFSymbolRef s) 256 : Defined(DefinedAbsoluteKind, n), va(s.getValue()), ctx(c) { 257 isExternal = s.isExternal(); 258 } 259 260 DefinedAbsolute(const COFFLinkerContext &c, StringRef n, uint64_t v) 261 : Defined(DefinedAbsoluteKind, n), va(v), ctx(c) {} 262 263 static bool classof(const Symbol *s) { 264 return s->kind() == DefinedAbsoluteKind; 265 } 266 267 uint64_t getRVA(); 268 void setVA(uint64_t v) { va = v; } 269 uint64_t getVA() const { return va; } 270 271 private: 272 uint64_t va; 273 const COFFLinkerContext &ctx; 274 }; 275 276 // This symbol is used for linker-synthesized symbols like __ImageBase and 277 // __safe_se_handler_table. 278 class DefinedSynthetic : public Defined { 279 public: 280 explicit DefinedSynthetic(StringRef name, Chunk *c, uint32_t offset = 0) 281 : Defined(DefinedSyntheticKind, name), c(c), offset(offset) {} 282 283 static bool classof(const Symbol *s) { 284 return s->kind() == DefinedSyntheticKind; 285 } 286 287 // A null chunk indicates that this is __ImageBase. Otherwise, this is some 288 // other synthesized chunk, like SEHTableChunk. 289 uint32_t getRVA() { return c ? c->getRVA() + offset : 0; } 290 Chunk *getChunk() { return c; } 291 292 private: 293 Chunk *c; 294 uint32_t offset; 295 }; 296 297 // This class represents a symbol defined in an archive file. It is 298 // created from an archive file header, and it knows how to load an 299 // object file from an archive to replace itself with a defined 300 // symbol. If the resolver finds both Undefined and LazyArchive for 301 // the same name, it will ask the LazyArchive to load a file. 302 class LazyArchive : public Symbol { 303 public: 304 LazyArchive(ArchiveFile *f, const Archive::Symbol s) 305 : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {} 306 307 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 308 309 MemoryBufferRef getMemberBuffer(); 310 311 ArchiveFile *file; 312 const Archive::Symbol sym; 313 }; 314 315 class LazyObject : public Symbol { 316 public: 317 LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {} 318 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 319 InputFile *file; 320 }; 321 322 // MinGW only. 323 class LazyDLLSymbol : public Symbol { 324 public: 325 LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n) 326 : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {} 327 static bool classof(const Symbol *s) { 328 return s->kind() == LazyDLLSymbolKind; 329 } 330 331 DLLFile *file; 332 DLLFile::Symbol *sym; 333 }; 334 335 // Undefined symbols. 336 class Undefined : public Symbol { 337 public: 338 explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} 339 340 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 341 342 // An undefined symbol can have a fallback symbol which gives an 343 // undefined symbol a second chance if it would remain undefined. 344 // If it remains undefined, it'll be replaced with whatever the 345 // Alias pointer points to. 346 Symbol *weakAlias = nullptr; 347 348 // If this symbol is external weak, try to resolve it to a defined 349 // symbol by searching the chain of fallback symbols. Returns the symbol if 350 // successful, otherwise returns null. 351 Symbol *getWeakAlias(); 352 Defined *getDefinedWeakAlias() { 353 return dyn_cast_or_null<Defined>(getWeakAlias()); 354 } 355 356 void setWeakAlias(Symbol *sym, bool antiDep = false) { 357 weakAlias = sym; 358 isAntiDep = antiDep; 359 } 360 361 bool isECAlias(MachineTypes machine) const { 362 return weakAlias && isAntiDep && isArm64EC(machine); 363 } 364 365 // If this symbol is external weak, replace this object with aliased symbol. 366 bool resolveWeakAlias(); 367 }; 368 369 // Windows-specific classes. 370 371 // This class represents a symbol imported from a DLL. This has two 372 // names for internal use and external use. The former is used for 373 // name resolution, and the latter is used for the import descriptor 374 // table in an output. The former has "__imp_" prefix. 375 class DefinedImportData : public Defined { 376 public: 377 DefinedImportData(StringRef n, ImportFile *file, Chunk *&location) 378 : Defined(DefinedImportDataKind, n), file(file), location(location) {} 379 380 static bool classof(const Symbol *s) { 381 return s->kind() == DefinedImportDataKind; 382 } 383 384 uint64_t getRVA() { return getChunk()->getRVA(); } 385 Chunk *getChunk() { return location; } 386 void setLocation(Chunk *addressTable) { location = addressTable; } 387 388 StringRef getDLLName() { return file->dllName; } 389 StringRef getExternalName() { return file->externalName; } 390 uint16_t getOrdinal() { return file->hdr->OrdinalHint; } 391 392 ImportFile *file; 393 Chunk *&location; 394 395 // This is a pointer to the synthetic symbol associated with the load thunk 396 // for this symbol that will be called if the DLL is delay-loaded. This is 397 // needed for Control Flow Guard because if this DefinedImportData symbol is a 398 // valid call target, the corresponding load thunk must also be marked as a 399 // valid call target. 400 DefinedSynthetic *loadThunkSym = nullptr; 401 }; 402 403 // This class represents a symbol for a jump table entry which jumps 404 // to a function in a DLL. Linker are supposed to create such symbols 405 // without "__imp_" prefix for all function symbols exported from 406 // DLLs, so that you can call DLL functions as regular functions with 407 // a regular name. A function pointer is given as a DefinedImportData. 408 class DefinedImportThunk : public Defined { 409 public: 410 DefinedImportThunk(COFFLinkerContext &ctx, StringRef name, 411 DefinedImportData *s, ImportThunkChunk *chunk); 412 413 static bool classof(const Symbol *s) { 414 return s->kind() == DefinedImportThunkKind; 415 } 416 417 uint64_t getRVA() { return data->getRVA(); } 418 ImportThunkChunk *getChunk() const { return data; } 419 420 DefinedImportData *wrappedSym; 421 422 private: 423 ImportThunkChunk *data; 424 }; 425 426 // If you have a symbol "foo" in your object file, a symbol name 427 // "__imp_foo" becomes automatically available as a pointer to "foo". 428 // This class is for such automatically-created symbols. 429 // Yes, this is an odd feature. We didn't intend to implement that. 430 // This is here just for compatibility with MSVC. 431 class DefinedLocalImport : public Defined { 432 public: 433 DefinedLocalImport(COFFLinkerContext &ctx, StringRef n, Defined *s) 434 : Defined(DefinedLocalImportKind, n), 435 data(make<LocalImportChunk>(ctx, s)) {} 436 437 static bool classof(const Symbol *s) { 438 return s->kind() == DefinedLocalImportKind; 439 } 440 441 uint64_t getRVA() { return data->getRVA(); } 442 Chunk *getChunk() { return data; } 443 444 private: 445 LocalImportChunk *data; 446 }; 447 448 inline uint64_t Defined::getRVA() { 449 switch (kind()) { 450 case DefinedAbsoluteKind: 451 return cast<DefinedAbsolute>(this)->getRVA(); 452 case DefinedSyntheticKind: 453 return cast<DefinedSynthetic>(this)->getRVA(); 454 case DefinedImportDataKind: 455 return cast<DefinedImportData>(this)->getRVA(); 456 case DefinedImportThunkKind: 457 return cast<DefinedImportThunk>(this)->getRVA(); 458 case DefinedLocalImportKind: 459 return cast<DefinedLocalImport>(this)->getRVA(); 460 case DefinedCommonKind: 461 return cast<DefinedCommon>(this)->getRVA(); 462 case DefinedRegularKind: 463 return cast<DefinedRegular>(this)->getRVA(); 464 case LazyArchiveKind: 465 case LazyObjectKind: 466 case LazyDLLSymbolKind: 467 case UndefinedKind: 468 llvm_unreachable("Cannot get the address for an undefined symbol."); 469 } 470 llvm_unreachable("unknown symbol kind"); 471 } 472 473 inline Chunk *Defined::getChunk() { 474 switch (kind()) { 475 case DefinedRegularKind: 476 return cast<DefinedRegular>(this)->getChunk(); 477 case DefinedAbsoluteKind: 478 return nullptr; 479 case DefinedSyntheticKind: 480 return cast<DefinedSynthetic>(this)->getChunk(); 481 case DefinedImportDataKind: 482 return cast<DefinedImportData>(this)->getChunk(); 483 case DefinedImportThunkKind: 484 return cast<DefinedImportThunk>(this)->getChunk(); 485 case DefinedLocalImportKind: 486 return cast<DefinedLocalImport>(this)->getChunk(); 487 case DefinedCommonKind: 488 return cast<DefinedCommon>(this)->getChunk(); 489 case LazyArchiveKind: 490 case LazyObjectKind: 491 case LazyDLLSymbolKind: 492 case UndefinedKind: 493 llvm_unreachable("Cannot get the chunk of an undefined symbol."); 494 } 495 llvm_unreachable("unknown symbol kind"); 496 } 497 498 // A buffer class that is large enough to hold any Symbol-derived 499 // object. We allocate memory using this class and instantiate a symbol 500 // using the placement new. 501 union SymbolUnion { 502 alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; 503 alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; 504 alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; 505 alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; 506 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 507 alignas(Undefined) char f[sizeof(Undefined)]; 508 alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; 509 alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; 510 alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; 511 alignas(LazyObject) char j[sizeof(LazyObject)]; 512 alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)]; 513 }; 514 515 template <typename T, typename... ArgT> 516 void replaceSymbol(Symbol *s, ArgT &&... arg) { 517 static_assert(std::is_trivially_destructible<T>(), 518 "Symbol types must be trivially destructible"); 519 static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); 520 static_assert(alignof(T) <= alignof(SymbolUnion), 521 "SymbolUnion not aligned enough"); 522 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 523 "Not a Symbol"); 524 bool canInline = s->canInline; 525 bool isUsedInRegularObj = s->isUsedInRegularObj; 526 new (s) T(std::forward<ArgT>(arg)...); 527 s->canInline = canInline; 528 s->isUsedInRegularObj = isUsedInRegularObj; 529 } 530 } // namespace coff 531 532 std::string toString(const coff::COFFLinkerContext &ctx, coff::Symbol &b); 533 std::string toCOFFString(const coff::COFFLinkerContext &ctx, 534 const llvm::object::Archive::Symbol &b); 535 536 // Returns a symbol name for an error message. 537 std::string maybeDemangleSymbol(const coff::COFFLinkerContext &ctx, 538 StringRef symName); 539 540 } // namespace lld 541 542 #endif 543