1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_WASM_SYMBOLS_H 10 #define LLD_WASM_SYMBOLS_H 11 12 #include "Config.h" 13 #include "lld/Common/LLVM.h" 14 #include "llvm/Object/Archive.h" 15 #include "llvm/Object/Wasm.h" 16 17 namespace lld { 18 namespace wasm { 19 20 // Shared string constants 21 22 // The default module name to use for symbol imports. 23 extern const char *defaultModule; 24 25 // The name under which to import or export the wasm table. 26 extern const char *functionTableName; 27 28 using llvm::wasm::WasmSymbolType; 29 30 class InputFile; 31 class InputChunk; 32 class InputSegment; 33 class InputFunction; 34 class InputGlobal; 35 class InputEvent; 36 class InputSection; 37 class OutputSection; 38 39 #define INVALID_INDEX UINT32_MAX 40 41 // The base class for real symbol classes. 42 class Symbol { 43 public: 44 enum Kind : uint8_t { 45 DefinedFunctionKind, 46 DefinedDataKind, 47 DefinedGlobalKind, 48 DefinedEventKind, 49 SectionKind, 50 OutputSectionKind, 51 UndefinedFunctionKind, 52 UndefinedDataKind, 53 UndefinedGlobalKind, 54 LazyKind, 55 }; 56 57 Kind kind() const { return symbolKind; } 58 59 bool isDefined() const { return !isLazy() && !isUndefined(); } 60 61 bool isUndefined() const { 62 return symbolKind == UndefinedFunctionKind || 63 symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind; 64 } 65 66 bool isLazy() const { return symbolKind == LazyKind; } 67 68 bool isLocal() const; 69 bool isWeak() const; 70 bool isHidden() const; 71 72 // Returns true if this symbol exists in a discarded (due to COMDAT) section 73 bool isDiscarded() const; 74 75 // True if this is an undefined weak symbol. This only works once 76 // all input files have been added. 77 bool isUndefWeak() const { 78 // See comment on lazy symbols for details. 79 return isWeak() && (isUndefined() || isLazy()); 80 } 81 82 // Returns the symbol name. 83 StringRef getName() const { return name; } 84 85 // Returns the file from which this symbol was created. 86 InputFile *getFile() const { return file; } 87 88 uint32_t getFlags() const { return flags; } 89 90 InputChunk *getChunk() const; 91 92 // Indicates that the section or import for this symbol will be included in 93 // the final image. 94 bool isLive() const; 95 96 // Marks the symbol's InputChunk as Live, so that it will be included in the 97 // final image. 98 void markLive(); 99 100 void setHidden(bool isHidden); 101 102 // Get/set the index in the output symbol table. This is only used for 103 // relocatable output. 104 uint32_t getOutputSymbolIndex() const; 105 void setOutputSymbolIndex(uint32_t index); 106 107 WasmSymbolType getWasmType() const; 108 bool isExported() const; 109 110 // Indicates that the symbol is used in an __attribute__((used)) directive 111 // or similar. 112 bool isNoStrip() const; 113 114 const WasmSignature* getSignature() const; 115 116 uint32_t getGOTIndex() const { 117 assert(gotIndex != INVALID_INDEX); 118 return gotIndex; 119 } 120 121 void setGOTIndex(uint32_t index); 122 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; } 123 124 protected: 125 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 126 : name(name), file(f), flags(flags), symbolKind(k), 127 referenced(!config->gcSections), requiresGOT(false), 128 isUsedInRegularObj(false), forceExport(false), canInline(false), 129 traced(false) {} 130 131 StringRef name; 132 InputFile *file; 133 uint32_t flags; 134 uint32_t outputSymbolIndex = INVALID_INDEX; 135 uint32_t gotIndex = INVALID_INDEX; 136 Kind symbolKind; 137 138 public: 139 bool referenced : 1; 140 141 // True for data symbols that needs a dummy GOT entry. Used for static 142 // linking of GOT accesses. 143 bool requiresGOT : 1; 144 145 // True if the symbol was used for linking and thus need to be added to the 146 // output file's symbol table. This is true for all symbols except for 147 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that 148 // are unreferenced except by other bitcode objects. 149 bool isUsedInRegularObj : 1; 150 151 // True if ths symbol is explicitly marked for export (i.e. via the 152 // -e/--export command line flag) 153 bool forceExport : 1; 154 155 // False if LTO shouldn't inline whatever this symbol points to. If a symbol 156 // is overwritten after LTO, LTO shouldn't inline the symbol because it 157 // doesn't know the final contents of the symbol. 158 bool canInline : 1; 159 160 // True if this symbol is specified by --trace-symbol option. 161 bool traced : 1; 162 }; 163 164 class FunctionSymbol : public Symbol { 165 public: 166 static bool classof(const Symbol *s) { 167 return s->kind() == DefinedFunctionKind || 168 s->kind() == UndefinedFunctionKind; 169 } 170 171 // Get/set the table index 172 void setTableIndex(uint32_t index); 173 uint32_t getTableIndex() const; 174 bool hasTableIndex() const; 175 176 // Get/set the function index 177 uint32_t getFunctionIndex() const; 178 void setFunctionIndex(uint32_t index); 179 bool hasFunctionIndex() const; 180 181 const WasmSignature *signature; 182 183 protected: 184 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 185 const WasmSignature *sig) 186 : Symbol(name, k, flags, f), signature(sig) {} 187 188 uint32_t tableIndex = INVALID_INDEX; 189 uint32_t functionIndex = INVALID_INDEX; 190 }; 191 192 class DefinedFunction : public FunctionSymbol { 193 public: 194 DefinedFunction(StringRef name, uint32_t flags, InputFile *f, 195 InputFunction *function); 196 197 static bool classof(const Symbol *s) { 198 return s->kind() == DefinedFunctionKind; 199 } 200 201 InputFunction *function; 202 }; 203 204 class UndefinedFunction : public FunctionSymbol { 205 public: 206 UndefinedFunction(StringRef name, StringRef importName, 207 StringRef importModule, uint32_t flags, 208 InputFile *file = nullptr, 209 const WasmSignature *type = nullptr, 210 bool isCalledDirectly = true) 211 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type), 212 importName(importName), importModule(importModule), isCalledDirectly(isCalledDirectly) {} 213 214 static bool classof(const Symbol *s) { 215 return s->kind() == UndefinedFunctionKind; 216 } 217 218 StringRef importName; 219 StringRef importModule; 220 bool isCalledDirectly; 221 }; 222 223 // Section symbols for output sections are different from those for input 224 // section. These are generated by the linker and point the OutputSection 225 // rather than an InputSection. 226 class OutputSectionSymbol : public Symbol { 227 public: 228 OutputSectionSymbol(const OutputSection *s) 229 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL, 230 nullptr), 231 section(s) {} 232 233 static bool classof(const Symbol *s) { 234 return s->kind() == OutputSectionKind; 235 } 236 237 const OutputSection *section; 238 }; 239 240 class SectionSymbol : public Symbol { 241 public: 242 SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr) 243 : Symbol("", SectionKind, flags, f), section(s) {} 244 245 static bool classof(const Symbol *s) { return s->kind() == SectionKind; } 246 247 const OutputSectionSymbol *getOutputSectionSymbol() const; 248 249 const InputSection *section; 250 }; 251 252 class DataSymbol : public Symbol { 253 public: 254 static bool classof(const Symbol *s) { 255 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind; 256 } 257 258 protected: 259 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f) 260 : Symbol(name, k, flags, f) {} 261 }; 262 263 class DefinedData : public DataSymbol { 264 public: 265 // Constructor for regular data symbols originating from input files. 266 DefinedData(StringRef name, uint32_t flags, InputFile *f, 267 InputSegment *segment, uint32_t offset, uint32_t size) 268 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment), 269 offset(offset), size(size) {} 270 271 // Constructor for linker synthetic data symbols. 272 DefinedData(StringRef name, uint32_t flags) 273 : DataSymbol(name, DefinedDataKind, flags, nullptr) {} 274 275 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; } 276 277 // Returns the output virtual address of a defined data symbol. 278 uint32_t getVirtualAddress() const; 279 void setVirtualAddress(uint32_t va); 280 281 // Returns the offset of a defined data symbol within its OutputSegment. 282 uint32_t getOutputSegmentOffset() const; 283 uint32_t getOutputSegmentIndex() const; 284 uint32_t getSize() const { return size; } 285 286 InputSegment *segment = nullptr; 287 288 protected: 289 uint32_t offset = 0; 290 uint32_t size = 0; 291 }; 292 293 class UndefinedData : public DataSymbol { 294 public: 295 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr) 296 : DataSymbol(name, UndefinedDataKind, flags, file) {} 297 static bool classof(const Symbol *s) { 298 return s->kind() == UndefinedDataKind; 299 } 300 }; 301 302 class GlobalSymbol : public Symbol { 303 public: 304 static bool classof(const Symbol *s) { 305 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind; 306 } 307 308 const WasmGlobalType *getGlobalType() const { return globalType; } 309 310 // Get/set the global index 311 uint32_t getGlobalIndex() const; 312 void setGlobalIndex(uint32_t index); 313 bool hasGlobalIndex() const; 314 315 protected: 316 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 317 const WasmGlobalType *globalType) 318 : Symbol(name, k, flags, f), globalType(globalType) {} 319 320 const WasmGlobalType *globalType; 321 uint32_t globalIndex = INVALID_INDEX; 322 }; 323 324 class DefinedGlobal : public GlobalSymbol { 325 public: 326 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file, 327 InputGlobal *global); 328 329 static bool classof(const Symbol *s) { 330 return s->kind() == DefinedGlobalKind; 331 } 332 333 InputGlobal *global; 334 }; 335 336 class UndefinedGlobal : public GlobalSymbol { 337 public: 338 UndefinedGlobal(StringRef name, StringRef importName, StringRef importModule, 339 uint32_t flags, InputFile *file = nullptr, 340 const WasmGlobalType *type = nullptr) 341 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type), 342 importName(importName), importModule(importModule) {} 343 344 static bool classof(const Symbol *s) { 345 return s->kind() == UndefinedGlobalKind; 346 } 347 348 StringRef importName; 349 StringRef importModule; 350 }; 351 352 // Wasm events are features that suspend the current execution and transfer the 353 // control flow to a corresponding handler. Currently the only supported event 354 // kind is exceptions. 355 // 356 // Event tags are values to distinguish different events. For exceptions, they 357 // can be used to distinguish different language's exceptions, i.e., all C++ 358 // exceptions have the same tag. Wasm can generate code capable of doing 359 // different handling actions based on the tag of caught exceptions. 360 // 361 // A single EventSymbol object represents a single tag. C++ exception event 362 // symbol is a weak symbol generated in every object file in which exceptions 363 // are used, and has name '__cpp_exception' for linking. 364 class EventSymbol : public Symbol { 365 public: 366 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; } 367 368 const WasmEventType *getEventType() const { return eventType; } 369 370 // Get/set the event index 371 uint32_t getEventIndex() const; 372 void setEventIndex(uint32_t index); 373 bool hasEventIndex() const; 374 375 const WasmSignature *signature; 376 377 protected: 378 EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f, 379 const WasmEventType *eventType, const WasmSignature *sig) 380 : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {} 381 382 const WasmEventType *eventType; 383 uint32_t eventIndex = INVALID_INDEX; 384 }; 385 386 class DefinedEvent : public EventSymbol { 387 public: 388 DefinedEvent(StringRef name, uint32_t flags, InputFile *file, 389 InputEvent *event); 390 391 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; } 392 393 InputEvent *event; 394 }; 395 396 // LazySymbol represents a symbol that is not yet in the link, but we know where 397 // to find it if needed. If the resolver finds both Undefined and Lazy for the 398 // same name, it will ask the Lazy to load a file. 399 // 400 // A special complication is the handling of weak undefined symbols. They should 401 // not load a file, but we have to remember we have seen both the weak undefined 402 // and the lazy. We represent that with a lazy symbol with a weak binding. This 403 // means that code looking for undefined symbols normally also has to take lazy 404 // symbols into consideration. 405 class LazySymbol : public Symbol { 406 public: 407 LazySymbol(StringRef name, uint32_t flags, InputFile *file, 408 const llvm::object::Archive::Symbol &sym) 409 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {} 410 411 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 412 void fetch(); 413 MemoryBufferRef getMemberBuffer(); 414 415 // Lazy symbols can have a signature because they can replace an 416 // UndefinedFunction which which case we need to be able to preserve the 417 // signture. 418 // TODO(sbc): This repetition of the signature field is inelegant. Revisit 419 // the use of class hierarchy to represent symbol taxonomy. 420 const WasmSignature *signature = nullptr; 421 422 private: 423 llvm::object::Archive::Symbol archiveSymbol; 424 }; 425 426 // linker-generated symbols 427 struct WasmSym { 428 // __global_base 429 // Symbol marking the start of the global section. 430 static DefinedData *globalBase; 431 432 // __stack_pointer 433 // Global that holds the address of the top of the explicit value stack in 434 // linear memory. 435 static GlobalSymbol *stackPointer; 436 437 // __tls_base 438 // Global that holds the address of the base of the current thread's 439 // TLS block. 440 static GlobalSymbol *tlsBase; 441 442 // __tls_size 443 // Symbol whose value is the size of the TLS block. 444 static GlobalSymbol *tlsSize; 445 446 // __tls_size 447 // Symbol whose value is the alignment of the TLS block. 448 static GlobalSymbol *tlsAlign; 449 450 // __data_end 451 // Symbol marking the end of the data and bss. 452 static DefinedData *dataEnd; 453 454 // __heap_base 455 // Symbol marking the end of the data, bss and explicit stack. Any linear 456 // memory following this address is not used by the linked code and can 457 // therefore be used as a backing store for brk()/malloc() implementations. 458 static DefinedData *heapBase; 459 460 // __wasm_init_memory_flag 461 // Symbol whose contents are nonzero iff memory has already been initialized. 462 static DefinedData *initMemoryFlag; 463 464 // __wasm_init_memory 465 // Function that initializes passive data segments during instantiation. 466 static DefinedFunction *initMemory; 467 468 // __wasm_call_ctors 469 // Function that directly calls all ctors in priority order. 470 static DefinedFunction *callCtors; 471 472 // __wasm_apply_relocs 473 // Function that applies relocations to data segment post-instantiation. 474 static DefinedFunction *applyRelocs; 475 476 // __wasm_init_tls 477 // Function that allocates thread-local storage and initializes it. 478 static DefinedFunction *initTLS; 479 480 // __dso_handle 481 // Symbol used in calls to __cxa_atexit to determine current DLL 482 static DefinedData *dsoHandle; 483 484 // __table_base 485 // Used in PIC code for offset of indirect function table 486 static UndefinedGlobal *tableBase; 487 static DefinedData *definedTableBase; 488 489 // __memory_base 490 // Used in PIC code for offset of global data 491 static UndefinedGlobal *memoryBase; 492 static DefinedData *definedMemoryBase; 493 }; 494 495 // A buffer class that is large enough to hold any Symbol-derived 496 // object. We allocate memory using this class and instantiate a symbol 497 // using the placement new. 498 union SymbolUnion { 499 alignas(DefinedFunction) char a[sizeof(DefinedFunction)]; 500 alignas(DefinedData) char b[sizeof(DefinedData)]; 501 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)]; 502 alignas(DefinedEvent) char d[sizeof(DefinedEvent)]; 503 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 504 alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)]; 505 alignas(UndefinedData) char g[sizeof(UndefinedData)]; 506 alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)]; 507 alignas(SectionSymbol) char i[sizeof(SectionSymbol)]; 508 }; 509 510 // It is important to keep the size of SymbolUnion small for performance and 511 // memory usage reasons. 96 bytes is a soft limit based on the size of 512 // UndefinedFunction on a 64-bit system. 513 static_assert(sizeof(SymbolUnion) <= 96, "SymbolUnion too large"); 514 515 void printTraceSymbol(Symbol *sym); 516 void printTraceSymbolUndefined(StringRef name, const InputFile* file); 517 518 template <typename T, typename... ArgT> 519 T *replaceSymbol(Symbol *s, ArgT &&... arg) { 520 static_assert(std::is_trivially_destructible<T>(), 521 "Symbol types must be trivially destructible"); 522 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 523 static_assert(alignof(T) <= alignof(SymbolUnion), 524 "SymbolUnion not aligned enough"); 525 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 526 "Not a Symbol"); 527 528 Symbol symCopy = *s; 529 530 T *s2 = new (s) T(std::forward<ArgT>(arg)...); 531 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; 532 s2->forceExport = symCopy.forceExport; 533 s2->canInline = symCopy.canInline; 534 s2->traced = symCopy.traced; 535 536 // Print out a log message if --trace-symbol was specified. 537 // This is for debugging. 538 if (s2->traced) 539 printTraceSymbol(s2); 540 541 return s2; 542 } 543 544 } // namespace wasm 545 546 // Returns a symbol name for an error message. 547 std::string toString(const wasm::Symbol &sym); 548 std::string toString(wasm::Symbol::Kind kind); 549 std::string maybeDemangleSymbol(StringRef name); 550 551 } // namespace lld 552 553 #endif 554