1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputElement.h" 13 #include "WriterUtils.h" 14 #include "lld/Common/CommonLinkerContext.h" 15 #include <optional> 16 17 #define DEBUG_TYPE "lld" 18 19 using namespace llvm; 20 using namespace llvm::wasm; 21 using namespace llvm::object; 22 23 namespace lld::wasm { 24 SymbolTable *symtab; 25 26 void SymbolTable::addFile(InputFile *file, StringRef symName) { 27 log("Processing: " + toString(file)); 28 29 // Lazy object file 30 if (file->lazy) { 31 if (auto *f = dyn_cast<BitcodeFile>(file)) { 32 f->parseLazy(); 33 } else { 34 cast<ObjFile>(file)->parseLazy(); 35 } 36 return; 37 } 38 39 // .so file 40 if (auto *f = dyn_cast<SharedFile>(file)) { 41 ctx.sharedFiles.push_back(f); 42 return; 43 } 44 45 // stub file 46 if (auto *f = dyn_cast<StubFile>(file)) { 47 f->parse(); 48 ctx.stubFiles.push_back(f); 49 return; 50 } 51 52 if (config->trace) 53 message(toString(file)); 54 55 // LLVM bitcode file 56 if (auto *f = dyn_cast<BitcodeFile>(file)) { 57 // This order, first adding to `bitcodeFiles` and then parsing is necessary. 58 // See https://github.com/llvm/llvm-project/pull/73095 59 ctx.bitcodeFiles.push_back(f); 60 f->parse(symName); 61 return; 62 } 63 64 // Regular object file 65 auto *f = cast<ObjFile>(file); 66 f->parse(false); 67 ctx.objectFiles.push_back(f); 68 } 69 70 // This function is where all the optimizations of link-time 71 // optimization happens. When LTO is in use, some input files are 72 // not in native object file format but in the LLVM bitcode format. 73 // This function compiles bitcode files into a few big native files 74 // using LLVM functions and replaces bitcode symbols with the results. 75 // Because all bitcode files that the program consists of are passed 76 // to the compiler at once, it can do whole-program optimization. 77 void SymbolTable::compileBitcodeFiles() { 78 // Prevent further LTO objects being included 79 BitcodeFile::doneLTO = true; 80 81 if (ctx.bitcodeFiles.empty()) 82 return; 83 84 // Compile bitcode files and replace bitcode symbols. 85 lto.reset(new BitcodeCompiler); 86 for (BitcodeFile *f : ctx.bitcodeFiles) 87 lto->add(*f); 88 89 for (StringRef filename : lto->compile()) { 90 auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), ""); 91 obj->parse(true); 92 ctx.objectFiles.push_back(obj); 93 } 94 } 95 96 Symbol *SymbolTable::find(StringRef name) { 97 auto it = symMap.find(CachedHashStringRef(name)); 98 if (it == symMap.end() || it->second == -1) 99 return nullptr; 100 return symVector[it->second]; 101 } 102 103 void SymbolTable::replace(StringRef name, Symbol* sym) { 104 auto it = symMap.find(CachedHashStringRef(name)); 105 symVector[it->second] = sym; 106 } 107 108 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) { 109 bool trace = false; 110 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 111 int &symIndex = p.first->second; 112 bool isNew = p.second; 113 if (symIndex == -1) { 114 symIndex = symVector.size(); 115 trace = true; 116 isNew = true; 117 } 118 119 if (!isNew) 120 return {symVector[symIndex], false}; 121 122 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 123 sym->isUsedInRegularObj = false; 124 sym->canInline = true; 125 sym->traced = trace; 126 sym->forceExport = false; 127 sym->referenced = !config->gcSections; 128 symVector.emplace_back(sym); 129 return {sym, true}; 130 } 131 132 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 133 const InputFile *file) { 134 Symbol *s; 135 bool wasInserted; 136 std::tie(s, wasInserted) = insertName(name); 137 138 if (!file || file->kind() == InputFile::ObjectKind) 139 s->isUsedInRegularObj = true; 140 141 return {s, wasInserted}; 142 } 143 144 static void reportTypeError(const Symbol *existing, const InputFile *file, 145 llvm::wasm::WasmSymbolType type) { 146 error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " + 147 toString(existing->getWasmType()) + " in " + 148 toString(existing->getFile()) + "\n>>> defined as " + toString(type) + 149 " in " + toString(file)); 150 } 151 152 // Check the type of new symbol matches that of the symbol is replacing. 153 // Returns true if the function types match, false is there is a signature 154 // mismatch. 155 static bool signatureMatches(FunctionSymbol *existing, 156 const WasmSignature *newSig) { 157 const WasmSignature *oldSig = existing->signature; 158 159 // If either function is missing a signature (this happens for bitcode 160 // symbols) then assume they match. Any mismatch will be reported later 161 // when the LTO objects are added. 162 if (!newSig || !oldSig) 163 return true; 164 165 return *newSig == *oldSig; 166 } 167 168 static void checkGlobalType(const Symbol *existing, const InputFile *file, 169 const WasmGlobalType *newType) { 170 if (!isa<GlobalSymbol>(existing)) { 171 reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL); 172 return; 173 } 174 175 const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType(); 176 if (*newType != *oldType) { 177 error("Global type mismatch: " + existing->getName() + "\n>>> defined as " + 178 toString(*oldType) + " in " + toString(existing->getFile()) + 179 "\n>>> defined as " + toString(*newType) + " in " + toString(file)); 180 } 181 } 182 183 static void checkTagType(const Symbol *existing, const InputFile *file, 184 const WasmSignature *newSig) { 185 const auto *existingTag = dyn_cast<TagSymbol>(existing); 186 if (!isa<TagSymbol>(existing)) { 187 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG); 188 return; 189 } 190 191 const WasmSignature *oldSig = existingTag->signature; 192 if (*newSig != *oldSig) 193 warn("Tag signature mismatch: " + existing->getName() + 194 "\n>>> defined as " + toString(*oldSig) + " in " + 195 toString(existing->getFile()) + "\n>>> defined as " + 196 toString(*newSig) + " in " + toString(file)); 197 } 198 199 static void checkTableType(const Symbol *existing, const InputFile *file, 200 const WasmTableType *newType) { 201 if (!isa<TableSymbol>(existing)) { 202 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE); 203 return; 204 } 205 206 const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType(); 207 if (newType->ElemType != oldType->ElemType) { 208 error("Table type mismatch: " + existing->getName() + "\n>>> defined as " + 209 toString(*oldType) + " in " + toString(existing->getFile()) + 210 "\n>>> defined as " + toString(*newType) + " in " + toString(file)); 211 } 212 // FIXME: No assertions currently on the limits. 213 } 214 215 static void checkDataType(const Symbol *existing, const InputFile *file) { 216 if (!isa<DataSymbol>(existing)) 217 reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA); 218 } 219 220 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name, 221 uint32_t flags, 222 InputFunction *function) { 223 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n"); 224 assert(!find(name)); 225 ctx.syntheticFunctions.emplace_back(function); 226 return replaceSymbol<DefinedFunction>(insertName(name).first, name, 227 flags, nullptr, function); 228 } 229 230 // Adds an optional, linker generated, data symbol. The symbol will only be 231 // added if there is an undefine reference to it, or if it is explicitly 232 // exported via the --export flag. Otherwise we don't add the symbol and return 233 // nullptr. 234 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name, 235 uint64_t value) { 236 Symbol *s = find(name); 237 if (!s && (config->exportAll || config->exportedSymbols.count(name) != 0)) 238 s = insertName(name).first; 239 else if (!s || s->isDefined()) 240 return nullptr; 241 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n"); 242 auto *rtn = replaceSymbol<DefinedData>( 243 s, name, WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE); 244 rtn->setVA(value); 245 rtn->referenced = true; 246 return rtn; 247 } 248 249 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name, 250 uint32_t flags) { 251 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n"); 252 assert(!find(name)); 253 return replaceSymbol<DefinedData>(insertName(name).first, name, 254 flags | WASM_SYMBOL_ABSOLUTE); 255 } 256 257 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags, 258 InputGlobal *global) { 259 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global 260 << "\n"); 261 assert(!find(name)); 262 ctx.syntheticGlobals.emplace_back(global); 263 return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags, 264 nullptr, global); 265 } 266 267 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name, 268 InputGlobal *global) { 269 Symbol *s = find(name); 270 if (!s || s->isDefined()) 271 return nullptr; 272 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global 273 << "\n"); 274 ctx.syntheticGlobals.emplace_back(global); 275 return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN, 276 nullptr, global); 277 } 278 279 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags, 280 InputTable *table) { 281 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table 282 << "\n"); 283 Symbol *s = find(name); 284 assert(!s || s->isUndefined()); 285 if (!s) 286 s = insertName(name).first; 287 ctx.syntheticTables.emplace_back(table); 288 return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table); 289 } 290 291 static bool shouldReplace(const Symbol *existing, InputFile *newFile, 292 uint32_t newFlags) { 293 // If existing symbol is undefined, replace it. 294 if (!existing->isDefined()) { 295 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " 296 << existing->getName() << "\n"); 297 return true; 298 } 299 300 // Now we have two defined symbols. If the new one is weak, we can ignore it. 301 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 302 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n"); 303 return false; 304 } 305 306 // If the existing symbol is weak, we should replace it. 307 if (existing->isWeak()) { 308 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n"); 309 return true; 310 } 311 312 // Neither symbol is week. They conflict. 313 error("duplicate symbol: " + toString(*existing) + "\n>>> defined in " + 314 toString(existing->getFile()) + "\n>>> defined in " + 315 toString(newFile)); 316 return true; 317 } 318 319 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags, 320 InputFile *file, 321 InputFunction *function) { 322 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " [" 323 << (function ? toString(function->signature) : "none") 324 << "]\n"); 325 Symbol *s; 326 bool wasInserted; 327 std::tie(s, wasInserted) = insert(name, file); 328 329 auto replaceSym = [&](Symbol *sym) { 330 // If the new defined function doesn't have signature (i.e. bitcode 331 // functions) but the old symbol does, then preserve the old signature 332 const WasmSignature *oldSig = s->getSignature(); 333 auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function); 334 if (!newSym->signature) 335 newSym->signature = oldSig; 336 }; 337 338 if (wasInserted || s->isLazy()) { 339 replaceSym(s); 340 return s; 341 } 342 343 auto existingFunction = dyn_cast<FunctionSymbol>(s); 344 if (!existingFunction) { 345 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); 346 return s; 347 } 348 349 bool checkSig = true; 350 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction)) 351 checkSig = ud->isCalledDirectly; 352 353 if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) { 354 Symbol* variant; 355 if (getFunctionVariant(s, &function->signature, file, &variant)) 356 // New variant, always replace 357 replaceSym(variant); 358 else if (shouldReplace(s, file, flags)) 359 // Variant already exists, replace it after checking shouldReplace 360 replaceSym(variant); 361 362 // This variant we found take the place in the symbol table as the primary 363 // variant. 364 replace(name, variant); 365 return variant; 366 } 367 368 // Existing function with matching signature. 369 if (shouldReplace(s, file, flags)) 370 replaceSym(s); 371 372 return s; 373 } 374 375 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags, 376 InputFile *file, InputChunk *segment, 377 uint64_t address, uint64_t size) { 378 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address 379 << "\n"); 380 Symbol *s; 381 bool wasInserted; 382 std::tie(s, wasInserted) = insert(name, file); 383 384 auto replaceSym = [&]() { 385 replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size); 386 }; 387 388 if (wasInserted || s->isLazy()) { 389 replaceSym(); 390 return s; 391 } 392 393 checkDataType(s, file); 394 395 if (shouldReplace(s, file, flags)) 396 replaceSym(); 397 return s; 398 } 399 400 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags, 401 InputFile *file, InputGlobal *global) { 402 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n"); 403 404 Symbol *s; 405 bool wasInserted; 406 std::tie(s, wasInserted) = insert(name, file); 407 408 auto replaceSym = [&]() { 409 replaceSymbol<DefinedGlobal>(s, name, flags, file, global); 410 }; 411 412 if (wasInserted || s->isLazy()) { 413 replaceSym(); 414 return s; 415 } 416 417 checkGlobalType(s, file, &global->getType()); 418 419 if (shouldReplace(s, file, flags)) 420 replaceSym(); 421 return s; 422 } 423 424 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags, 425 InputFile *file, InputTag *tag) { 426 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n"); 427 428 Symbol *s; 429 bool wasInserted; 430 std::tie(s, wasInserted) = insert(name, file); 431 432 auto replaceSym = [&]() { 433 replaceSymbol<DefinedTag>(s, name, flags, file, tag); 434 }; 435 436 if (wasInserted || s->isLazy()) { 437 replaceSym(); 438 return s; 439 } 440 441 checkTagType(s, file, &tag->signature); 442 443 if (shouldReplace(s, file, flags)) 444 replaceSym(); 445 return s; 446 } 447 448 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags, 449 InputFile *file, InputTable *table) { 450 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n"); 451 452 Symbol *s; 453 bool wasInserted; 454 std::tie(s, wasInserted) = insert(name, file); 455 456 auto replaceSym = [&]() { 457 replaceSymbol<DefinedTable>(s, name, flags, file, table); 458 }; 459 460 if (wasInserted || s->isLazy()) { 461 replaceSym(); 462 return s; 463 } 464 465 checkTableType(s, file, &table->getType()); 466 467 if (shouldReplace(s, file, flags)) 468 replaceSym(); 469 return s; 470 } 471 472 // This function get called when an undefined symbol is added, and there is 473 // already an existing one in the symbols table. In this case we check that 474 // custom 'import-module' and 'import-field' symbol attributes agree. 475 // With LTO these attributes are not available when the bitcode is read and only 476 // become available when the LTO object is read. In this case we silently 477 // replace the empty attributes with the valid ones. 478 template <typename T> 479 static void setImportAttributes(T *existing, 480 std::optional<StringRef> importName, 481 std::optional<StringRef> importModule, 482 uint32_t flags, InputFile *file) { 483 if (importName) { 484 if (!existing->importName) 485 existing->importName = importName; 486 if (existing->importName != importName) 487 error("import name mismatch for symbol: " + toString(*existing) + 488 "\n>>> defined as " + *existing->importName + " in " + 489 toString(existing->getFile()) + "\n>>> defined as " + *importName + 490 " in " + toString(file)); 491 } 492 493 if (importModule) { 494 if (!existing->importModule) 495 existing->importModule = importModule; 496 if (existing->importModule != importModule) 497 error("import module mismatch for symbol: " + toString(*existing) + 498 "\n>>> defined as " + *existing->importModule + " in " + 499 toString(existing->getFile()) + "\n>>> defined as " + 500 *importModule + " in " + toString(file)); 501 } 502 503 // Update symbol binding, if the existing symbol is weak 504 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK; 505 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) { 506 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding; 507 } 508 } 509 510 Symbol *SymbolTable::addUndefinedFunction(StringRef name, 511 std::optional<StringRef> importName, 512 std::optional<StringRef> importModule, 513 uint32_t flags, InputFile *file, 514 const WasmSignature *sig, 515 bool isCalledDirectly) { 516 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " [" 517 << (sig ? toString(*sig) : "none") 518 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x" 519 << utohexstr(flags) << "\n"); 520 assert(flags & WASM_SYMBOL_UNDEFINED); 521 522 Symbol *s; 523 bool wasInserted; 524 std::tie(s, wasInserted) = insert(name, file); 525 if (s->traced) 526 printTraceSymbolUndefined(name, file); 527 528 auto replaceSym = [&]() { 529 replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags, 530 file, sig, isCalledDirectly); 531 }; 532 533 if (wasInserted) { 534 replaceSym(); 535 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) { 536 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 537 lazy->setWeak(); 538 lazy->signature = sig; 539 } else { 540 lazy->extract(); 541 if (!config->whyExtract.empty()) 542 ctx.whyExtractRecords.emplace_back(toString(file), s->getFile(), *s); 543 } 544 } else { 545 auto existingFunction = dyn_cast<FunctionSymbol>(s); 546 if (!existingFunction) { 547 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); 548 return s; 549 } 550 if (!existingFunction->signature && sig) 551 existingFunction->signature = sig; 552 auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction); 553 if (isCalledDirectly && !signatureMatches(existingFunction, sig)) { 554 // If the existing undefined functions is not called directly then let 555 // this one take precedence. Otherwise the existing function is either 556 // directly called or defined, in which case we need a function variant. 557 if (existingUndefined && !existingUndefined->isCalledDirectly) 558 replaceSym(); 559 else if (getFunctionVariant(s, sig, file, &s)) 560 replaceSym(); 561 } 562 if (existingUndefined) { 563 setImportAttributes(existingUndefined, importName, importModule, flags, 564 file); 565 if (isCalledDirectly) 566 existingUndefined->isCalledDirectly = true; 567 if (s->isWeak()) 568 s->flags = flags; 569 } 570 } 571 572 return s; 573 } 574 575 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags, 576 InputFile *file) { 577 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n"); 578 assert(flags & WASM_SYMBOL_UNDEFINED); 579 580 Symbol *s; 581 bool wasInserted; 582 std::tie(s, wasInserted) = insert(name, file); 583 if (s->traced) 584 printTraceSymbolUndefined(name, file); 585 586 if (wasInserted) { 587 replaceSymbol<UndefinedData>(s, name, flags, file); 588 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) { 589 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) 590 lazy->setWeak(); 591 else 592 lazy->extract(); 593 } else if (s->isDefined()) { 594 checkDataType(s, file); 595 } else if (s->isWeak()) { 596 s->flags = flags; 597 } 598 return s; 599 } 600 601 Symbol *SymbolTable::addUndefinedGlobal(StringRef name, 602 std::optional<StringRef> importName, 603 std::optional<StringRef> importModule, 604 uint32_t flags, InputFile *file, 605 const WasmGlobalType *type) { 606 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n"); 607 assert(flags & WASM_SYMBOL_UNDEFINED); 608 609 Symbol *s; 610 bool wasInserted; 611 std::tie(s, wasInserted) = insert(name, file); 612 if (s->traced) 613 printTraceSymbolUndefined(name, file); 614 615 if (wasInserted) 616 replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags, 617 file, type); 618 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 619 lazy->extract(); 620 else if (s->isDefined()) 621 checkGlobalType(s, file, type); 622 else if (s->isWeak()) 623 s->flags = flags; 624 return s; 625 } 626 627 Symbol *SymbolTable::addUndefinedTable(StringRef name, 628 std::optional<StringRef> importName, 629 std::optional<StringRef> importModule, 630 uint32_t flags, InputFile *file, 631 const WasmTableType *type) { 632 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n"); 633 assert(flags & WASM_SYMBOL_UNDEFINED); 634 635 Symbol *s; 636 bool wasInserted; 637 std::tie(s, wasInserted) = insert(name, file); 638 if (s->traced) 639 printTraceSymbolUndefined(name, file); 640 641 if (wasInserted) 642 replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags, 643 file, type); 644 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 645 lazy->extract(); 646 else if (s->isDefined()) 647 checkTableType(s, file, type); 648 else if (s->isWeak()) 649 s->flags = flags; 650 return s; 651 } 652 653 Symbol *SymbolTable::addUndefinedTag(StringRef name, 654 std::optional<StringRef> importName, 655 std::optional<StringRef> importModule, 656 uint32_t flags, InputFile *file, 657 const WasmSignature *sig) { 658 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n"); 659 assert(flags & WASM_SYMBOL_UNDEFINED); 660 661 Symbol *s; 662 bool wasInserted; 663 std::tie(s, wasInserted) = insert(name, file); 664 if (s->traced) 665 printTraceSymbolUndefined(name, file); 666 667 if (wasInserted) 668 replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file, 669 sig); 670 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 671 lazy->extract(); 672 else if (s->isDefined()) 673 checkTagType(s, file, sig); 674 else if (s->isWeak()) 675 s->flags = flags; 676 return s; 677 } 678 679 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) { 680 WasmLimits limits{0, 0, 0}; // Set by the writer. 681 WasmTableType *type = make<WasmTableType>(); 682 type->ElemType = ValType::FUNCREF; 683 type->Limits = limits; 684 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; 685 flags |= WASM_SYMBOL_UNDEFINED; 686 Symbol *sym = 687 addUndefinedTable(name, name, defaultModule, flags, nullptr, type); 688 sym->markLive(); 689 sym->forceExport = config->exportTable; 690 return cast<TableSymbol>(sym); 691 } 692 693 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) { 694 const uint32_t invalidIndex = -1; 695 WasmLimits limits{0, 0, 0}; // Set by the writer. 696 WasmTableType type{ValType::FUNCREF, limits}; 697 WasmTable desc{invalidIndex, type, name}; 698 InputTable *table = make<InputTable>(desc, nullptr); 699 uint32_t flags = config->exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; 700 TableSymbol *sym = addSyntheticTable(name, flags, table); 701 sym->markLive(); 702 sym->forceExport = config->exportTable; 703 return sym; 704 } 705 706 // Whether or not we need an indirect function table is usually a function of 707 // whether an input declares a need for it. However sometimes it's possible for 708 // no input to need the indirect function table, but then a late 709 // addInternalGOTEntry causes a function to be allocated an address. In that 710 // case address we synthesize a definition at the last minute. 711 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) { 712 Symbol *existing = find(functionTableName); 713 if (existing) { 714 if (!isa<TableSymbol>(existing)) { 715 error(Twine("reserved symbol must be of type table: `") + 716 functionTableName + "`"); 717 return nullptr; 718 } 719 if (existing->isDefined()) { 720 error(Twine("reserved symbol must not be defined in input files: `") + 721 functionTableName + "`"); 722 return nullptr; 723 } 724 } 725 726 if (config->importTable) { 727 if (existing) { 728 existing->importModule = defaultModule; 729 existing->importName = functionTableName; 730 return cast<TableSymbol>(existing); 731 } 732 if (required) 733 return createUndefinedIndirectFunctionTable(functionTableName); 734 } else if ((existing && existing->isLive()) || config->exportTable || 735 required) { 736 // A defined table is required. Either because the user request an exported 737 // table or because the table symbol is already live. The existing table is 738 // guaranteed to be undefined due to the check above. 739 return createDefinedIndirectFunctionTable(functionTableName); 740 } 741 742 // An indirect function table will only be present in the symbol table if 743 // needed by a reloc; if we get here, we don't need one. 744 return nullptr; 745 } 746 747 void SymbolTable::addLazy(StringRef name, InputFile *file) { 748 LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n"); 749 750 Symbol *s; 751 bool wasInserted; 752 std::tie(s, wasInserted) = insertName(name); 753 754 if (wasInserted) { 755 replaceSymbol<LazySymbol>(s, name, 0, file); 756 return; 757 } 758 759 if (!s->isUndefined()) 760 return; 761 762 // The existing symbol is undefined, load a new one from the archive, 763 // unless the existing symbol is weak in which case replace the undefined 764 // symbols with a LazySymbol. 765 if (s->isWeak()) { 766 const WasmSignature *oldSig = nullptr; 767 // In the case of an UndefinedFunction we need to preserve the expected 768 // signature. 769 if (auto *f = dyn_cast<UndefinedFunction>(s)) 770 oldSig = f->signature; 771 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n"); 772 auto newSym = 773 replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file); 774 newSym->signature = oldSig; 775 return; 776 } 777 778 LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); 779 const InputFile *oldFile = s->getFile(); 780 LazySymbol(name, 0, file).extract(); 781 if (!config->whyExtract.empty()) 782 ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s); 783 } 784 785 bool SymbolTable::addComdat(StringRef name) { 786 return comdatGroups.insert(CachedHashStringRef(name)).second; 787 } 788 789 // The new signature doesn't match. Create a variant to the symbol with the 790 // signature encoded in the name and return that instead. These symbols are 791 // then unified later in handleSymbolVariants. 792 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig, 793 const InputFile *file, Symbol **out) { 794 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> " 795 << " " << toString(*sig) << "\n"); 796 Symbol *variant = nullptr; 797 798 // Linear search through symbol variants. Should never be more than two 799 // or three entries here. 800 auto &variants = symVariants[CachedHashStringRef(sym->getName())]; 801 if (variants.empty()) 802 variants.push_back(sym); 803 804 for (Symbol* v : variants) { 805 if (*v->getSignature() == *sig) { 806 variant = v; 807 break; 808 } 809 } 810 811 bool wasAdded = !variant; 812 if (wasAdded) { 813 // Create a new variant; 814 LLVM_DEBUG(dbgs() << "added new variant\n"); 815 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 816 variant->isUsedInRegularObj = 817 !file || file->kind() == InputFile::ObjectKind; 818 variant->canInline = true; 819 variant->traced = false; 820 variant->forceExport = false; 821 variants.push_back(variant); 822 } else { 823 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n"); 824 assert(*variant->getSignature() == *sig); 825 } 826 827 *out = variant; 828 return wasAdded; 829 } 830 831 // Set a flag for --trace-symbol so that we can print out a log message 832 // if a new symbol with the same name is inserted into the symbol table. 833 void SymbolTable::trace(StringRef name) { 834 symMap.insert({CachedHashStringRef(name), -1}); 835 } 836 837 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { 838 // Swap symbols as instructed by -wrap. 839 int &origIdx = symMap[CachedHashStringRef(sym->getName())]; 840 int &realIdx= symMap[CachedHashStringRef(real->getName())]; 841 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())]; 842 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n"); 843 844 // Anyone looking up __real symbols should get the original 845 realIdx = origIdx; 846 // Anyone looking up the original should get the __wrap symbol 847 origIdx = wrapIdx; 848 } 849 850 static const uint8_t unreachableFn[] = { 851 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, 852 0x00 /* opcode unreachable */, 0x0b /* opcode end */ 853 }; 854 855 // Replace the given symbol body with an unreachable function. 856 // This is used by handleWeakUndefines in order to generate a callable 857 // equivalent of an undefined function and also handleSymbolVariants for 858 // undefined functions that don't match the signature of the definition. 859 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym, 860 const WasmSignature &sig, 861 StringRef debugName) { 862 auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName); 863 func->setBody(unreachableFn); 864 ctx.syntheticFunctions.emplace_back(func); 865 // Mark new symbols as local. For relocatable output we don't want them 866 // to be exported outside the object file. 867 replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL, 868 nullptr, func); 869 // Ensure the stub function doesn't get a table entry. Its address 870 // should always compare equal to the null pointer. 871 sym->isStub = true; 872 return func; 873 } 874 875 void SymbolTable::replaceWithUndefined(Symbol *sym) { 876 // Add a synthetic dummy for weak undefined functions. These dummies will 877 // be GC'd if not used as the target of any "call" instructions. 878 StringRef debugName = saver().save("undefined_weak:" + toString(*sym)); 879 replaceWithUnreachable(sym, *sym->getSignature(), debugName); 880 // Hide our dummy to prevent export. 881 sym->setHidden(true); 882 } 883 884 // For weak undefined functions, there may be "call" instructions that reference 885 // the symbol. In this case, we need to synthesise a dummy/stub function that 886 // will abort at runtime, so that relocations can still provided an operand to 887 // the call instruction that passes Wasm validation. 888 void SymbolTable::handleWeakUndefines() { 889 for (Symbol *sym : symbols()) { 890 if (sym->isUndefWeak() && sym->isUsedInRegularObj) { 891 if (sym->getSignature()) { 892 replaceWithUndefined(sym); 893 } else { 894 // It is possible for undefined functions not to have a signature (eg. 895 // if added via "--undefined"), but weak undefined ones do have a 896 // signature. Lazy symbols may not be functions and therefore Sig can 897 // still be null in some circumstance. 898 assert(!isa<FunctionSymbol>(sym)); 899 } 900 } 901 } 902 } 903 904 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) { 905 if (stubFunctions.count(sig)) 906 return stubFunctions[sig]; 907 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n"); 908 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>()); 909 sym->isUsedInRegularObj = true; 910 sym->canInline = true; 911 sym->traced = false; 912 sym->forceExport = false; 913 sym->signature = &sig; 914 replaceSymbol<DefinedFunction>( 915 sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr); 916 replaceWithUnreachable(sym, sig, "undefined_stub"); 917 stubFunctions[sig] = sym; 918 return sym; 919 } 920 921 static void reportFunctionSignatureMismatch(StringRef symName, 922 FunctionSymbol *a, 923 FunctionSymbol *b, bool isError) { 924 std::string msg = ("function signature mismatch: " + symName + 925 "\n>>> defined as " + toString(*a->signature) + " in " + 926 toString(a->getFile()) + "\n>>> defined as " + 927 toString(*b->signature) + " in " + toString(b->getFile())) 928 .str(); 929 if (isError) 930 error(msg); 931 else 932 warn(msg); 933 } 934 935 // Remove any variant symbols that were created due to function signature 936 // mismatches. 937 void SymbolTable::handleSymbolVariants() { 938 for (auto pair : symVariants) { 939 // Push the initial symbol onto the list of variants. 940 StringRef symName = pair.first.val(); 941 std::vector<Symbol *> &variants = pair.second; 942 943 #ifndef NDEBUG 944 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size() 945 << ") variants: " << symName << "\n"); 946 for (auto *s: variants) { 947 auto *f = cast<FunctionSymbol>(s); 948 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " " 949 << toString(*f->signature) << "\n"); 950 } 951 #endif 952 953 // Find the one definition. 954 DefinedFunction *defined = nullptr; 955 for (auto *symbol : variants) { 956 if (auto f = dyn_cast<DefinedFunction>(symbol)) { 957 defined = f; 958 break; 959 } 960 } 961 962 // If there are no definitions, and the undefined symbols disagree on 963 // the signature, there is not we can do since we don't know which one 964 // to use as the signature on the import. 965 if (!defined) { 966 reportFunctionSignatureMismatch(symName, 967 cast<FunctionSymbol>(variants[0]), 968 cast<FunctionSymbol>(variants[1]), true); 969 return; 970 } 971 972 for (auto *symbol : variants) { 973 if (symbol != defined) { 974 auto *f = cast<FunctionSymbol>(symbol); 975 reportFunctionSignatureMismatch(symName, f, defined, false); 976 StringRef debugName = 977 saver().save("signature_mismatch:" + toString(*f)); 978 replaceWithUnreachable(f, *f->signature, debugName); 979 } 980 } 981 } 982 } 983 984 } // namespace wasm::lld 985