1 //===- SymbolTable.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "SymbolTable.h" 10 #include "Config.h" 11 #include "InputChunks.h" 12 #include "InputElement.h" 13 #include "WriterUtils.h" 14 #include "lld/Common/CommonLinkerContext.h" 15 #include <optional> 16 17 #define DEBUG_TYPE "lld" 18 19 using namespace llvm; 20 using namespace llvm::wasm; 21 using namespace llvm::object; 22 23 namespace lld::wasm { 24 SymbolTable *symtab; 25 26 void SymbolTable::addFile(InputFile *file, StringRef symName) { 27 log("Processing: " + toString(file)); 28 29 // Lazy object file 30 if (file->lazy) { 31 if (auto *f = dyn_cast<BitcodeFile>(file)) { 32 ctx.lazyBitcodeFiles.push_back(f); 33 f->parseLazy(); 34 } else { 35 cast<ObjFile>(file)->parseLazy(); 36 } 37 return; 38 } 39 40 // .so file 41 if (auto *f = dyn_cast<SharedFile>(file)) { 42 // If we are not reporting undefined symbols that we don't actualy 43 // parse the shared library symbol table. 44 f->parse(); 45 ctx.sharedFiles.push_back(f); 46 return; 47 } 48 49 // stub file 50 if (auto *f = dyn_cast<StubFile>(file)) { 51 f->parse(); 52 ctx.stubFiles.push_back(f); 53 return; 54 } 55 56 if (ctx.arg.trace) 57 message(toString(file)); 58 59 // LLVM bitcode file 60 if (auto *f = dyn_cast<BitcodeFile>(file)) { 61 // This order, first adding to `bitcodeFiles` and then parsing is necessary. 62 // See https://github.com/llvm/llvm-project/pull/73095 63 ctx.bitcodeFiles.push_back(f); 64 f->parse(symName); 65 return; 66 } 67 68 // Regular object file 69 auto *f = cast<ObjFile>(file); 70 f->parse(false); 71 ctx.objectFiles.push_back(f); 72 } 73 74 // This function is where all the optimizations of link-time 75 // optimization happens. When LTO is in use, some input files are 76 // not in native object file format but in the LLVM bitcode format. 77 // This function compiles bitcode files into a few big native files 78 // using LLVM functions and replaces bitcode symbols with the results. 79 // Because all bitcode files that the program consists of are passed 80 // to the compiler at once, it can do whole-program optimization. 81 void SymbolTable::compileBitcodeFiles() { 82 // Prevent further LTO objects being included 83 BitcodeFile::doneLTO = true; 84 85 // Compile bitcode files and replace bitcode symbols. 86 lto.reset(new BitcodeCompiler); 87 for (BitcodeFile *f : ctx.bitcodeFiles) 88 lto->add(*f); 89 90 for (StringRef filename : lto->compile()) { 91 auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), ""); 92 obj->parse(true); 93 ctx.objectFiles.push_back(obj); 94 } 95 } 96 97 Symbol *SymbolTable::find(StringRef name) { 98 auto it = symMap.find(CachedHashStringRef(name)); 99 if (it == symMap.end() || it->second == -1) 100 return nullptr; 101 return symVector[it->second]; 102 } 103 104 void SymbolTable::replace(StringRef name, Symbol* sym) { 105 auto it = symMap.find(CachedHashStringRef(name)); 106 symVector[it->second] = sym; 107 } 108 109 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) { 110 bool trace = false; 111 auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 112 int &symIndex = p.first->second; 113 bool isNew = p.second; 114 if (symIndex == -1) { 115 symIndex = symVector.size(); 116 trace = true; 117 isNew = true; 118 } 119 120 if (!isNew) 121 return {symVector[symIndex], false}; 122 123 Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 124 sym->isUsedInRegularObj = false; 125 sym->canInline = true; 126 sym->traced = trace; 127 sym->forceExport = false; 128 sym->referenced = !ctx.arg.gcSections; 129 symVector.emplace_back(sym); 130 return {sym, true}; 131 } 132 133 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 134 const InputFile *file) { 135 Symbol *s; 136 bool wasInserted; 137 std::tie(s, wasInserted) = insertName(name); 138 139 if (!file || file->kind() == InputFile::ObjectKind) 140 s->isUsedInRegularObj = true; 141 142 return {s, wasInserted}; 143 } 144 145 static void reportTypeError(const Symbol *existing, const InputFile *file, 146 llvm::wasm::WasmSymbolType type) { 147 error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " + 148 toString(existing->getWasmType()) + " in " + 149 toString(existing->getFile()) + "\n>>> defined as " + toString(type) + 150 " in " + toString(file)); 151 } 152 153 // Check the type of new symbol matches that of the symbol is replacing. 154 // Returns true if the function types match, false is there is a signature 155 // mismatch. 156 static bool signatureMatches(FunctionSymbol *existing, 157 const WasmSignature *newSig) { 158 const WasmSignature *oldSig = existing->signature; 159 160 // If either function is missing a signature (this happens for bitcode 161 // symbols) then assume they match. Any mismatch will be reported later 162 // when the LTO objects are added. 163 if (!newSig || !oldSig) 164 return true; 165 166 return *newSig == *oldSig; 167 } 168 169 static void checkGlobalType(const Symbol *existing, const InputFile *file, 170 const WasmGlobalType *newType) { 171 if (!isa<GlobalSymbol>(existing)) { 172 reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL); 173 return; 174 } 175 176 const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType(); 177 if (*newType != *oldType) { 178 error("Global type mismatch: " + existing->getName() + "\n>>> defined as " + 179 toString(*oldType) + " in " + toString(existing->getFile()) + 180 "\n>>> defined as " + toString(*newType) + " in " + toString(file)); 181 } 182 } 183 184 static void checkTagType(const Symbol *existing, const InputFile *file, 185 const WasmSignature *newSig) { 186 const auto *existingTag = dyn_cast<TagSymbol>(existing); 187 if (!isa<TagSymbol>(existing)) { 188 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG); 189 return; 190 } 191 192 const WasmSignature *oldSig = existingTag->signature; 193 if (*newSig != *oldSig) 194 warn("Tag signature mismatch: " + existing->getName() + 195 "\n>>> defined as " + toString(*oldSig) + " in " + 196 toString(existing->getFile()) + "\n>>> defined as " + 197 toString(*newSig) + " in " + toString(file)); 198 } 199 200 static void checkTableType(const Symbol *existing, const InputFile *file, 201 const WasmTableType *newType) { 202 if (!isa<TableSymbol>(existing)) { 203 reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE); 204 return; 205 } 206 207 const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType(); 208 if (newType->ElemType != oldType->ElemType) { 209 error("Table type mismatch: " + existing->getName() + "\n>>> defined as " + 210 toString(*oldType) + " in " + toString(existing->getFile()) + 211 "\n>>> defined as " + toString(*newType) + " in " + toString(file)); 212 } 213 // FIXME: No assertions currently on the limits. 214 } 215 216 static void checkDataType(const Symbol *existing, const InputFile *file) { 217 if (!isa<DataSymbol>(existing)) 218 reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA); 219 } 220 221 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name, 222 uint32_t flags, 223 InputFunction *function) { 224 LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n"); 225 assert(!find(name)); 226 ctx.syntheticFunctions.emplace_back(function); 227 return replaceSymbol<DefinedFunction>(insertName(name).first, name, 228 flags, nullptr, function); 229 } 230 231 // Adds an optional, linker generated, data symbol. The symbol will only be 232 // added if there is an undefine reference to it, or if it is explicitly 233 // exported via the --export flag. Otherwise we don't add the symbol and return 234 // nullptr. 235 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name, 236 uint64_t value) { 237 Symbol *s = find(name); 238 if (!s && (ctx.arg.exportAll || ctx.arg.exportedSymbols.count(name) != 0)) 239 s = insertName(name).first; 240 else if (!s || s->isDefined()) 241 return nullptr; 242 LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n"); 243 auto *rtn = replaceSymbol<DefinedData>( 244 s, name, WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE); 245 rtn->setVA(value); 246 rtn->referenced = true; 247 return rtn; 248 } 249 250 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name, 251 uint32_t flags) { 252 LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n"); 253 assert(!find(name)); 254 return replaceSymbol<DefinedData>(insertName(name).first, name, 255 flags | WASM_SYMBOL_ABSOLUTE); 256 } 257 258 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags, 259 InputGlobal *global) { 260 LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global 261 << "\n"); 262 assert(!find(name)); 263 ctx.syntheticGlobals.emplace_back(global); 264 return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags, 265 nullptr, global); 266 } 267 268 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name, 269 InputGlobal *global) { 270 Symbol *s = find(name); 271 if (!s || s->isDefined()) 272 return nullptr; 273 LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global 274 << "\n"); 275 ctx.syntheticGlobals.emplace_back(global); 276 return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN, 277 nullptr, global); 278 } 279 280 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags, 281 InputTable *table) { 282 LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table 283 << "\n"); 284 Symbol *s = find(name); 285 assert(!s || s->isUndefined()); 286 if (!s) 287 s = insertName(name).first; 288 ctx.syntheticTables.emplace_back(table); 289 return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table); 290 } 291 292 static bool shouldReplace(const Symbol *existing, InputFile *newFile, 293 uint32_t newFlags) { 294 // If existing symbol is undefined, replace it. 295 if (!existing->isDefined()) { 296 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " 297 << existing->getName() << "\n"); 298 return true; 299 } 300 301 // Now we have two defined symbols. If the new one is weak, we can ignore it. 302 if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 303 LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n"); 304 return false; 305 } 306 307 // If the existing symbol is weak, we should replace it. 308 if (existing->isWeak()) { 309 LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n"); 310 return true; 311 } 312 313 // Similarly with shared symbols 314 if (existing->isShared()) { 315 LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n"); 316 return true; 317 } 318 319 // Neither symbol is week. They conflict. 320 if (ctx.arg.allowMultipleDefinition) 321 return false; 322 323 errorOrWarn("duplicate symbol: " + toString(*existing) + "\n>>> defined in " + 324 toString(existing->getFile()) + "\n>>> defined in " + 325 toString(newFile)); 326 return true; 327 } 328 329 static void reportFunctionSignatureMismatch(StringRef symName, 330 FunctionSymbol *sym, 331 const WasmSignature *signature, 332 InputFile *file, 333 bool isError = true) { 334 std::string msg = 335 ("function signature mismatch: " + symName + "\n>>> defined as " + 336 toString(*sym->signature) + " in " + toString(sym->getFile()) + 337 "\n>>> defined as " + toString(*signature) + " in " + toString(file)) 338 .str(); 339 if (isError) 340 error(msg); 341 else 342 warn(msg); 343 } 344 345 static void reportFunctionSignatureMismatch(StringRef symName, 346 FunctionSymbol *a, 347 FunctionSymbol *b, 348 bool isError = true) { 349 reportFunctionSignatureMismatch(symName, a, b->signature, b->getFile(), 350 isError); 351 } 352 353 Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags, 354 InputFile *file, 355 const WasmSignature *sig) { 356 LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig) 357 << "]\n"); 358 Symbol *s; 359 bool wasInserted; 360 std::tie(s, wasInserted) = insert(name, file); 361 362 auto replaceSym = [&](Symbol *sym) { 363 replaceSymbol<SharedFunctionSymbol>(sym, name, flags, file, sig); 364 }; 365 366 if (wasInserted || s->isLazy()) { 367 replaceSym(s); 368 return s; 369 } 370 371 auto existingFunction = dyn_cast<FunctionSymbol>(s); 372 if (!existingFunction) { 373 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); 374 return s; 375 } 376 377 // Shared symbols should never replace locally-defined ones 378 if (s->isDefined()) { 379 return s; 380 } 381 382 LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName() 383 << "\n"); 384 385 bool checkSig = true; 386 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction)) 387 checkSig = ud->isCalledDirectly; 388 389 if (checkSig && !signatureMatches(existingFunction, sig)) { 390 if (ctx.arg.shlibSigCheck) { 391 reportFunctionSignatureMismatch(name, existingFunction, sig, file); 392 } else { 393 // With --no-shlib-sigcheck we ignore the signature of the function as 394 // defined by the shared library and instead use the signature as 395 // expected by the program being linked. 396 sig = existingFunction->signature; 397 } 398 } 399 400 replaceSym(s); 401 return s; 402 } 403 404 Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags, 405 InputFile *file) { 406 LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n"); 407 Symbol *s; 408 bool wasInserted; 409 std::tie(s, wasInserted) = insert(name, file); 410 411 if (wasInserted || s->isLazy()) { 412 replaceSymbol<SharedData>(s, name, flags, file); 413 return s; 414 } 415 416 // Shared symbols should never replace locally-defined ones 417 if (s->isDefined()) { 418 return s; 419 } 420 421 checkDataType(s, file); 422 replaceSymbol<SharedData>(s, name, flags, file); 423 return s; 424 } 425 426 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags, 427 InputFile *file, 428 InputFunction *function) { 429 LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " [" 430 << (function ? toString(function->signature) : "none") 431 << "]\n"); 432 Symbol *s; 433 bool wasInserted; 434 std::tie(s, wasInserted) = insert(name, file); 435 436 auto replaceSym = [&](Symbol *sym) { 437 // If the new defined function doesn't have signature (i.e. bitcode 438 // functions) but the old symbol does, then preserve the old signature 439 const WasmSignature *oldSig = s->getSignature(); 440 auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function); 441 if (!newSym->signature) 442 newSym->signature = oldSig; 443 }; 444 445 if (wasInserted || s->isLazy()) { 446 replaceSym(s); 447 return s; 448 } 449 450 auto existingFunction = dyn_cast<FunctionSymbol>(s); 451 if (!existingFunction) { 452 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); 453 return s; 454 } 455 456 bool checkSig = true; 457 if (auto ud = dyn_cast<UndefinedFunction>(existingFunction)) 458 checkSig = ud->isCalledDirectly; 459 460 if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) { 461 Symbol* variant; 462 if (getFunctionVariant(s, &function->signature, file, &variant)) 463 // New variant, always replace 464 replaceSym(variant); 465 else if (shouldReplace(s, file, flags)) 466 // Variant already exists, replace it after checking shouldReplace 467 replaceSym(variant); 468 469 // This variant we found take the place in the symbol table as the primary 470 // variant. 471 replace(name, variant); 472 return variant; 473 } 474 475 // Existing function with matching signature. 476 if (shouldReplace(s, file, flags)) 477 replaceSym(s); 478 479 return s; 480 } 481 482 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags, 483 InputFile *file, InputChunk *segment, 484 uint64_t address, uint64_t size) { 485 LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address 486 << "\n"); 487 Symbol *s; 488 bool wasInserted; 489 std::tie(s, wasInserted) = insert(name, file); 490 491 auto replaceSym = [&]() { 492 replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size); 493 }; 494 495 if (wasInserted || s->isLazy()) { 496 replaceSym(); 497 return s; 498 } 499 500 checkDataType(s, file); 501 502 if (shouldReplace(s, file, flags)) 503 replaceSym(); 504 return s; 505 } 506 507 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags, 508 InputFile *file, InputGlobal *global) { 509 LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n"); 510 511 Symbol *s; 512 bool wasInserted; 513 std::tie(s, wasInserted) = insert(name, file); 514 515 auto replaceSym = [&]() { 516 replaceSymbol<DefinedGlobal>(s, name, flags, file, global); 517 }; 518 519 if (wasInserted || s->isLazy()) { 520 replaceSym(); 521 return s; 522 } 523 524 checkGlobalType(s, file, &global->getType()); 525 526 if (shouldReplace(s, file, flags)) 527 replaceSym(); 528 return s; 529 } 530 531 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags, 532 InputFile *file, InputTag *tag) { 533 LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n"); 534 535 Symbol *s; 536 bool wasInserted; 537 std::tie(s, wasInserted) = insert(name, file); 538 539 auto replaceSym = [&]() { 540 replaceSymbol<DefinedTag>(s, name, flags, file, tag); 541 }; 542 543 if (wasInserted || s->isLazy()) { 544 replaceSym(); 545 return s; 546 } 547 548 checkTagType(s, file, &tag->signature); 549 550 if (shouldReplace(s, file, flags)) 551 replaceSym(); 552 return s; 553 } 554 555 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags, 556 InputFile *file, InputTable *table) { 557 LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n"); 558 559 Symbol *s; 560 bool wasInserted; 561 std::tie(s, wasInserted) = insert(name, file); 562 563 auto replaceSym = [&]() { 564 replaceSymbol<DefinedTable>(s, name, flags, file, table); 565 }; 566 567 if (wasInserted || s->isLazy()) { 568 replaceSym(); 569 return s; 570 } 571 572 checkTableType(s, file, &table->getType()); 573 574 if (shouldReplace(s, file, flags)) 575 replaceSym(); 576 return s; 577 } 578 579 // This function get called when an undefined symbol is added, and there is 580 // already an existing one in the symbols table. In this case we check that 581 // custom 'import-module' and 'import-field' symbol attributes agree. 582 // With LTO these attributes are not available when the bitcode is read and only 583 // become available when the LTO object is read. In this case we silently 584 // replace the empty attributes with the valid ones. 585 template <typename T> 586 static void setImportAttributes(T *existing, 587 std::optional<StringRef> importName, 588 std::optional<StringRef> importModule, 589 uint32_t flags, InputFile *file) { 590 if (importName) { 591 if (!existing->importName) 592 existing->importName = importName; 593 if (existing->importName != importName) 594 error("import name mismatch for symbol: " + toString(*existing) + 595 "\n>>> defined as " + *existing->importName + " in " + 596 toString(existing->getFile()) + "\n>>> defined as " + *importName + 597 " in " + toString(file)); 598 } 599 600 if (importModule) { 601 if (!existing->importModule) 602 existing->importModule = importModule; 603 if (existing->importModule != importModule) 604 error("import module mismatch for symbol: " + toString(*existing) + 605 "\n>>> defined as " + *existing->importModule + " in " + 606 toString(existing->getFile()) + "\n>>> defined as " + 607 *importModule + " in " + toString(file)); 608 } 609 610 // Update symbol binding, if the existing symbol is weak 611 uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK; 612 if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) { 613 existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding; 614 } 615 } 616 617 Symbol *SymbolTable::addUndefinedFunction(StringRef name, 618 std::optional<StringRef> importName, 619 std::optional<StringRef> importModule, 620 uint32_t flags, InputFile *file, 621 const WasmSignature *sig, 622 bool isCalledDirectly) { 623 LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " [" 624 << (sig ? toString(*sig) : "none") 625 << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x" 626 << utohexstr(flags) << "\n"); 627 assert(flags & WASM_SYMBOL_UNDEFINED); 628 629 Symbol *s; 630 bool wasInserted; 631 std::tie(s, wasInserted) = insert(name, file); 632 if (s->traced) 633 printTraceSymbolUndefined(name, file); 634 635 auto replaceSym = [&]() { 636 replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags, 637 file, sig, isCalledDirectly); 638 }; 639 640 if (wasInserted) { 641 replaceSym(); 642 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) { 643 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) { 644 lazy->setWeak(); 645 lazy->signature = sig; 646 } else { 647 lazy->extract(); 648 if (!ctx.arg.whyExtract.empty()) 649 ctx.whyExtractRecords.emplace_back(toString(file), s->getFile(), *s); 650 } 651 } else { 652 auto existingFunction = dyn_cast<FunctionSymbol>(s); 653 if (!existingFunction) { 654 reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); 655 return s; 656 } 657 if (!existingFunction->signature && sig) 658 existingFunction->signature = sig; 659 auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction); 660 if (isCalledDirectly && !signatureMatches(existingFunction, sig)) { 661 if (existingFunction->isShared()) { 662 // Special handling for when the existing function is a shared symbol 663 if (ctx.arg.shlibSigCheck) { 664 reportFunctionSignatureMismatch(name, existingFunction, sig, file); 665 } else { 666 existingFunction->signature = sig; 667 } 668 } 669 // If the existing undefined functions is not called directly then let 670 // this one take precedence. Otherwise the existing function is either 671 // directly called or defined, in which case we need a function variant. 672 else if (existingUndefined && !existingUndefined->isCalledDirectly) 673 replaceSym(); 674 else if (getFunctionVariant(s, sig, file, &s)) 675 replaceSym(); 676 } 677 if (existingUndefined) { 678 setImportAttributes(existingUndefined, importName, importModule, flags, 679 file); 680 if (isCalledDirectly) 681 existingUndefined->isCalledDirectly = true; 682 if (s->isWeak()) 683 s->flags = flags; 684 } 685 } 686 687 return s; 688 } 689 690 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags, 691 InputFile *file) { 692 LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n"); 693 assert(flags & WASM_SYMBOL_UNDEFINED); 694 695 Symbol *s; 696 bool wasInserted; 697 std::tie(s, wasInserted) = insert(name, file); 698 if (s->traced) 699 printTraceSymbolUndefined(name, file); 700 701 if (wasInserted) { 702 replaceSymbol<UndefinedData>(s, name, flags, file); 703 } else if (auto *lazy = dyn_cast<LazySymbol>(s)) { 704 if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) 705 lazy->setWeak(); 706 else 707 lazy->extract(); 708 } else if (s->isDefined()) { 709 checkDataType(s, file); 710 } else if (s->isWeak()) { 711 s->flags = flags; 712 } 713 return s; 714 } 715 716 Symbol *SymbolTable::addUndefinedGlobal(StringRef name, 717 std::optional<StringRef> importName, 718 std::optional<StringRef> importModule, 719 uint32_t flags, InputFile *file, 720 const WasmGlobalType *type) { 721 LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n"); 722 assert(flags & WASM_SYMBOL_UNDEFINED); 723 724 Symbol *s; 725 bool wasInserted; 726 std::tie(s, wasInserted) = insert(name, file); 727 if (s->traced) 728 printTraceSymbolUndefined(name, file); 729 730 if (wasInserted) 731 replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags, 732 file, type); 733 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 734 lazy->extract(); 735 else if (s->isDefined()) 736 checkGlobalType(s, file, type); 737 else if (s->isWeak()) 738 s->flags = flags; 739 return s; 740 } 741 742 Symbol *SymbolTable::addUndefinedTable(StringRef name, 743 std::optional<StringRef> importName, 744 std::optional<StringRef> importModule, 745 uint32_t flags, InputFile *file, 746 const WasmTableType *type) { 747 LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n"); 748 assert(flags & WASM_SYMBOL_UNDEFINED); 749 750 Symbol *s; 751 bool wasInserted; 752 std::tie(s, wasInserted) = insert(name, file); 753 if (s->traced) 754 printTraceSymbolUndefined(name, file); 755 756 if (wasInserted) 757 replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags, 758 file, type); 759 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 760 lazy->extract(); 761 else if (s->isDefined()) 762 checkTableType(s, file, type); 763 else if (s->isWeak()) 764 s->flags = flags; 765 return s; 766 } 767 768 Symbol *SymbolTable::addUndefinedTag(StringRef name, 769 std::optional<StringRef> importName, 770 std::optional<StringRef> importModule, 771 uint32_t flags, InputFile *file, 772 const WasmSignature *sig) { 773 LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n"); 774 assert(flags & WASM_SYMBOL_UNDEFINED); 775 776 Symbol *s; 777 bool wasInserted; 778 std::tie(s, wasInserted) = insert(name, file); 779 if (s->traced) 780 printTraceSymbolUndefined(name, file); 781 782 if (wasInserted) 783 replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file, 784 sig); 785 else if (auto *lazy = dyn_cast<LazySymbol>(s)) 786 lazy->extract(); 787 else if (s->isDefined()) 788 checkTagType(s, file, sig); 789 else if (s->isWeak()) 790 s->flags = flags; 791 return s; 792 } 793 794 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) { 795 WasmLimits limits{0, 0, 0}; // Set by the writer. 796 WasmTableType *type = make<WasmTableType>(); 797 type->ElemType = ValType::FUNCREF; 798 type->Limits = limits; 799 uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; 800 flags |= WASM_SYMBOL_UNDEFINED; 801 Symbol *sym = 802 addUndefinedTable(name, name, defaultModule, flags, nullptr, type); 803 sym->markLive(); 804 sym->forceExport = ctx.arg.exportTable; 805 return cast<TableSymbol>(sym); 806 } 807 808 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) { 809 const uint32_t invalidIndex = -1; 810 WasmLimits limits{0, 0, 0}; // Set by the writer. 811 WasmTableType type{ValType::FUNCREF, limits}; 812 WasmTable desc{invalidIndex, type, name}; 813 InputTable *table = make<InputTable>(desc, nullptr); 814 uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN; 815 TableSymbol *sym = addSyntheticTable(name, flags, table); 816 sym->markLive(); 817 sym->forceExport = ctx.arg.exportTable; 818 return sym; 819 } 820 821 // Whether or not we need an indirect function table is usually a function of 822 // whether an input declares a need for it. However sometimes it's possible for 823 // no input to need the indirect function table, but then a late 824 // addInternalGOTEntry causes a function to be allocated an address. In that 825 // case address we synthesize a definition at the last minute. 826 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) { 827 Symbol *existing = find(functionTableName); 828 if (existing) { 829 if (!isa<TableSymbol>(existing)) { 830 error(Twine("reserved symbol must be of type table: `") + 831 functionTableName + "`"); 832 return nullptr; 833 } 834 if (existing->isDefined()) { 835 error(Twine("reserved symbol must not be defined in input files: `") + 836 functionTableName + "`"); 837 return nullptr; 838 } 839 } 840 841 if (ctx.arg.importTable) { 842 if (existing) { 843 existing->importModule = defaultModule; 844 existing->importName = functionTableName; 845 return cast<TableSymbol>(existing); 846 } 847 if (required) 848 return createUndefinedIndirectFunctionTable(functionTableName); 849 } else if ((existing && existing->isLive()) || ctx.arg.exportTable || 850 required) { 851 // A defined table is required. Either because the user request an exported 852 // table or because the table symbol is already live. The existing table is 853 // guaranteed to be undefined due to the check above. 854 return createDefinedIndirectFunctionTable(functionTableName); 855 } 856 857 // An indirect function table will only be present in the symbol table if 858 // needed by a reloc; if we get here, we don't need one. 859 return nullptr; 860 } 861 862 void SymbolTable::addLazy(StringRef name, InputFile *file) { 863 LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n"); 864 865 Symbol *s; 866 bool wasInserted; 867 std::tie(s, wasInserted) = insertName(name); 868 869 if (wasInserted) { 870 replaceSymbol<LazySymbol>(s, name, 0, file); 871 return; 872 } 873 874 if (!s->isUndefined()) 875 return; 876 877 // The existing symbol is undefined, load a new one from the archive, 878 // unless the existing symbol is weak in which case replace the undefined 879 // symbols with a LazySymbol. 880 if (s->isWeak()) { 881 const WasmSignature *oldSig = nullptr; 882 // In the case of an UndefinedFunction we need to preserve the expected 883 // signature. 884 if (auto *f = dyn_cast<UndefinedFunction>(s)) 885 oldSig = f->signature; 886 LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n"); 887 auto newSym = 888 replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file); 889 newSym->signature = oldSig; 890 return; 891 } 892 893 LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); 894 const InputFile *oldFile = s->getFile(); 895 LazySymbol(name, 0, file).extract(); 896 if (!ctx.arg.whyExtract.empty()) 897 ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s); 898 } 899 900 bool SymbolTable::addComdat(StringRef name) { 901 return comdatGroups.insert(CachedHashStringRef(name)).second; 902 } 903 904 // The new signature doesn't match. Create a variant to the symbol with the 905 // signature encoded in the name and return that instead. These symbols are 906 // then unified later in handleSymbolVariants. 907 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig, 908 const InputFile *file, Symbol **out) { 909 LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> " 910 << " " << toString(*sig) << "\n"); 911 Symbol *variant = nullptr; 912 913 // Linear search through symbol variants. Should never be more than two 914 // or three entries here. 915 auto &variants = symVariants[CachedHashStringRef(sym->getName())]; 916 if (variants.empty()) 917 variants.push_back(sym); 918 919 for (Symbol* v : variants) { 920 if (*v->getSignature() == *sig) { 921 variant = v; 922 break; 923 } 924 } 925 926 bool wasAdded = !variant; 927 if (wasAdded) { 928 // Create a new variant; 929 LLVM_DEBUG(dbgs() << "added new variant\n"); 930 variant = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 931 variant->isUsedInRegularObj = 932 !file || file->kind() == InputFile::ObjectKind; 933 variant->canInline = true; 934 variant->traced = false; 935 variant->forceExport = false; 936 variants.push_back(variant); 937 } else { 938 LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n"); 939 assert(*variant->getSignature() == *sig); 940 } 941 942 *out = variant; 943 return wasAdded; 944 } 945 946 // Set a flag for --trace-symbol so that we can print out a log message 947 // if a new symbol with the same name is inserted into the symbol table. 948 void SymbolTable::trace(StringRef name) { 949 symMap.insert({CachedHashStringRef(name), -1}); 950 } 951 952 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { 953 // Swap symbols as instructed by -wrap. 954 int &origIdx = symMap[CachedHashStringRef(sym->getName())]; 955 int &realIdx= symMap[CachedHashStringRef(real->getName())]; 956 int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())]; 957 LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n"); 958 959 // Anyone looking up __real symbols should get the original 960 realIdx = origIdx; 961 // Anyone looking up the original should get the __wrap symbol 962 origIdx = wrapIdx; 963 } 964 965 static const uint8_t unreachableFn[] = { 966 0x03 /* ULEB length */, 0x00 /* ULEB num locals */, 967 0x00 /* opcode unreachable */, 0x0b /* opcode end */ 968 }; 969 970 // Replace the given symbol body with an unreachable function. 971 // This is used by handleWeakUndefines in order to generate a callable 972 // equivalent of an undefined function and also handleSymbolVariants for 973 // undefined functions that don't match the signature of the definition. 974 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym, 975 const WasmSignature &sig, 976 StringRef debugName) { 977 auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName); 978 func->setBody(unreachableFn); 979 ctx.syntheticFunctions.emplace_back(func); 980 // Mark new symbols as local. For relocatable output we don't want them 981 // to be exported outside the object file. 982 replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL, 983 nullptr, func); 984 // Ensure the stub function doesn't get a table entry. Its address 985 // should always compare equal to the null pointer. 986 sym->isStub = true; 987 return func; 988 } 989 990 void SymbolTable::replaceWithUndefined(Symbol *sym) { 991 // Add a synthetic dummy for weak undefined functions. These dummies will 992 // be GC'd if not used as the target of any "call" instructions. 993 StringRef debugName = saver().save("undefined_weak:" + toString(*sym)); 994 replaceWithUnreachable(sym, *sym->getSignature(), debugName); 995 // Hide our dummy to prevent export. 996 sym->setHidden(true); 997 } 998 999 // For weak undefined functions, there may be "call" instructions that reference 1000 // the symbol. In this case, we need to synthesise a dummy/stub function that 1001 // will abort at runtime, so that relocations can still provided an operand to 1002 // the call instruction that passes Wasm validation. 1003 void SymbolTable::handleWeakUndefines() { 1004 for (Symbol *sym : symbols()) { 1005 if (sym->isUndefWeak() && sym->isUsedInRegularObj) { 1006 if (sym->getSignature()) { 1007 replaceWithUndefined(sym); 1008 } else { 1009 // It is possible for undefined functions not to have a signature (eg. 1010 // if added via "--undefined"), but weak undefined ones do have a 1011 // signature. Lazy symbols may not be functions and therefore Sig can 1012 // still be null in some circumstance. 1013 assert(!isa<FunctionSymbol>(sym)); 1014 } 1015 } 1016 } 1017 } 1018 1019 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) { 1020 if (auto it = stubFunctions.find(sig); it != stubFunctions.end()) 1021 return it->second; 1022 LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n"); 1023 auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>()); 1024 sym->isUsedInRegularObj = true; 1025 sym->canInline = true; 1026 sym->traced = false; 1027 sym->forceExport = false; 1028 sym->signature = &sig; 1029 replaceSymbol<DefinedFunction>( 1030 sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr); 1031 replaceWithUnreachable(sym, sig, "undefined_stub"); 1032 stubFunctions[sig] = sym; 1033 return sym; 1034 } 1035 1036 // Remove any variant symbols that were created due to function signature 1037 // mismatches. 1038 void SymbolTable::handleSymbolVariants() { 1039 for (auto pair : symVariants) { 1040 // Push the initial symbol onto the list of variants. 1041 StringRef symName = pair.first.val(); 1042 std::vector<Symbol *> &variants = pair.second; 1043 1044 #ifndef NDEBUG 1045 LLVM_DEBUG(dbgs() << "symbol with (" << variants.size() 1046 << ") variants: " << symName << "\n"); 1047 for (auto *s: variants) { 1048 auto *f = cast<FunctionSymbol>(s); 1049 LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " " 1050 << toString(*f->signature) << "\n"); 1051 } 1052 #endif 1053 1054 // Find the one definition. 1055 DefinedFunction *defined = nullptr; 1056 for (auto *symbol : variants) { 1057 if (auto f = dyn_cast<DefinedFunction>(symbol)) { 1058 defined = f; 1059 break; 1060 } 1061 } 1062 1063 // If there are no definitions, and the undefined symbols disagree on 1064 // the signature, there is not we can do since we don't know which one 1065 // to use as the signature on the import. 1066 if (!defined) { 1067 reportFunctionSignatureMismatch(symName, 1068 cast<FunctionSymbol>(variants[0]), 1069 cast<FunctionSymbol>(variants[1])); 1070 return; 1071 } 1072 1073 for (auto *symbol : variants) { 1074 if (symbol != defined) { 1075 auto *f = cast<FunctionSymbol>(symbol); 1076 reportFunctionSignatureMismatch(symName, f, defined, false); 1077 StringRef debugName = 1078 saver().save("signature_mismatch:" + toString(*f)); 1079 replaceWithUnreachable(f, *f->signature, debugName); 1080 } 1081 } 1082 } 1083 } 1084 1085 } // namespace wasm::lld 1086