xref: /llvm-project/lld/wasm/SymbolTable.cpp (revision 617278e7b0c937fccbf7d67d14f053c3409bc33f)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolTable.h"
10 #include "Config.h"
11 #include "InputChunks.h"
12 #include "InputElement.h"
13 #include "WriterUtils.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include <optional>
16 
17 #define DEBUG_TYPE "lld"
18 
19 using namespace llvm;
20 using namespace llvm::wasm;
21 using namespace llvm::object;
22 
23 namespace lld::wasm {
24 SymbolTable *symtab;
25 
26 void SymbolTable::addFile(InputFile *file, StringRef symName) {
27   log("Processing: " + toString(file));
28 
29   // Lazy object file
30   if (file->lazy) {
31     if (auto *f = dyn_cast<BitcodeFile>(file)) {
32       ctx.lazyBitcodeFiles.push_back(f);
33       f->parseLazy();
34     } else {
35       cast<ObjFile>(file)->parseLazy();
36     }
37     return;
38   }
39 
40   // .so file
41   if (auto *f = dyn_cast<SharedFile>(file)) {
42     // If we are not reporting undefined symbols that we don't actualy
43     // parse the shared library symbol table.
44     f->parse();
45     ctx.sharedFiles.push_back(f);
46     return;
47   }
48 
49   // stub file
50   if (auto *f = dyn_cast<StubFile>(file)) {
51     f->parse();
52     ctx.stubFiles.push_back(f);
53     return;
54   }
55 
56   if (ctx.arg.trace)
57     message(toString(file));
58 
59   // LLVM bitcode file
60   if (auto *f = dyn_cast<BitcodeFile>(file)) {
61     // This order, first adding to `bitcodeFiles` and then parsing is necessary.
62     // See https://github.com/llvm/llvm-project/pull/73095
63     ctx.bitcodeFiles.push_back(f);
64     f->parse(symName);
65     return;
66   }
67 
68   // Regular object file
69   auto *f = cast<ObjFile>(file);
70   f->parse(false);
71   ctx.objectFiles.push_back(f);
72 }
73 
74 // This function is where all the optimizations of link-time
75 // optimization happens. When LTO is in use, some input files are
76 // not in native object file format but in the LLVM bitcode format.
77 // This function compiles bitcode files into a few big native files
78 // using LLVM functions and replaces bitcode symbols with the results.
79 // Because all bitcode files that the program consists of are passed
80 // to the compiler at once, it can do whole-program optimization.
81 void SymbolTable::compileBitcodeFiles() {
82   // Prevent further LTO objects being included
83   BitcodeFile::doneLTO = true;
84 
85   // Compile bitcode files and replace bitcode symbols.
86   lto.reset(new BitcodeCompiler);
87   for (BitcodeFile *f : ctx.bitcodeFiles)
88     lto->add(*f);
89 
90   for (StringRef filename : lto->compile()) {
91     auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
92     obj->parse(true);
93     ctx.objectFiles.push_back(obj);
94   }
95 }
96 
97 Symbol *SymbolTable::find(StringRef name) {
98   auto it = symMap.find(CachedHashStringRef(name));
99   if (it == symMap.end() || it->second == -1)
100     return nullptr;
101   return symVector[it->second];
102 }
103 
104 void SymbolTable::replace(StringRef name, Symbol* sym) {
105   auto it = symMap.find(CachedHashStringRef(name));
106   symVector[it->second] = sym;
107 }
108 
109 std::pair<Symbol *, bool> SymbolTable::insertName(StringRef name) {
110   bool trace = false;
111   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
112   int &symIndex = p.first->second;
113   bool isNew = p.second;
114   if (symIndex == -1) {
115     symIndex = symVector.size();
116     trace = true;
117     isNew = true;
118   }
119 
120   if (!isNew)
121     return {symVector[symIndex], false};
122 
123   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
124   sym->isUsedInRegularObj = false;
125   sym->canInline = true;
126   sym->traced = trace;
127   sym->forceExport = false;
128   sym->referenced = !ctx.arg.gcSections;
129   symVector.emplace_back(sym);
130   return {sym, true};
131 }
132 
133 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name,
134                                               const InputFile *file) {
135   Symbol *s;
136   bool wasInserted;
137   std::tie(s, wasInserted) = insertName(name);
138 
139   if (!file || file->kind() == InputFile::ObjectKind)
140     s->isUsedInRegularObj = true;
141 
142   return {s, wasInserted};
143 }
144 
145 static void reportTypeError(const Symbol *existing, const InputFile *file,
146                             llvm::wasm::WasmSymbolType type) {
147   error("symbol type mismatch: " + toString(*existing) + "\n>>> defined as " +
148         toString(existing->getWasmType()) + " in " +
149         toString(existing->getFile()) + "\n>>> defined as " + toString(type) +
150         " in " + toString(file));
151 }
152 
153 // Check the type of new symbol matches that of the symbol is replacing.
154 // Returns true if the function types match, false is there is a signature
155 // mismatch.
156 static bool signatureMatches(FunctionSymbol *existing,
157                              const WasmSignature *newSig) {
158   const WasmSignature *oldSig = existing->signature;
159 
160   // If either function is missing a signature (this happens for bitcode
161   // symbols) then assume they match.  Any mismatch will be reported later
162   // when the LTO objects are added.
163   if (!newSig || !oldSig)
164     return true;
165 
166   return *newSig == *oldSig;
167 }
168 
169 static void checkGlobalType(const Symbol *existing, const InputFile *file,
170                             const WasmGlobalType *newType) {
171   if (!isa<GlobalSymbol>(existing)) {
172     reportTypeError(existing, file, WASM_SYMBOL_TYPE_GLOBAL);
173     return;
174   }
175 
176   const WasmGlobalType *oldType = cast<GlobalSymbol>(existing)->getGlobalType();
177   if (*newType != *oldType) {
178     error("Global type mismatch: " + existing->getName() + "\n>>> defined as " +
179           toString(*oldType) + " in " + toString(existing->getFile()) +
180           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
181   }
182 }
183 
184 static void checkTagType(const Symbol *existing, const InputFile *file,
185                          const WasmSignature *newSig) {
186   const auto *existingTag = dyn_cast<TagSymbol>(existing);
187   if (!isa<TagSymbol>(existing)) {
188     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TAG);
189     return;
190   }
191 
192   const WasmSignature *oldSig = existingTag->signature;
193   if (*newSig != *oldSig)
194     warn("Tag signature mismatch: " + existing->getName() +
195          "\n>>> defined as " + toString(*oldSig) + " in " +
196          toString(existing->getFile()) + "\n>>> defined as " +
197          toString(*newSig) + " in " + toString(file));
198 }
199 
200 static void checkTableType(const Symbol *existing, const InputFile *file,
201                            const WasmTableType *newType) {
202   if (!isa<TableSymbol>(existing)) {
203     reportTypeError(existing, file, WASM_SYMBOL_TYPE_TABLE);
204     return;
205   }
206 
207   const WasmTableType *oldType = cast<TableSymbol>(existing)->getTableType();
208   if (newType->ElemType != oldType->ElemType) {
209     error("Table type mismatch: " + existing->getName() + "\n>>> defined as " +
210           toString(*oldType) + " in " + toString(existing->getFile()) +
211           "\n>>> defined as " + toString(*newType) + " in " + toString(file));
212   }
213   // FIXME: No assertions currently on the limits.
214 }
215 
216 static void checkDataType(const Symbol *existing, const InputFile *file) {
217   if (!isa<DataSymbol>(existing))
218     reportTypeError(existing, file, WASM_SYMBOL_TYPE_DATA);
219 }
220 
221 DefinedFunction *SymbolTable::addSyntheticFunction(StringRef name,
222                                                    uint32_t flags,
223                                                    InputFunction *function) {
224   LLVM_DEBUG(dbgs() << "addSyntheticFunction: " << name << "\n");
225   assert(!find(name));
226   ctx.syntheticFunctions.emplace_back(function);
227   return replaceSymbol<DefinedFunction>(insertName(name).first, name,
228                                         flags, nullptr, function);
229 }
230 
231 // Adds an optional, linker generated, data symbol.  The symbol will only be
232 // added if there is an undefine reference to it, or if it is explicitly
233 // exported via the --export flag.  Otherwise we don't add the symbol and return
234 // nullptr.
235 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef name,
236                                                 uint64_t value) {
237   Symbol *s = find(name);
238   if (!s && (ctx.arg.exportAll || ctx.arg.exportedSymbols.count(name) != 0))
239     s = insertName(name).first;
240   else if (!s || s->isDefined())
241     return nullptr;
242   LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << name << "\n");
243   auto *rtn = replaceSymbol<DefinedData>(
244       s, name, WASM_SYMBOL_VISIBILITY_HIDDEN | WASM_SYMBOL_ABSOLUTE);
245   rtn->setVA(value);
246   rtn->referenced = true;
247   return rtn;
248 }
249 
250 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef name,
251                                                  uint32_t flags) {
252   LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << name << "\n");
253   assert(!find(name));
254   return replaceSymbol<DefinedData>(insertName(name).first, name,
255                                     flags | WASM_SYMBOL_ABSOLUTE);
256 }
257 
258 DefinedGlobal *SymbolTable::addSyntheticGlobal(StringRef name, uint32_t flags,
259                                                InputGlobal *global) {
260   LLVM_DEBUG(dbgs() << "addSyntheticGlobal: " << name << " -> " << global
261                     << "\n");
262   assert(!find(name));
263   ctx.syntheticGlobals.emplace_back(global);
264   return replaceSymbol<DefinedGlobal>(insertName(name).first, name, flags,
265                                       nullptr, global);
266 }
267 
268 DefinedGlobal *SymbolTable::addOptionalGlobalSymbol(StringRef name,
269                                                     InputGlobal *global) {
270   Symbol *s = find(name);
271   if (!s || s->isDefined())
272     return nullptr;
273   LLVM_DEBUG(dbgs() << "addOptionalGlobalSymbol: " << name << " -> " << global
274                     << "\n");
275   ctx.syntheticGlobals.emplace_back(global);
276   return replaceSymbol<DefinedGlobal>(s, name, WASM_SYMBOL_VISIBILITY_HIDDEN,
277                                       nullptr, global);
278 }
279 
280 DefinedTable *SymbolTable::addSyntheticTable(StringRef name, uint32_t flags,
281                                              InputTable *table) {
282   LLVM_DEBUG(dbgs() << "addSyntheticTable: " << name << " -> " << table
283                     << "\n");
284   Symbol *s = find(name);
285   assert(!s || s->isUndefined());
286   if (!s)
287     s = insertName(name).first;
288   ctx.syntheticTables.emplace_back(table);
289   return replaceSymbol<DefinedTable>(s, name, flags, nullptr, table);
290 }
291 
292 static bool shouldReplace(const Symbol *existing, InputFile *newFile,
293                           uint32_t newFlags) {
294   // If existing symbol is undefined, replace it.
295   if (!existing->isDefined()) {
296     LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: "
297                       << existing->getName() << "\n");
298     return true;
299   }
300 
301   // Now we have two defined symbols. If the new one is weak, we can ignore it.
302   if ((newFlags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
303     LLVM_DEBUG(dbgs() << "existing symbol takes precedence\n");
304     return false;
305   }
306 
307   // If the existing symbol is weak, we should replace it.
308   if (existing->isWeak()) {
309     LLVM_DEBUG(dbgs() << "replacing existing weak symbol\n");
310     return true;
311   }
312 
313   // Similarly with shared symbols
314   if (existing->isShared()) {
315     LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n");
316     return true;
317   }
318 
319   // Neither symbol is week. They conflict.
320   if (ctx.arg.allowMultipleDefinition)
321     return false;
322 
323   errorOrWarn("duplicate symbol: " + toString(*existing) + "\n>>> defined in " +
324               toString(existing->getFile()) + "\n>>> defined in " +
325               toString(newFile));
326   return true;
327 }
328 
329 static void reportFunctionSignatureMismatch(StringRef symName,
330                                             FunctionSymbol *sym,
331                                             const WasmSignature *signature,
332                                             InputFile *file,
333                                             bool isError = true) {
334   std::string msg =
335       ("function signature mismatch: " + symName + "\n>>> defined as " +
336        toString(*sym->signature) + " in " + toString(sym->getFile()) +
337        "\n>>> defined as " + toString(*signature) + " in " + toString(file))
338           .str();
339   if (isError)
340     error(msg);
341   else
342     warn(msg);
343 }
344 
345 static void reportFunctionSignatureMismatch(StringRef symName,
346                                             FunctionSymbol *a,
347                                             FunctionSymbol *b,
348                                             bool isError = true) {
349   reportFunctionSignatureMismatch(symName, a, b->signature, b->getFile(),
350                                   isError);
351 }
352 
353 Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags,
354                                        InputFile *file,
355                                        const WasmSignature *sig) {
356   LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig)
357                     << "]\n");
358   Symbol *s;
359   bool wasInserted;
360   std::tie(s, wasInserted) = insert(name, file);
361 
362   auto replaceSym = [&](Symbol *sym) {
363     replaceSymbol<SharedFunctionSymbol>(sym, name, flags, file, sig);
364   };
365 
366   if (wasInserted || s->isLazy()) {
367     replaceSym(s);
368     return s;
369   }
370 
371   auto existingFunction = dyn_cast<FunctionSymbol>(s);
372   if (!existingFunction) {
373     reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
374     return s;
375   }
376 
377   // Shared symbols should never replace locally-defined ones
378   if (s->isDefined()) {
379     return s;
380   }
381 
382   LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName()
383                     << "\n");
384 
385   bool checkSig = true;
386   if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
387     checkSig = ud->isCalledDirectly;
388 
389   if (checkSig && !signatureMatches(existingFunction, sig)) {
390     if (ctx.arg.shlibSigCheck) {
391       reportFunctionSignatureMismatch(name, existingFunction, sig, file);
392     } else {
393       // With --no-shlib-sigcheck we ignore the signature of the function as
394       // defined by the shared library and instead use the signature as
395       // expected by the program being linked.
396       sig = existingFunction->signature;
397     }
398   }
399 
400   replaceSym(s);
401   return s;
402 }
403 
404 Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags,
405                                    InputFile *file) {
406   LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n");
407   Symbol *s;
408   bool wasInserted;
409   std::tie(s, wasInserted) = insert(name, file);
410 
411   if (wasInserted || s->isLazy()) {
412     replaceSymbol<SharedData>(s, name, flags, file);
413     return s;
414   }
415 
416   // Shared symbols should never replace locally-defined ones
417   if (s->isDefined()) {
418     return s;
419   }
420 
421   checkDataType(s, file);
422   replaceSymbol<SharedData>(s, name, flags, file);
423   return s;
424 }
425 
426 Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags,
427                                         InputFile *file,
428                                         InputFunction *function) {
429   LLVM_DEBUG(dbgs() << "addDefinedFunction: " << name << " ["
430                     << (function ? toString(function->signature) : "none")
431                     << "]\n");
432   Symbol *s;
433   bool wasInserted;
434   std::tie(s, wasInserted) = insert(name, file);
435 
436   auto replaceSym = [&](Symbol *sym) {
437     // If the new defined function doesn't have signature (i.e. bitcode
438     // functions) but the old symbol does, then preserve the old signature
439     const WasmSignature *oldSig = s->getSignature();
440     auto* newSym = replaceSymbol<DefinedFunction>(sym, name, flags, file, function);
441     if (!newSym->signature)
442       newSym->signature = oldSig;
443   };
444 
445   if (wasInserted || s->isLazy()) {
446     replaceSym(s);
447     return s;
448   }
449 
450   auto existingFunction = dyn_cast<FunctionSymbol>(s);
451   if (!existingFunction) {
452     reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
453     return s;
454   }
455 
456   bool checkSig = true;
457   if (auto ud = dyn_cast<UndefinedFunction>(existingFunction))
458     checkSig = ud->isCalledDirectly;
459 
460   if (checkSig && function && !signatureMatches(existingFunction, &function->signature)) {
461     Symbol* variant;
462     if (getFunctionVariant(s, &function->signature, file, &variant))
463       // New variant, always replace
464       replaceSym(variant);
465     else if (shouldReplace(s, file, flags))
466       // Variant already exists, replace it after checking shouldReplace
467       replaceSym(variant);
468 
469     // This variant we found take the place in the symbol table as the primary
470     // variant.
471     replace(name, variant);
472     return variant;
473   }
474 
475   // Existing function with matching signature.
476   if (shouldReplace(s, file, flags))
477     replaceSym(s);
478 
479   return s;
480 }
481 
482 Symbol *SymbolTable::addDefinedData(StringRef name, uint32_t flags,
483                                     InputFile *file, InputChunk *segment,
484                                     uint64_t address, uint64_t size) {
485   LLVM_DEBUG(dbgs() << "addDefinedData:" << name << " addr:" << address
486                     << "\n");
487   Symbol *s;
488   bool wasInserted;
489   std::tie(s, wasInserted) = insert(name, file);
490 
491   auto replaceSym = [&]() {
492     replaceSymbol<DefinedData>(s, name, flags, file, segment, address, size);
493   };
494 
495   if (wasInserted || s->isLazy()) {
496     replaceSym();
497     return s;
498   }
499 
500   checkDataType(s, file);
501 
502   if (shouldReplace(s, file, flags))
503     replaceSym();
504   return s;
505 }
506 
507 Symbol *SymbolTable::addDefinedGlobal(StringRef name, uint32_t flags,
508                                       InputFile *file, InputGlobal *global) {
509   LLVM_DEBUG(dbgs() << "addDefinedGlobal:" << name << "\n");
510 
511   Symbol *s;
512   bool wasInserted;
513   std::tie(s, wasInserted) = insert(name, file);
514 
515   auto replaceSym = [&]() {
516     replaceSymbol<DefinedGlobal>(s, name, flags, file, global);
517   };
518 
519   if (wasInserted || s->isLazy()) {
520     replaceSym();
521     return s;
522   }
523 
524   checkGlobalType(s, file, &global->getType());
525 
526   if (shouldReplace(s, file, flags))
527     replaceSym();
528   return s;
529 }
530 
531 Symbol *SymbolTable::addDefinedTag(StringRef name, uint32_t flags,
532                                    InputFile *file, InputTag *tag) {
533   LLVM_DEBUG(dbgs() << "addDefinedTag:" << name << "\n");
534 
535   Symbol *s;
536   bool wasInserted;
537   std::tie(s, wasInserted) = insert(name, file);
538 
539   auto replaceSym = [&]() {
540     replaceSymbol<DefinedTag>(s, name, flags, file, tag);
541   };
542 
543   if (wasInserted || s->isLazy()) {
544     replaceSym();
545     return s;
546   }
547 
548   checkTagType(s, file, &tag->signature);
549 
550   if (shouldReplace(s, file, flags))
551     replaceSym();
552   return s;
553 }
554 
555 Symbol *SymbolTable::addDefinedTable(StringRef name, uint32_t flags,
556                                      InputFile *file, InputTable *table) {
557   LLVM_DEBUG(dbgs() << "addDefinedTable:" << name << "\n");
558 
559   Symbol *s;
560   bool wasInserted;
561   std::tie(s, wasInserted) = insert(name, file);
562 
563   auto replaceSym = [&]() {
564     replaceSymbol<DefinedTable>(s, name, flags, file, table);
565   };
566 
567   if (wasInserted || s->isLazy()) {
568     replaceSym();
569     return s;
570   }
571 
572   checkTableType(s, file, &table->getType());
573 
574   if (shouldReplace(s, file, flags))
575     replaceSym();
576   return s;
577 }
578 
579 // This function get called when an undefined symbol is added, and there is
580 // already an existing one in the symbols table.  In this case we check that
581 // custom 'import-module' and 'import-field' symbol attributes agree.
582 // With LTO these attributes are not available when the bitcode is read and only
583 // become available when the LTO object is read.  In this case we silently
584 // replace the empty attributes with the valid ones.
585 template <typename T>
586 static void setImportAttributes(T *existing,
587                                 std::optional<StringRef> importName,
588                                 std::optional<StringRef> importModule,
589                                 uint32_t flags, InputFile *file) {
590   if (importName) {
591     if (!existing->importName)
592       existing->importName = importName;
593     if (existing->importName != importName)
594       error("import name mismatch for symbol: " + toString(*existing) +
595             "\n>>> defined as " + *existing->importName + " in " +
596             toString(existing->getFile()) + "\n>>> defined as " + *importName +
597             " in " + toString(file));
598   }
599 
600   if (importModule) {
601     if (!existing->importModule)
602       existing->importModule = importModule;
603     if (existing->importModule != importModule)
604       error("import module mismatch for symbol: " + toString(*existing) +
605             "\n>>> defined as " + *existing->importModule + " in " +
606             toString(existing->getFile()) + "\n>>> defined as " +
607             *importModule + " in " + toString(file));
608   }
609 
610   // Update symbol binding, if the existing symbol is weak
611   uint32_t binding = flags & WASM_SYMBOL_BINDING_MASK;
612   if (existing->isWeak() && binding != WASM_SYMBOL_BINDING_WEAK) {
613     existing->flags = (existing->flags & ~WASM_SYMBOL_BINDING_MASK) | binding;
614   }
615 }
616 
617 Symbol *SymbolTable::addUndefinedFunction(StringRef name,
618                                           std::optional<StringRef> importName,
619                                           std::optional<StringRef> importModule,
620                                           uint32_t flags, InputFile *file,
621                                           const WasmSignature *sig,
622                                           bool isCalledDirectly) {
623   LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << name << " ["
624                     << (sig ? toString(*sig) : "none")
625                     << "] IsCalledDirectly:" << isCalledDirectly << " flags=0x"
626                     << utohexstr(flags) << "\n");
627   assert(flags & WASM_SYMBOL_UNDEFINED);
628 
629   Symbol *s;
630   bool wasInserted;
631   std::tie(s, wasInserted) = insert(name, file);
632   if (s->traced)
633     printTraceSymbolUndefined(name, file);
634 
635   auto replaceSym = [&]() {
636     replaceSymbol<UndefinedFunction>(s, name, importName, importModule, flags,
637                                      file, sig, isCalledDirectly);
638   };
639 
640   if (wasInserted) {
641     replaceSym();
642   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
643     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK) {
644       lazy->setWeak();
645       lazy->signature = sig;
646     } else {
647       lazy->extract();
648       if (!ctx.arg.whyExtract.empty())
649         ctx.whyExtractRecords.emplace_back(toString(file), s->getFile(), *s);
650     }
651   } else {
652     auto existingFunction = dyn_cast<FunctionSymbol>(s);
653     if (!existingFunction) {
654       reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION);
655       return s;
656     }
657     if (!existingFunction->signature && sig)
658       existingFunction->signature = sig;
659     auto *existingUndefined = dyn_cast<UndefinedFunction>(existingFunction);
660     if (isCalledDirectly && !signatureMatches(existingFunction, sig)) {
661       if (existingFunction->isShared()) {
662         // Special handling for when the existing function is a shared symbol
663         if (ctx.arg.shlibSigCheck) {
664           reportFunctionSignatureMismatch(name, existingFunction, sig, file);
665         } else {
666           existingFunction->signature = sig;
667         }
668       }
669       // If the existing undefined functions is not called directly then let
670       // this one take precedence.  Otherwise the existing function is either
671       // directly called or defined, in which case we need a function variant.
672       else if (existingUndefined && !existingUndefined->isCalledDirectly)
673         replaceSym();
674       else if (getFunctionVariant(s, sig, file, &s))
675         replaceSym();
676     }
677     if (existingUndefined) {
678       setImportAttributes(existingUndefined, importName, importModule, flags,
679                           file);
680       if (isCalledDirectly)
681         existingUndefined->isCalledDirectly = true;
682       if (s->isWeak())
683         s->flags = flags;
684     }
685   }
686 
687   return s;
688 }
689 
690 Symbol *SymbolTable::addUndefinedData(StringRef name, uint32_t flags,
691                                       InputFile *file) {
692   LLVM_DEBUG(dbgs() << "addUndefinedData: " << name << "\n");
693   assert(flags & WASM_SYMBOL_UNDEFINED);
694 
695   Symbol *s;
696   bool wasInserted;
697   std::tie(s, wasInserted) = insert(name, file);
698   if (s->traced)
699     printTraceSymbolUndefined(name, file);
700 
701   if (wasInserted) {
702     replaceSymbol<UndefinedData>(s, name, flags, file);
703   } else if (auto *lazy = dyn_cast<LazySymbol>(s)) {
704     if ((flags & WASM_SYMBOL_BINDING_MASK) == WASM_SYMBOL_BINDING_WEAK)
705       lazy->setWeak();
706     else
707       lazy->extract();
708   } else if (s->isDefined()) {
709     checkDataType(s, file);
710   } else if (s->isWeak()) {
711     s->flags = flags;
712   }
713   return s;
714 }
715 
716 Symbol *SymbolTable::addUndefinedGlobal(StringRef name,
717                                         std::optional<StringRef> importName,
718                                         std::optional<StringRef> importModule,
719                                         uint32_t flags, InputFile *file,
720                                         const WasmGlobalType *type) {
721   LLVM_DEBUG(dbgs() << "addUndefinedGlobal: " << name << "\n");
722   assert(flags & WASM_SYMBOL_UNDEFINED);
723 
724   Symbol *s;
725   bool wasInserted;
726   std::tie(s, wasInserted) = insert(name, file);
727   if (s->traced)
728     printTraceSymbolUndefined(name, file);
729 
730   if (wasInserted)
731     replaceSymbol<UndefinedGlobal>(s, name, importName, importModule, flags,
732                                    file, type);
733   else if (auto *lazy = dyn_cast<LazySymbol>(s))
734     lazy->extract();
735   else if (s->isDefined())
736     checkGlobalType(s, file, type);
737   else if (s->isWeak())
738     s->flags = flags;
739   return s;
740 }
741 
742 Symbol *SymbolTable::addUndefinedTable(StringRef name,
743                                        std::optional<StringRef> importName,
744                                        std::optional<StringRef> importModule,
745                                        uint32_t flags, InputFile *file,
746                                        const WasmTableType *type) {
747   LLVM_DEBUG(dbgs() << "addUndefinedTable: " << name << "\n");
748   assert(flags & WASM_SYMBOL_UNDEFINED);
749 
750   Symbol *s;
751   bool wasInserted;
752   std::tie(s, wasInserted) = insert(name, file);
753   if (s->traced)
754     printTraceSymbolUndefined(name, file);
755 
756   if (wasInserted)
757     replaceSymbol<UndefinedTable>(s, name, importName, importModule, flags,
758                                   file, type);
759   else if (auto *lazy = dyn_cast<LazySymbol>(s))
760     lazy->extract();
761   else if (s->isDefined())
762     checkTableType(s, file, type);
763   else if (s->isWeak())
764     s->flags = flags;
765   return s;
766 }
767 
768 Symbol *SymbolTable::addUndefinedTag(StringRef name,
769                                      std::optional<StringRef> importName,
770                                      std::optional<StringRef> importModule,
771                                      uint32_t flags, InputFile *file,
772                                      const WasmSignature *sig) {
773   LLVM_DEBUG(dbgs() << "addUndefinedTag: " << name << "\n");
774   assert(flags & WASM_SYMBOL_UNDEFINED);
775 
776   Symbol *s;
777   bool wasInserted;
778   std::tie(s, wasInserted) = insert(name, file);
779   if (s->traced)
780     printTraceSymbolUndefined(name, file);
781 
782   if (wasInserted)
783     replaceSymbol<UndefinedTag>(s, name, importName, importModule, flags, file,
784                                 sig);
785   else if (auto *lazy = dyn_cast<LazySymbol>(s))
786     lazy->extract();
787   else if (s->isDefined())
788     checkTagType(s, file, sig);
789   else if (s->isWeak())
790     s->flags = flags;
791   return s;
792 }
793 
794 TableSymbol *SymbolTable::createUndefinedIndirectFunctionTable(StringRef name) {
795   WasmLimits limits{0, 0, 0}; // Set by the writer.
796   WasmTableType *type = make<WasmTableType>();
797   type->ElemType = ValType::FUNCREF;
798   type->Limits = limits;
799   uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
800   flags |= WASM_SYMBOL_UNDEFINED;
801   Symbol *sym =
802       addUndefinedTable(name, name, defaultModule, flags, nullptr, type);
803   sym->markLive();
804   sym->forceExport = ctx.arg.exportTable;
805   return cast<TableSymbol>(sym);
806 }
807 
808 TableSymbol *SymbolTable::createDefinedIndirectFunctionTable(StringRef name) {
809   const uint32_t invalidIndex = -1;
810   WasmLimits limits{0, 0, 0}; // Set by the writer.
811   WasmTableType type{ValType::FUNCREF, limits};
812   WasmTable desc{invalidIndex, type, name};
813   InputTable *table = make<InputTable>(desc, nullptr);
814   uint32_t flags = ctx.arg.exportTable ? 0 : WASM_SYMBOL_VISIBILITY_HIDDEN;
815   TableSymbol *sym = addSyntheticTable(name, flags, table);
816   sym->markLive();
817   sym->forceExport = ctx.arg.exportTable;
818   return sym;
819 }
820 
821 // Whether or not we need an indirect function table is usually a function of
822 // whether an input declares a need for it.  However sometimes it's possible for
823 // no input to need the indirect function table, but then a late
824 // addInternalGOTEntry causes a function to be allocated an address.  In that
825 // case address we synthesize a definition at the last minute.
826 TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
827   Symbol *existing = find(functionTableName);
828   if (existing) {
829     if (!isa<TableSymbol>(existing)) {
830       error(Twine("reserved symbol must be of type table: `") +
831             functionTableName + "`");
832       return nullptr;
833     }
834     if (existing->isDefined()) {
835       error(Twine("reserved symbol must not be defined in input files: `") +
836             functionTableName + "`");
837       return nullptr;
838     }
839   }
840 
841   if (ctx.arg.importTable) {
842     if (existing) {
843       existing->importModule = defaultModule;
844       existing->importName = functionTableName;
845       return cast<TableSymbol>(existing);
846     }
847     if (required)
848       return createUndefinedIndirectFunctionTable(functionTableName);
849   } else if ((existing && existing->isLive()) || ctx.arg.exportTable ||
850              required) {
851     // A defined table is required.  Either because the user request an exported
852     // table or because the table symbol is already live.  The existing table is
853     // guaranteed to be undefined due to the check above.
854     return createDefinedIndirectFunctionTable(functionTableName);
855   }
856 
857   // An indirect function table will only be present in the symbol table if
858   // needed by a reloc; if we get here, we don't need one.
859   return nullptr;
860 }
861 
862 void SymbolTable::addLazy(StringRef name, InputFile *file) {
863   LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");
864 
865   Symbol *s;
866   bool wasInserted;
867   std::tie(s, wasInserted) = insertName(name);
868 
869   if (wasInserted) {
870     replaceSymbol<LazySymbol>(s, name, 0, file);
871     return;
872   }
873 
874   if (!s->isUndefined())
875     return;
876 
877   // The existing symbol is undefined, load a new one from the archive,
878   // unless the existing symbol is weak in which case replace the undefined
879   // symbols with a LazySymbol.
880   if (s->isWeak()) {
881     const WasmSignature *oldSig = nullptr;
882     // In the case of an UndefinedFunction we need to preserve the expected
883     // signature.
884     if (auto *f = dyn_cast<UndefinedFunction>(s))
885       oldSig = f->signature;
886     LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
887     auto newSym =
888         replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
889     newSym->signature = oldSig;
890     return;
891   }
892 
893   LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
894   const InputFile *oldFile = s->getFile();
895   LazySymbol(name, 0, file).extract();
896   if (!ctx.arg.whyExtract.empty())
897     ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
898 }
899 
900 bool SymbolTable::addComdat(StringRef name) {
901   return comdatGroups.insert(CachedHashStringRef(name)).second;
902 }
903 
904 // The new signature doesn't match.  Create a variant to the symbol with the
905 // signature encoded in the name and return that instead.  These symbols are
906 // then unified later in handleSymbolVariants.
907 bool SymbolTable::getFunctionVariant(Symbol* sym, const WasmSignature *sig,
908                                      const InputFile *file, Symbol **out) {
909   LLVM_DEBUG(dbgs() << "getFunctionVariant: " << sym->getName() << " -> "
910                     << " " << toString(*sig) << "\n");
911   Symbol *variant = nullptr;
912 
913   // Linear search through symbol variants.  Should never be more than two
914   // or three entries here.
915   auto &variants = symVariants[CachedHashStringRef(sym->getName())];
916   if (variants.empty())
917     variants.push_back(sym);
918 
919   for (Symbol* v : variants) {
920     if (*v->getSignature() == *sig) {
921       variant = v;
922       break;
923     }
924   }
925 
926   bool wasAdded = !variant;
927   if (wasAdded) {
928     // Create a new variant;
929     LLVM_DEBUG(dbgs() << "added new variant\n");
930     variant = reinterpret_cast<Symbol *>(make<SymbolUnion>());
931     variant->isUsedInRegularObj =
932         !file || file->kind() == InputFile::ObjectKind;
933     variant->canInline = true;
934     variant->traced = false;
935     variant->forceExport = false;
936     variants.push_back(variant);
937   } else {
938     LLVM_DEBUG(dbgs() << "variant already exists: " << toString(*variant) << "\n");
939     assert(*variant->getSignature() == *sig);
940   }
941 
942   *out = variant;
943   return wasAdded;
944 }
945 
946 // Set a flag for --trace-symbol so that we can print out a log message
947 // if a new symbol with the same name is inserted into the symbol table.
948 void SymbolTable::trace(StringRef name) {
949   symMap.insert({CachedHashStringRef(name), -1});
950 }
951 
952 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
953   // Swap symbols as instructed by -wrap.
954   int &origIdx = symMap[CachedHashStringRef(sym->getName())];
955   int &realIdx= symMap[CachedHashStringRef(real->getName())];
956   int &wrapIdx = symMap[CachedHashStringRef(wrap->getName())];
957   LLVM_DEBUG(dbgs() << "wrap: " << sym->getName() << "\n");
958 
959   // Anyone looking up __real symbols should get the original
960   realIdx = origIdx;
961   // Anyone looking up the original should get the __wrap symbol
962   origIdx = wrapIdx;
963 }
964 
965 static const uint8_t unreachableFn[] = {
966     0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
967     0x00 /* opcode unreachable */, 0x0b /* opcode end */
968 };
969 
970 // Replace the given symbol body with an unreachable function.
971 // This is used by handleWeakUndefines in order to generate a callable
972 // equivalent of an undefined function and also handleSymbolVariants for
973 // undefined functions that don't match the signature of the definition.
974 InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
975                                                    const WasmSignature &sig,
976                                                    StringRef debugName) {
977   auto *func = make<SyntheticFunction>(sig, sym->getName(), debugName);
978   func->setBody(unreachableFn);
979   ctx.syntheticFunctions.emplace_back(func);
980   // Mark new symbols as local. For relocatable output we don't want them
981   // to be exported outside the object file.
982   replaceSymbol<DefinedFunction>(sym, debugName, WASM_SYMBOL_BINDING_LOCAL,
983                                  nullptr, func);
984   // Ensure the stub function doesn't get a table entry.  Its address
985   // should always compare equal to the null pointer.
986   sym->isStub = true;
987   return func;
988 }
989 
990 void SymbolTable::replaceWithUndefined(Symbol *sym) {
991   // Add a synthetic dummy for weak undefined functions.  These dummies will
992   // be GC'd if not used as the target of any "call" instructions.
993   StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
994   replaceWithUnreachable(sym, *sym->getSignature(), debugName);
995   // Hide our dummy to prevent export.
996   sym->setHidden(true);
997 }
998 
999 // For weak undefined functions, there may be "call" instructions that reference
1000 // the symbol. In this case, we need to synthesise a dummy/stub function that
1001 // will abort at runtime, so that relocations can still provided an operand to
1002 // the call instruction that passes Wasm validation.
1003 void SymbolTable::handleWeakUndefines() {
1004   for (Symbol *sym : symbols()) {
1005     if (sym->isUndefWeak() && sym->isUsedInRegularObj) {
1006       if (sym->getSignature()) {
1007         replaceWithUndefined(sym);
1008       } else {
1009         // It is possible for undefined functions not to have a signature (eg.
1010         // if added via "--undefined"), but weak undefined ones do have a
1011         // signature.  Lazy symbols may not be functions and therefore Sig can
1012         // still be null in some circumstance.
1013         assert(!isa<FunctionSymbol>(sym));
1014       }
1015     }
1016   }
1017 }
1018 
1019 DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) {
1020   if (auto it = stubFunctions.find(sig); it != stubFunctions.end())
1021     return it->second;
1022   LLVM_DEBUG(dbgs() << "createUndefinedStub: " << toString(sig) << "\n");
1023   auto *sym = reinterpret_cast<DefinedFunction *>(make<SymbolUnion>());
1024   sym->isUsedInRegularObj = true;
1025   sym->canInline = true;
1026   sym->traced = false;
1027   sym->forceExport = false;
1028   sym->signature = &sig;
1029   replaceSymbol<DefinedFunction>(
1030       sym, "undefined_stub", WASM_SYMBOL_VISIBILITY_HIDDEN, nullptr, nullptr);
1031   replaceWithUnreachable(sym, sig, "undefined_stub");
1032   stubFunctions[sig] = sym;
1033   return sym;
1034 }
1035 
1036 // Remove any variant symbols that were created due to function signature
1037 // mismatches.
1038 void SymbolTable::handleSymbolVariants() {
1039   for (auto pair : symVariants) {
1040     // Push the initial symbol onto the list of variants.
1041     StringRef symName = pair.first.val();
1042     std::vector<Symbol *> &variants = pair.second;
1043 
1044 #ifndef NDEBUG
1045     LLVM_DEBUG(dbgs() << "symbol with (" << variants.size()
1046                       << ") variants: " << symName << "\n");
1047     for (auto *s: variants) {
1048       auto *f = cast<FunctionSymbol>(s);
1049       LLVM_DEBUG(dbgs() << " variant: " + f->getName() << " "
1050                         << toString(*f->signature) << "\n");
1051     }
1052 #endif
1053 
1054     // Find the one definition.
1055     DefinedFunction *defined = nullptr;
1056     for (auto *symbol : variants) {
1057       if (auto f = dyn_cast<DefinedFunction>(symbol)) {
1058         defined = f;
1059         break;
1060       }
1061     }
1062 
1063     // If there are no definitions, and the undefined symbols disagree on
1064     // the signature, there is not we can do since we don't know which one
1065     // to use as the signature on the import.
1066     if (!defined) {
1067       reportFunctionSignatureMismatch(symName,
1068                                       cast<FunctionSymbol>(variants[0]),
1069                                       cast<FunctionSymbol>(variants[1]));
1070       return;
1071     }
1072 
1073     for (auto *symbol : variants) {
1074       if (symbol != defined) {
1075         auto *f = cast<FunctionSymbol>(symbol);
1076         reportFunctionSignatureMismatch(symName, f, defined, false);
1077         StringRef debugName =
1078             saver().save("signature_mismatch:" + toString(*f));
1079         replaceWithUnreachable(f, *f->signature, debugName);
1080       }
1081     }
1082   }
1083 }
1084 
1085 } // namespace wasm::lld
1086