xref: /llvm-project/lld/wasm/InputFiles.cpp (revision 3792b36234b6c87d728f0a905543e284bf961460)
1c94d393aSSam Clegg //===- InputFiles.cpp -----------------------------------------------------===//
2c94d393aSSam Clegg //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c94d393aSSam Clegg //
7c94d393aSSam Clegg //===----------------------------------------------------------------------===//
8c94d393aSSam Clegg 
9c94d393aSSam Clegg #include "InputFiles.h"
10c94d393aSSam Clegg #include "Config.h"
115fa274beSSam Clegg #include "InputChunks.h"
12a56e5749SAndy Wingo #include "InputElement.h"
13a28a4662SSam Clegg #include "OutputSegment.h"
14c94d393aSSam Clegg #include "SymbolTable.h"
153111784fSSam Clegg #include "lld/Common/Args.h"
1683d59e05SAlexandre Ganea #include "lld/Common/CommonLinkerContext.h"
1735150bb5SRui Ueyama #include "lld/Common/Reproduce.h"
18ef1f999eSDerek Schuff #include "llvm/BinaryFormat/Wasm.h"
19c94d393aSSam Clegg #include "llvm/Object/Binary.h"
20c94d393aSSam Clegg #include "llvm/Object/Wasm.h"
21d4efc3e0SYuta Saito #include "llvm/ProfileData/InstrProf.h"
2228848e9eSSam Clegg #include "llvm/Support/Path.h"
2335150bb5SRui Ueyama #include "llvm/Support/TarWriter.h"
24c94d393aSSam Clegg #include "llvm/Support/raw_ostream.h"
25b9ef5648SKazu Hirata #include <optional>
26c94d393aSSam Clegg 
27c94d393aSSam Clegg #define DEBUG_TYPE "lld"
28c94d393aSSam Clegg 
29c94d393aSSam Clegg using namespace llvm;
30c94d393aSSam Clegg using namespace llvm::object;
31c94d393aSSam Clegg using namespace llvm::wasm;
3228848e9eSSam Clegg using namespace llvm::sys;
33c94d393aSSam Clegg 
3433c59abfSFangrui Song namespace lld {
3535150bb5SRui Ueyama 
3633c59abfSFangrui Song // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
3733c59abfSFangrui Song std::string toString(const wasm::InputFile *file) {
3833c59abfSFangrui Song   if (!file)
3933c59abfSFangrui Song     return "<internal>";
4033c59abfSFangrui Song 
4133c59abfSFangrui Song   if (file->archiveName.empty())
42adcd0268SBenjamin Kramer     return std::string(file->getName());
4333c59abfSFangrui Song 
4433c59abfSFangrui Song   return (file->archiveName + "(" + file->getName() + ")").str();
4533c59abfSFangrui Song }
4633c59abfSFangrui Song 
4733c59abfSFangrui Song namespace wasm {
48b8c2d60dSWouter van Oortmerssen 
49b70eb863SSam Clegg std::string replaceThinLTOSuffix(StringRef path) {
50*3792b362SFangrui Song   auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace;
51b70eb863SSam Clegg   if (path.consume_back(suffix))
52b70eb863SSam Clegg     return (path + repl).str();
53b70eb863SSam Clegg   return std::string(path);
54b70eb863SSam Clegg }
55b70eb863SSam Clegg 
56b8c2d60dSWouter van Oortmerssen void InputFile::checkArch(Triple::ArchType arch) const {
57b8c2d60dSWouter van Oortmerssen   bool is64 = arch == Triple::wasm64;
58*3792b362SFangrui Song   if (is64 && !ctx.arg.is64) {
59b8c2d60dSWouter van Oortmerssen     fatal(toString(this) +
60b8c2d60dSWouter van Oortmerssen           ": must specify -mwasm64 to process wasm64 object files");
61*3792b362SFangrui Song   } else if (ctx.arg.is64.value_or(false) != is64) {
62b8c2d60dSWouter van Oortmerssen     fatal(toString(this) +
63b8c2d60dSWouter van Oortmerssen           ": wasm32 object file can't be linked in wasm64 mode");
64b8c2d60dSWouter van Oortmerssen   }
65b8c2d60dSWouter van Oortmerssen }
66b8c2d60dSWouter van Oortmerssen 
6733c59abfSFangrui Song std::unique_ptr<llvm::TarWriter> tar;
6833c59abfSFangrui Song 
69b9ef5648SKazu Hirata std::optional<MemoryBufferRef> readFile(StringRef path) {
70136d27abSRui Ueyama   log("Loading: " + path);
71c94d393aSSam Clegg 
72136d27abSRui Ueyama   auto mbOrErr = MemoryBuffer::getFile(path);
73136d27abSRui Ueyama   if (auto ec = mbOrErr.getError()) {
74136d27abSRui Ueyama     error("cannot open " + path + ": " + ec.message());
75c68af42fSKazu Hirata     return std::nullopt;
76c94d393aSSam Clegg   }
77136d27abSRui Ueyama   std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
78136d27abSRui Ueyama   MemoryBufferRef mbref = mb->getMemBufferRef();
79136d27abSRui Ueyama   make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership
80c94d393aSSam Clegg 
81136d27abSRui Ueyama   if (tar)
82136d27abSRui Ueyama     tar->append(relativeToRoot(path), mbref.getBuffer());
83136d27abSRui Ueyama   return mbref;
84c94d393aSSam Clegg }
85c94d393aSSam Clegg 
8628848e9eSSam Clegg InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
87bcc9b9d8SSam Clegg                             uint64_t offsetInArchive, bool lazy) {
88136d27abSRui Ueyama   file_magic magic = identify_magic(mb.getBuffer());
89136d27abSRui Ueyama   if (magic == file_magic::wasm_object) {
90136d27abSRui Ueyama     std::unique_ptr<Binary> bin =
91136d27abSRui Ueyama         CHECK(createBinary(mb), mb.getBufferIdentifier());
92136d27abSRui Ueyama     auto *obj = cast<WasmObjectFile>(bin.get());
937f409cd8SDerek Schuff     if (obj->hasUnmodeledTypes())
947f409cd8SDerek Schuff       fatal(toString(mb.getBufferIdentifier()) +
957f409cd8SDerek Schuff             "file has unmodeled reference or GC types");
96136d27abSRui Ueyama     if (obj->isSharedObject())
97136d27abSRui Ueyama       return make<SharedFile>(mb);
98bcc9b9d8SSam Clegg     return make<ObjFile>(mb, archiveName, lazy);
99a688a42cSSam Clegg   }
1008adf7ac5SSam Clegg 
101bcc9b9d8SSam Clegg   assert(magic == file_magic::bitcode);
102bcc9b9d8SSam Clegg   return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
1038adf7ac5SSam Clegg }
1048adf7ac5SSam Clegg 
105c1be8230SSam Clegg // Relocations contain either symbol or type indices.  This function takes a
106c1be8230SSam Clegg // relocation and returns relocated index (i.e. translates from the input
1076f4286fbSHeejin Ahn // symbol/type space to the output symbol/type space).
108136d27abSRui Ueyama uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const {
109136d27abSRui Ueyama   if (reloc.Type == R_WASM_TYPE_INDEX_LEB) {
110136d27abSRui Ueyama     assert(typeIsUsed[reloc.Index]);
111136d27abSRui Ueyama     return typeMap[reloc.Index];
112d1063bb9SRui Ueyama   }
113136d27abSRui Ueyama   const Symbol *sym = symbols[reloc.Index];
114136d27abSRui Ueyama   if (auto *ss = dyn_cast<SectionSymbol>(sym))
115136d27abSRui Ueyama     sym = ss->getOutputSectionSymbol();
116136d27abSRui Ueyama   return sym->getOutputSymbolIndex();
117d96d9357SSam Clegg }
118d96d9357SSam Clegg 
119d177ab2aSSam Clegg // Relocations can contain addend for combined sections. This function takes a
120d177ab2aSSam Clegg // relocation and returns updated addend by offset in the output section.
1214c75521cSSam Clegg int64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
122136d27abSRui Ueyama   switch (reloc.Type) {
12379e33171SSam Clegg   case R_WASM_MEMORY_ADDR_LEB:
1243b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_LEB64:
1253b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_SLEB64:
12679e33171SSam Clegg   case R_WASM_MEMORY_ADDR_SLEB:
1275bb0dcd9SKeno Fischer   case R_WASM_MEMORY_ADDR_REL_SLEB:
1283b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_REL_SLEB64:
12979e33171SSam Clegg   case R_WASM_MEMORY_ADDR_I32:
1303b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_I64:
13107b6aeb5SSam Clegg   case R_WASM_MEMORY_ADDR_TLS_SLEB:
132670944fbSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_TLS_SLEB64:
13379e33171SSam Clegg   case R_WASM_FUNCTION_OFFSET_I32:
13416f02431SWouter van Oortmerssen   case R_WASM_FUNCTION_OFFSET_I64:
135aa0c571aSYuta Saito   case R_WASM_MEMORY_ADDR_LOCREL_I32:
136136d27abSRui Ueyama     return reloc.Addend;
13779e33171SSam Clegg   case R_WASM_SECTION_OFFSET_I32:
13814ffbb84SSam Clegg     return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend);
139d177ab2aSSam Clegg   default:
140d177ab2aSSam Clegg     llvm_unreachable("unexpected relocation type");
141d177ab2aSSam Clegg   }
142d177ab2aSSam Clegg }
143d177ab2aSSam Clegg 
144ab604a98SSam Clegg // Translate from the relocation's index into the final linked output value.
145aa0c571aSYuta Saito uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone,
146aa0c571aSYuta Saito                                const InputChunk *chunk) const {
147136d27abSRui Ueyama   const Symbol* sym = nullptr;
148136d27abSRui Ueyama   if (reloc.Type != R_WASM_TYPE_INDEX_LEB) {
149136d27abSRui Ueyama     sym = symbols[reloc.Index];
1507cdec273SSam Clegg 
1517cdec273SSam Clegg     // We can end up with relocations against non-live symbols.  For example
1528b8088acSEric Leese     // in debug sections. We return a tombstone value in debug symbol sections
1538b8088acSEric Leese     // so this will not produce a valid range conflicting with ranges of actual
1548b8088acSEric Leese     // code. In other sections we return reloc.Addend.
1558b8088acSEric Leese 
1563e7bc0daSSam Clegg     if (!isa<SectionSymbol>(sym) && !sym->isLive())
1578b8088acSEric Leese       return tombstone ? tombstone : reloc.Addend;
1587cdec273SSam Clegg   }
1597cdec273SSam Clegg 
160136d27abSRui Ueyama   switch (reloc.Type) {
16179e33171SSam Clegg   case R_WASM_TABLE_INDEX_I32:
162cc1b9b68SWouter van Oortmerssen   case R_WASM_TABLE_INDEX_I64:
16379e33171SSam Clegg   case R_WASM_TABLE_INDEX_SLEB:
164cc1b9b68SWouter van Oortmerssen   case R_WASM_TABLE_INDEX_SLEB64:
1653a293cbfSWouter van Oortmerssen   case R_WASM_TABLE_INDEX_REL_SLEB:
1663a293cbfSWouter van Oortmerssen   case R_WASM_TABLE_INDEX_REL_SLEB64: {
167cf2b8722SSam Clegg     if (!getFunctionSymbol(reloc.Index)->hasTableIndex())
168ea38ac5bSSam Clegg       return 0;
169937b9558SSam Clegg     uint32_t index = getFunctionSymbol(reloc.Index)->getTableIndex();
1703a293cbfSWouter van Oortmerssen     if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB ||
1713a293cbfSWouter van Oortmerssen         reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB64)
172*3792b362SFangrui Song       index -= ctx.arg.tableBase;
173937b9558SSam Clegg     return index;
174937b9558SSam Clegg   }
17579e33171SSam Clegg   case R_WASM_MEMORY_ADDR_LEB:
1763b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_LEB64:
1773b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_SLEB:
1783b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_SLEB64:
1792a7cac93SSam Clegg   case R_WASM_MEMORY_ADDR_REL_SLEB:
1803b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_REL_SLEB64:
1813b29376eSWouter van Oortmerssen   case R_WASM_MEMORY_ADDR_I32:
182aa0c571aSYuta Saito   case R_WASM_MEMORY_ADDR_I64:
183fad05465SSam Clegg   case R_WASM_MEMORY_ADDR_TLS_SLEB:
184fad05465SSam Clegg   case R_WASM_MEMORY_ADDR_TLS_SLEB64:
185aa0c571aSYuta Saito   case R_WASM_MEMORY_ADDR_LOCREL_I32: {
18622b7b848SSam Clegg     if (isa<UndefinedData>(sym) || sym->isShared() || sym->isUndefWeak())
1879abe8c48SSam Clegg       return 0;
188a28a4662SSam Clegg     auto D = cast<DefinedData>(sym);
18919cedd3cSSam Clegg     uint64_t value = D->getVA() + reloc.Addend;
190aa0c571aSYuta Saito     if (reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I32) {
191aa0c571aSYuta Saito       const auto *segment = cast<InputSegment>(chunk);
192aa0c571aSYuta Saito       uint64_t p = segment->outputSeg->startVA + segment->outputSegmentOffset +
193aa0c571aSYuta Saito                    reloc.Offset - segment->getInputSectionOffset();
194aa0c571aSYuta Saito       value -= p;
195aa0c571aSYuta Saito     }
196aa0c571aSYuta Saito     return value;
197a28a4662SSam Clegg   }
19879e33171SSam Clegg   case R_WASM_TYPE_INDEX_LEB:
199136d27abSRui Ueyama     return typeMap[reloc.Index];
20079e33171SSam Clegg   case R_WASM_FUNCTION_INDEX_LEB:
201220fe00aSBrendan Dahl   case R_WASM_FUNCTION_INDEX_I32:
202136d27abSRui Ueyama     return getFunctionSymbol(reloc.Index)->getFunctionIndex();
2037cdec273SSam Clegg   case R_WASM_GLOBAL_INDEX_LEB:
20448139ebcSWouter van Oortmerssen   case R_WASM_GLOBAL_INDEX_I32:
205136d27abSRui Ueyama     if (auto gs = dyn_cast<GlobalSymbol>(sym))
206136d27abSRui Ueyama       return gs->getGlobalIndex();
207136d27abSRui Ueyama     return sym->getGOTIndex();
2081d891d44SHeejin Ahn   case R_WASM_TAG_INDEX_LEB:
2091d891d44SHeejin Ahn     return getTagSymbol(reloc.Index)->getTagIndex();
21016f02431SWouter van Oortmerssen   case R_WASM_FUNCTION_OFFSET_I32:
21116f02431SWouter van Oortmerssen   case R_WASM_FUNCTION_OFFSET_I64: {
2126f5c5cbeSSam Clegg     if (isa<UndefinedFunction>(sym)) {
2136f5c5cbeSSam Clegg       return tombstone ? tombstone : reloc.Addend;
2146f5c5cbeSSam Clegg     }
215136d27abSRui Ueyama     auto *f = cast<DefinedFunction>(sym);
21614ffbb84SSam Clegg     return f->function->getOffset(f->function->getFunctionCodeOffset() +
21714ffbb84SSam Clegg                                   reloc.Addend);
218d177ab2aSSam Clegg   }
21979e33171SSam Clegg   case R_WASM_SECTION_OFFSET_I32:
22014ffbb84SSam Clegg     return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend);
22153e3b81fSAndy Wingo   case R_WASM_TABLE_NUMBER_LEB:
22253e3b81fSAndy Wingo     return getTableSymbol(reloc.Index)->getTableNumber();
223ab604a98SSam Clegg   default:
224ab604a98SSam Clegg     llvm_unreachable("unknown relocation type");
225ab604a98SSam Clegg   }
226ab604a98SSam Clegg }
227ab604a98SSam Clegg 
22847078f56SSam Clegg template <class T>
229136d27abSRui Ueyama static void setRelocs(const std::vector<T *> &chunks,
230136d27abSRui Ueyama                       const WasmSection *section) {
231136d27abSRui Ueyama   if (!section)
23247078f56SSam Clegg     return;
23347078f56SSam Clegg 
234136d27abSRui Ueyama   ArrayRef<WasmRelocation> relocs = section->Relocations;
2351647ff6eSGeorgii Rymar   assert(llvm::is_sorted(
2361647ff6eSGeorgii Rymar       relocs, [](const WasmRelocation &r1, const WasmRelocation &r2) {
237136d27abSRui Ueyama         return r1.Offset < r2.Offset;
23847078f56SSam Clegg       }));
2391647ff6eSGeorgii Rymar   assert(llvm::is_sorted(chunks, [](InputChunk *c1, InputChunk *c2) {
240136d27abSRui Ueyama     return c1->getInputSectionOffset() < c2->getInputSectionOffset();
24147078f56SSam Clegg   }));
24247078f56SSam Clegg 
243136d27abSRui Ueyama   auto relocsNext = relocs.begin();
244136d27abSRui Ueyama   auto relocsEnd = relocs.end();
245136d27abSRui Ueyama   auto relocLess = [](const WasmRelocation &r, uint32_t val) {
246136d27abSRui Ueyama     return r.Offset < val;
24747078f56SSam Clegg   };
248136d27abSRui Ueyama   for (InputChunk *c : chunks) {
249136d27abSRui Ueyama     auto relocsStart = std::lower_bound(relocsNext, relocsEnd,
250136d27abSRui Ueyama                                         c->getInputSectionOffset(), relocLess);
251136d27abSRui Ueyama     relocsNext = std::lower_bound(
252136d27abSRui Ueyama         relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(),
253136d27abSRui Ueyama         relocLess);
254136d27abSRui Ueyama     c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext));
25547078f56SSam Clegg   }
25647078f56SSam Clegg }
25747078f56SSam Clegg 
258c3536b26SDan Gohman // An object file can have two approaches to tables.  With the
259c3536b26SDan Gohman // reference-types feature or call-indirect-overlong feature enabled
260c3536b26SDan Gohman // (explicitly, or implied by the reference-types feature), input files that
261c3536b26SDan Gohman // define or use tables declare the tables using symbols, and record each use
262c3536b26SDan Gohman // with a relocation.  This way when the linker combines inputs, it can collate
263c3536b26SDan Gohman // the tables used by the inputs, assigning them distinct table numbers, and
264c3536b26SDan Gohman // renumber all the uses as appropriate.  At the same time, the linker has
265c3536b26SDan Gohman // special logic to build the indirect function table if it is needed.
26663393828SAndy Wingo //
2674fc25573SAndy Wingo // However, MVP object files (those that target WebAssembly 1.0, the "minimum
2684fc25573SAndy Wingo // viable product" version of WebAssembly) neither write table symbols nor
2694fc25573SAndy Wingo // record relocations.  These files can have at most one table, the indirect
2704fc25573SAndy Wingo // function table used by call_indirect and which is the address space for
2714fc25573SAndy Wingo // function pointers.  If this table is present, it is always an import.  If we
2724fc25573SAndy Wingo // have a file with a table import but no table symbols, it is an MVP object
2734fc25573SAndy Wingo // file.  synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when
2744fc25573SAndy Wingo // loading these input files, defining the missing symbol to allow the indirect
2754fc25573SAndy Wingo // function table to be built.
27663393828SAndy Wingo //
2774fc25573SAndy Wingo // As indirect function table table usage in MVP objects cannot be relocated,
2784fc25573SAndy Wingo // the linker must ensure that this table gets assigned index zero.
2794fc25573SAndy Wingo void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
2804fc25573SAndy Wingo     uint32_t tableSymbolCount) {
2814fc25573SAndy Wingo   uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size();
2824fc25573SAndy Wingo 
2834fc25573SAndy Wingo   // If there are symbols for all tables, then all is good.
2844fc25573SAndy Wingo   if (tableCount == tableSymbolCount)
2854fc25573SAndy Wingo     return;
2864fc25573SAndy Wingo 
2874fc25573SAndy Wingo   // It's possible for an input to define tables and also use the indirect
288c3536b26SDan Gohman   // function table, but forget to compile with -mattr=+call-indirect-overlong
289c3536b26SDan Gohman   // or -mattr=+reference-types. For these newer files, we require symbols for
290c3536b26SDan Gohman   // all tables, and relocations for all of their uses.
2914fc25573SAndy Wingo   if (tableSymbolCount != 0) {
2924fc25573SAndy Wingo     error(toString(this) +
2934fc25573SAndy Wingo           ": expected one symbol table entry for each of the " +
2944fc25573SAndy Wingo           Twine(tableCount) + " table(s) present, but got " +
2954fc25573SAndy Wingo           Twine(tableSymbolCount) + " symbol(s) instead.");
2964fc25573SAndy Wingo     return;
2974fc25573SAndy Wingo   }
2984fc25573SAndy Wingo 
2994fc25573SAndy Wingo   // An MVP object file can have up to one table import, for the indirect
3004fc25573SAndy Wingo   // function table, but will have no table definitions.
3014fc25573SAndy Wingo   if (tables.size()) {
3024fc25573SAndy Wingo     error(toString(this) +
3034fc25573SAndy Wingo           ": unexpected table definition(s) without corresponding "
3044fc25573SAndy Wingo           "symbol-table entries.");
3054fc25573SAndy Wingo     return;
3064fc25573SAndy Wingo   }
3074fc25573SAndy Wingo 
3084fc25573SAndy Wingo   // An MVP object file can have only one table import.
3094fc25573SAndy Wingo   if (tableCount != 1) {
3104fc25573SAndy Wingo     error(toString(this) +
3114fc25573SAndy Wingo           ": multiple table imports, but no corresponding symbol-table "
3124fc25573SAndy Wingo           "entries.");
3134fc25573SAndy Wingo     return;
3144fc25573SAndy Wingo   }
3154fc25573SAndy Wingo 
3164fc25573SAndy Wingo   const WasmImport *tableImport = nullptr;
3174fc25573SAndy Wingo   for (const auto &import : wasmObj->imports()) {
3184fc25573SAndy Wingo     if (import.Kind == WASM_EXTERNAL_TABLE) {
3194fc25573SAndy Wingo       assert(!tableImport);
3204fc25573SAndy Wingo       tableImport = &import;
3214fc25573SAndy Wingo     }
3224fc25573SAndy Wingo   }
3234fc25573SAndy Wingo   assert(tableImport);
3244fc25573SAndy Wingo 
3254fc25573SAndy Wingo   // We can only synthesize a symtab entry for the indirect function table; if
3264fc25573SAndy Wingo   // it has an unexpected name or type, assume that it's not actually the
3274fc25573SAndy Wingo   // indirect function table.
3284fc25573SAndy Wingo   if (tableImport->Field != functionTableName ||
329103fa325SDerek Schuff       tableImport->Table.ElemType != ValType::FUNCREF) {
3304fc25573SAndy Wingo     error(toString(this) + ": table import " + Twine(tableImport->Field) +
3314fc25573SAndy Wingo           " is missing a symbol table entry.");
3324fc25573SAndy Wingo     return;
3334fc25573SAndy Wingo   }
3344fc25573SAndy Wingo 
335ef1f999eSDerek Schuff   WasmSymbolInfo info;
336ef1f999eSDerek Schuff   info.Name = tableImport->Field;
337ef1f999eSDerek Schuff   info.Kind = WASM_SYMBOL_TYPE_TABLE;
338ef1f999eSDerek Schuff   info.ImportModule = tableImport->Module;
339ef1f999eSDerek Schuff   info.ImportName = tableImport->Field;
340ef1f999eSDerek Schuff   info.Flags = WASM_SYMBOL_UNDEFINED | WASM_SYMBOL_NO_STRIP;
341ef1f999eSDerek Schuff   info.ElementIndex = 0;
342ef1f999eSDerek Schuff   LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info.Name
3434fc25573SAndy Wingo                     << "\n");
34463393828SAndy Wingo   const WasmGlobalType *globalType = nullptr;
34563393828SAndy Wingo   const WasmSignature *signature = nullptr;
3463ec1760dSHeejin Ahn   auto *wasmSym =
347ef1f999eSDerek Schuff       make<WasmSymbol>(info, globalType, &tableImport->Table, signature);
3484fc25573SAndy Wingo   Symbol *sym = createUndefined(*wasmSym, false);
3494fc25573SAndy Wingo   // We're only sure it's a TableSymbol if the createUndefined succeeded.
3504fc25573SAndy Wingo   if (errorCount())
3514fc25573SAndy Wingo     return;
3524fc25573SAndy Wingo   symbols.push_back(sym);
3534fc25573SAndy Wingo   // Because there are no TABLE_NUMBER relocs, we can't compute accurate
3544fc25573SAndy Wingo   // liveness info; instead, just mark the symbol as always live.
3554fc25573SAndy Wingo   sym->markLive();
3564fc25573SAndy Wingo 
3574fc25573SAndy Wingo   // We assume that this compilation unit has unrelocatable references to
3584fc25573SAndy Wingo   // this table.
359184c22ddSSam Clegg   ctx.legacyFunctionTable = true;
36063393828SAndy Wingo }
36163393828SAndy Wingo 
36245b7cf99SSam Clegg static bool shouldMerge(const WasmSection &sec) {
363*3792b362SFangrui Song   if (ctx.arg.optimize == 0)
36445b7cf99SSam Clegg     return false;
36545b7cf99SSam Clegg   // Sadly we don't have section attributes yet for custom sections, so we
36645b7cf99SSam Clegg   // currently go by the name alone.
36745b7cf99SSam Clegg   // TODO(sbc): Add ability for wasm sections to carry flags so we don't
36845b7cf99SSam Clegg   // need to use names here.
369c1a59fa5SSam Clegg   // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in
370c1a59fa5SSam Clegg   // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom
371c1a59fa5SSam Clegg   // sections.
372c1a59fa5SSam Clegg   return sec.Name == ".debug_str" || sec.Name == ".debug_str.dwo" ||
373c1a59fa5SSam Clegg          sec.Name == ".debug_line_str";
37445b7cf99SSam Clegg }
37545b7cf99SSam Clegg 
3763b8d2be5SSam Clegg static bool shouldMerge(const WasmSegment &seg) {
3773b8d2be5SSam Clegg   // As of now we only support merging strings, and only with single byte
3783b8d2be5SSam Clegg   // alignment (2^0).
3793b8d2be5SSam Clegg   if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) ||
3803b8d2be5SSam Clegg       (seg.Data.Alignment != 0))
3813b8d2be5SSam Clegg     return false;
3823b8d2be5SSam Clegg 
3833b8d2be5SSam Clegg   // On a regular link we don't merge sections if -O0 (default is -O1). This
3843b8d2be5SSam Clegg   // sometimes makes the linker significantly faster, although the output will
3853b8d2be5SSam Clegg   // be bigger.
386*3792b362SFangrui Song   if (ctx.arg.optimize == 0)
3873b8d2be5SSam Clegg     return false;
3883b8d2be5SSam Clegg 
3893b8d2be5SSam Clegg   // A mergeable section with size 0 is useless because they don't have
3903b8d2be5SSam Clegg   // any data to merge. A mergeable string section with size 0 can be
3913b8d2be5SSam Clegg   // argued as invalid because it doesn't end with a null character.
3923b8d2be5SSam Clegg   // We'll avoid a mess by handling them as if they were non-mergeable.
3933b8d2be5SSam Clegg   if (seg.Data.Content.size() == 0)
3943b8d2be5SSam Clegg     return false;
3953b8d2be5SSam Clegg 
3963b8d2be5SSam Clegg   return true;
3973b8d2be5SSam Clegg }
3983b8d2be5SSam Clegg 
399bcc9b9d8SSam Clegg void ObjFile::parseLazy() {
40022b7b848SSam Clegg   LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " "
40122b7b848SSam Clegg                     << wasmObj.get() << "\n");
402bcc9b9d8SSam Clegg   for (const SymbolRef &sym : wasmObj->symbols()) {
403bcc9b9d8SSam Clegg     const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
40454031231SSam Clegg     if (wasmSym.isUndefined() || wasmSym.isBindingLocal())
405bcc9b9d8SSam Clegg       continue;
406bcc9b9d8SSam Clegg     symtab->addLazy(wasmSym.Info.Name, this);
407bcc9b9d8SSam Clegg     // addLazy() may trigger this->extract() if an existing symbol is an
408bcc9b9d8SSam Clegg     // undefined symbol. If that happens, this function has served its purpose,
409bcc9b9d8SSam Clegg     // and we can exit from the loop early.
410bcc9b9d8SSam Clegg     if (!lazy)
411bcc9b9d8SSam Clegg       break;
412bcc9b9d8SSam Clegg   }
413bcc9b9d8SSam Clegg }
414bcc9b9d8SSam Clegg 
415bcc9b9d8SSam Clegg ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
41622b7b848SSam Clegg     : WasmFileBase(ObjectKind, m) {
417bcc9b9d8SSam Clegg   this->lazy = lazy;
418bcc9b9d8SSam Clegg   this->archiveName = std::string(archiveName);
419bcc9b9d8SSam Clegg 
420ad2ff172SSam Clegg   // Currently we only do this check for regular object file, and not for shared
421ad2ff172SSam Clegg   // object files.  This is because architecture detection for shared objects is
422ad2ff172SSam Clegg   // currently based on a heuristic, which is fallable:
423ad2ff172SSam Clegg   // https://github.com/llvm/llvm-project/issues/98778
424ad2ff172SSam Clegg   checkArch(wasmObj->getArch());
425ad2ff172SSam Clegg 
426bcc9b9d8SSam Clegg   // If this isn't part of an archive, it's eagerly linked, so mark it live.
427bcc9b9d8SSam Clegg   if (archiveName.empty())
428bcc9b9d8SSam Clegg     markLive();
42922b7b848SSam Clegg }
430bcc9b9d8SSam Clegg 
43122b7b848SSam Clegg void SharedFile::parse() {
43222b7b848SSam Clegg   assert(wasmObj->isSharedObject());
43322b7b848SSam Clegg 
43422b7b848SSam Clegg   for (const SymbolRef &sym : wasmObj->symbols()) {
43522b7b848SSam Clegg     const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
43622b7b848SSam Clegg     if (wasmSym.isDefined()) {
43722b7b848SSam Clegg       StringRef name = wasmSym.Info.Name;
43822b7b848SSam Clegg       // Certain shared library exports are known to be DSO-local so we
43922b7b848SSam Clegg       // don't want to add them to the symbol table.
44022b7b848SSam Clegg       // TODO(sbc): Instead of hardcoding these here perhaps we could add
44122b7b848SSam Clegg       // this as extra metadata in the `dylink` section.
44222b7b848SSam Clegg       if (name == "__wasm_apply_data_relocs" || name == "__wasm_call_ctors" ||
44322b7b848SSam Clegg           name.starts_with("__start_") || name.starts_with("__stop_"))
44422b7b848SSam Clegg         continue;
44522b7b848SSam Clegg       uint32_t flags = wasmSym.Info.Flags;
44622b7b848SSam Clegg       Symbol *s;
44722b7b848SSam Clegg       LLVM_DEBUG(dbgs() << "shared symbol: " << name << "\n");
44822b7b848SSam Clegg       switch (wasmSym.Info.Kind) {
44922b7b848SSam Clegg       case WASM_SYMBOL_TYPE_FUNCTION:
45022b7b848SSam Clegg         s = symtab->addSharedFunction(name, flags, this, wasmSym.Signature);
45122b7b848SSam Clegg         break;
45222b7b848SSam Clegg       case WASM_SYMBOL_TYPE_DATA:
45322b7b848SSam Clegg         s = symtab->addSharedData(name, flags, this);
45422b7b848SSam Clegg         break;
45522b7b848SSam Clegg       default:
45622b7b848SSam Clegg         continue;
45722b7b848SSam Clegg       }
45822b7b848SSam Clegg       symbols.push_back(s);
45922b7b848SSam Clegg     }
46022b7b848SSam Clegg   }
46122b7b848SSam Clegg }
46222b7b848SSam Clegg 
463d4efc3e0SYuta Saito // Returns the alignment for a custom section. This is used to concatenate
464d4efc3e0SYuta Saito // custom sections with the same name into a single custom section.
465d4efc3e0SYuta Saito static uint32_t getCustomSectionAlignment(const WasmSection &sec) {
466d4efc3e0SYuta Saito   // TODO: Add a section attribute for alignment in the linking spec.
467d4efc3e0SYuta Saito   if (sec.Name == getInstrProfSectionName(IPSK_covfun, Triple::Wasm) ||
468d4efc3e0SYuta Saito       sec.Name == getInstrProfSectionName(IPSK_covmap, Triple::Wasm)) {
469d4efc3e0SYuta Saito     // llvm-cov assumes that coverage metadata sections are 8-byte aligned.
470d4efc3e0SYuta Saito     return 8;
471d4efc3e0SYuta Saito   }
472d4efc3e0SYuta Saito   return 1;
473d4efc3e0SYuta Saito }
474d4efc3e0SYuta Saito 
47522b7b848SSam Clegg WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) {
47622b7b848SSam Clegg   // Parse a memory buffer as a wasm file.
47722b7b848SSam Clegg   LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n");
478136d27abSRui Ueyama   std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
479c94d393aSSam Clegg 
480136d27abSRui Ueyama   auto *obj = dyn_cast<WasmObjectFile>(bin.get());
481136d27abSRui Ueyama   if (!obj)
482c94d393aSSam Clegg     fatal(toString(this) + ": not a wasm file");
483c94d393aSSam Clegg 
484136d27abSRui Ueyama   bin.release();
485136d27abSRui Ueyama   wasmObj.reset(obj);
486bcc9b9d8SSam Clegg }
487bcc9b9d8SSam Clegg 
488bcc9b9d8SSam Clegg void ObjFile::parse(bool ignoreComdats) {
489bcc9b9d8SSam Clegg   // Parse a memory buffer as a wasm file.
490bcc9b9d8SSam Clegg   LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");
491b8c2d60dSWouter van Oortmerssen 
49222b7b848SSam Clegg   if (!wasmObj->isRelocatableObject())
49322b7b848SSam Clegg     fatal(toString(this) + ": not a relocatable wasm file");
49422b7b848SSam Clegg 
495dbd33b80SSam Clegg   // Build up a map of function indices to table indices for use when
496dbd33b80SSam Clegg   // verifying the existing table index relocations
497136d27abSRui Ueyama   uint32_t totalFunctions =
498136d27abSRui Ueyama       wasmObj->getNumImportedFunctions() + wasmObj->functions().size();
49981443ac1SSam Clegg   tableEntriesRel.resize(totalFunctions);
500136d27abSRui Ueyama   tableEntries.resize(totalFunctions);
501136d27abSRui Ueyama   for (const WasmElemSegment &seg : wasmObj->elements()) {
5023b29376eSWouter van Oortmerssen     int64_t offset;
5039504ab32SSam Clegg     if (seg.Offset.Extended)
5049504ab32SSam Clegg       fatal(toString(this) + ": extended init exprs not supported");
5059504ab32SSam Clegg     else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I32_CONST)
5069504ab32SSam Clegg       offset = seg.Offset.Inst.Value.Int32;
5079504ab32SSam Clegg     else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I64_CONST)
5089504ab32SSam Clegg       offset = seg.Offset.Inst.Value.Int64;
5093b29376eSWouter van Oortmerssen     else
510dbd33b80SSam Clegg       fatal(toString(this) + ": invalid table elements");
5113b29376eSWouter van Oortmerssen     for (size_t index = 0; index < seg.Functions.size(); index++) {
5123b29376eSWouter van Oortmerssen       auto functionIndex = seg.Functions[index];
51381443ac1SSam Clegg       tableEntriesRel[functionIndex] = index;
514136d27abSRui Ueyama       tableEntries[functionIndex] = offset + index;
515dbd33b80SSam Clegg     }
516dbd33b80SSam Clegg   }
517dbd33b80SSam Clegg 
518dd6412c0SDerek Schuff   ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats;
519dd6412c0SDerek Schuff   for (StringRef comdat : comdats) {
520dd6412c0SDerek Schuff     bool isNew = ignoreComdats || symtab->addComdat(comdat);
521dd6412c0SDerek Schuff     keptComdats.push_back(isNew);
522dd6412c0SDerek Schuff   }
523dd6412c0SDerek Schuff 
524136d27abSRui Ueyama   uint32_t sectionIndex = 0;
52556e970d4SSam Clegg 
52656e970d4SSam Clegg   // Bool for each symbol, true if called directly.  This allows us to implement
52756e970d4SSam Clegg   // a weaker form of signature checking where undefined functions that are not
52856e970d4SSam Clegg   // called directly (i.e. only address taken) don't have to match the defined
52956e970d4SSam Clegg   // function's signature.  We cannot do this for directly called functions
53056e970d4SSam Clegg   // because those signatures are checked at validation times.
5314e844a14SHeejin Ahn   // See https://github.com/llvm/llvm-project/issues/39758
532136d27abSRui Ueyama   std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false);
533136d27abSRui Ueyama   for (const SectionRef &sec : wasmObj->sections()) {
534136d27abSRui Ueyama     const WasmSection &section = wasmObj->getWasmSection(sec);
53559f959ffSSam Clegg     // Wasm objects can have at most one code and one data section.
536136d27abSRui Ueyama     if (section.Type == WASM_SEC_CODE) {
537136d27abSRui Ueyama       assert(!codeSection);
538136d27abSRui Ueyama       codeSection = &section;
539136d27abSRui Ueyama     } else if (section.Type == WASM_SEC_DATA) {
540136d27abSRui Ueyama       assert(!dataSection);
541136d27abSRui Ueyama       dataSection = &section;
542136d27abSRui Ueyama     } else if (section.Type == WASM_SEC_CUSTOM) {
54345b7cf99SSam Clegg       InputChunk *customSec;
544d4efc3e0SYuta Saito       uint32_t alignment = getCustomSectionAlignment(section);
54545b7cf99SSam Clegg       if (shouldMerge(section))
546d4efc3e0SYuta Saito         customSec = make<MergeInputChunk>(section, this, alignment);
54745b7cf99SSam Clegg       else
548d4efc3e0SYuta Saito         customSec = make<InputSection>(section, this, alignment);
549dd6412c0SDerek Schuff       customSec->discarded = isExcludedByComdat(customSec);
550dd6412c0SDerek Schuff       customSections.emplace_back(customSec);
551136d27abSRui Ueyama       customSections.back()->setRelocations(section.Relocations);
552136d27abSRui Ueyama       customSectionsByIndex[sectionIndex] = customSections.back();
553d177ab2aSSam Clegg     }
554136d27abSRui Ueyama     sectionIndex++;
5557ae3d335SKazuaki Ishizaki     // Scans relocations to determine if a function symbol is called directly.
556136d27abSRui Ueyama     for (const WasmRelocation &reloc : section.Relocations)
557136d27abSRui Ueyama       if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
558136d27abSRui Ueyama         isCalledDirectly[reloc.Index] = true;
559c94d393aSSam Clegg   }
560c94d393aSSam Clegg 
561136d27abSRui Ueyama   typeMap.resize(getWasmObj()->types().size());
562136d27abSRui Ueyama   typeIsUsed.resize(getWasmObj()->types().size(), false);
5638f6d2defSSam Clegg 
564c4d9aa1bSNicholas Wilson 
5650a9583ceSRui Ueyama   // Populate `Segments`.
566136d27abSRui Ueyama   for (const WasmSegment &s : wasmObj->dataSegments()) {
5675a9b25e1SSam Clegg     InputChunk *seg;
568875ee937SSam Clegg     if (shouldMerge(s))
56945b7cf99SSam Clegg       seg = make<MergeInputChunk>(s, this);
570875ee937SSam Clegg     else
57145b7cf99SSam Clegg       seg = make<InputSegment>(s, this);
572136d27abSRui Ueyama     seg->discarded = isExcludedByComdat(seg);
57344177e5fSSam Clegg     // Older object files did not include WASM_SEG_FLAG_TLS and instead
57444177e5fSSam Clegg     // relied on the naming convention.  To maintain compat with such objects
57544177e5fSSam Clegg     // we still imply the TLS flag based on the name of the segment.
57644177e5fSSam Clegg     if (!seg->isTLS() &&
5778d85c96eSFangrui Song         (seg->name.starts_with(".tdata") || seg->name.starts_with(".tbss")))
57844177e5fSSam Clegg       seg->flags |= WASM_SEG_FLAG_TLS;
579136d27abSRui Ueyama     segments.emplace_back(seg);
580fd54fa5dSSam Clegg   }
581136d27abSRui Ueyama   setRelocs(segments, dataSection);
582c94d393aSSam Clegg 
5830a9583ceSRui Ueyama   // Populate `Functions`.
584136d27abSRui Ueyama   ArrayRef<WasmFunction> funcs = wasmObj->functions();
585136d27abSRui Ueyama   ArrayRef<WasmSignature> types = wasmObj->types();
586136d27abSRui Ueyama   functions.reserve(funcs.size());
5870a9583ceSRui Ueyama 
588c0039de2SSam Clegg   for (auto &f : funcs) {
589c0039de2SSam Clegg     auto *func = make<InputFunction>(types[f.SigIndex], &f, this);
590136d27abSRui Ueyama     func->discarded = isExcludedByComdat(func);
591136d27abSRui Ueyama     functions.emplace_back(func);
592fd54fa5dSSam Clegg   }
593136d27abSRui Ueyama   setRelocs(functions, codeSection);
5948d146bbcSSam Clegg 
59553e3b81fSAndy Wingo   // Populate `Tables`.
59653e3b81fSAndy Wingo   for (const WasmTable &t : wasmObj->tables())
59753e3b81fSAndy Wingo     tables.emplace_back(make<InputTable>(t, this));
59853e3b81fSAndy Wingo 
5990a9583ceSRui Ueyama   // Populate `Globals`.
600136d27abSRui Ueyama   for (const WasmGlobal &g : wasmObj->globals())
601136d27abSRui Ueyama     globals.emplace_back(make<InputGlobal>(g, this));
6028d146bbcSSam Clegg 
6031d891d44SHeejin Ahn   // Populate `Tags`.
6041d891d44SHeejin Ahn   for (const WasmTag &t : wasmObj->tags())
6053ec1760dSHeejin Ahn     tags.emplace_back(make<InputTag>(types[t.SigIndex], t, this));
606e915a71fSHeejin Ahn 
60733fdf82dSFangrui Song   // Populate `Symbols` based on the symbols in the object.
608136d27abSRui Ueyama   symbols.reserve(wasmObj->getNumberOfSymbols());
6094fc25573SAndy Wingo   uint32_t tableSymbolCount = 0;
610136d27abSRui Ueyama   for (const SymbolRef &sym : wasmObj->symbols()) {
611136d27abSRui Ueyama     const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
61263393828SAndy Wingo     if (wasmSym.isTypeTable())
6134fc25573SAndy Wingo       tableSymbolCount++;
614136d27abSRui Ueyama     if (wasmSym.isDefined()) {
61559f959ffSSam Clegg       // createDefined may fail if the symbol is comdat excluded in which case
61659f959ffSSam Clegg       // we fall back to creating an undefined symbol
617136d27abSRui Ueyama       if (Symbol *d = createDefined(wasmSym)) {
618136d27abSRui Ueyama         symbols.push_back(d);
61959f959ffSSam Clegg         continue;
62059f959ffSSam Clegg       }
62159f959ffSSam Clegg     }
622136d27abSRui Ueyama     size_t idx = symbols.size();
623136d27abSRui Ueyama     symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx]));
6240a9583ceSRui Ueyama   }
62563393828SAndy Wingo 
6264fc25573SAndy Wingo   addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount);
62793102974SSam Clegg }
62893102974SSam Clegg 
629c0039de2SSam Clegg bool ObjFile::isExcludedByComdat(const InputChunk *chunk) const {
630136d27abSRui Ueyama   uint32_t c = chunk->getComdat();
631136d27abSRui Ueyama   if (c == UINT32_MAX)
632dcf6234dSRui Ueyama     return false;
633136d27abSRui Ueyama   return !keptComdats[c];
634e0f6fcd0SSam Clegg }
635e0f6fcd0SSam Clegg 
636136d27abSRui Ueyama FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const {
637136d27abSRui Ueyama   return cast<FunctionSymbol>(symbols[index]);
63893102974SSam Clegg }
63993102974SSam Clegg 
640136d27abSRui Ueyama GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const {
641136d27abSRui Ueyama   return cast<GlobalSymbol>(symbols[index]);
64293102974SSam Clegg }
64393102974SSam Clegg 
6441d891d44SHeejin Ahn TagSymbol *ObjFile::getTagSymbol(uint32_t index) const {
6451d891d44SHeejin Ahn   return cast<TagSymbol>(symbols[index]);
646e915a71fSHeejin Ahn }
647e915a71fSHeejin Ahn 
64853e3b81fSAndy Wingo TableSymbol *ObjFile::getTableSymbol(uint32_t index) const {
64953e3b81fSAndy Wingo   return cast<TableSymbol>(symbols[index]);
65053e3b81fSAndy Wingo }
65153e3b81fSAndy Wingo 
652136d27abSRui Ueyama SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const {
653136d27abSRui Ueyama   return cast<SectionSymbol>(symbols[index]);
654d177ab2aSSam Clegg }
655d177ab2aSSam Clegg 
656136d27abSRui Ueyama DataSymbol *ObjFile::getDataSymbol(uint32_t index) const {
657136d27abSRui Ueyama   return cast<DataSymbol>(symbols[index]);
65893102974SSam Clegg }
65993102974SSam Clegg 
660136d27abSRui Ueyama Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
661136d27abSRui Ueyama   StringRef name = sym.Info.Name;
662136d27abSRui Ueyama   uint32_t flags = sym.Info.Flags;
663e89b0ef0SRui Ueyama 
664136d27abSRui Ueyama   switch (sym.Info.Kind) {
6654b56adceSRui Ueyama   case WASM_SYMBOL_TYPE_FUNCTION: {
666136d27abSRui Ueyama     InputFunction *func =
667136d27abSRui Ueyama         functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
668136d27abSRui Ueyama     if (sym.isBindingLocal())
669136d27abSRui Ueyama       return make<DefinedFunction>(name, flags, this, func);
670accad76cSSam Clegg     if (func->discarded)
671accad76cSSam Clegg       return nullptr;
672136d27abSRui Ueyama     return symtab->addDefinedFunction(name, flags, this, func);
6734b56adceSRui Ueyama   }
6744b56adceSRui Ueyama   case WASM_SYMBOL_TYPE_DATA: {
6755a9b25e1SSam Clegg     InputChunk *seg = segments[sym.Info.DataRef.Segment];
6763b29376eSWouter van Oortmerssen     auto offset = sym.Info.DataRef.Offset;
6773b29376eSWouter van Oortmerssen     auto size = sym.Info.DataRef.Size;
678875ee937SSam Clegg     // Support older (e.g. llvm 13) object files that pre-date the per-symbol
679875ee937SSam Clegg     // TLS flag, and symbols were assumed to be TLS by being defined in a TLS
680875ee937SSam Clegg     // segment.
681875ee937SSam Clegg     if (!(flags & WASM_SYMBOL_TLS) && seg->isTLS())
68244177e5fSSam Clegg       flags |= WASM_SYMBOL_TLS;
683136d27abSRui Ueyama     if (sym.isBindingLocal())
684136d27abSRui Ueyama       return make<DefinedData>(name, flags, this, seg, offset, size);
685accad76cSSam Clegg     if (seg->discarded)
686accad76cSSam Clegg       return nullptr;
687136d27abSRui Ueyama     return symtab->addDefinedData(name, flags, this, seg, offset, size);
6884b56adceSRui Ueyama   }
689d177ab2aSSam Clegg   case WASM_SYMBOL_TYPE_GLOBAL: {
690136d27abSRui Ueyama     InputGlobal *global =
691136d27abSRui Ueyama         globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()];
692136d27abSRui Ueyama     if (sym.isBindingLocal())
693136d27abSRui Ueyama       return make<DefinedGlobal>(name, flags, this, global);
694136d27abSRui Ueyama     return symtab->addDefinedGlobal(name, flags, this, global);
6954b56adceSRui Ueyama   }
696d177ab2aSSam Clegg   case WASM_SYMBOL_TYPE_SECTION: {
69745b7cf99SSam Clegg     InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex];
698136d27abSRui Ueyama     assert(sym.isBindingLocal());
699dd6412c0SDerek Schuff     // Need to return null if discarded here? data and func only do that when
700dd6412c0SDerek Schuff     // binding is not local.
701dd6412c0SDerek Schuff     if (section->discarded)
702dd6412c0SDerek Schuff       return nullptr;
703136d27abSRui Ueyama     return make<SectionSymbol>(flags, section, this);
704d177ab2aSSam Clegg   }
7051d891d44SHeejin Ahn   case WASM_SYMBOL_TYPE_TAG: {
7061d891d44SHeejin Ahn     InputTag *tag = tags[sym.Info.ElementIndex - wasmObj->getNumImportedTags()];
707136d27abSRui Ueyama     if (sym.isBindingLocal())
7081d891d44SHeejin Ahn       return make<DefinedTag>(name, flags, this, tag);
7091d891d44SHeejin Ahn     return symtab->addDefinedTag(name, flags, this, tag);
710e915a71fSHeejin Ahn   }
71153e3b81fSAndy Wingo   case WASM_SYMBOL_TYPE_TABLE: {
71253e3b81fSAndy Wingo     InputTable *table =
71353e3b81fSAndy Wingo         tables[sym.Info.ElementIndex - wasmObj->getNumImportedTables()];
71453e3b81fSAndy Wingo     if (sym.isBindingLocal())
71553e3b81fSAndy Wingo       return make<DefinedTable>(name, flags, this, table);
71653e3b81fSAndy Wingo     return symtab->addDefinedTable(name, flags, this, table);
71753e3b81fSAndy Wingo   }
718d177ab2aSSam Clegg   }
719d177ab2aSSam Clegg   llvm_unreachable("unknown symbol kind");
7204b56adceSRui Ueyama }
7214b56adceSRui Ueyama 
722136d27abSRui Ueyama Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
723136d27abSRui Ueyama   StringRef name = sym.Info.Name;
724f6f4b98fSSam Clegg   uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED;
725e3498ec5SRui Ueyama 
726136d27abSRui Ueyama   switch (sym.Info.Kind) {
727e3498ec5SRui Ueyama   case WASM_SYMBOL_TYPE_FUNCTION:
728136d27abSRui Ueyama     if (sym.isBindingLocal())
729136d27abSRui Ueyama       return make<UndefinedFunction>(name, sym.Info.ImportName,
730136d27abSRui Ueyama                                      sym.Info.ImportModule, flags, this,
731136d27abSRui Ueyama                                      sym.Signature, isCalledDirectly);
732136d27abSRui Ueyama     return symtab->addUndefinedFunction(name, sym.Info.ImportName,
733136d27abSRui Ueyama                                         sym.Info.ImportModule, flags, this,
734136d27abSRui Ueyama                                         sym.Signature, isCalledDirectly);
735e3498ec5SRui Ueyama   case WASM_SYMBOL_TYPE_DATA:
736136d27abSRui Ueyama     if (sym.isBindingLocal())
737136d27abSRui Ueyama       return make<UndefinedData>(name, flags, this);
738136d27abSRui Ueyama     return symtab->addUndefinedData(name, flags, this);
739e3498ec5SRui Ueyama   case WASM_SYMBOL_TYPE_GLOBAL:
740136d27abSRui Ueyama     if (sym.isBindingLocal())
741136d27abSRui Ueyama       return make<UndefinedGlobal>(name, sym.Info.ImportName,
742136d27abSRui Ueyama                                    sym.Info.ImportModule, flags, this,
743136d27abSRui Ueyama                                    sym.GlobalType);
744136d27abSRui Ueyama     return symtab->addUndefinedGlobal(name, sym.Info.ImportName,
745136d27abSRui Ueyama                                       sym.Info.ImportModule, flags, this,
746136d27abSRui Ueyama                                       sym.GlobalType);
74753e3b81fSAndy Wingo   case WASM_SYMBOL_TYPE_TABLE:
74853e3b81fSAndy Wingo     if (sym.isBindingLocal())
74953e3b81fSAndy Wingo       return make<UndefinedTable>(name, sym.Info.ImportName,
75053e3b81fSAndy Wingo                                   sym.Info.ImportModule, flags, this,
75153e3b81fSAndy Wingo                                   sym.TableType);
75253e3b81fSAndy Wingo     return symtab->addUndefinedTable(name, sym.Info.ImportName,
75353e3b81fSAndy Wingo                                      sym.Info.ImportModule, flags, this,
75453e3b81fSAndy Wingo                                      sym.TableType);
7559261ee32SHeejin Ahn   case WASM_SYMBOL_TYPE_TAG:
7569261ee32SHeejin Ahn     if (sym.isBindingLocal())
7579261ee32SHeejin Ahn       return make<UndefinedTag>(name, sym.Info.ImportName,
7589261ee32SHeejin Ahn                                 sym.Info.ImportModule, flags, this,
7599261ee32SHeejin Ahn                                 sym.Signature);
7609261ee32SHeejin Ahn     return symtab->addUndefinedTag(name, sym.Info.ImportName,
7619261ee32SHeejin Ahn                                    sym.Info.ImportModule, flags, this,
7629261ee32SHeejin Ahn                                    sym.Signature);
763d177ab2aSSam Clegg   case WASM_SYMBOL_TYPE_SECTION:
764d177ab2aSSam Clegg     llvm_unreachable("section symbols cannot be undefined");
765e3498ec5SRui Ueyama   }
766d177ab2aSSam Clegg   llvm_unreachable("unknown symbol kind");
767c94d393aSSam Clegg }
768c94d393aSSam Clegg 
769e7efa323SSam Clegg static StringRef strip(StringRef s) { return s.trim(' '); }
7703111784fSSam Clegg 
7713111784fSSam Clegg void StubFile::parse() {
772d9d840cdSSam Clegg   bool first = true;
7733111784fSSam Clegg 
774d9d840cdSSam Clegg   SmallVector<StringRef> lines;
775d9d840cdSSam Clegg   mb.getBuffer().split(lines, '\n');
776d9d840cdSSam Clegg   for (StringRef line : lines) {
777d9d840cdSSam Clegg     line = line.trim();
778d9d840cdSSam Clegg 
7793111784fSSam Clegg     // File must begin with #STUB
7803111784fSSam Clegg     if (first) {
781d9d840cdSSam Clegg       assert(line == "#STUB");
7823111784fSSam Clegg       first = false;
7833111784fSSam Clegg     }
7843111784fSSam Clegg 
7853111784fSSam Clegg     // Lines starting with # are considered comments
786e7efa323SSam Clegg     if (line.starts_with("#") || !line.size())
7873111784fSSam Clegg       continue;
7883111784fSSam Clegg 
7893111784fSSam Clegg     StringRef sym;
7903111784fSSam Clegg     StringRef rest;
7913111784fSSam Clegg     std::tie(sym, rest) = line.split(':');
7923111784fSSam Clegg     sym = strip(sym);
7933111784fSSam Clegg     rest = strip(rest);
7943111784fSSam Clegg 
7953111784fSSam Clegg     symbolDependencies[sym] = {};
7963111784fSSam Clegg 
7973111784fSSam Clegg     while (rest.size()) {
798d9d840cdSSam Clegg       StringRef dep;
799d9d840cdSSam Clegg       std::tie(dep, rest) = rest.split(',');
800d9d840cdSSam Clegg       dep = strip(dep);
801d9d840cdSSam Clegg       symbolDependencies[sym].push_back(dep);
8023111784fSSam Clegg     }
8033111784fSSam Clegg   }
8043111784fSSam Clegg }
8053111784fSSam Clegg 
806136d27abSRui Ueyama static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
807136d27abSRui Ueyama   switch (gvVisibility) {
808c729c1b4SSam Clegg   case GlobalValue::DefaultVisibility:
809c729c1b4SSam Clegg     return WASM_SYMBOL_VISIBILITY_DEFAULT;
810c729c1b4SSam Clegg   case GlobalValue::HiddenVisibility:
811c729c1b4SSam Clegg   case GlobalValue::ProtectedVisibility:
812c729c1b4SSam Clegg     return WASM_SYMBOL_VISIBILITY_HIDDEN;
813c729c1b4SSam Clegg   }
814c729c1b4SSam Clegg   llvm_unreachable("unknown visibility");
815c729c1b4SSam Clegg }
816c729c1b4SSam Clegg 
817136d27abSRui Ueyama static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
818136d27abSRui Ueyama                                    const lto::InputFile::Symbol &objSym,
819136d27abSRui Ueyama                                    BitcodeFile &f) {
82083d59e05SAlexandre Ganea   StringRef name = saver().save(objSym.getName());
821c729c1b4SSam Clegg 
822136d27abSRui Ueyama   uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
823136d27abSRui Ueyama   flags |= mapVisibility(objSym.getVisibility());
824c729c1b4SSam Clegg 
825136d27abSRui Ueyama   int c = objSym.getComdatIndex();
826136d27abSRui Ueyama   bool excludedByComdat = c != -1 && !keptComdats[c];
827697f2149SSam Clegg 
828136d27abSRui Ueyama   if (objSym.isUndefined() || excludedByComdat) {
829f6f4b98fSSam Clegg     flags |= WASM_SYMBOL_UNDEFINED;
830136d27abSRui Ueyama     if (objSym.isExecutable())
831c68af42fSKazu Hirata       return symtab->addUndefinedFunction(name, std::nullopt, std::nullopt,
832c68af42fSKazu Hirata                                           flags, &f, nullptr, true);
833136d27abSRui Ueyama     return symtab->addUndefinedData(name, flags, &f);
834c729c1b4SSam Clegg   }
835c729c1b4SSam Clegg 
836136d27abSRui Ueyama   if (objSym.isExecutable())
837136d27abSRui Ueyama     return symtab->addDefinedFunction(name, flags, &f, nullptr);
838136d27abSRui Ueyama   return symtab->addDefinedData(name, flags, &f, nullptr, 0, 0);
839c729c1b4SSam Clegg }
840c729c1b4SSam Clegg 
84128848e9eSSam Clegg BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
842bcc9b9d8SSam Clegg                          uint64_t offsetInArchive, bool lazy)
84328848e9eSSam Clegg     : InputFile(BitcodeKind, m) {
844bcc9b9d8SSam Clegg   this->lazy = lazy;
84528848e9eSSam Clegg   this->archiveName = std::string(archiveName);
84628848e9eSSam Clegg 
84728848e9eSSam Clegg   std::string path = mb.getBufferIdentifier().str();
848*3792b362SFangrui Song   if (ctx.arg.thinLTOIndexOnly)
849b70eb863SSam Clegg     path = replaceThinLTOSuffix(mb.getBufferIdentifier());
85028848e9eSSam Clegg 
85128848e9eSSam Clegg   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
85228848e9eSSam Clegg   // name. If two archives define two members with the same name, this
85328848e9eSSam Clegg   // causes a collision which result in only one of the objects being taken
85428848e9eSSam Clegg   // into consideration at LTO time (which very likely causes undefined
85528848e9eSSam Clegg   // symbols later in the link stage). So we append file offset to make
85628848e9eSSam Clegg   // filename unique.
85728848e9eSSam Clegg   StringRef name = archiveName.empty()
85883d59e05SAlexandre Ganea                        ? saver().save(path)
85983d59e05SAlexandre Ganea                        : saver().save(archiveName + "(" + path::filename(path) +
86028848e9eSSam Clegg                                       " at " + utostr(offsetInArchive) + ")");
86128848e9eSSam Clegg   MemoryBufferRef mbref(mb.getBuffer(), name);
86228848e9eSSam Clegg 
86328848e9eSSam Clegg   obj = check(lto::InputFile::create(mbref));
86428848e9eSSam Clegg 
86528848e9eSSam Clegg   // If this isn't part of an archive, it's eagerly linked, so mark it live.
86628848e9eSSam Clegg   if (archiveName.empty())
86728848e9eSSam Clegg     markLive();
86828848e9eSSam Clegg }
86928848e9eSSam Clegg 
870b062fe18SSam Clegg bool BitcodeFile::doneLTO = false;
871b062fe18SSam Clegg 
872bcc9b9d8SSam Clegg void BitcodeFile::parseLazy() {
873bcc9b9d8SSam Clegg   for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
874bcc9b9d8SSam Clegg     if (irSym.isUndefined())
875bcc9b9d8SSam Clegg       continue;
876bcc9b9d8SSam Clegg     StringRef name = saver().save(irSym.getName());
877bcc9b9d8SSam Clegg     symtab->addLazy(name, this);
878bcc9b9d8SSam Clegg     // addLazy() may trigger this->extract() if an existing symbol is an
879bcc9b9d8SSam Clegg     // undefined symbol. If that happens, this function has served its purpose,
880bcc9b9d8SSam Clegg     // and we can exit from the loop early.
881bcc9b9d8SSam Clegg     if (!lazy)
882bcc9b9d8SSam Clegg       break;
883bcc9b9d8SSam Clegg   }
884bcc9b9d8SSam Clegg }
885bcc9b9d8SSam Clegg 
8867bac0bc1SSam Clegg void BitcodeFile::parse(StringRef symName) {
887b062fe18SSam Clegg   if (doneLTO) {
8887bac0bc1SSam Clegg     error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");
889b062fe18SSam Clegg     return;
890b062fe18SSam Clegg   }
891b062fe18SSam Clegg 
892136d27abSRui Ueyama   Triple t(obj->getTargetTriple());
89329f8c9f6SWouter van Oortmerssen   if (!t.isWasm()) {
89429f8c9f6SWouter van Oortmerssen     error(toString(this) + ": machine type must be wasm32 or wasm64");
895c729c1b4SSam Clegg     return;
896c729c1b4SSam Clegg   }
897b8c2d60dSWouter van Oortmerssen   checkArch(t.getArch());
898136d27abSRui Ueyama   std::vector<bool> keptComdats;
8994e844a14SHeejin Ahn   // TODO Support nodeduplicate
9004e844a14SHeejin Ahn   // https://github.com/llvm/llvm-project/issues/49875
901db5e0786SFangrui Song   for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable())
902db5e0786SFangrui Song     keptComdats.push_back(symtab->addComdat(s.first));
903c729c1b4SSam Clegg 
904136d27abSRui Ueyama   for (const lto::InputFile::Symbol &objSym : obj->symbols())
905136d27abSRui Ueyama     symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this));
906c729c1b4SSam Clegg }
907c729c1b4SSam Clegg 
90833c59abfSFangrui Song } // namespace wasm
90933c59abfSFangrui Song } // namespace lld
910