1c94d393aSSam Clegg //===- InputFiles.cpp -----------------------------------------------------===// 2c94d393aSSam Clegg // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6c94d393aSSam Clegg // 7c94d393aSSam Clegg //===----------------------------------------------------------------------===// 8c94d393aSSam Clegg 9c94d393aSSam Clegg #include "InputFiles.h" 10c94d393aSSam Clegg #include "Config.h" 115fa274beSSam Clegg #include "InputChunks.h" 12a56e5749SAndy Wingo #include "InputElement.h" 13a28a4662SSam Clegg #include "OutputSegment.h" 14c94d393aSSam Clegg #include "SymbolTable.h" 153111784fSSam Clegg #include "lld/Common/Args.h" 1683d59e05SAlexandre Ganea #include "lld/Common/CommonLinkerContext.h" 1735150bb5SRui Ueyama #include "lld/Common/Reproduce.h" 18ef1f999eSDerek Schuff #include "llvm/BinaryFormat/Wasm.h" 19c94d393aSSam Clegg #include "llvm/Object/Binary.h" 20c94d393aSSam Clegg #include "llvm/Object/Wasm.h" 21d4efc3e0SYuta Saito #include "llvm/ProfileData/InstrProf.h" 2228848e9eSSam Clegg #include "llvm/Support/Path.h" 2335150bb5SRui Ueyama #include "llvm/Support/TarWriter.h" 24c94d393aSSam Clegg #include "llvm/Support/raw_ostream.h" 25b9ef5648SKazu Hirata #include <optional> 26c94d393aSSam Clegg 27c94d393aSSam Clegg #define DEBUG_TYPE "lld" 28c94d393aSSam Clegg 29c94d393aSSam Clegg using namespace llvm; 30c94d393aSSam Clegg using namespace llvm::object; 31c94d393aSSam Clegg using namespace llvm::wasm; 3228848e9eSSam Clegg using namespace llvm::sys; 33c94d393aSSam Clegg 3433c59abfSFangrui Song namespace lld { 3535150bb5SRui Ueyama 3633c59abfSFangrui Song // Returns a string in the format of "foo.o" or "foo.a(bar.o)". 3733c59abfSFangrui Song std::string toString(const wasm::InputFile *file) { 3833c59abfSFangrui Song if (!file) 3933c59abfSFangrui Song return "<internal>"; 4033c59abfSFangrui Song 4133c59abfSFangrui Song if (file->archiveName.empty()) 42adcd0268SBenjamin Kramer return std::string(file->getName()); 4333c59abfSFangrui Song 4433c59abfSFangrui Song return (file->archiveName + "(" + file->getName() + ")").str(); 4533c59abfSFangrui Song } 4633c59abfSFangrui Song 4733c59abfSFangrui Song namespace wasm { 48b8c2d60dSWouter van Oortmerssen 49b70eb863SSam Clegg std::string replaceThinLTOSuffix(StringRef path) { 50*3792b362SFangrui Song auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace; 51b70eb863SSam Clegg if (path.consume_back(suffix)) 52b70eb863SSam Clegg return (path + repl).str(); 53b70eb863SSam Clegg return std::string(path); 54b70eb863SSam Clegg } 55b70eb863SSam Clegg 56b8c2d60dSWouter van Oortmerssen void InputFile::checkArch(Triple::ArchType arch) const { 57b8c2d60dSWouter van Oortmerssen bool is64 = arch == Triple::wasm64; 58*3792b362SFangrui Song if (is64 && !ctx.arg.is64) { 59b8c2d60dSWouter van Oortmerssen fatal(toString(this) + 60b8c2d60dSWouter van Oortmerssen ": must specify -mwasm64 to process wasm64 object files"); 61*3792b362SFangrui Song } else if (ctx.arg.is64.value_or(false) != is64) { 62b8c2d60dSWouter van Oortmerssen fatal(toString(this) + 63b8c2d60dSWouter van Oortmerssen ": wasm32 object file can't be linked in wasm64 mode"); 64b8c2d60dSWouter van Oortmerssen } 65b8c2d60dSWouter van Oortmerssen } 66b8c2d60dSWouter van Oortmerssen 6733c59abfSFangrui Song std::unique_ptr<llvm::TarWriter> tar; 6833c59abfSFangrui Song 69b9ef5648SKazu Hirata std::optional<MemoryBufferRef> readFile(StringRef path) { 70136d27abSRui Ueyama log("Loading: " + path); 71c94d393aSSam Clegg 72136d27abSRui Ueyama auto mbOrErr = MemoryBuffer::getFile(path); 73136d27abSRui Ueyama if (auto ec = mbOrErr.getError()) { 74136d27abSRui Ueyama error("cannot open " + path + ": " + ec.message()); 75c68af42fSKazu Hirata return std::nullopt; 76c94d393aSSam Clegg } 77136d27abSRui Ueyama std::unique_ptr<MemoryBuffer> &mb = *mbOrErr; 78136d27abSRui Ueyama MemoryBufferRef mbref = mb->getMemBufferRef(); 79136d27abSRui Ueyama make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership 80c94d393aSSam Clegg 81136d27abSRui Ueyama if (tar) 82136d27abSRui Ueyama tar->append(relativeToRoot(path), mbref.getBuffer()); 83136d27abSRui Ueyama return mbref; 84c94d393aSSam Clegg } 85c94d393aSSam Clegg 8628848e9eSSam Clegg InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, 87bcc9b9d8SSam Clegg uint64_t offsetInArchive, bool lazy) { 88136d27abSRui Ueyama file_magic magic = identify_magic(mb.getBuffer()); 89136d27abSRui Ueyama if (magic == file_magic::wasm_object) { 90136d27abSRui Ueyama std::unique_ptr<Binary> bin = 91136d27abSRui Ueyama CHECK(createBinary(mb), mb.getBufferIdentifier()); 92136d27abSRui Ueyama auto *obj = cast<WasmObjectFile>(bin.get()); 937f409cd8SDerek Schuff if (obj->hasUnmodeledTypes()) 947f409cd8SDerek Schuff fatal(toString(mb.getBufferIdentifier()) + 957f409cd8SDerek Schuff "file has unmodeled reference or GC types"); 96136d27abSRui Ueyama if (obj->isSharedObject()) 97136d27abSRui Ueyama return make<SharedFile>(mb); 98bcc9b9d8SSam Clegg return make<ObjFile>(mb, archiveName, lazy); 99a688a42cSSam Clegg } 1008adf7ac5SSam Clegg 101bcc9b9d8SSam Clegg assert(magic == file_magic::bitcode); 102bcc9b9d8SSam Clegg return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy); 1038adf7ac5SSam Clegg } 1048adf7ac5SSam Clegg 105c1be8230SSam Clegg // Relocations contain either symbol or type indices. This function takes a 106c1be8230SSam Clegg // relocation and returns relocated index (i.e. translates from the input 1076f4286fbSHeejin Ahn // symbol/type space to the output symbol/type space). 108136d27abSRui Ueyama uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const { 109136d27abSRui Ueyama if (reloc.Type == R_WASM_TYPE_INDEX_LEB) { 110136d27abSRui Ueyama assert(typeIsUsed[reloc.Index]); 111136d27abSRui Ueyama return typeMap[reloc.Index]; 112d1063bb9SRui Ueyama } 113136d27abSRui Ueyama const Symbol *sym = symbols[reloc.Index]; 114136d27abSRui Ueyama if (auto *ss = dyn_cast<SectionSymbol>(sym)) 115136d27abSRui Ueyama sym = ss->getOutputSectionSymbol(); 116136d27abSRui Ueyama return sym->getOutputSymbolIndex(); 117d96d9357SSam Clegg } 118d96d9357SSam Clegg 119d177ab2aSSam Clegg // Relocations can contain addend for combined sections. This function takes a 120d177ab2aSSam Clegg // relocation and returns updated addend by offset in the output section. 1214c75521cSSam Clegg int64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const { 122136d27abSRui Ueyama switch (reloc.Type) { 12379e33171SSam Clegg case R_WASM_MEMORY_ADDR_LEB: 1243b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_LEB64: 1253b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_SLEB64: 12679e33171SSam Clegg case R_WASM_MEMORY_ADDR_SLEB: 1275bb0dcd9SKeno Fischer case R_WASM_MEMORY_ADDR_REL_SLEB: 1283b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_REL_SLEB64: 12979e33171SSam Clegg case R_WASM_MEMORY_ADDR_I32: 1303b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_I64: 13107b6aeb5SSam Clegg case R_WASM_MEMORY_ADDR_TLS_SLEB: 132670944fbSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_TLS_SLEB64: 13379e33171SSam Clegg case R_WASM_FUNCTION_OFFSET_I32: 13416f02431SWouter van Oortmerssen case R_WASM_FUNCTION_OFFSET_I64: 135aa0c571aSYuta Saito case R_WASM_MEMORY_ADDR_LOCREL_I32: 136136d27abSRui Ueyama return reloc.Addend; 13779e33171SSam Clegg case R_WASM_SECTION_OFFSET_I32: 13814ffbb84SSam Clegg return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend); 139d177ab2aSSam Clegg default: 140d177ab2aSSam Clegg llvm_unreachable("unexpected relocation type"); 141d177ab2aSSam Clegg } 142d177ab2aSSam Clegg } 143d177ab2aSSam Clegg 144ab604a98SSam Clegg // Translate from the relocation's index into the final linked output value. 145aa0c571aSYuta Saito uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone, 146aa0c571aSYuta Saito const InputChunk *chunk) const { 147136d27abSRui Ueyama const Symbol* sym = nullptr; 148136d27abSRui Ueyama if (reloc.Type != R_WASM_TYPE_INDEX_LEB) { 149136d27abSRui Ueyama sym = symbols[reloc.Index]; 1507cdec273SSam Clegg 1517cdec273SSam Clegg // We can end up with relocations against non-live symbols. For example 1528b8088acSEric Leese // in debug sections. We return a tombstone value in debug symbol sections 1538b8088acSEric Leese // so this will not produce a valid range conflicting with ranges of actual 1548b8088acSEric Leese // code. In other sections we return reloc.Addend. 1558b8088acSEric Leese 1563e7bc0daSSam Clegg if (!isa<SectionSymbol>(sym) && !sym->isLive()) 1578b8088acSEric Leese return tombstone ? tombstone : reloc.Addend; 1587cdec273SSam Clegg } 1597cdec273SSam Clegg 160136d27abSRui Ueyama switch (reloc.Type) { 16179e33171SSam Clegg case R_WASM_TABLE_INDEX_I32: 162cc1b9b68SWouter van Oortmerssen case R_WASM_TABLE_INDEX_I64: 16379e33171SSam Clegg case R_WASM_TABLE_INDEX_SLEB: 164cc1b9b68SWouter van Oortmerssen case R_WASM_TABLE_INDEX_SLEB64: 1653a293cbfSWouter van Oortmerssen case R_WASM_TABLE_INDEX_REL_SLEB: 1663a293cbfSWouter van Oortmerssen case R_WASM_TABLE_INDEX_REL_SLEB64: { 167cf2b8722SSam Clegg if (!getFunctionSymbol(reloc.Index)->hasTableIndex()) 168ea38ac5bSSam Clegg return 0; 169937b9558SSam Clegg uint32_t index = getFunctionSymbol(reloc.Index)->getTableIndex(); 1703a293cbfSWouter van Oortmerssen if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB || 1713a293cbfSWouter van Oortmerssen reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB64) 172*3792b362SFangrui Song index -= ctx.arg.tableBase; 173937b9558SSam Clegg return index; 174937b9558SSam Clegg } 17579e33171SSam Clegg case R_WASM_MEMORY_ADDR_LEB: 1763b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_LEB64: 1773b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_SLEB: 1783b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_SLEB64: 1792a7cac93SSam Clegg case R_WASM_MEMORY_ADDR_REL_SLEB: 1803b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_REL_SLEB64: 1813b29376eSWouter van Oortmerssen case R_WASM_MEMORY_ADDR_I32: 182aa0c571aSYuta Saito case R_WASM_MEMORY_ADDR_I64: 183fad05465SSam Clegg case R_WASM_MEMORY_ADDR_TLS_SLEB: 184fad05465SSam Clegg case R_WASM_MEMORY_ADDR_TLS_SLEB64: 185aa0c571aSYuta Saito case R_WASM_MEMORY_ADDR_LOCREL_I32: { 18622b7b848SSam Clegg if (isa<UndefinedData>(sym) || sym->isShared() || sym->isUndefWeak()) 1879abe8c48SSam Clegg return 0; 188a28a4662SSam Clegg auto D = cast<DefinedData>(sym); 18919cedd3cSSam Clegg uint64_t value = D->getVA() + reloc.Addend; 190aa0c571aSYuta Saito if (reloc.Type == R_WASM_MEMORY_ADDR_LOCREL_I32) { 191aa0c571aSYuta Saito const auto *segment = cast<InputSegment>(chunk); 192aa0c571aSYuta Saito uint64_t p = segment->outputSeg->startVA + segment->outputSegmentOffset + 193aa0c571aSYuta Saito reloc.Offset - segment->getInputSectionOffset(); 194aa0c571aSYuta Saito value -= p; 195aa0c571aSYuta Saito } 196aa0c571aSYuta Saito return value; 197a28a4662SSam Clegg } 19879e33171SSam Clegg case R_WASM_TYPE_INDEX_LEB: 199136d27abSRui Ueyama return typeMap[reloc.Index]; 20079e33171SSam Clegg case R_WASM_FUNCTION_INDEX_LEB: 201220fe00aSBrendan Dahl case R_WASM_FUNCTION_INDEX_I32: 202136d27abSRui Ueyama return getFunctionSymbol(reloc.Index)->getFunctionIndex(); 2037cdec273SSam Clegg case R_WASM_GLOBAL_INDEX_LEB: 20448139ebcSWouter van Oortmerssen case R_WASM_GLOBAL_INDEX_I32: 205136d27abSRui Ueyama if (auto gs = dyn_cast<GlobalSymbol>(sym)) 206136d27abSRui Ueyama return gs->getGlobalIndex(); 207136d27abSRui Ueyama return sym->getGOTIndex(); 2081d891d44SHeejin Ahn case R_WASM_TAG_INDEX_LEB: 2091d891d44SHeejin Ahn return getTagSymbol(reloc.Index)->getTagIndex(); 21016f02431SWouter van Oortmerssen case R_WASM_FUNCTION_OFFSET_I32: 21116f02431SWouter van Oortmerssen case R_WASM_FUNCTION_OFFSET_I64: { 2126f5c5cbeSSam Clegg if (isa<UndefinedFunction>(sym)) { 2136f5c5cbeSSam Clegg return tombstone ? tombstone : reloc.Addend; 2146f5c5cbeSSam Clegg } 215136d27abSRui Ueyama auto *f = cast<DefinedFunction>(sym); 21614ffbb84SSam Clegg return f->function->getOffset(f->function->getFunctionCodeOffset() + 21714ffbb84SSam Clegg reloc.Addend); 218d177ab2aSSam Clegg } 21979e33171SSam Clegg case R_WASM_SECTION_OFFSET_I32: 22014ffbb84SSam Clegg return getSectionSymbol(reloc.Index)->section->getOffset(reloc.Addend); 22153e3b81fSAndy Wingo case R_WASM_TABLE_NUMBER_LEB: 22253e3b81fSAndy Wingo return getTableSymbol(reloc.Index)->getTableNumber(); 223ab604a98SSam Clegg default: 224ab604a98SSam Clegg llvm_unreachable("unknown relocation type"); 225ab604a98SSam Clegg } 226ab604a98SSam Clegg } 227ab604a98SSam Clegg 22847078f56SSam Clegg template <class T> 229136d27abSRui Ueyama static void setRelocs(const std::vector<T *> &chunks, 230136d27abSRui Ueyama const WasmSection *section) { 231136d27abSRui Ueyama if (!section) 23247078f56SSam Clegg return; 23347078f56SSam Clegg 234136d27abSRui Ueyama ArrayRef<WasmRelocation> relocs = section->Relocations; 2351647ff6eSGeorgii Rymar assert(llvm::is_sorted( 2361647ff6eSGeorgii Rymar relocs, [](const WasmRelocation &r1, const WasmRelocation &r2) { 237136d27abSRui Ueyama return r1.Offset < r2.Offset; 23847078f56SSam Clegg })); 2391647ff6eSGeorgii Rymar assert(llvm::is_sorted(chunks, [](InputChunk *c1, InputChunk *c2) { 240136d27abSRui Ueyama return c1->getInputSectionOffset() < c2->getInputSectionOffset(); 24147078f56SSam Clegg })); 24247078f56SSam Clegg 243136d27abSRui Ueyama auto relocsNext = relocs.begin(); 244136d27abSRui Ueyama auto relocsEnd = relocs.end(); 245136d27abSRui Ueyama auto relocLess = [](const WasmRelocation &r, uint32_t val) { 246136d27abSRui Ueyama return r.Offset < val; 24747078f56SSam Clegg }; 248136d27abSRui Ueyama for (InputChunk *c : chunks) { 249136d27abSRui Ueyama auto relocsStart = std::lower_bound(relocsNext, relocsEnd, 250136d27abSRui Ueyama c->getInputSectionOffset(), relocLess); 251136d27abSRui Ueyama relocsNext = std::lower_bound( 252136d27abSRui Ueyama relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(), 253136d27abSRui Ueyama relocLess); 254136d27abSRui Ueyama c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext)); 25547078f56SSam Clegg } 25647078f56SSam Clegg } 25747078f56SSam Clegg 258c3536b26SDan Gohman // An object file can have two approaches to tables. With the 259c3536b26SDan Gohman // reference-types feature or call-indirect-overlong feature enabled 260c3536b26SDan Gohman // (explicitly, or implied by the reference-types feature), input files that 261c3536b26SDan Gohman // define or use tables declare the tables using symbols, and record each use 262c3536b26SDan Gohman // with a relocation. This way when the linker combines inputs, it can collate 263c3536b26SDan Gohman // the tables used by the inputs, assigning them distinct table numbers, and 264c3536b26SDan Gohman // renumber all the uses as appropriate. At the same time, the linker has 265c3536b26SDan Gohman // special logic to build the indirect function table if it is needed. 26663393828SAndy Wingo // 2674fc25573SAndy Wingo // However, MVP object files (those that target WebAssembly 1.0, the "minimum 2684fc25573SAndy Wingo // viable product" version of WebAssembly) neither write table symbols nor 2694fc25573SAndy Wingo // record relocations. These files can have at most one table, the indirect 2704fc25573SAndy Wingo // function table used by call_indirect and which is the address space for 2714fc25573SAndy Wingo // function pointers. If this table is present, it is always an import. If we 2724fc25573SAndy Wingo // have a file with a table import but no table symbols, it is an MVP object 2734fc25573SAndy Wingo // file. synthesizeMVPIndirectFunctionTableSymbolIfNeeded serves as a shim when 2744fc25573SAndy Wingo // loading these input files, defining the missing symbol to allow the indirect 2754fc25573SAndy Wingo // function table to be built. 27663393828SAndy Wingo // 2774fc25573SAndy Wingo // As indirect function table table usage in MVP objects cannot be relocated, 2784fc25573SAndy Wingo // the linker must ensure that this table gets assigned index zero. 2794fc25573SAndy Wingo void ObjFile::addLegacyIndirectFunctionTableIfNeeded( 2804fc25573SAndy Wingo uint32_t tableSymbolCount) { 2814fc25573SAndy Wingo uint32_t tableCount = wasmObj->getNumImportedTables() + tables.size(); 2824fc25573SAndy Wingo 2834fc25573SAndy Wingo // If there are symbols for all tables, then all is good. 2844fc25573SAndy Wingo if (tableCount == tableSymbolCount) 2854fc25573SAndy Wingo return; 2864fc25573SAndy Wingo 2874fc25573SAndy Wingo // It's possible for an input to define tables and also use the indirect 288c3536b26SDan Gohman // function table, but forget to compile with -mattr=+call-indirect-overlong 289c3536b26SDan Gohman // or -mattr=+reference-types. For these newer files, we require symbols for 290c3536b26SDan Gohman // all tables, and relocations for all of their uses. 2914fc25573SAndy Wingo if (tableSymbolCount != 0) { 2924fc25573SAndy Wingo error(toString(this) + 2934fc25573SAndy Wingo ": expected one symbol table entry for each of the " + 2944fc25573SAndy Wingo Twine(tableCount) + " table(s) present, but got " + 2954fc25573SAndy Wingo Twine(tableSymbolCount) + " symbol(s) instead."); 2964fc25573SAndy Wingo return; 2974fc25573SAndy Wingo } 2984fc25573SAndy Wingo 2994fc25573SAndy Wingo // An MVP object file can have up to one table import, for the indirect 3004fc25573SAndy Wingo // function table, but will have no table definitions. 3014fc25573SAndy Wingo if (tables.size()) { 3024fc25573SAndy Wingo error(toString(this) + 3034fc25573SAndy Wingo ": unexpected table definition(s) without corresponding " 3044fc25573SAndy Wingo "symbol-table entries."); 3054fc25573SAndy Wingo return; 3064fc25573SAndy Wingo } 3074fc25573SAndy Wingo 3084fc25573SAndy Wingo // An MVP object file can have only one table import. 3094fc25573SAndy Wingo if (tableCount != 1) { 3104fc25573SAndy Wingo error(toString(this) + 3114fc25573SAndy Wingo ": multiple table imports, but no corresponding symbol-table " 3124fc25573SAndy Wingo "entries."); 3134fc25573SAndy Wingo return; 3144fc25573SAndy Wingo } 3154fc25573SAndy Wingo 3164fc25573SAndy Wingo const WasmImport *tableImport = nullptr; 3174fc25573SAndy Wingo for (const auto &import : wasmObj->imports()) { 3184fc25573SAndy Wingo if (import.Kind == WASM_EXTERNAL_TABLE) { 3194fc25573SAndy Wingo assert(!tableImport); 3204fc25573SAndy Wingo tableImport = &import; 3214fc25573SAndy Wingo } 3224fc25573SAndy Wingo } 3234fc25573SAndy Wingo assert(tableImport); 3244fc25573SAndy Wingo 3254fc25573SAndy Wingo // We can only synthesize a symtab entry for the indirect function table; if 3264fc25573SAndy Wingo // it has an unexpected name or type, assume that it's not actually the 3274fc25573SAndy Wingo // indirect function table. 3284fc25573SAndy Wingo if (tableImport->Field != functionTableName || 329103fa325SDerek Schuff tableImport->Table.ElemType != ValType::FUNCREF) { 3304fc25573SAndy Wingo error(toString(this) + ": table import " + Twine(tableImport->Field) + 3314fc25573SAndy Wingo " is missing a symbol table entry."); 3324fc25573SAndy Wingo return; 3334fc25573SAndy Wingo } 3344fc25573SAndy Wingo 335ef1f999eSDerek Schuff WasmSymbolInfo info; 336ef1f999eSDerek Schuff info.Name = tableImport->Field; 337ef1f999eSDerek Schuff info.Kind = WASM_SYMBOL_TYPE_TABLE; 338ef1f999eSDerek Schuff info.ImportModule = tableImport->Module; 339ef1f999eSDerek Schuff info.ImportName = tableImport->Field; 340ef1f999eSDerek Schuff info.Flags = WASM_SYMBOL_UNDEFINED | WASM_SYMBOL_NO_STRIP; 341ef1f999eSDerek Schuff info.ElementIndex = 0; 342ef1f999eSDerek Schuff LLVM_DEBUG(dbgs() << "Synthesizing symbol for table import: " << info.Name 3434fc25573SAndy Wingo << "\n"); 34463393828SAndy Wingo const WasmGlobalType *globalType = nullptr; 34563393828SAndy Wingo const WasmSignature *signature = nullptr; 3463ec1760dSHeejin Ahn auto *wasmSym = 347ef1f999eSDerek Schuff make<WasmSymbol>(info, globalType, &tableImport->Table, signature); 3484fc25573SAndy Wingo Symbol *sym = createUndefined(*wasmSym, false); 3494fc25573SAndy Wingo // We're only sure it's a TableSymbol if the createUndefined succeeded. 3504fc25573SAndy Wingo if (errorCount()) 3514fc25573SAndy Wingo return; 3524fc25573SAndy Wingo symbols.push_back(sym); 3534fc25573SAndy Wingo // Because there are no TABLE_NUMBER relocs, we can't compute accurate 3544fc25573SAndy Wingo // liveness info; instead, just mark the symbol as always live. 3554fc25573SAndy Wingo sym->markLive(); 3564fc25573SAndy Wingo 3574fc25573SAndy Wingo // We assume that this compilation unit has unrelocatable references to 3584fc25573SAndy Wingo // this table. 359184c22ddSSam Clegg ctx.legacyFunctionTable = true; 36063393828SAndy Wingo } 36163393828SAndy Wingo 36245b7cf99SSam Clegg static bool shouldMerge(const WasmSection &sec) { 363*3792b362SFangrui Song if (ctx.arg.optimize == 0) 36445b7cf99SSam Clegg return false; 36545b7cf99SSam Clegg // Sadly we don't have section attributes yet for custom sections, so we 36645b7cf99SSam Clegg // currently go by the name alone. 36745b7cf99SSam Clegg // TODO(sbc): Add ability for wasm sections to carry flags so we don't 36845b7cf99SSam Clegg // need to use names here. 369c1a59fa5SSam Clegg // For now, keep in sync with uses of wasm::WASM_SEG_FLAG_STRINGS in 370c1a59fa5SSam Clegg // MCObjectFileInfo::initWasmMCObjectFileInfo which creates these custom 371c1a59fa5SSam Clegg // sections. 372c1a59fa5SSam Clegg return sec.Name == ".debug_str" || sec.Name == ".debug_str.dwo" || 373c1a59fa5SSam Clegg sec.Name == ".debug_line_str"; 37445b7cf99SSam Clegg } 37545b7cf99SSam Clegg 3763b8d2be5SSam Clegg static bool shouldMerge(const WasmSegment &seg) { 3773b8d2be5SSam Clegg // As of now we only support merging strings, and only with single byte 3783b8d2be5SSam Clegg // alignment (2^0). 3793b8d2be5SSam Clegg if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) || 3803b8d2be5SSam Clegg (seg.Data.Alignment != 0)) 3813b8d2be5SSam Clegg return false; 3823b8d2be5SSam Clegg 3833b8d2be5SSam Clegg // On a regular link we don't merge sections if -O0 (default is -O1). This 3843b8d2be5SSam Clegg // sometimes makes the linker significantly faster, although the output will 3853b8d2be5SSam Clegg // be bigger. 386*3792b362SFangrui Song if (ctx.arg.optimize == 0) 3873b8d2be5SSam Clegg return false; 3883b8d2be5SSam Clegg 3893b8d2be5SSam Clegg // A mergeable section with size 0 is useless because they don't have 3903b8d2be5SSam Clegg // any data to merge. A mergeable string section with size 0 can be 3913b8d2be5SSam Clegg // argued as invalid because it doesn't end with a null character. 3923b8d2be5SSam Clegg // We'll avoid a mess by handling them as if they were non-mergeable. 3933b8d2be5SSam Clegg if (seg.Data.Content.size() == 0) 3943b8d2be5SSam Clegg return false; 3953b8d2be5SSam Clegg 3963b8d2be5SSam Clegg return true; 3973b8d2be5SSam Clegg } 3983b8d2be5SSam Clegg 399bcc9b9d8SSam Clegg void ObjFile::parseLazy() { 40022b7b848SSam Clegg LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " " 40122b7b848SSam Clegg << wasmObj.get() << "\n"); 402bcc9b9d8SSam Clegg for (const SymbolRef &sym : wasmObj->symbols()) { 403bcc9b9d8SSam Clegg const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); 40454031231SSam Clegg if (wasmSym.isUndefined() || wasmSym.isBindingLocal()) 405bcc9b9d8SSam Clegg continue; 406bcc9b9d8SSam Clegg symtab->addLazy(wasmSym.Info.Name, this); 407bcc9b9d8SSam Clegg // addLazy() may trigger this->extract() if an existing symbol is an 408bcc9b9d8SSam Clegg // undefined symbol. If that happens, this function has served its purpose, 409bcc9b9d8SSam Clegg // and we can exit from the loop early. 410bcc9b9d8SSam Clegg if (!lazy) 411bcc9b9d8SSam Clegg break; 412bcc9b9d8SSam Clegg } 413bcc9b9d8SSam Clegg } 414bcc9b9d8SSam Clegg 415bcc9b9d8SSam Clegg ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy) 41622b7b848SSam Clegg : WasmFileBase(ObjectKind, m) { 417bcc9b9d8SSam Clegg this->lazy = lazy; 418bcc9b9d8SSam Clegg this->archiveName = std::string(archiveName); 419bcc9b9d8SSam Clegg 420ad2ff172SSam Clegg // Currently we only do this check for regular object file, and not for shared 421ad2ff172SSam Clegg // object files. This is because architecture detection for shared objects is 422ad2ff172SSam Clegg // currently based on a heuristic, which is fallable: 423ad2ff172SSam Clegg // https://github.com/llvm/llvm-project/issues/98778 424ad2ff172SSam Clegg checkArch(wasmObj->getArch()); 425ad2ff172SSam Clegg 426bcc9b9d8SSam Clegg // If this isn't part of an archive, it's eagerly linked, so mark it live. 427bcc9b9d8SSam Clegg if (archiveName.empty()) 428bcc9b9d8SSam Clegg markLive(); 42922b7b848SSam Clegg } 430bcc9b9d8SSam Clegg 43122b7b848SSam Clegg void SharedFile::parse() { 43222b7b848SSam Clegg assert(wasmObj->isSharedObject()); 43322b7b848SSam Clegg 43422b7b848SSam Clegg for (const SymbolRef &sym : wasmObj->symbols()) { 43522b7b848SSam Clegg const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); 43622b7b848SSam Clegg if (wasmSym.isDefined()) { 43722b7b848SSam Clegg StringRef name = wasmSym.Info.Name; 43822b7b848SSam Clegg // Certain shared library exports are known to be DSO-local so we 43922b7b848SSam Clegg // don't want to add them to the symbol table. 44022b7b848SSam Clegg // TODO(sbc): Instead of hardcoding these here perhaps we could add 44122b7b848SSam Clegg // this as extra metadata in the `dylink` section. 44222b7b848SSam Clegg if (name == "__wasm_apply_data_relocs" || name == "__wasm_call_ctors" || 44322b7b848SSam Clegg name.starts_with("__start_") || name.starts_with("__stop_")) 44422b7b848SSam Clegg continue; 44522b7b848SSam Clegg uint32_t flags = wasmSym.Info.Flags; 44622b7b848SSam Clegg Symbol *s; 44722b7b848SSam Clegg LLVM_DEBUG(dbgs() << "shared symbol: " << name << "\n"); 44822b7b848SSam Clegg switch (wasmSym.Info.Kind) { 44922b7b848SSam Clegg case WASM_SYMBOL_TYPE_FUNCTION: 45022b7b848SSam Clegg s = symtab->addSharedFunction(name, flags, this, wasmSym.Signature); 45122b7b848SSam Clegg break; 45222b7b848SSam Clegg case WASM_SYMBOL_TYPE_DATA: 45322b7b848SSam Clegg s = symtab->addSharedData(name, flags, this); 45422b7b848SSam Clegg break; 45522b7b848SSam Clegg default: 45622b7b848SSam Clegg continue; 45722b7b848SSam Clegg } 45822b7b848SSam Clegg symbols.push_back(s); 45922b7b848SSam Clegg } 46022b7b848SSam Clegg } 46122b7b848SSam Clegg } 46222b7b848SSam Clegg 463d4efc3e0SYuta Saito // Returns the alignment for a custom section. This is used to concatenate 464d4efc3e0SYuta Saito // custom sections with the same name into a single custom section. 465d4efc3e0SYuta Saito static uint32_t getCustomSectionAlignment(const WasmSection &sec) { 466d4efc3e0SYuta Saito // TODO: Add a section attribute for alignment in the linking spec. 467d4efc3e0SYuta Saito if (sec.Name == getInstrProfSectionName(IPSK_covfun, Triple::Wasm) || 468d4efc3e0SYuta Saito sec.Name == getInstrProfSectionName(IPSK_covmap, Triple::Wasm)) { 469d4efc3e0SYuta Saito // llvm-cov assumes that coverage metadata sections are 8-byte aligned. 470d4efc3e0SYuta Saito return 8; 471d4efc3e0SYuta Saito } 472d4efc3e0SYuta Saito return 1; 473d4efc3e0SYuta Saito } 474d4efc3e0SYuta Saito 47522b7b848SSam Clegg WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) { 47622b7b848SSam Clegg // Parse a memory buffer as a wasm file. 47722b7b848SSam Clegg LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n"); 478136d27abSRui Ueyama std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this)); 479c94d393aSSam Clegg 480136d27abSRui Ueyama auto *obj = dyn_cast<WasmObjectFile>(bin.get()); 481136d27abSRui Ueyama if (!obj) 482c94d393aSSam Clegg fatal(toString(this) + ": not a wasm file"); 483c94d393aSSam Clegg 484136d27abSRui Ueyama bin.release(); 485136d27abSRui Ueyama wasmObj.reset(obj); 486bcc9b9d8SSam Clegg } 487bcc9b9d8SSam Clegg 488bcc9b9d8SSam Clegg void ObjFile::parse(bool ignoreComdats) { 489bcc9b9d8SSam Clegg // Parse a memory buffer as a wasm file. 490bcc9b9d8SSam Clegg LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n"); 491b8c2d60dSWouter van Oortmerssen 49222b7b848SSam Clegg if (!wasmObj->isRelocatableObject()) 49322b7b848SSam Clegg fatal(toString(this) + ": not a relocatable wasm file"); 49422b7b848SSam Clegg 495dbd33b80SSam Clegg // Build up a map of function indices to table indices for use when 496dbd33b80SSam Clegg // verifying the existing table index relocations 497136d27abSRui Ueyama uint32_t totalFunctions = 498136d27abSRui Ueyama wasmObj->getNumImportedFunctions() + wasmObj->functions().size(); 49981443ac1SSam Clegg tableEntriesRel.resize(totalFunctions); 500136d27abSRui Ueyama tableEntries.resize(totalFunctions); 501136d27abSRui Ueyama for (const WasmElemSegment &seg : wasmObj->elements()) { 5023b29376eSWouter van Oortmerssen int64_t offset; 5039504ab32SSam Clegg if (seg.Offset.Extended) 5049504ab32SSam Clegg fatal(toString(this) + ": extended init exprs not supported"); 5059504ab32SSam Clegg else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I32_CONST) 5069504ab32SSam Clegg offset = seg.Offset.Inst.Value.Int32; 5079504ab32SSam Clegg else if (seg.Offset.Inst.Opcode == WASM_OPCODE_I64_CONST) 5089504ab32SSam Clegg offset = seg.Offset.Inst.Value.Int64; 5093b29376eSWouter van Oortmerssen else 510dbd33b80SSam Clegg fatal(toString(this) + ": invalid table elements"); 5113b29376eSWouter van Oortmerssen for (size_t index = 0; index < seg.Functions.size(); index++) { 5123b29376eSWouter van Oortmerssen auto functionIndex = seg.Functions[index]; 51381443ac1SSam Clegg tableEntriesRel[functionIndex] = index; 514136d27abSRui Ueyama tableEntries[functionIndex] = offset + index; 515dbd33b80SSam Clegg } 516dbd33b80SSam Clegg } 517dbd33b80SSam Clegg 518dd6412c0SDerek Schuff ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats; 519dd6412c0SDerek Schuff for (StringRef comdat : comdats) { 520dd6412c0SDerek Schuff bool isNew = ignoreComdats || symtab->addComdat(comdat); 521dd6412c0SDerek Schuff keptComdats.push_back(isNew); 522dd6412c0SDerek Schuff } 523dd6412c0SDerek Schuff 524136d27abSRui Ueyama uint32_t sectionIndex = 0; 52556e970d4SSam Clegg 52656e970d4SSam Clegg // Bool for each symbol, true if called directly. This allows us to implement 52756e970d4SSam Clegg // a weaker form of signature checking where undefined functions that are not 52856e970d4SSam Clegg // called directly (i.e. only address taken) don't have to match the defined 52956e970d4SSam Clegg // function's signature. We cannot do this for directly called functions 53056e970d4SSam Clegg // because those signatures are checked at validation times. 5314e844a14SHeejin Ahn // See https://github.com/llvm/llvm-project/issues/39758 532136d27abSRui Ueyama std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false); 533136d27abSRui Ueyama for (const SectionRef &sec : wasmObj->sections()) { 534136d27abSRui Ueyama const WasmSection §ion = wasmObj->getWasmSection(sec); 53559f959ffSSam Clegg // Wasm objects can have at most one code and one data section. 536136d27abSRui Ueyama if (section.Type == WASM_SEC_CODE) { 537136d27abSRui Ueyama assert(!codeSection); 538136d27abSRui Ueyama codeSection = §ion; 539136d27abSRui Ueyama } else if (section.Type == WASM_SEC_DATA) { 540136d27abSRui Ueyama assert(!dataSection); 541136d27abSRui Ueyama dataSection = §ion; 542136d27abSRui Ueyama } else if (section.Type == WASM_SEC_CUSTOM) { 54345b7cf99SSam Clegg InputChunk *customSec; 544d4efc3e0SYuta Saito uint32_t alignment = getCustomSectionAlignment(section); 54545b7cf99SSam Clegg if (shouldMerge(section)) 546d4efc3e0SYuta Saito customSec = make<MergeInputChunk>(section, this, alignment); 54745b7cf99SSam Clegg else 548d4efc3e0SYuta Saito customSec = make<InputSection>(section, this, alignment); 549dd6412c0SDerek Schuff customSec->discarded = isExcludedByComdat(customSec); 550dd6412c0SDerek Schuff customSections.emplace_back(customSec); 551136d27abSRui Ueyama customSections.back()->setRelocations(section.Relocations); 552136d27abSRui Ueyama customSectionsByIndex[sectionIndex] = customSections.back(); 553d177ab2aSSam Clegg } 554136d27abSRui Ueyama sectionIndex++; 5557ae3d335SKazuaki Ishizaki // Scans relocations to determine if a function symbol is called directly. 556136d27abSRui Ueyama for (const WasmRelocation &reloc : section.Relocations) 557136d27abSRui Ueyama if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB) 558136d27abSRui Ueyama isCalledDirectly[reloc.Index] = true; 559c94d393aSSam Clegg } 560c94d393aSSam Clegg 561136d27abSRui Ueyama typeMap.resize(getWasmObj()->types().size()); 562136d27abSRui Ueyama typeIsUsed.resize(getWasmObj()->types().size(), false); 5638f6d2defSSam Clegg 564c4d9aa1bSNicholas Wilson 5650a9583ceSRui Ueyama // Populate `Segments`. 566136d27abSRui Ueyama for (const WasmSegment &s : wasmObj->dataSegments()) { 5675a9b25e1SSam Clegg InputChunk *seg; 568875ee937SSam Clegg if (shouldMerge(s)) 56945b7cf99SSam Clegg seg = make<MergeInputChunk>(s, this); 570875ee937SSam Clegg else 57145b7cf99SSam Clegg seg = make<InputSegment>(s, this); 572136d27abSRui Ueyama seg->discarded = isExcludedByComdat(seg); 57344177e5fSSam Clegg // Older object files did not include WASM_SEG_FLAG_TLS and instead 57444177e5fSSam Clegg // relied on the naming convention. To maintain compat with such objects 57544177e5fSSam Clegg // we still imply the TLS flag based on the name of the segment. 57644177e5fSSam Clegg if (!seg->isTLS() && 5778d85c96eSFangrui Song (seg->name.starts_with(".tdata") || seg->name.starts_with(".tbss"))) 57844177e5fSSam Clegg seg->flags |= WASM_SEG_FLAG_TLS; 579136d27abSRui Ueyama segments.emplace_back(seg); 580fd54fa5dSSam Clegg } 581136d27abSRui Ueyama setRelocs(segments, dataSection); 582c94d393aSSam Clegg 5830a9583ceSRui Ueyama // Populate `Functions`. 584136d27abSRui Ueyama ArrayRef<WasmFunction> funcs = wasmObj->functions(); 585136d27abSRui Ueyama ArrayRef<WasmSignature> types = wasmObj->types(); 586136d27abSRui Ueyama functions.reserve(funcs.size()); 5870a9583ceSRui Ueyama 588c0039de2SSam Clegg for (auto &f : funcs) { 589c0039de2SSam Clegg auto *func = make<InputFunction>(types[f.SigIndex], &f, this); 590136d27abSRui Ueyama func->discarded = isExcludedByComdat(func); 591136d27abSRui Ueyama functions.emplace_back(func); 592fd54fa5dSSam Clegg } 593136d27abSRui Ueyama setRelocs(functions, codeSection); 5948d146bbcSSam Clegg 59553e3b81fSAndy Wingo // Populate `Tables`. 59653e3b81fSAndy Wingo for (const WasmTable &t : wasmObj->tables()) 59753e3b81fSAndy Wingo tables.emplace_back(make<InputTable>(t, this)); 59853e3b81fSAndy Wingo 5990a9583ceSRui Ueyama // Populate `Globals`. 600136d27abSRui Ueyama for (const WasmGlobal &g : wasmObj->globals()) 601136d27abSRui Ueyama globals.emplace_back(make<InputGlobal>(g, this)); 6028d146bbcSSam Clegg 6031d891d44SHeejin Ahn // Populate `Tags`. 6041d891d44SHeejin Ahn for (const WasmTag &t : wasmObj->tags()) 6053ec1760dSHeejin Ahn tags.emplace_back(make<InputTag>(types[t.SigIndex], t, this)); 606e915a71fSHeejin Ahn 60733fdf82dSFangrui Song // Populate `Symbols` based on the symbols in the object. 608136d27abSRui Ueyama symbols.reserve(wasmObj->getNumberOfSymbols()); 6094fc25573SAndy Wingo uint32_t tableSymbolCount = 0; 610136d27abSRui Ueyama for (const SymbolRef &sym : wasmObj->symbols()) { 611136d27abSRui Ueyama const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); 61263393828SAndy Wingo if (wasmSym.isTypeTable()) 6134fc25573SAndy Wingo tableSymbolCount++; 614136d27abSRui Ueyama if (wasmSym.isDefined()) { 61559f959ffSSam Clegg // createDefined may fail if the symbol is comdat excluded in which case 61659f959ffSSam Clegg // we fall back to creating an undefined symbol 617136d27abSRui Ueyama if (Symbol *d = createDefined(wasmSym)) { 618136d27abSRui Ueyama symbols.push_back(d); 61959f959ffSSam Clegg continue; 62059f959ffSSam Clegg } 62159f959ffSSam Clegg } 622136d27abSRui Ueyama size_t idx = symbols.size(); 623136d27abSRui Ueyama symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx])); 6240a9583ceSRui Ueyama } 62563393828SAndy Wingo 6264fc25573SAndy Wingo addLegacyIndirectFunctionTableIfNeeded(tableSymbolCount); 62793102974SSam Clegg } 62893102974SSam Clegg 629c0039de2SSam Clegg bool ObjFile::isExcludedByComdat(const InputChunk *chunk) const { 630136d27abSRui Ueyama uint32_t c = chunk->getComdat(); 631136d27abSRui Ueyama if (c == UINT32_MAX) 632dcf6234dSRui Ueyama return false; 633136d27abSRui Ueyama return !keptComdats[c]; 634e0f6fcd0SSam Clegg } 635e0f6fcd0SSam Clegg 636136d27abSRui Ueyama FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const { 637136d27abSRui Ueyama return cast<FunctionSymbol>(symbols[index]); 63893102974SSam Clegg } 63993102974SSam Clegg 640136d27abSRui Ueyama GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const { 641136d27abSRui Ueyama return cast<GlobalSymbol>(symbols[index]); 64293102974SSam Clegg } 64393102974SSam Clegg 6441d891d44SHeejin Ahn TagSymbol *ObjFile::getTagSymbol(uint32_t index) const { 6451d891d44SHeejin Ahn return cast<TagSymbol>(symbols[index]); 646e915a71fSHeejin Ahn } 647e915a71fSHeejin Ahn 64853e3b81fSAndy Wingo TableSymbol *ObjFile::getTableSymbol(uint32_t index) const { 64953e3b81fSAndy Wingo return cast<TableSymbol>(symbols[index]); 65053e3b81fSAndy Wingo } 65153e3b81fSAndy Wingo 652136d27abSRui Ueyama SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const { 653136d27abSRui Ueyama return cast<SectionSymbol>(symbols[index]); 654d177ab2aSSam Clegg } 655d177ab2aSSam Clegg 656136d27abSRui Ueyama DataSymbol *ObjFile::getDataSymbol(uint32_t index) const { 657136d27abSRui Ueyama return cast<DataSymbol>(symbols[index]); 65893102974SSam Clegg } 65993102974SSam Clegg 660136d27abSRui Ueyama Symbol *ObjFile::createDefined(const WasmSymbol &sym) { 661136d27abSRui Ueyama StringRef name = sym.Info.Name; 662136d27abSRui Ueyama uint32_t flags = sym.Info.Flags; 663e89b0ef0SRui Ueyama 664136d27abSRui Ueyama switch (sym.Info.Kind) { 6654b56adceSRui Ueyama case WASM_SYMBOL_TYPE_FUNCTION: { 666136d27abSRui Ueyama InputFunction *func = 667136d27abSRui Ueyama functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()]; 668136d27abSRui Ueyama if (sym.isBindingLocal()) 669136d27abSRui Ueyama return make<DefinedFunction>(name, flags, this, func); 670accad76cSSam Clegg if (func->discarded) 671accad76cSSam Clegg return nullptr; 672136d27abSRui Ueyama return symtab->addDefinedFunction(name, flags, this, func); 6734b56adceSRui Ueyama } 6744b56adceSRui Ueyama case WASM_SYMBOL_TYPE_DATA: { 6755a9b25e1SSam Clegg InputChunk *seg = segments[sym.Info.DataRef.Segment]; 6763b29376eSWouter van Oortmerssen auto offset = sym.Info.DataRef.Offset; 6773b29376eSWouter van Oortmerssen auto size = sym.Info.DataRef.Size; 678875ee937SSam Clegg // Support older (e.g. llvm 13) object files that pre-date the per-symbol 679875ee937SSam Clegg // TLS flag, and symbols were assumed to be TLS by being defined in a TLS 680875ee937SSam Clegg // segment. 681875ee937SSam Clegg if (!(flags & WASM_SYMBOL_TLS) && seg->isTLS()) 68244177e5fSSam Clegg flags |= WASM_SYMBOL_TLS; 683136d27abSRui Ueyama if (sym.isBindingLocal()) 684136d27abSRui Ueyama return make<DefinedData>(name, flags, this, seg, offset, size); 685accad76cSSam Clegg if (seg->discarded) 686accad76cSSam Clegg return nullptr; 687136d27abSRui Ueyama return symtab->addDefinedData(name, flags, this, seg, offset, size); 6884b56adceSRui Ueyama } 689d177ab2aSSam Clegg case WASM_SYMBOL_TYPE_GLOBAL: { 690136d27abSRui Ueyama InputGlobal *global = 691136d27abSRui Ueyama globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()]; 692136d27abSRui Ueyama if (sym.isBindingLocal()) 693136d27abSRui Ueyama return make<DefinedGlobal>(name, flags, this, global); 694136d27abSRui Ueyama return symtab->addDefinedGlobal(name, flags, this, global); 6954b56adceSRui Ueyama } 696d177ab2aSSam Clegg case WASM_SYMBOL_TYPE_SECTION: { 69745b7cf99SSam Clegg InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex]; 698136d27abSRui Ueyama assert(sym.isBindingLocal()); 699dd6412c0SDerek Schuff // Need to return null if discarded here? data and func only do that when 700dd6412c0SDerek Schuff // binding is not local. 701dd6412c0SDerek Schuff if (section->discarded) 702dd6412c0SDerek Schuff return nullptr; 703136d27abSRui Ueyama return make<SectionSymbol>(flags, section, this); 704d177ab2aSSam Clegg } 7051d891d44SHeejin Ahn case WASM_SYMBOL_TYPE_TAG: { 7061d891d44SHeejin Ahn InputTag *tag = tags[sym.Info.ElementIndex - wasmObj->getNumImportedTags()]; 707136d27abSRui Ueyama if (sym.isBindingLocal()) 7081d891d44SHeejin Ahn return make<DefinedTag>(name, flags, this, tag); 7091d891d44SHeejin Ahn return symtab->addDefinedTag(name, flags, this, tag); 710e915a71fSHeejin Ahn } 71153e3b81fSAndy Wingo case WASM_SYMBOL_TYPE_TABLE: { 71253e3b81fSAndy Wingo InputTable *table = 71353e3b81fSAndy Wingo tables[sym.Info.ElementIndex - wasmObj->getNumImportedTables()]; 71453e3b81fSAndy Wingo if (sym.isBindingLocal()) 71553e3b81fSAndy Wingo return make<DefinedTable>(name, flags, this, table); 71653e3b81fSAndy Wingo return symtab->addDefinedTable(name, flags, this, table); 71753e3b81fSAndy Wingo } 718d177ab2aSSam Clegg } 719d177ab2aSSam Clegg llvm_unreachable("unknown symbol kind"); 7204b56adceSRui Ueyama } 7214b56adceSRui Ueyama 722136d27abSRui Ueyama Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) { 723136d27abSRui Ueyama StringRef name = sym.Info.Name; 724f6f4b98fSSam Clegg uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED; 725e3498ec5SRui Ueyama 726136d27abSRui Ueyama switch (sym.Info.Kind) { 727e3498ec5SRui Ueyama case WASM_SYMBOL_TYPE_FUNCTION: 728136d27abSRui Ueyama if (sym.isBindingLocal()) 729136d27abSRui Ueyama return make<UndefinedFunction>(name, sym.Info.ImportName, 730136d27abSRui Ueyama sym.Info.ImportModule, flags, this, 731136d27abSRui Ueyama sym.Signature, isCalledDirectly); 732136d27abSRui Ueyama return symtab->addUndefinedFunction(name, sym.Info.ImportName, 733136d27abSRui Ueyama sym.Info.ImportModule, flags, this, 734136d27abSRui Ueyama sym.Signature, isCalledDirectly); 735e3498ec5SRui Ueyama case WASM_SYMBOL_TYPE_DATA: 736136d27abSRui Ueyama if (sym.isBindingLocal()) 737136d27abSRui Ueyama return make<UndefinedData>(name, flags, this); 738136d27abSRui Ueyama return symtab->addUndefinedData(name, flags, this); 739e3498ec5SRui Ueyama case WASM_SYMBOL_TYPE_GLOBAL: 740136d27abSRui Ueyama if (sym.isBindingLocal()) 741136d27abSRui Ueyama return make<UndefinedGlobal>(name, sym.Info.ImportName, 742136d27abSRui Ueyama sym.Info.ImportModule, flags, this, 743136d27abSRui Ueyama sym.GlobalType); 744136d27abSRui Ueyama return symtab->addUndefinedGlobal(name, sym.Info.ImportName, 745136d27abSRui Ueyama sym.Info.ImportModule, flags, this, 746136d27abSRui Ueyama sym.GlobalType); 74753e3b81fSAndy Wingo case WASM_SYMBOL_TYPE_TABLE: 74853e3b81fSAndy Wingo if (sym.isBindingLocal()) 74953e3b81fSAndy Wingo return make<UndefinedTable>(name, sym.Info.ImportName, 75053e3b81fSAndy Wingo sym.Info.ImportModule, flags, this, 75153e3b81fSAndy Wingo sym.TableType); 75253e3b81fSAndy Wingo return symtab->addUndefinedTable(name, sym.Info.ImportName, 75353e3b81fSAndy Wingo sym.Info.ImportModule, flags, this, 75453e3b81fSAndy Wingo sym.TableType); 7559261ee32SHeejin Ahn case WASM_SYMBOL_TYPE_TAG: 7569261ee32SHeejin Ahn if (sym.isBindingLocal()) 7579261ee32SHeejin Ahn return make<UndefinedTag>(name, sym.Info.ImportName, 7589261ee32SHeejin Ahn sym.Info.ImportModule, flags, this, 7599261ee32SHeejin Ahn sym.Signature); 7609261ee32SHeejin Ahn return symtab->addUndefinedTag(name, sym.Info.ImportName, 7619261ee32SHeejin Ahn sym.Info.ImportModule, flags, this, 7629261ee32SHeejin Ahn sym.Signature); 763d177ab2aSSam Clegg case WASM_SYMBOL_TYPE_SECTION: 764d177ab2aSSam Clegg llvm_unreachable("section symbols cannot be undefined"); 765e3498ec5SRui Ueyama } 766d177ab2aSSam Clegg llvm_unreachable("unknown symbol kind"); 767c94d393aSSam Clegg } 768c94d393aSSam Clegg 769e7efa323SSam Clegg static StringRef strip(StringRef s) { return s.trim(' '); } 7703111784fSSam Clegg 7713111784fSSam Clegg void StubFile::parse() { 772d9d840cdSSam Clegg bool first = true; 7733111784fSSam Clegg 774d9d840cdSSam Clegg SmallVector<StringRef> lines; 775d9d840cdSSam Clegg mb.getBuffer().split(lines, '\n'); 776d9d840cdSSam Clegg for (StringRef line : lines) { 777d9d840cdSSam Clegg line = line.trim(); 778d9d840cdSSam Clegg 7793111784fSSam Clegg // File must begin with #STUB 7803111784fSSam Clegg if (first) { 781d9d840cdSSam Clegg assert(line == "#STUB"); 7823111784fSSam Clegg first = false; 7833111784fSSam Clegg } 7843111784fSSam Clegg 7853111784fSSam Clegg // Lines starting with # are considered comments 786e7efa323SSam Clegg if (line.starts_with("#") || !line.size()) 7873111784fSSam Clegg continue; 7883111784fSSam Clegg 7893111784fSSam Clegg StringRef sym; 7903111784fSSam Clegg StringRef rest; 7913111784fSSam Clegg std::tie(sym, rest) = line.split(':'); 7923111784fSSam Clegg sym = strip(sym); 7933111784fSSam Clegg rest = strip(rest); 7943111784fSSam Clegg 7953111784fSSam Clegg symbolDependencies[sym] = {}; 7963111784fSSam Clegg 7973111784fSSam Clegg while (rest.size()) { 798d9d840cdSSam Clegg StringRef dep; 799d9d840cdSSam Clegg std::tie(dep, rest) = rest.split(','); 800d9d840cdSSam Clegg dep = strip(dep); 801d9d840cdSSam Clegg symbolDependencies[sym].push_back(dep); 8023111784fSSam Clegg } 8033111784fSSam Clegg } 8043111784fSSam Clegg } 8053111784fSSam Clegg 806136d27abSRui Ueyama static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 807136d27abSRui Ueyama switch (gvVisibility) { 808c729c1b4SSam Clegg case GlobalValue::DefaultVisibility: 809c729c1b4SSam Clegg return WASM_SYMBOL_VISIBILITY_DEFAULT; 810c729c1b4SSam Clegg case GlobalValue::HiddenVisibility: 811c729c1b4SSam Clegg case GlobalValue::ProtectedVisibility: 812c729c1b4SSam Clegg return WASM_SYMBOL_VISIBILITY_HIDDEN; 813c729c1b4SSam Clegg } 814c729c1b4SSam Clegg llvm_unreachable("unknown visibility"); 815c729c1b4SSam Clegg } 816c729c1b4SSam Clegg 817136d27abSRui Ueyama static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats, 818136d27abSRui Ueyama const lto::InputFile::Symbol &objSym, 819136d27abSRui Ueyama BitcodeFile &f) { 82083d59e05SAlexandre Ganea StringRef name = saver().save(objSym.getName()); 821c729c1b4SSam Clegg 822136d27abSRui Ueyama uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0; 823136d27abSRui Ueyama flags |= mapVisibility(objSym.getVisibility()); 824c729c1b4SSam Clegg 825136d27abSRui Ueyama int c = objSym.getComdatIndex(); 826136d27abSRui Ueyama bool excludedByComdat = c != -1 && !keptComdats[c]; 827697f2149SSam Clegg 828136d27abSRui Ueyama if (objSym.isUndefined() || excludedByComdat) { 829f6f4b98fSSam Clegg flags |= WASM_SYMBOL_UNDEFINED; 830136d27abSRui Ueyama if (objSym.isExecutable()) 831c68af42fSKazu Hirata return symtab->addUndefinedFunction(name, std::nullopt, std::nullopt, 832c68af42fSKazu Hirata flags, &f, nullptr, true); 833136d27abSRui Ueyama return symtab->addUndefinedData(name, flags, &f); 834c729c1b4SSam Clegg } 835c729c1b4SSam Clegg 836136d27abSRui Ueyama if (objSym.isExecutable()) 837136d27abSRui Ueyama return symtab->addDefinedFunction(name, flags, &f, nullptr); 838136d27abSRui Ueyama return symtab->addDefinedData(name, flags, &f, nullptr, 0, 0); 839c729c1b4SSam Clegg } 840c729c1b4SSam Clegg 84128848e9eSSam Clegg BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName, 842bcc9b9d8SSam Clegg uint64_t offsetInArchive, bool lazy) 84328848e9eSSam Clegg : InputFile(BitcodeKind, m) { 844bcc9b9d8SSam Clegg this->lazy = lazy; 84528848e9eSSam Clegg this->archiveName = std::string(archiveName); 84628848e9eSSam Clegg 84728848e9eSSam Clegg std::string path = mb.getBufferIdentifier().str(); 848*3792b362SFangrui Song if (ctx.arg.thinLTOIndexOnly) 849b70eb863SSam Clegg path = replaceThinLTOSuffix(mb.getBufferIdentifier()); 85028848e9eSSam Clegg 85128848e9eSSam Clegg // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 85228848e9eSSam Clegg // name. If two archives define two members with the same name, this 85328848e9eSSam Clegg // causes a collision which result in only one of the objects being taken 85428848e9eSSam Clegg // into consideration at LTO time (which very likely causes undefined 85528848e9eSSam Clegg // symbols later in the link stage). So we append file offset to make 85628848e9eSSam Clegg // filename unique. 85728848e9eSSam Clegg StringRef name = archiveName.empty() 85883d59e05SAlexandre Ganea ? saver().save(path) 85983d59e05SAlexandre Ganea : saver().save(archiveName + "(" + path::filename(path) + 86028848e9eSSam Clegg " at " + utostr(offsetInArchive) + ")"); 86128848e9eSSam Clegg MemoryBufferRef mbref(mb.getBuffer(), name); 86228848e9eSSam Clegg 86328848e9eSSam Clegg obj = check(lto::InputFile::create(mbref)); 86428848e9eSSam Clegg 86528848e9eSSam Clegg // If this isn't part of an archive, it's eagerly linked, so mark it live. 86628848e9eSSam Clegg if (archiveName.empty()) 86728848e9eSSam Clegg markLive(); 86828848e9eSSam Clegg } 86928848e9eSSam Clegg 870b062fe18SSam Clegg bool BitcodeFile::doneLTO = false; 871b062fe18SSam Clegg 872bcc9b9d8SSam Clegg void BitcodeFile::parseLazy() { 873bcc9b9d8SSam Clegg for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 874bcc9b9d8SSam Clegg if (irSym.isUndefined()) 875bcc9b9d8SSam Clegg continue; 876bcc9b9d8SSam Clegg StringRef name = saver().save(irSym.getName()); 877bcc9b9d8SSam Clegg symtab->addLazy(name, this); 878bcc9b9d8SSam Clegg // addLazy() may trigger this->extract() if an existing symbol is an 879bcc9b9d8SSam Clegg // undefined symbol. If that happens, this function has served its purpose, 880bcc9b9d8SSam Clegg // and we can exit from the loop early. 881bcc9b9d8SSam Clegg if (!lazy) 882bcc9b9d8SSam Clegg break; 883bcc9b9d8SSam Clegg } 884bcc9b9d8SSam Clegg } 885bcc9b9d8SSam Clegg 8867bac0bc1SSam Clegg void BitcodeFile::parse(StringRef symName) { 887b062fe18SSam Clegg if (doneLTO) { 8887bac0bc1SSam Clegg error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")"); 889b062fe18SSam Clegg return; 890b062fe18SSam Clegg } 891b062fe18SSam Clegg 892136d27abSRui Ueyama Triple t(obj->getTargetTriple()); 89329f8c9f6SWouter van Oortmerssen if (!t.isWasm()) { 89429f8c9f6SWouter van Oortmerssen error(toString(this) + ": machine type must be wasm32 or wasm64"); 895c729c1b4SSam Clegg return; 896c729c1b4SSam Clegg } 897b8c2d60dSWouter van Oortmerssen checkArch(t.getArch()); 898136d27abSRui Ueyama std::vector<bool> keptComdats; 8994e844a14SHeejin Ahn // TODO Support nodeduplicate 9004e844a14SHeejin Ahn // https://github.com/llvm/llvm-project/issues/49875 901db5e0786SFangrui Song for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) 902db5e0786SFangrui Song keptComdats.push_back(symtab->addComdat(s.first)); 903c729c1b4SSam Clegg 904136d27abSRui Ueyama for (const lto::InputFile::Symbol &objSym : obj->symbols()) 905136d27abSRui Ueyama symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this)); 906c729c1b4SSam Clegg } 907c729c1b4SSam Clegg 90833c59abfSFangrui Song } // namespace wasm 90933c59abfSFangrui Song } // namespace lld 910