//===- SymbolTable.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "COFFLinkerContext.h" #include "Config.h" #include "Driver.h" #include "LTO.h" #include "PDB.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/COFFModuleDefinition.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; namespace lld::coff { StringRef ltrim1(StringRef s, const char *chars) { if (!s.empty() && strchr(chars, s[0])) return s.substr(1); return s; } static COFFSyncStream errorOrWarn(COFFLinkerContext &ctx) { return {ctx, ctx.config.forceUnresolved ? DiagLevel::Warn : DiagLevel::Err}; } // Causes the file associated with a lazy symbol to be linked in. static void forceLazy(Symbol *s) { s->pendingArchiveLoad = true; switch (s->kind()) { case Symbol::Kind::LazyArchiveKind: { auto *l = cast(s); l->file->addMember(l->sym); break; } case Symbol::Kind::LazyObjectKind: { InputFile *file = cast(s)->file; // FIXME: Remove this once we resolve all defineds before all undefineds in // ObjFile::initializeSymbols(). if (!file->lazy) return; file->lazy = false; file->symtab.ctx.driver.addFile(file); break; } case Symbol::Kind::LazyDLLSymbolKind: { auto *l = cast(s); l->file->makeImport(l->sym); break; } default: llvm_unreachable( "symbol passed to forceLazy is not a LazyArchive or LazyObject"); } } // Returns the symbol in SC whose value is <= Addr that is closest to Addr. // This is generally the global variable or function whose definition contains // Addr. static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { DefinedRegular *candidate = nullptr; for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); if (!d || !d->data || d->file != sc->file || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; candidate = d; } return candidate; } static std::vector getSymbolLocations(BitcodeFile *file) { std::string res("\n>>> referenced by "); StringRef source = file->obj->getSourceFileName(); if (!source.empty()) res += source.str() + "\n>>> "; res += toString(file); return {res}; } static std::optional> getFileLineDwarf(const SectionChunk *c, uint32_t addr) { std::optional optionalLineInfo = c->file->getDILineInfo(addr, c->getSectionNumber() - 1); if (!optionalLineInfo) return std::nullopt; const DILineInfo &lineInfo = *optionalLineInfo; if (lineInfo.FileName == DILineInfo::BadString) return std::nullopt; return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line); } static std::optional> getFileLine(const SectionChunk *c, uint32_t addr) { // MinGW can optionally use codeview, even if the default is dwarf. std::optional> fileLine = getFileLineCodeView(c, addr); // If codeview didn't yield any result, check dwarf in MinGW mode. if (!fileLine && c->file->symtab.ctx.config.mingw) fileLine = getFileLineDwarf(c, addr); return fileLine; } // Given a file and the index of a symbol in that file, returns a description // of all references to that symbol from that file. If no debug information is // available, returns just the name of the file, else one string per actual // reference as described in the debug info. // Returns up to maxStrings string descriptions, along with the total number of // locations found. static std::pair, size_t> getSymbolLocations(ObjFile *file, uint32_t symIndex, size_t maxStrings) { struct Location { Symbol *sym; std::pair fileLine; }; std::vector locations; size_t numLocations = 0; for (Chunk *c : file->getChunks()) { auto *sc = dyn_cast(c); if (!sc) continue; for (const coff_relocation &r : sc->getRelocs()) { if (r.SymbolTableIndex != symIndex) continue; numLocations++; if (locations.size() >= maxStrings) continue; std::optional> fileLine = getFileLine(sc, r.VirtualAddress); Symbol *sym = getSymbol(sc, r.VirtualAddress); if (fileLine) locations.push_back({sym, *fileLine}); else if (sym) locations.push_back({sym, {"", 0}}); } } if (maxStrings == 0) return std::make_pair(std::vector(), numLocations); if (numLocations == 0) return std::make_pair( std::vector{"\n>>> referenced by " + toString(file)}, 1); std::vector symbolLocations(locations.size()); size_t i = 0; for (Location loc : locations) { llvm::raw_string_ostream os(symbolLocations[i++]); os << "\n>>> referenced by "; if (!loc.fileLine.first.empty()) os << loc.fileLine.first << ":" << loc.fileLine.second << "\n>>> "; os << toString(file); if (loc.sym) os << ":(" << toString(file->symtab.ctx, *loc.sym) << ')'; } return std::make_pair(symbolLocations, numLocations); } std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex) { return getSymbolLocations(file, symIndex, SIZE_MAX).first; } static std::pair, size_t> getSymbolLocations(InputFile *file, uint32_t symIndex, size_t maxStrings) { if (auto *o = dyn_cast(file)) return getSymbolLocations(o, symIndex, maxStrings); if (auto *b = dyn_cast(file)) { std::vector symbolLocations = getSymbolLocations(b); size_t numLocations = symbolLocations.size(); if (symbolLocations.size() > maxStrings) symbolLocations.resize(maxStrings); return std::make_pair(symbolLocations, numLocations); } llvm_unreachable("unsupported file type passed to getSymbolLocations"); return std::make_pair(std::vector(), (size_t)0); } // For an undefined symbol, stores all files referencing it and the index of // the undefined symbol in each file. struct UndefinedDiag { Symbol *sym; struct File { InputFile *file; uint32_t symIndex; }; std::vector files; }; static void reportUndefinedSymbol(COFFLinkerContext &ctx, const UndefinedDiag &undefDiag) { auto diag = errorOrWarn(ctx); diag << "undefined symbol: " << undefDiag.sym; const size_t maxUndefReferences = 3; size_t numDisplayedRefs = 0, numRefs = 0; for (const UndefinedDiag::File &ref : undefDiag.files) { auto [symbolLocations, totalLocations] = getSymbolLocations( ref.file, ref.symIndex, maxUndefReferences - numDisplayedRefs); numRefs += totalLocations; numDisplayedRefs += symbolLocations.size(); for (const std::string &s : symbolLocations) diag << s; } if (numDisplayedRefs < numRefs) diag << "\n>>> referenced " << numRefs - numDisplayedRefs << " more times"; } void SymbolTable::loadMinGWSymbols() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (undef->getWeakAlias()) continue; StringRef name = undef->getName(); if (machine == I386 && ctx.config.stdcallFixup) { // Check if we can resolve an undefined decorated symbol by finding // the intended target as an undecorated symbol (only with a leading // underscore). StringRef origName = name; StringRef baseName = name; // Trim down stdcall/fastcall/vectorcall symbols to the base name. baseName = ltrim1(baseName, "_@"); baseName = baseName.substr(0, baseName.find('@')); // Add a leading underscore, as it would be in cdecl form. std::string newName = ("_" + baseName).str(); Symbol *l; if (newName != origName && (l = find(newName)) != nullptr) { // If we found a symbol and it is lazy; load it. if (l->isLazy() && !l->pendingArchiveLoad) { Log(ctx) << "Loading lazy " << l->getName() << " from " << l->getFile()->getName() << " for stdcall fixup"; forceLazy(l); } // If it's lazy or already defined, hook it up as weak alias. if (l->isLazy() || isa(l)) { if (ctx.config.warnStdcallFixup) Warn(ctx) << "Resolving " << origName << " by linking to " << newName; else Log(ctx) << "Resolving " << origName << " by linking to " << newName; undef->setWeakAlias(l); continue; } } } if (ctx.config.autoImport) { if (name.starts_with("__imp_")) continue; // If we have an undefined symbol, but we have a lazy symbol we could // load, load it. Symbol *l = find(("__imp_" + name).str()); if (!l || l->pendingArchiveLoad || !l->isLazy()) continue; Log(ctx) << "Loading lazy " << l->getName() << " from " << l->getFile()->getName() << " for automatic import"; forceLazy(l); } } } Defined *SymbolTable::impSymbol(StringRef name) { if (name.starts_with("__imp_")) return nullptr; return dyn_cast_or_null(find(("__imp_" + name).str())); } bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { Defined *imp = impSymbol(name); if (!imp) return false; // Replace the reference directly to a variable with a reference // to the import address table instead. This obviously isn't right, // but we mark the symbol as isRuntimePseudoReloc, and a later pass // will add runtime pseudo relocations for every relocation against // this Symbol. The runtime pseudo relocation framework expects the // reference itself to point at the IAT entry. size_t impSize = 0; if (isa(imp)) { Log(ctx) << "Automatically importing " << name << " from " << cast(imp)->getDLLName(); impSize = sizeof(DefinedImportData); } else if (isa(imp)) { Log(ctx) << "Automatically importing " << name << " from " << toString(cast(imp)->file); impSize = sizeof(DefinedRegular); } else { Warn(ctx) << "unable to automatically import " << name << " from " << imp->getName() << " from " << cast(imp)->file << "; unexpected symbol type"; return false; } sym->replaceKeepingName(imp, impSize); sym->isRuntimePseudoReloc = true; // There may exist symbols named .refptr. which only consist // of a single pointer to . If it turns out is // automatically imported, we don't need to keep the .refptr. // pointer at all, but redirect all accesses to it to the IAT entry // for __imp_ instead, and drop the whole .refptr. chunk. DefinedRegular *refptr = dyn_cast_or_null(find((".refptr." + name).str())); if (refptr && refptr->getChunk()->getSize() == ctx.config.wordsize) { SectionChunk *sc = dyn_cast_or_null(refptr->getChunk()); if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) { Log(ctx) << "Replacing .refptr." << name << " with " << imp->getName(); refptr->getChunk()->live = false; refptr->replaceKeepingName(imp, impSize); } } return true; } /// Helper function for reportUnresolvable and resolveRemainingUndefines. /// This function emits an "undefined symbol" diagnostic for each symbol in /// undefs. If localImports is not nullptr, it also emits a "locally /// defined symbol imported" diagnostic for symbols in localImports. /// objFiles and bitcodeFiles (if not nullptr) are used to report where /// undefined symbols are referenced. void SymbolTable::reportProblemSymbols( const SmallPtrSetImpl &undefs, const DenseMap *localImports, bool needBitcodeFiles) { // Return early if there is nothing to report (which should be // the common case). if (undefs.empty() && (!localImports || localImports->empty())) return; for (Symbol *b : ctx.config.gcroot) { if (undefs.count(b)) errorOrWarn(ctx) << ": undefined symbol: " << b; if (localImports) if (Symbol *imp = localImports->lookup(b)) Warn(ctx) << ": locally defined symbol imported: " << imp << " (defined in " << toString(imp->getFile()) << ") [LNK4217]"; } std::vector undefDiags; DenseMap firstDiag; auto processFile = [&](InputFile *file, ArrayRef symbols) { uint32_t symIndex = (uint32_t)-1; for (Symbol *sym : symbols) { ++symIndex; if (!sym) continue; if (undefs.count(sym)) { auto [it, inserted] = firstDiag.try_emplace(sym, undefDiags.size()); if (inserted) undefDiags.push_back({sym, {{file, symIndex}}}); else undefDiags[it->second].files.push_back({file, symIndex}); } if (localImports) if (Symbol *imp = localImports->lookup(sym)) Warn(ctx) << file << ": locally defined symbol imported: " << imp << " (defined in " << imp->getFile() << ") [LNK4217]"; } }; for (ObjFile *file : ctx.objFileInstances) processFile(file, file->getSymbols()); if (needBitcodeFiles) for (BitcodeFile *file : bitcodeFileInstances) processFile(file, file->getSymbols()); for (const UndefinedDiag &undefDiag : undefDiags) reportUndefinedSymbol(ctx, undefDiag); } void SymbolTable::reportUnresolvable() { SmallPtrSet undefs; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef || sym->deferUndefined) continue; if (undef->getWeakAlias()) continue; StringRef name = undef->getName(); if (name.starts_with("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); if (Defined *def = dyn_cast_or_null(imp)) { def->isUsedInRegularObj = true; continue; } } if (name.contains("_PchSym_")) continue; if (ctx.config.autoImport && impSymbol(name)) continue; undefs.insert(sym); } reportProblemSymbols(undefs, /*localImports=*/nullptr, true); } bool SymbolTable::resolveRemainingUndefines() { llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols"); SmallPtrSet undefs; DenseMap localImports; bool foundLazy = false; for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); if (!undef) continue; if (!sym->isUsedInRegularObj) continue; StringRef name = undef->getName(); // A weak alias may have been resolved, so check for that. if (undef->resolveWeakAlias()) continue; // If we can resolve a symbol by removing __imp_ prefix, do that. // This odd rule is for compatibility with MSVC linker. if (name.starts_with("__imp_")) { auto findLocalSym = [&](StringRef n) { Symbol *sym = find(n); if (auto undef = dyn_cast_or_null(sym)) { // The unprefixed symbol might come later in symMap, so handle it now // if needed. if (!undef->resolveWeakAlias()) sym = nullptr; } return sym; }; StringRef impName = name.substr(strlen("__imp_")); Symbol *imp = findLocalSym(impName); if (!imp && isEC()) { // Try to use the mangled symbol on ARM64EC. std::optional mangledName = getArm64ECMangledFunctionName(impName); if (mangledName) imp = findLocalSym(*mangledName); if (!imp && impName.consume_front("aux_")) { // If it's a __imp_aux_ symbol, try skipping the aux_ prefix. imp = findLocalSym(impName); if (!imp && (mangledName = getArm64ECMangledFunctionName(impName))) imp = findLocalSym(*mangledName); } } if (imp && imp->isLazy()) { forceLazy(imp); foundLazy = true; continue; } if (imp && isa(imp)) { auto *d = cast(imp); replaceSymbol(sym, ctx, name, d); localImportChunks.push_back(cast(sym)->getChunk()); localImports[sym] = d; continue; } } // We don't want to report missing Microsoft precompiled headers symbols. // A proper message will be emitted instead in PDBLinker::aquirePrecompObj if (name.contains("_PchSym_")) continue; if (ctx.config.autoImport && handleMinGWAutomaticImport(sym, name)) continue; // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. if (ctx.config.forceUnresolved) replaceSymbol(sym, ctx, name, 0); undefs.insert(sym); } reportProblemSymbols( undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false); return foundLazy; } std::pair SymbolTable::insert(StringRef name) { bool inserted = false; Symbol *&sym = symMap[CachedHashStringRef(name)]; if (!sym) { sym = reinterpret_cast(make()); sym->isUsedInRegularObj = false; sym->pendingArchiveLoad = false; sym->canInline = true; inserted = true; if (isEC() && name.starts_with("EXP+")) expSymbols.push_back(sym); } return {sym, inserted}; } std::pair SymbolTable::insert(StringRef name, InputFile *file) { std::pair result = insert(name); if (!file || !isa(file)) result.first->isUsedInRegularObj = true; return result; } void SymbolTable::initializeLoadConfig() { auto sym = dyn_cast_or_null(findUnderscore("_load_config_used")); if (!sym) { if (isEC()) { Warn(ctx) << "EC version of '_load_config_used' is missing"; return; } if (ctx.hybridSymtab) { Warn(ctx) << "native version of '_load_config_used' is missing for " "ARM64X target"; return; } if (ctx.config.guardCF != GuardCFLevel::Off) Warn(ctx) << "Control Flow Guard is enabled but '_load_config_used' is missing"; if (ctx.config.dependentLoadFlags) Warn(ctx) << "_load_config_used not found, /dependentloadflag will have " "no effect"; return; } SectionChunk *sc = sym->getChunk(); if (!sc->hasData) { Err(ctx) << "_load_config_used points to uninitialized data"; return; } uint64_t offsetInChunk = sym->getValue(); if (offsetInChunk + 4 > sc->getSize()) { Err(ctx) << "_load_config_used section chunk is too small"; return; } ArrayRef secContents = sc->getContents(); loadConfigSize = *reinterpret_cast(&secContents[offsetInChunk]); if (offsetInChunk + loadConfigSize > sc->getSize()) { Err(ctx) << "_load_config_used specifies a size larger than its containing " "section chunk"; return; } uint32_t expectedAlign = ctx.config.is64() ? 8 : 4; if (sc->getAlignment() < expectedAlign) Warn(ctx) << "'_load_config_used' is misaligned (expected alignment to be " << expectedAlign << " bytes, got " << sc->getAlignment() << " instead)"; else if (!isAligned(Align(expectedAlign), offsetInChunk)) Warn(ctx) << "'_load_config_used' is misaligned (section offset is 0x" << Twine::utohexstr(sym->getValue()) << " not aligned to " << expectedAlign << " bytes)"; loadConfigSym = sym; } void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) { entryThunks.push_back({from, to}); } void SymbolTable::addExitThunk(Symbol *from, Symbol *to) { exitThunks[from] = to; } void SymbolTable::initializeECThunks() { if (!isArm64EC(ctx.config.machine)) return; for (auto it : entryThunks) { auto *to = dyn_cast(it.second); if (!to) continue; auto *from = dyn_cast(it.first); // We need to be able to add padding to the function and fill it with an // offset to its entry thunks. To ensure that padding the function is // feasible, functions are required to be COMDAT symbols with no offset. if (!from || !from->getChunk()->isCOMDAT() || cast(from)->getValue()) { Err(ctx) << "non COMDAT symbol '" << from->getName() << "' in hybrid map"; continue; } from->getChunk()->setEntryThunk(to); } for (ImportFile *file : ctx.importFileInstances) { if (!file->impchkThunk) continue; Symbol *sym = exitThunks.lookup(file->thunkSym); if (!sym) sym = exitThunks.lookup(file->impECSym); file->impchkThunk->exitThunk = dyn_cast_or_null(sym); } // On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the // __imp_aux_ symbol references the regular IAT. However, x86_64 code expects // both to reference the regular IAT, so adjust the symbol if necessary. parallelForEach(ctx.objFileInstances, [&](ObjFile *file) { if (file->getMachineType() != AMD64) return; for (auto &sym : file->getMutableSymbols()) { auto impSym = dyn_cast_or_null(sym); if (impSym && impSym->file->impchkThunk && sym == impSym->file->impECSym) sym = impSym->file->impSym; } }); } Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f, bool overrideLazy) { auto [s, wasInserted] = insert(name, f); if (wasInserted || (s->isLazy() && overrideLazy)) { replaceSymbol(s, name); return s; } if (s->isLazy()) forceLazy(s); return s; } Symbol *SymbolTable::addGCRoot(StringRef name, bool aliasEC) { Symbol *b = addUndefined(name); if (!b->isGCRoot) { b->isGCRoot = true; ctx.config.gcroot.push_back(b); } // On ARM64EC, a symbol may be defined in either its mangled or demangled form // (or both). Define an anti-dependency symbol that binds both forms, similar // to how compiler-generated code references external functions. if (aliasEC && isEC()) { if (std::optional mangledName = getArm64ECMangledFunctionName(name)) { auto u = dyn_cast(b); if (u && !u->weakAlias) { Symbol *t = addUndefined(saver().save(*mangledName)); u->setWeakAlias(t, true); } } else if (std::optional demangledName = getArm64ECDemangledFunctionName(name)) { Symbol *us = addUndefined(saver().save(*demangledName)); auto u = dyn_cast(us); if (u && !u->weakAlias) u->setWeakAlias(b, true); } } return b; } // On ARM64EC, a function symbol may appear in both mangled and demangled forms: // - ARM64EC archives contain only the mangled name, while the demangled symbol // is defined by the object file as an alias. // - x86_64 archives contain only the demangled name (the mangled name is // usually defined by an object referencing the symbol as an alias to a guess // exit thunk). // - ARM64EC import files contain both the mangled and demangled names for // thunks. // If more than one archive defines the same function, this could lead // to different libraries being used for the same function depending on how they // are referenced. Avoid this by checking if the paired symbol is already // defined before adding a symbol to the table. template bool checkLazyECPair(SymbolTable *symtab, StringRef name, InputFile *f) { if (name.starts_with("__imp_")) return true; std::string pairName; if (std::optional mangledName = getArm64ECMangledFunctionName(name)) pairName = std::move(*mangledName); else if (std::optional demangledName = getArm64ECDemangledFunctionName(name)) pairName = std::move(*demangledName); else return true; Symbol *sym = symtab->find(pairName); if (!sym) return true; if (sym->pendingArchiveLoad) return false; if (auto u = dyn_cast(sym)) return !u->weakAlias || u->isAntiDep; // If the symbol is lazy, allow it only if it originates from the same // archive. auto lazy = dyn_cast(sym); return lazy && lazy->file == f; } void SymbolTable::addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym) { StringRef name = sym.getName(); if (isEC() && !checkLazyECPair(this, name, f)) return; auto [s, wasInserted] = insert(name); if (wasInserted) { replaceSymbol(s, f, sym); return; } auto *u = dyn_cast(s); if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; f->addMember(sym); } void SymbolTable::addLazyObject(InputFile *f, StringRef n) { assert(f->lazy); if (isEC() && !checkLazyECPair(this, n, f)) return; auto [s, wasInserted] = insert(n, f); if (wasInserted) { replaceSymbol(s, f, n); return; } auto *u = dyn_cast(s); if (!u || (u->weakAlias && !u->isECAlias(machine)) || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; f->lazy = false; ctx.driver.addFile(f); } void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n) { auto [s, wasInserted] = insert(n); if (wasInserted) { replaceSymbol(s, f, sym, n); return; } auto *u = dyn_cast(s); if (!u || u->weakAlias || s->pendingArchiveLoad) return; s->pendingArchiveLoad = true; f->makeImport(sym); } static std::string getSourceLocationBitcode(BitcodeFile *file) { std::string res("\n>>> defined at "); StringRef source = file->obj->getSourceFileName(); if (!source.empty()) res += source.str() + "\n>>> "; res += toString(file); return res; } static std::string getSourceLocationObj(ObjFile *file, SectionChunk *sc, uint32_t offset, StringRef name) { std::optional> fileLine; if (sc) fileLine = getFileLine(sc, offset); if (!fileLine) fileLine = file->getVariableLocation(name); std::string res; llvm::raw_string_ostream os(res); os << "\n>>> defined at "; if (fileLine) os << fileLine->first << ":" << fileLine->second << "\n>>> "; os << toString(file); return res; } static std::string getSourceLocation(InputFile *file, SectionChunk *sc, uint32_t offset, StringRef name) { if (!file) return ""; if (auto *o = dyn_cast(file)) return getSourceLocationObj(o, sc, offset, name); if (auto *b = dyn_cast(file)) return getSourceLocationBitcode(b); return "\n>>> defined at " + toString(file); } // Construct and print an error message in the form of: // // lld-link: error: duplicate symbol: foo // >>> defined at bar.c:30 // >>> bar.o // >>> defined at baz.c:563 // >>> baz.o void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile, SectionChunk *newSc, uint32_t newSectionOffset) { COFFSyncStream diag(ctx, ctx.config.forceMultiple ? DiagLevel::Warn : DiagLevel::Err); diag << "duplicate symbol: " << existing; DefinedRegular *d = dyn_cast(existing); if (d && isa(d->getFile())) { diag << getSourceLocation(d->getFile(), d->getChunk(), d->getValue(), existing->getName()); } else { diag << getSourceLocation(existing->getFile(), nullptr, 0, ""); } diag << getSourceLocation(newFile, newSc, newSectionOffset, existing->getName()); } Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) { auto [s, wasInserted] = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, ctx, n, sym); else if (auto *da = dyn_cast(s)) { if (da->getVA() != sym.getValue()) reportDuplicate(s, nullptr); } else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) { auto [s, wasInserted] = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, ctx, n, va); else if (auto *da = dyn_cast(s)) { if (da->getVA() != va) reportDuplicate(s, nullptr); } else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) { auto [s, wasInserted] = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || s->isLazy()) replaceSymbol(s, n, c); else if (!isa(s)) reportDuplicate(s, nullptr); return s; } Symbol *SymbolTable::addRegular(InputFile *f, StringRef n, const coff_symbol_generic *sym, SectionChunk *c, uint32_t sectionOffset, bool isWeak) { auto [s, wasInserted] = insert(n, f); if (wasInserted || !isa(s) || s->isWeak) replaceSymbol(s, f, n, /*IsCOMDAT*/ false, /*IsExternal*/ true, sym, c, isWeak); else if (!isWeak) reportDuplicate(s, f, c, sectionOffset); return s; } std::pair SymbolTable::addComdat(InputFile *f, StringRef n, const coff_symbol_generic *sym) { auto [s, wasInserted] = insert(n, f); if (wasInserted || !isa(s)) { replaceSymbol(s, f, n, /*IsCOMDAT*/ true, /*IsExternal*/ true, sym, nullptr); return {cast(s), true}; } auto *existingSymbol = cast(s); if (!existingSymbol->isCOMDAT) reportDuplicate(s, f); return {existingSymbol, false}; } Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, const coff_symbol_generic *sym, CommonChunk *c) { auto [s, wasInserted] = insert(n, f); if (wasInserted || !isa(s)) replaceSymbol(s, f, n, size, sym, c); else if (auto *dc = dyn_cast(s)) if (size > dc->getSize()) replaceSymbol(s, f, n, size, sym, c); return s; } DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f, Chunk *&location) { auto [s, wasInserted] = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || s->isLazy()) { replaceSymbol(s, n, f, location); return cast(s); } reportDuplicate(s, f); return nullptr; } Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id, ImportThunkChunk *chunk) { auto [s, wasInserted] = insert(name, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa(s) || s->isLazy()) { replaceSymbol(s, ctx, name, id, chunk); return cast(s); } reportDuplicate(s, id->file); return nullptr; } void SymbolTable::addLibcall(StringRef name) { Symbol *sym = findUnderscore(name); if (!sym) return; if (auto *l = dyn_cast(sym)) { MemoryBufferRef mb = l->getMemberBuffer(); if (isBitcode(mb)) addUndefined(sym->getName()); } else if (LazyObject *o = dyn_cast(sym)) { if (isBitcode(o->file->mb)) addUndefined(sym->getName()); } } Symbol *SymbolTable::find(StringRef name) const { return symMap.lookup(CachedHashStringRef(name)); } Symbol *SymbolTable::findUnderscore(StringRef name) const { if (machine == I386) return find(("_" + name).str()); return find(name); } // Return all symbols that start with Prefix, possibly ignoring the first // character of Prefix or the first character symbol. std::vector SymbolTable::getSymsWithPrefix(StringRef prefix) { std::vector syms; for (auto pair : symMap) { StringRef name = pair.first.val(); if (name.starts_with(prefix) || name.starts_with(prefix.drop_front()) || name.drop_front().starts_with(prefix) || name.drop_front().starts_with(prefix.drop_front())) { syms.push_back(pair.second); } } return syms; } Symbol *SymbolTable::findMangle(StringRef name) { if (Symbol *sym = find(name)) { if (auto *u = dyn_cast(sym)) { // We're specifically looking for weak aliases that ultimately resolve to // defined symbols, hence the call to getWeakAlias() instead of just using // the weakAlias member variable. This matches link.exe's behavior. if (Symbol *weakAlias = u->getWeakAlias()) return weakAlias; } else { return sym; } } // Efficient fuzzy string lookup is impossible with a hash table, so iterate // the symbol table once and collect all possibly matching symbols into this // vector. Then compare each possibly matching symbol with each possible // mangling. std::vector syms = getSymsWithPrefix(name); auto findByPrefix = [&syms](const Twine &t) -> Symbol * { std::string prefix = t.str(); for (auto *s : syms) if (s->getName().starts_with(prefix)) return s; return nullptr; }; // For non-x86, just look for C++ functions. if (machine != I386) return findByPrefix("?" + name + "@@Y"); if (!name.starts_with("_")) return nullptr; // Search for x86 stdcall function. if (Symbol *s = findByPrefix(name + "@")) return s; // Search for x86 fastcall function. if (Symbol *s = findByPrefix("@" + name.substr(1) + "@")) return s; // Search for x86 vectorcall function. if (Symbol *s = findByPrefix(name.substr(1) + "@@")) return s; // Search for x86 C++ non-member function. return findByPrefix("?" + name.substr(1) + "@@Y"); } bool SymbolTable::findUnderscoreMangle(StringRef sym) { Symbol *s = findMangle(mangle(sym)); return s && !isa(s); } // Symbol names are mangled by prepending "_" on x86. StringRef SymbolTable::mangle(StringRef sym) { assert(machine != IMAGE_FILE_MACHINE_UNKNOWN); if (machine == I386) return saver().save("_" + sym); return sym; } StringRef SymbolTable::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); if (!unmangled) return ""; // Otherwise, see if a similar, mangled symbol exists in the symbol table. Symbol *mangled = findMangle(unmangled->getName()); if (!mangled) return ""; // If we find a similar mangled symbol, make this an alias to it and return // its name. Log(ctx) << unmangled->getName() << " aliased to " << mangled->getName(); unmangled->setWeakAlias(addUndefined(mangled->getName())); return mangled->getName(); } // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, // each of which corresponds to a user-defined "main" function. This function // infers an entry point from a user-defined "main" function. StringRef SymbolTable::findDefaultEntry() { assert(ctx.config.subsystem != IMAGE_SUBSYSTEM_UNKNOWN && "must handle /subsystem before calling this"); if (ctx.config.mingw) return mangle(ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI ? "WinMainCRTStartup" : "mainCRTStartup"); if (ctx.config.subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) { if (findUnderscoreMangle("wWinMain")) { if (!findUnderscoreMangle("WinMain")) return mangle("wWinMainCRTStartup"); Warn(ctx) << "found both wWinMain and WinMain; using latter"; } return mangle("WinMainCRTStartup"); } if (findUnderscoreMangle("wmain")) { if (!findUnderscoreMangle("main")) return mangle("wmainCRTStartup"); Warn(ctx) << "found both wmain and main; using latter"; } return mangle("mainCRTStartup"); } WindowsSubsystem SymbolTable::inferSubsystem() { if (ctx.config.dll) return IMAGE_SUBSYSTEM_WINDOWS_GUI; if (ctx.config.mingw) return IMAGE_SUBSYSTEM_WINDOWS_CUI; // Note that link.exe infers the subsystem from the presence of these // functions even if /entry: or /nodefaultlib are passed which causes them // to not be called. bool haveMain = findUnderscoreMangle("main"); bool haveWMain = findUnderscoreMangle("wmain"); bool haveWinMain = findUnderscoreMangle("WinMain"); bool haveWWinMain = findUnderscoreMangle("wWinMain"); if (haveMain || haveWMain) { if (haveWinMain || haveWWinMain) { Warn(ctx) << "found " << (haveMain ? "main" : "wmain") << " and " << (haveWinMain ? "WinMain" : "wWinMain") << "; defaulting to /subsystem:console"; } return IMAGE_SUBSYSTEM_WINDOWS_CUI; } if (haveWinMain || haveWWinMain) return IMAGE_SUBSYSTEM_WINDOWS_GUI; return IMAGE_SUBSYSTEM_UNKNOWN; } void SymbolTable::addUndefinedGlob(StringRef arg) { Expected pat = GlobPattern::create(arg); if (!pat) { Err(ctx) << "/includeglob: " << toString(pat.takeError()); return; } SmallVector syms; forEachSymbol([&syms, &pat](Symbol *sym) { if (pat->match(sym->getName())) { syms.push_back(sym); } }); for (Symbol *sym : syms) addGCRoot(sym->getName()); } // Convert stdcall/fastcall style symbols into unsuffixed symbols, // with or without a leading underscore. (MinGW specific.) static StringRef killAt(StringRef sym, bool prefix) { if (sym.empty()) return sym; // Strip any trailing stdcall suffix sym = sym.substr(0, sym.find('@', 1)); if (!sym.starts_with("@")) { if (prefix && !sym.starts_with("_")) return saver().save("_" + sym); return sym; } // For fastcall, remove the leading @ and replace it with an // underscore, if prefixes are used. sym = sym.substr(1); if (prefix) sym = saver().save("_" + sym); return sym; } static StringRef exportSourceName(ExportSource s) { switch (s) { case ExportSource::Directives: return "source file (directives)"; case ExportSource::Export: return "/export"; case ExportSource::ModuleDefinition: return "/def"; default: llvm_unreachable("unknown ExportSource"); } } // Performs error checking on all /export arguments. // It also sets ordinals. void SymbolTable::fixupExports() { llvm::TimeTraceScope timeScope("Fixup exports"); // Symbol ordinals must be unique. std::set ords; for (Export &e : exports) { if (e.ordinal == 0) continue; if (!ords.insert(e.ordinal).second) Fatal(ctx) << "duplicate export ordinal: " << e.name; } for (Export &e : exports) { if (!e.exportAs.empty()) { e.exportName = e.exportAs; continue; } StringRef sym = !e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName; if (machine == I386 && sym.starts_with("_")) { // In MSVC mode, a fully decorated stdcall function is exported // as-is with the leading underscore (with type IMPORT_NAME). // In MinGW mode, a decorated stdcall function gets the underscore // removed, just like normal cdecl functions. if (ctx.config.mingw || !sym.contains('@')) { e.exportName = sym.substr(1); continue; } } if (isEC() && !e.data && !e.constant) { if (std::optional demangledName = getArm64ECDemangledFunctionName(sym)) { e.exportName = saver().save(*demangledName); continue; } } e.exportName = sym; } if (ctx.config.killAt && machine == I386) { for (Export &e : exports) { e.name = killAt(e.name, true); e.exportName = killAt(e.exportName, false); e.extName = killAt(e.extName, true); e.symbolName = killAt(e.symbolName, true); } } // Uniquefy by name. DenseMap> map(exports.size()); std::vector v; for (Export &e : exports) { auto pair = map.insert(std::make_pair(e.exportName, std::make_pair(&e, 0))); bool inserted = pair.second; if (inserted) { pair.first->second.second = v.size(); v.push_back(e); continue; } Export *existing = pair.first->second.first; if (e == *existing || e.name != existing->name) continue; // If the existing export comes from .OBJ directives, we are allowed to // overwrite it with /DEF: or /EXPORT without any warning, as MSVC link.exe // does. if (existing->source == ExportSource::Directives) { *existing = e; v[pair.first->second.second] = e; continue; } if (existing->source == e.source) { Warn(ctx) << "duplicate " << exportSourceName(existing->source) << " option: " << e.name; } else { Warn(ctx) << "duplicate export: " << e.name << " first seen in " << exportSourceName(existing->source) << ", now in " << exportSourceName(e.source); } } exports = std::move(v); // Sort by name. llvm::sort(exports, [](const Export &a, const Export &b) { return a.exportName < b.exportName; }); } void SymbolTable::assignExportOrdinals() { // Assign unique ordinals if default (= 0). uint32_t max = 0; for (Export &e : exports) max = std::max(max, (uint32_t)e.ordinal); for (Export &e : exports) if (e.ordinal == 0) e.ordinal = ++max; if (max > std::numeric_limits::max()) Fatal(ctx) << "too many exported symbols (got " << max << ", max " << Twine(std::numeric_limits::max()) << ")"; } void SymbolTable::parseModuleDefs(StringRef path) { llvm::TimeTraceScope timeScope("Parse def file"); std::unique_ptr mb = CHECK(MemoryBuffer::getFile(path, /*IsText=*/false, /*RequiresNullTerminator=*/false, /*IsVolatile=*/true), "could not open " + path); COFFModuleDefinition m = check(parseCOFFModuleDefinition( mb->getMemBufferRef(), machine, ctx.config.mingw)); // Include in /reproduce: output if applicable. ctx.driver.takeBuffer(std::move(mb)); if (ctx.config.outputFile.empty()) ctx.config.outputFile = std::string(saver().save(m.OutputFile)); ctx.config.importName = std::string(saver().save(m.ImportName)); if (m.ImageBase) ctx.config.imageBase = m.ImageBase; if (m.StackReserve) ctx.config.stackReserve = m.StackReserve; if (m.StackCommit) ctx.config.stackCommit = m.StackCommit; if (m.HeapReserve) ctx.config.heapReserve = m.HeapReserve; if (m.HeapCommit) ctx.config.heapCommit = m.HeapCommit; if (m.MajorImageVersion) ctx.config.majorImageVersion = m.MajorImageVersion; if (m.MinorImageVersion) ctx.config.minorImageVersion = m.MinorImageVersion; if (m.MajorOSVersion) ctx.config.majorOSVersion = m.MajorOSVersion; if (m.MinorOSVersion) ctx.config.minorOSVersion = m.MinorOSVersion; for (COFFShortExport e1 : m.Exports) { Export e2; // Renamed exports are parsed and set as "ExtName = Name". If Name has // the form "OtherDll.Func", it shouldn't be a normal exported // function but a forward to another DLL instead. This is supported // by both MS and GNU linkers. if (!e1.ExtName.empty() && e1.ExtName != e1.Name && StringRef(e1.Name).contains('.')) { e2.name = saver().save(e1.ExtName); e2.forwardTo = saver().save(e1.Name); } else { e2.name = saver().save(e1.Name); e2.extName = saver().save(e1.ExtName); } e2.exportAs = saver().save(e1.ExportAs); e2.importName = saver().save(e1.ImportName); e2.ordinal = e1.Ordinal; e2.noname = e1.Noname; e2.data = e1.Data; e2.isPrivate = e1.Private; e2.constant = e1.Constant; e2.source = ExportSource::ModuleDefinition; exports.push_back(e2); } } Symbol *SymbolTable::addUndefined(StringRef name) { return addUndefined(name, nullptr, false); } void SymbolTable::compileBitcodeFiles() { if (bitcodeFileInstances.empty()) return; llvm::TimeTraceScope timeScope("Compile bitcode"); ScopedTimer t(ctx.ltoTimer); lto.reset(new BitcodeCompiler(ctx)); for (BitcodeFile *f : bitcodeFileInstances) lto->add(*f); for (InputFile *newObj : lto->compile()) { ObjFile *obj = cast(newObj); obj->parse(); ctx.objFileInstances.push_back(obj); } } } // namespace lld::coff