1ece8a530Spatrick //===- SymbolTable.cpp ----------------------------------------------------===//
2ece8a530Spatrick //
3ece8a530Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ece8a530Spatrick // See https://llvm.org/LICENSE.txt for license information.
5ece8a530Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ece8a530Spatrick //
7ece8a530Spatrick //===----------------------------------------------------------------------===//
8ece8a530Spatrick //
9ece8a530Spatrick // Symbol table is a bag of all known symbols. We put all symbols of
10ece8a530Spatrick // all input files to the symbol table. The symbol table is basically
11ece8a530Spatrick // a hash table with the logic to resolve symbol name conflicts using
12ece8a530Spatrick // the symbol types.
13ece8a530Spatrick //
14ece8a530Spatrick //===----------------------------------------------------------------------===//
15ece8a530Spatrick
16ece8a530Spatrick #include "SymbolTable.h"
17ece8a530Spatrick #include "Config.h"
18*05edf1c1Srobert #include "InputFiles.h"
19ece8a530Spatrick #include "Symbols.h"
20ece8a530Spatrick #include "lld/Common/ErrorHandler.h"
21ece8a530Spatrick #include "lld/Common/Memory.h"
22ece8a530Spatrick #include "lld/Common/Strings.h"
23ece8a530Spatrick #include "llvm/ADT/STLExtras.h"
24*05edf1c1Srobert #include "llvm/Demangle/Demangle.h"
25ece8a530Spatrick
26ece8a530Spatrick using namespace llvm;
27ece8a530Spatrick using namespace llvm::object;
28ece8a530Spatrick using namespace llvm::ELF;
29bb684c34Spatrick using namespace lld;
30bb684c34Spatrick using namespace lld::elf;
31ece8a530Spatrick
32*05edf1c1Srobert SymbolTable elf::symtab;
33ece8a530Spatrick
wrap(Symbol * sym,Symbol * real,Symbol * wrap)34ece8a530Spatrick void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
35*05edf1c1Srobert // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
36ece8a530Spatrick int &idx1 = symMap[CachedHashStringRef(sym->getName())];
37ece8a530Spatrick int &idx2 = symMap[CachedHashStringRef(real->getName())];
38ece8a530Spatrick int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
39ece8a530Spatrick
40ece8a530Spatrick idx2 = idx1;
41ece8a530Spatrick idx1 = idx3;
42ece8a530Spatrick
43*05edf1c1Srobert // Propagate symbol usage information to the redirected symbols.
44*05edf1c1Srobert if (sym->isUsedInRegularObj)
45*05edf1c1Srobert wrap->isUsedInRegularObj = true;
46*05edf1c1Srobert if (real->isUsedInRegularObj)
47*05edf1c1Srobert sym->isUsedInRegularObj = true;
48*05edf1c1Srobert else if (!sym->isDefined())
49*05edf1c1Srobert // Now that all references to sym have been redirected to wrap, if there are
50*05edf1c1Srobert // no references to real (which has been redirected to sym), we only need to
51*05edf1c1Srobert // keep sym if it was defined, otherwise it's unused and can be dropped.
521cf9926bSpatrick sym->isUsedInRegularObj = false;
53bb684c34Spatrick
54bb684c34Spatrick // Now renaming is complete, and no one refers to real. We drop real from
55bb684c34Spatrick // .symtab and .dynsym. If real is undefined, it is important that we don't
56bb684c34Spatrick // leave it in .dynsym, because otherwise it might lead to an undefined symbol
57bb684c34Spatrick // error in a subsequent link. If real is defined, we could emit real as an
58bb684c34Spatrick // alias for sym, but that could degrade the user experience of some tools
59bb684c34Spatrick // that can print out only one symbol for each location: sym is a preferred
60bb684c34Spatrick // name than real, but they might print out real instead.
61ece8a530Spatrick memcpy(real, sym, sizeof(SymbolUnion));
62bb684c34Spatrick real->isUsedInRegularObj = false;
63ece8a530Spatrick }
64ece8a530Spatrick
65ece8a530Spatrick // Find an existing symbol or create a new one.
insert(StringRef name)66ece8a530Spatrick Symbol *SymbolTable::insert(StringRef name) {
67ece8a530Spatrick // <name>@@<version> means the symbol is the default version. In that
68ece8a530Spatrick // case <name>@@<version> will be used to resolve references to <name>.
69ece8a530Spatrick //
70ece8a530Spatrick // Since this is a hot path, the following string search code is
71ece8a530Spatrick // optimized for speed. StringRef::find(char) is much faster than
72ece8a530Spatrick // StringRef::find(StringRef).
73*05edf1c1Srobert StringRef stem = name;
74ece8a530Spatrick size_t pos = name.find('@');
75ece8a530Spatrick if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
76*05edf1c1Srobert stem = name.take_front(pos);
77ece8a530Spatrick
78*05edf1c1Srobert auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()});
79*05edf1c1Srobert if (!p.second) {
80*05edf1c1Srobert Symbol *sym = symVector[p.first->second];
81*05edf1c1Srobert if (stem.size() != name.size()) {
82*05edf1c1Srobert sym->setName(name);
83*05edf1c1Srobert sym->hasVersionSuffix = true;
84*05edf1c1Srobert }
85*05edf1c1Srobert return sym;
86*05edf1c1Srobert }
87ece8a530Spatrick
88ece8a530Spatrick Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
89ece8a530Spatrick symVector.push_back(sym);
90ece8a530Spatrick
91*05edf1c1Srobert // *sym was not initialized by a constructor. Initialize all Symbol fields.
92*05edf1c1Srobert memset(sym, 0, sizeof(Symbol));
93ece8a530Spatrick sym->setName(name);
94ece8a530Spatrick sym->partition = 1;
95*05edf1c1Srobert sym->verdefIndex = -1;
96*05edf1c1Srobert sym->gwarn = false;
97*05edf1c1Srobert sym->versionId = VER_NDX_GLOBAL;
98*05edf1c1Srobert if (pos != StringRef::npos)
99*05edf1c1Srobert sym->hasVersionSuffix = true;
100ece8a530Spatrick return sym;
101ece8a530Spatrick }
102ece8a530Spatrick
103*05edf1c1Srobert // This variant of addSymbol is used by BinaryFile::parse to check duplicate
104*05edf1c1Srobert // symbol errors.
addAndCheckDuplicate(const Defined & newSym)105*05edf1c1Srobert Symbol *SymbolTable::addAndCheckDuplicate(const Defined &newSym) {
106bb684c34Spatrick Symbol *sym = insert(newSym.getName());
107*05edf1c1Srobert if (sym->isDefined())
108*05edf1c1Srobert sym->checkDuplicate(newSym);
109ece8a530Spatrick sym->resolve(newSym);
110*05edf1c1Srobert sym->isUsedInRegularObj = true;
111ece8a530Spatrick return sym;
112ece8a530Spatrick }
113ece8a530Spatrick
find(StringRef name)114ece8a530Spatrick Symbol *SymbolTable::find(StringRef name) {
115ece8a530Spatrick auto it = symMap.find(CachedHashStringRef(name));
116ece8a530Spatrick if (it == symMap.end())
117ece8a530Spatrick return nullptr;
118*05edf1c1Srobert return symVector[it->second];
119ece8a530Spatrick }
120ece8a530Spatrick
121bb684c34Spatrick // A version script/dynamic list is only meaningful for a Defined symbol.
122bb684c34Spatrick // A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
123*05edf1c1Srobert // A lazy symbol may be made Defined if an LTO libcall extracts it.
canBeVersioned(const Symbol & sym)124bb684c34Spatrick static bool canBeVersioned(const Symbol &sym) {
125bb684c34Spatrick return sym.isDefined() || sym.isCommon() || sym.isLazy();
126bb684c34Spatrick }
127bb684c34Spatrick
128ece8a530Spatrick // Initialize demangledSyms with a map from demangled symbols to symbol
129ece8a530Spatrick // objects. Used to handle "extern C++" directive in version scripts.
130ece8a530Spatrick //
131ece8a530Spatrick // The map will contain all demangled symbols. That can be very large,
132ece8a530Spatrick // and in LLD we generally want to avoid do anything for each symbol.
133ece8a530Spatrick // Then, why are we doing this? Here's why.
134ece8a530Spatrick //
135ece8a530Spatrick // Users can use "extern C++ {}" directive to match against demangled
136ece8a530Spatrick // C++ symbols. For example, you can write a pattern such as
137ece8a530Spatrick // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
138ece8a530Spatrick // other than trying to match a pattern against all demangled symbols.
139ece8a530Spatrick // So, if "extern C++" feature is used, we need to demangle all known
140ece8a530Spatrick // symbols.
getDemangledSyms()141*05edf1c1Srobert StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() {
142ece8a530Spatrick if (!demangledSyms) {
143ece8a530Spatrick demangledSyms.emplace();
1441cf9926bSpatrick std::string demangled;
145bb684c34Spatrick for (Symbol *sym : symVector)
1461cf9926bSpatrick if (canBeVersioned(*sym)) {
1471cf9926bSpatrick StringRef name = sym->getName();
1481cf9926bSpatrick size_t pos = name.find('@');
1491cf9926bSpatrick if (pos == std::string::npos)
150*05edf1c1Srobert demangled = demangle(name.str());
1511cf9926bSpatrick else if (pos + 1 == name.size() || name[pos + 1] == '@')
152*05edf1c1Srobert demangled = demangle(name.substr(0, pos).str());
1531cf9926bSpatrick else
1541cf9926bSpatrick demangled =
155*05edf1c1Srobert (demangle(name.substr(0, pos).str()) + name.substr(pos)).str();
1561cf9926bSpatrick (*demangledSyms)[demangled].push_back(sym);
1571cf9926bSpatrick }
158ece8a530Spatrick }
159ece8a530Spatrick return *demangledSyms;
160ece8a530Spatrick }
161ece8a530Spatrick
findByVersion(SymbolVersion ver)162*05edf1c1Srobert SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) {
163ece8a530Spatrick if (ver.isExternCpp)
164ece8a530Spatrick return getDemangledSyms().lookup(ver.name);
165bb684c34Spatrick if (Symbol *sym = find(ver.name))
166bb684c34Spatrick if (canBeVersioned(*sym))
167bb684c34Spatrick return {sym};
168ece8a530Spatrick return {};
169ece8a530Spatrick }
170ece8a530Spatrick
findAllByVersion(SymbolVersion ver,bool includeNonDefault)171*05edf1c1Srobert SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver,
1721cf9926bSpatrick bool includeNonDefault) {
173*05edf1c1Srobert SmallVector<Symbol *, 0> res;
174bb684c34Spatrick SingleStringMatcher m(ver.name);
1751cf9926bSpatrick auto check = [&](StringRef name) {
1761cf9926bSpatrick size_t pos = name.find('@');
1771cf9926bSpatrick if (!includeNonDefault)
1781cf9926bSpatrick return pos == StringRef::npos;
1791cf9926bSpatrick return !(pos + 1 < name.size() && name[pos + 1] == '@');
1801cf9926bSpatrick };
181ece8a530Spatrick
182ece8a530Spatrick if (ver.isExternCpp) {
183ece8a530Spatrick for (auto &p : getDemangledSyms())
184ece8a530Spatrick if (m.match(p.first()))
1851cf9926bSpatrick for (Symbol *sym : p.second)
1861cf9926bSpatrick if (check(sym->getName()))
1871cf9926bSpatrick res.push_back(sym);
188ece8a530Spatrick return res;
189ece8a530Spatrick }
190ece8a530Spatrick
191ece8a530Spatrick for (Symbol *sym : symVector)
1921cf9926bSpatrick if (canBeVersioned(*sym) && check(sym->getName()) &&
1931cf9926bSpatrick m.match(sym->getName()))
194ece8a530Spatrick res.push_back(sym);
195ece8a530Spatrick return res;
196ece8a530Spatrick }
197ece8a530Spatrick
handleDynamicList()198ece8a530Spatrick void SymbolTable::handleDynamicList() {
199*05edf1c1Srobert SmallVector<Symbol *, 0> syms;
200ece8a530Spatrick for (SymbolVersion &ver : config->dynamicList) {
201ece8a530Spatrick if (ver.hasWildcard)
2021cf9926bSpatrick syms = findAllByVersion(ver, /*includeNonDefault=*/true);
203ece8a530Spatrick else
204ece8a530Spatrick syms = findByVersion(ver);
205ece8a530Spatrick
206ece8a530Spatrick for (Symbol *sym : syms)
207ece8a530Spatrick sym->inDynamicList = true;
208ece8a530Spatrick }
209ece8a530Spatrick }
210ece8a530Spatrick
2111cf9926bSpatrick // Set symbol versions to symbols. This function handles patterns containing no
2121cf9926bSpatrick // wildcard characters. Return false if no symbol definition matches ver.
assignExactVersion(SymbolVersion ver,uint16_t versionId,StringRef versionName,bool includeNonDefault)2131cf9926bSpatrick bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
2141cf9926bSpatrick StringRef versionName,
2151cf9926bSpatrick bool includeNonDefault) {
216ece8a530Spatrick // Get a list of symbols which we need to assign the version to.
217*05edf1c1Srobert SmallVector<Symbol *, 0> syms = findByVersion(ver);
218ece8a530Spatrick
219ece8a530Spatrick auto getName = [](uint16_t ver) -> std::string {
220ece8a530Spatrick if (ver == VER_NDX_LOCAL)
221ece8a530Spatrick return "VER_NDX_LOCAL";
222ece8a530Spatrick if (ver == VER_NDX_GLOBAL)
223ece8a530Spatrick return "VER_NDX_GLOBAL";
224ece8a530Spatrick return ("version '" + config->versionDefinitions[ver].name + "'").str();
225ece8a530Spatrick };
226ece8a530Spatrick
227ece8a530Spatrick // Assign the version.
228ece8a530Spatrick for (Symbol *sym : syms) {
2291cf9926bSpatrick // For a non-local versionId, skip symbols containing version info because
2301cf9926bSpatrick // symbol versions specified by symbol names take precedence over version
2311cf9926bSpatrick // scripts. See parseSymbolVersion().
2321cf9926bSpatrick if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
2331cf9926bSpatrick sym->getName().contains('@'))
234ece8a530Spatrick continue;
235ece8a530Spatrick
236ece8a530Spatrick // If the version has not been assigned, verdefIndex is -1. Use an arbitrary
237ece8a530Spatrick // number (0) to indicate the version has been assigned.
238*05edf1c1Srobert if (sym->verdefIndex == uint16_t(-1)) {
239ece8a530Spatrick sym->verdefIndex = 0;
240ece8a530Spatrick sym->versionId = versionId;
241ece8a530Spatrick }
242ece8a530Spatrick if (sym->versionId == versionId)
243ece8a530Spatrick continue;
244ece8a530Spatrick
245ece8a530Spatrick warn("attempt to reassign symbol '" + ver.name + "' of " +
246ece8a530Spatrick getName(sym->versionId) + " to " + getName(versionId));
247ece8a530Spatrick }
2481cf9926bSpatrick return !syms.empty();
249ece8a530Spatrick }
250ece8a530Spatrick
assignWildcardVersion(SymbolVersion ver,uint16_t versionId,bool includeNonDefault)2511cf9926bSpatrick void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
2521cf9926bSpatrick bool includeNonDefault) {
253ece8a530Spatrick // Exact matching takes precedence over fuzzy matching,
254ece8a530Spatrick // so we set a version to a symbol only if no version has been assigned
255ece8a530Spatrick // to the symbol. This behavior is compatible with GNU.
2561cf9926bSpatrick for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
257*05edf1c1Srobert if (sym->verdefIndex == uint16_t(-1)) {
258ece8a530Spatrick sym->verdefIndex = 0;
259ece8a530Spatrick sym->versionId = versionId;
260ece8a530Spatrick }
261ece8a530Spatrick }
262ece8a530Spatrick
263ece8a530Spatrick // This function processes version scripts by updating the versionId
264ece8a530Spatrick // member of symbols.
265ece8a530Spatrick // If there's only one anonymous version definition in a version
266ece8a530Spatrick // script file, the script does not actually define any symbol version,
267ece8a530Spatrick // but just specifies symbols visibilities.
scanVersionScript()268ece8a530Spatrick void SymbolTable::scanVersionScript() {
2691cf9926bSpatrick SmallString<128> buf;
270ece8a530Spatrick // First, we assign versions to exact matching symbols,
271ece8a530Spatrick // i.e. version definitions not containing any glob meta-characters.
2721cf9926bSpatrick for (VersionDefinition &v : config->versionDefinitions) {
2731cf9926bSpatrick auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
2741cf9926bSpatrick bool found =
2751cf9926bSpatrick assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
2761cf9926bSpatrick buf.clear();
2771cf9926bSpatrick found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
2781cf9926bSpatrick pat.isExternCpp, /*hasWildCard=*/false},
2791cf9926bSpatrick id, ver, /*includeNonDefault=*/true);
2801cf9926bSpatrick if (!found && !config->undefinedVersion)
281*05edf1c1Srobert warn("version script assignment of '" + ver + "' to symbol '" +
2821cf9926bSpatrick pat.name + "' failed: symbol not defined");
2831cf9926bSpatrick };
2841cf9926bSpatrick for (SymbolVersion &pat : v.nonLocalPatterns)
2851cf9926bSpatrick if (!pat.hasWildcard)
2861cf9926bSpatrick assignExact(pat, v.id, v.name);
2871cf9926bSpatrick for (SymbolVersion pat : v.localPatterns)
2881cf9926bSpatrick if (!pat.hasWildcard)
2891cf9926bSpatrick assignExact(pat, VER_NDX_LOCAL, "local");
2901cf9926bSpatrick }
291ece8a530Spatrick
292ece8a530Spatrick // Next, assign versions to wildcards that are not "*". Note that because the
293ece8a530Spatrick // last match takes precedence over previous matches, we iterate over the
294ece8a530Spatrick // definitions in the reverse order.
2951cf9926bSpatrick auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
2961cf9926bSpatrick assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
2971cf9926bSpatrick buf.clear();
2981cf9926bSpatrick assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
2991cf9926bSpatrick pat.isExternCpp, /*hasWildCard=*/true},
3001cf9926bSpatrick id,
3011cf9926bSpatrick /*includeNonDefault=*/true);
3021cf9926bSpatrick };
3031cf9926bSpatrick for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) {
3041cf9926bSpatrick for (SymbolVersion &pat : v.nonLocalPatterns)
305ece8a530Spatrick if (pat.hasWildcard && pat.name != "*")
3061cf9926bSpatrick assignWildcard(pat, v.id, v.name);
3071cf9926bSpatrick for (SymbolVersion &pat : v.localPatterns)
3081cf9926bSpatrick if (pat.hasWildcard && pat.name != "*")
3091cf9926bSpatrick assignWildcard(pat, VER_NDX_LOCAL, v.name);
3101cf9926bSpatrick }
311ece8a530Spatrick
312ece8a530Spatrick // Then, assign versions to "*". In GNU linkers they have lower priority than
313ece8a530Spatrick // other wildcards.
3141cf9926bSpatrick for (VersionDefinition &v : config->versionDefinitions) {
3151cf9926bSpatrick for (SymbolVersion &pat : v.nonLocalPatterns)
316ece8a530Spatrick if (pat.hasWildcard && pat.name == "*")
3171cf9926bSpatrick assignWildcard(pat, v.id, v.name);
3181cf9926bSpatrick for (SymbolVersion &pat : v.localPatterns)
3191cf9926bSpatrick if (pat.hasWildcard && pat.name == "*")
3201cf9926bSpatrick assignWildcard(pat, VER_NDX_LOCAL, v.name);
3211cf9926bSpatrick }
322ece8a530Spatrick
323ece8a530Spatrick // Symbol themselves might know their versions because symbols
324ece8a530Spatrick // can contain versions in the form of <name>@<version>.
325ece8a530Spatrick // Let them parse and update their names to exclude version suffix.
326ece8a530Spatrick for (Symbol *sym : symVector)
327*05edf1c1Srobert if (sym->hasVersionSuffix)
328ece8a530Spatrick sym->parseSymbolVersion();
329ece8a530Spatrick
330ece8a530Spatrick // isPreemptible is false at this point. To correctly compute the binding of a
331ece8a530Spatrick // Defined (which is used by includeInDynsym()), we need to know if it is
332ece8a530Spatrick // VER_NDX_LOCAL or not. Compute symbol versions before handling
333ece8a530Spatrick // --dynamic-list.
334ece8a530Spatrick handleDynamicList();
335ece8a530Spatrick }
336