xref: /llvm-project/lld/ELF/SymbolTable.cpp (revision 5aef8ab6ec3a5bcb224fee764bbc6914a76d7dbb)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbol table is a bag of all known symbols. We put all symbols of
10 // all input files to the symbol table. The symbol table is basically
11 // a hash table with the logic to resolve symbol name conflicts using
12 // the symbol types.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "SymbolTable.h"
17 #include "Config.h"
18 #include "InputFiles.h"
19 #include "Symbols.h"
20 #include "lld/Common/ErrorHandler.h"
21 #include "lld/Common/Memory.h"
22 #include "lld/Common/Strings.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Demangle/Demangle.h"
25 
26 using namespace llvm;
27 using namespace llvm::object;
28 using namespace llvm::ELF;
29 using namespace lld;
30 using namespace lld::elf;
31 
32 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
33   // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
34   int &idx1 = symMap[CachedHashStringRef(sym->getName())];
35   int &idx2 = symMap[CachedHashStringRef(real->getName())];
36   int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
37 
38   idx2 = idx1;
39   idx1 = idx3;
40 
41   // Propagate symbol usage information to the redirected symbols.
42   if (sym->isUsedInRegularObj)
43     wrap->isUsedInRegularObj = true;
44   if (real->isUsedInRegularObj)
45     sym->isUsedInRegularObj = true;
46   else if (!sym->isDefined())
47     // Now that all references to sym have been redirected to wrap, if there are
48     // no references to real (which has been redirected to sym), we only need to
49     // keep sym if it was defined, otherwise it's unused and can be dropped.
50     sym->isUsedInRegularObj = false;
51 
52   // Now renaming is complete, and no one refers to real. We drop real from
53   // .symtab and .dynsym. If real is undefined, it is important that we don't
54   // leave it in .dynsym, because otherwise it might lead to an undefined symbol
55   // error in a subsequent link. If real is defined, we could emit real as an
56   // alias for sym, but that could degrade the user experience of some tools
57   // that can print out only one symbol for each location: sym is a preferred
58   // name than real, but they might print out real instead.
59   memcpy(static_cast<void *>(real), sym, sizeof(SymbolUnion));
60   real->isUsedInRegularObj = false;
61 }
62 
63 // Find an existing symbol or create a new one.
64 Symbol *SymbolTable::insert(StringRef name) {
65   // <name>@@<version> means the symbol is the default version. In that
66   // case <name>@@<version> will be used to resolve references to <name>.
67   //
68   // Since this is a hot path, the following string search code is
69   // optimized for speed. StringRef::find(char) is much faster than
70   // StringRef::find(StringRef).
71   StringRef stem = name;
72   size_t pos = name.find('@');
73   if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
74     stem = name.take_front(pos);
75 
76   auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()});
77   if (!p.second) {
78     Symbol *sym = symVector[p.first->second];
79     if (stem.size() != name.size()) {
80       sym->setName(name);
81       sym->hasVersionSuffix = true;
82     }
83     return sym;
84   }
85 
86   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
87   symVector.push_back(sym);
88 
89   // *sym was not initialized by a constructor. Initialize all Symbol fields.
90   memset(static_cast<void *>(sym), 0, sizeof(Symbol));
91   sym->setName(name);
92   sym->partition = 1;
93   sym->versionId = VER_NDX_GLOBAL;
94   if (pos != StringRef::npos)
95     sym->hasVersionSuffix = true;
96   return sym;
97 }
98 
99 // This variant of addSymbol is used by BinaryFile::parse to check duplicate
100 // symbol errors.
101 Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) {
102   Symbol *sym = insert(newSym.getName());
103   if (sym->isDefined())
104     sym->checkDuplicate(ctx, newSym);
105   sym->resolve(ctx, newSym);
106   sym->isUsedInRegularObj = true;
107   return sym;
108 }
109 
110 Symbol *SymbolTable::find(StringRef name) {
111   auto it = symMap.find(CachedHashStringRef(name));
112   if (it == symMap.end())
113     return nullptr;
114   return symVector[it->second];
115 }
116 
117 // A version script/dynamic list is only meaningful for a Defined symbol.
118 // A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
119 // A lazy symbol may be made Defined if an LTO libcall extracts it.
120 static bool canBeVersioned(const Symbol &sym) {
121   return sym.isDefined() || sym.isCommon() || sym.isLazy();
122 }
123 
124 // Initialize demangledSyms with a map from demangled symbols to symbol
125 // objects. Used to handle "extern C++" directive in version scripts.
126 //
127 // The map will contain all demangled symbols. That can be very large,
128 // and in LLD we generally want to avoid do anything for each symbol.
129 // Then, why are we doing this? Here's why.
130 //
131 // Users can use "extern C++ {}" directive to match against demangled
132 // C++ symbols. For example, you can write a pattern such as
133 // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
134 // other than trying to match a pattern against all demangled symbols.
135 // So, if "extern C++" feature is used, we need to demangle all known
136 // symbols.
137 StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() {
138   if (!demangledSyms) {
139     demangledSyms.emplace();
140     std::string demangled;
141     for (Symbol *sym : symVector)
142       if (canBeVersioned(*sym)) {
143         StringRef name = sym->getName();
144         size_t pos = name.find('@');
145         std::string substr;
146         if (pos == std::string::npos)
147           demangled = demangle(name);
148         else if (pos + 1 == name.size() || name[pos + 1] == '@') {
149           substr = name.substr(0, pos);
150           demangled = demangle(substr);
151         } else {
152           substr = name.substr(0, pos);
153           demangled = (demangle(substr) + name.substr(pos)).str();
154         }
155         (*demangledSyms)[demangled].push_back(sym);
156       }
157   }
158   return *demangledSyms;
159 }
160 
161 SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) {
162   if (ver.isExternCpp)
163     return getDemangledSyms().lookup(ver.name);
164   if (Symbol *sym = find(ver.name))
165     if (canBeVersioned(*sym))
166       return {sym};
167   return {};
168 }
169 
170 SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver,
171                                                        bool includeNonDefault) {
172   SmallVector<Symbol *, 0> res;
173   SingleStringMatcher m(ver.name);
174   auto check = [&](const Symbol &sym) -> bool {
175     if (!includeNonDefault)
176       return !sym.hasVersionSuffix;
177     StringRef name = sym.getName();
178     size_t pos = name.find('@');
179     return !(pos + 1 < name.size() && name[pos + 1] == '@');
180   };
181 
182   if (ver.isExternCpp) {
183     for (auto &p : getDemangledSyms())
184       if (m.match(p.first()))
185         for (Symbol *sym : p.second)
186           if (check(*sym))
187             res.push_back(sym);
188     return res;
189   }
190 
191   for (Symbol *sym : symVector)
192     if (canBeVersioned(*sym) && check(*sym) && m.match(sym->getName()))
193       res.push_back(sym);
194   return res;
195 }
196 
197 void SymbolTable::handleDynamicList() {
198   SmallVector<Symbol *, 0> syms;
199   for (SymbolVersion &ver : ctx.arg.dynamicList) {
200     if (ver.hasWildcard)
201       syms = findAllByVersion(ver, /*includeNonDefault=*/true);
202     else
203       syms = findByVersion(ver);
204 
205     for (Symbol *sym : syms)
206       sym->exportDynamic = sym->inDynamicList = true;
207   }
208 }
209 
210 // Set symbol versions to symbols. This function handles patterns containing no
211 // wildcard characters. Return false if no symbol definition matches ver.
212 bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
213                                      StringRef versionName,
214                                      bool includeNonDefault) {
215   // Get a list of symbols which we need to assign the version to.
216   SmallVector<Symbol *, 0> syms = findByVersion(ver);
217 
218   auto getName = [&ctx = ctx](uint16_t ver) -> std::string {
219     if (ver == VER_NDX_LOCAL)
220       return "VER_NDX_LOCAL";
221     if (ver == VER_NDX_GLOBAL)
222       return "VER_NDX_GLOBAL";
223     return ("version '" + ctx.arg.versionDefinitions[ver].name + "'").str();
224   };
225 
226   // Assign the version.
227   for (Symbol *sym : syms) {
228     // For a non-local versionId, skip symbols containing version info because
229     // symbol versions specified by symbol names take precedence over version
230     // scripts. See parseSymbolVersion(ctx).
231     if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
232         sym->getName().contains('@'))
233       continue;
234 
235     // If the version has not been assigned, assign versionId to the symbol.
236     if (!sym->versionScriptAssigned) {
237       sym->versionScriptAssigned = true;
238       sym->versionId = versionId;
239     }
240     if (sym->versionId == versionId)
241       continue;
242 
243     Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of "
244               << getName(sym->versionId) << " to " << getName(versionId);
245   }
246   return !syms.empty();
247 }
248 
249 void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
250                                         bool includeNonDefault) {
251   // Exact matching takes precedence over fuzzy matching,
252   // so we set a version to a symbol only if no version has been assigned
253   // to the symbol. This behavior is compatible with GNU.
254   for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
255     if (!sym->versionScriptAssigned) {
256       sym->versionScriptAssigned = true;
257       sym->versionId = versionId;
258     }
259 }
260 
261 // This function processes version scripts by updating the versionId
262 // member of symbols.
263 // If there's only one anonymous version definition in a version
264 // script file, the script does not actually define any symbol version,
265 // but just specifies symbols visibilities.
266 void SymbolTable::scanVersionScript() {
267   SmallString<128> buf;
268   // First, we assign versions to exact matching symbols,
269   // i.e. version definitions not containing any glob meta-characters.
270   for (VersionDefinition &v : ctx.arg.versionDefinitions) {
271     auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
272       bool found =
273           assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
274       buf.clear();
275       found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
276                                    pat.isExternCpp, /*hasWildCard=*/false},
277                                   id, ver, /*includeNonDefault=*/true);
278       if (!found && !ctx.arg.undefinedVersion)
279         Err(ctx) << "version script assignment of '" << ver << "' to symbol '"
280                  << pat.name << "' failed: symbol not defined";
281     };
282     for (SymbolVersion &pat : v.nonLocalPatterns)
283       if (!pat.hasWildcard)
284         assignExact(pat, v.id, v.name);
285     for (SymbolVersion pat : v.localPatterns)
286       if (!pat.hasWildcard)
287         assignExact(pat, VER_NDX_LOCAL, "local");
288   }
289 
290   // Next, assign versions to wildcards that are not "*". Note that because the
291   // last match takes precedence over previous matches, we iterate over the
292   // definitions in the reverse order.
293   auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
294     assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
295     buf.clear();
296     assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
297                            pat.isExternCpp, /*hasWildCard=*/true},
298                           id,
299                           /*includeNonDefault=*/true);
300   };
301   for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
302     for (SymbolVersion &pat : v.nonLocalPatterns)
303       if (pat.hasWildcard && pat.name != "*")
304         assignWildcard(pat, v.id, v.name);
305     for (SymbolVersion &pat : v.localPatterns)
306       if (pat.hasWildcard && pat.name != "*")
307         assignWildcard(pat, VER_NDX_LOCAL, v.name);
308   }
309 
310   // Then, assign versions to "*". In GNU linkers they have lower priority than
311   // other wildcards.
312   bool globalAsteriskFound = false;
313   bool localAsteriskFound = false;
314   bool asteriskReported = false;
315   auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver,
316                             bool isLocal) {
317     // Avoid issuing a warning if both '--retain-symbol-file' and a version
318     // script with `global: *` are used.
319     //
320     // '--retain-symbol-file' adds a "*" pattern to
321     // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see
322     // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns',
323     // and may seem counterintuitive, but still works as expected. Here we can
324     // exploit that and skip analyzing the pattern added for this option.
325     if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) {
326       if ((isLocal && globalAsteriskFound) ||
327           (!isLocal && localAsteriskFound)) {
328         Warn(ctx)
329             << "wildcard pattern '*' is used for both 'local' and 'global' "
330                "scopes in version script";
331         asteriskReported = true;
332       } else if (!isLocal && globalAsteriskFound) {
333         Warn(ctx) << "wildcard pattern '*' is used for multiple version "
334                      "definitions in "
335                      "version script";
336         asteriskReported = true;
337       } else {
338         localAsteriskFound = isLocal;
339         globalAsteriskFound = !isLocal;
340       }
341     }
342     assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name);
343   };
344   for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
345     for (SymbolVersion &pat : v.nonLocalPatterns)
346       if (pat.hasWildcard && pat.name == "*")
347         assignAsterisk(pat, &v, false);
348     for (SymbolVersion &pat : v.localPatterns)
349       if (pat.hasWildcard && pat.name == "*")
350         assignAsterisk(pat, &v, true);
351   }
352 
353   // isPreemptible is false at this point. To correctly compute the binding of a
354   // Defined (which is used by includeInDynsym(ctx)), we need to know if it is
355   // VER_NDX_LOCAL or not. Compute symbol versions before handling
356   // --dynamic-list.
357   handleDynamicList();
358 }
359 
360 Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
361   return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
362 }
363