xref: /llvm-project/lldb/source/Core/Mangled.cpp (revision 3241d915b1b8257fd6234461b04e4775fc7ed9fb)
1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21 
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/Support/Compiler.h"
26 
27 #include <mutex>
28 #include <string>
29 #include <string_view>
30 #include <utility>
31 
32 #include <cstdlib>
33 #include <cstring>
34 using namespace lldb_private;
35 
36 static inline bool cstring_is_mangled(llvm::StringRef s) {
37   return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
38 }
39 
40 #pragma mark Mangled
41 
42 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
43   if (name.empty())
44     return Mangled::eManglingSchemeNone;
45 
46   if (name.starts_with("?"))
47     return Mangled::eManglingSchemeMSVC;
48 
49   if (name.starts_with("_R"))
50     return Mangled::eManglingSchemeRustV0;
51 
52   if (name.starts_with("_D")) {
53     // A dlang mangled name begins with `_D`, followed by a numeric length. One
54     // known exception is the symbol `_Dmain`.
55     // See `SymbolName` and `LName` in
56     // https://dlang.org/spec/abi.html#name_mangling
57     llvm::StringRef buf = name.drop_front(2);
58     if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
59       return Mangled::eManglingSchemeD;
60   }
61 
62   if (name.starts_with("_Z"))
63     return Mangled::eManglingSchemeItanium;
64 
65   // ___Z is a clang extension of block invocations
66   if (name.starts_with("___Z"))
67     return Mangled::eManglingSchemeItanium;
68 
69   // Swift's older style of mangling used "_T" as a mangling prefix. This can
70   // lead to false positives with other symbols that just so happen to start
71   // with "_T". To minimize the chance of that happening, we only return true
72   // for select old-style swift mangled names. The known cases are ObjC classes
73   // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74   // Protocols are prefixed with "_TtP".
75   if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
76       name.starts_with("_TtP"))
77     return Mangled::eManglingSchemeSwift;
78 
79   // Swift 4.2 used "$S" and "_$S".
80   // Swift 5 and onward uses "$s" and "_$s".
81   // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82   // Embedded Swift introduced "$e" and  "_$e" as Swift mangling prefixes.
83   if (name.starts_with("$S") || name.starts_with("_$S") ||
84       name.starts_with("$s") || name.starts_with("_$s") ||
85       name.starts_with("$e") || name.starts_with("_$e") ||
86       name.starts_with("@__swiftmacro_"))
87     return Mangled::eManglingSchemeSwift;
88 
89   return Mangled::eManglingSchemeNone;
90 }
91 
92 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
93   if (s)
94     SetValue(s);
95 }
96 
97 Mangled::Mangled(llvm::StringRef name) {
98   if (!name.empty())
99     SetValue(ConstString(name));
100 }
101 
102 // Convert to bool operator. This allows code to check any Mangled objects
103 // to see if they contain anything valid using code such as:
104 //
105 //  Mangled mangled(...);
106 //  if (mangled)
107 //  { ...
108 Mangled::operator bool() const { return m_mangled || m_demangled; }
109 
110 // Clear the mangled and demangled values.
111 void Mangled::Clear() {
112   m_mangled.Clear();
113   m_demangled.Clear();
114 }
115 
116 // Compare the string values.
117 int Mangled::Compare(const Mangled &a, const Mangled &b) {
118   return ConstString::Compare(a.GetName(ePreferMangled),
119                               b.GetName(ePreferMangled));
120 }
121 
122 void Mangled::SetValue(ConstString name) {
123   if (name) {
124     if (cstring_is_mangled(name.GetStringRef())) {
125       m_demangled.Clear();
126       m_mangled = name;
127     } else {
128       m_demangled = name;
129       m_mangled.Clear();
130     }
131   } else {
132     m_demangled.Clear();
133     m_mangled.Clear();
134   }
135 }
136 
137 // Local helpers for different demangling implementations.
138 static char *GetMSVCDemangledStr(llvm::StringRef M) {
139   char *demangled_cstr = llvm::microsoftDemangle(
140       M, nullptr, nullptr,
141       llvm::MSDemangleFlags(
142           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
143           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
144 
145   if (Log *log = GetLog(LLDBLog::Demangle)) {
146     if (demangled_cstr && demangled_cstr[0])
147       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
148     else
149       LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
150   }
151 
152   return demangled_cstr;
153 }
154 
155 static char *GetItaniumDemangledStr(const char *M) {
156   char *demangled_cstr = nullptr;
157 
158   llvm::ItaniumPartialDemangler ipd;
159   bool err = ipd.partialDemangle(M);
160   if (!err) {
161     // Default buffer and size (will realloc in case it's too small).
162     size_t demangled_size = 80;
163     demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
164     demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
165 
166     assert(demangled_cstr &&
167            "finishDemangle must always succeed if partialDemangle did");
168     assert(demangled_cstr[demangled_size - 1] == '\0' &&
169            "Expected demangled_size to return length including trailing null");
170   }
171 
172   if (Log *log = GetLog(LLDBLog::Demangle)) {
173     if (demangled_cstr)
174       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
175     else
176       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
177   }
178 
179   return demangled_cstr;
180 }
181 
182 static char *GetRustV0DemangledStr(llvm::StringRef M) {
183   char *demangled_cstr = llvm::rustDemangle(M);
184 
185   if (Log *log = GetLog(LLDBLog::Demangle)) {
186     if (demangled_cstr && demangled_cstr[0])
187       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
188     else
189       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
190                static_cast<std::string_view>(M));
191   }
192 
193   return demangled_cstr;
194 }
195 
196 static char *GetDLangDemangledStr(llvm::StringRef M) {
197   char *demangled_cstr = llvm::dlangDemangle(M);
198 
199   if (Log *log = GetLog(LLDBLog::Demangle)) {
200     if (demangled_cstr && demangled_cstr[0])
201       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
202     else
203       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
204                static_cast<std::string_view>(M));
205   }
206 
207   return demangled_cstr;
208 }
209 
210 // Explicit demangling for scheduled requests during batch processing. This
211 // makes use of ItaniumPartialDemangler's rich demangle info
212 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
213                                   SkipMangledNameFn *skip_mangled_name) {
214   // Others are not meant to arrive here. ObjC names or C's main() for example
215   // have their names stored in m_demangled, while m_mangled is empty.
216   assert(m_mangled);
217 
218   // Check whether or not we are interested in this name at all.
219   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
220   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
221     return false;
222 
223   switch (scheme) {
224   case eManglingSchemeNone:
225     // The current mangled_name_filter would allow llvm_unreachable here.
226     return false;
227 
228   case eManglingSchemeItanium:
229     // We want the rich mangling info here, so we don't care whether or not
230     // there is a demangled string in the pool already.
231     return context.FromItaniumName(m_mangled);
232 
233   case eManglingSchemeMSVC: {
234     // We have no rich mangling for MSVC-mangled names yet, so first try to
235     // demangle it if necessary.
236     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
237       if (char *d = GetMSVCDemangledStr(m_mangled)) {
238         // Without the rich mangling info we have to demangle the full name.
239         // Copy it to string pool and connect the counterparts to accelerate
240         // later access in GetDemangledName().
241         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
242                                                     m_mangled);
243         ::free(d);
244       } else {
245         m_demangled.SetCString("");
246       }
247     }
248 
249     if (m_demangled.IsEmpty()) {
250       // Cannot demangle it, so don't try parsing.
251       return false;
252     } else {
253       // Demangled successfully, we can try and parse it with
254       // CPlusPlusLanguage::MethodName.
255       return context.FromCxxMethodName(m_demangled);
256     }
257   }
258 
259   case eManglingSchemeRustV0:
260   case eManglingSchemeD:
261   case eManglingSchemeSwift:
262     // Rich demangling scheme is not supported
263     return false;
264   }
265   llvm_unreachable("Fully covered switch above!");
266 }
267 
268 // Generate the demangled name on demand using this accessor. Code in this
269 // class will need to use this accessor if it wishes to decode the demangled
270 // name. The result is cached and will be kept until a new string value is
271 // supplied to this object, or until the end of the object's lifetime.
272 ConstString Mangled::GetDemangledName() const {
273   // Check to make sure we have a valid mangled name and that we haven't
274   // already decoded our mangled name.
275   if (m_mangled && m_demangled.IsNull()) {
276     // Don't bother running anything that isn't mangled
277     const char *mangled_name = m_mangled.GetCString();
278     ManglingScheme mangling_scheme =
279         GetManglingScheme(m_mangled.GetStringRef());
280     if (mangling_scheme != eManglingSchemeNone &&
281         !m_mangled.GetMangledCounterpart(m_demangled)) {
282       // We didn't already mangle this name, demangle it and if all goes well
283       // add it to our map.
284       char *demangled_name = nullptr;
285       switch (mangling_scheme) {
286       case eManglingSchemeMSVC:
287         demangled_name = GetMSVCDemangledStr(mangled_name);
288         break;
289       case eManglingSchemeItanium: {
290         demangled_name = GetItaniumDemangledStr(mangled_name);
291         break;
292       }
293       case eManglingSchemeRustV0:
294         demangled_name = GetRustV0DemangledStr(m_mangled);
295         break;
296       case eManglingSchemeD:
297         demangled_name = GetDLangDemangledStr(m_mangled);
298         break;
299       case eManglingSchemeSwift:
300         // Demangling a swift name requires the swift compiler. This is
301         // explicitly unsupported on llvm.org.
302         break;
303       case eManglingSchemeNone:
304         llvm_unreachable("eManglingSchemeNone was handled already");
305       }
306       if (demangled_name) {
307         m_demangled.SetStringWithMangledCounterpart(
308             llvm::StringRef(demangled_name), m_mangled);
309         free(demangled_name);
310       }
311     }
312     if (m_demangled.IsNull()) {
313       // Set the demangled string to the empty string to indicate we tried to
314       // parse it once and failed.
315       m_demangled.SetCString("");
316     }
317   }
318 
319   return m_demangled;
320 }
321 
322 ConstString Mangled::GetDisplayDemangledName() const {
323   if (Language *lang = Language::FindPlugin(GuessLanguage()))
324     return lang->GetDisplayDemangledName(*this);
325   return GetDemangledName();
326 }
327 
328 bool Mangled::NameMatches(const RegularExpression &regex) const {
329   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
330     return true;
331 
332   ConstString demangled = GetDemangledName();
333   return demangled && regex.Execute(demangled.GetStringRef());
334 }
335 
336 // Get the demangled name if there is one, else return the mangled name.
337 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
338   if (preference == ePreferMangled && m_mangled)
339     return m_mangled;
340 
341   // Call the accessor to make sure we get a demangled name in case it hasn't
342   // been demangled yet...
343   ConstString demangled = GetDemangledName();
344 
345   if (preference == ePreferDemangledWithoutArguments) {
346     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
347       return lang->GetDemangledFunctionNameWithoutArguments(*this);
348     }
349   }
350   if (preference == ePreferDemangled) {
351     if (demangled)
352       return demangled;
353     return m_mangled;
354   }
355   return demangled;
356 }
357 
358 // Dump a Mangled object to stream "s". We don't force our demangled name to be
359 // computed currently (we don't use the accessor).
360 void Mangled::Dump(Stream *s) const {
361   if (m_mangled) {
362     *s << ", mangled = " << m_mangled;
363   }
364   if (m_demangled) {
365     const char *demangled = m_demangled.AsCString();
366     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
367   }
368 }
369 
370 // Dumps a debug version of this string with extra object and state information
371 // to stream "s".
372 void Mangled::DumpDebug(Stream *s) const {
373   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
374             static_cast<const void *>(this));
375   m_mangled.DumpDebug(s);
376   s->Printf(", demangled = ");
377   m_demangled.DumpDebug(s);
378 }
379 
380 // Return the size in byte that this object takes in memory. The size includes
381 // the size of the objects it owns, and not the strings that it references
382 // because they are shared strings.
383 size_t Mangled::MemorySize() const {
384   return m_mangled.MemorySize() + m_demangled.MemorySize();
385 }
386 
387 // We "guess" the language because we can't determine a symbol's language from
388 // it's name.  For example, a Pascal symbol can be mangled using the C++
389 // Itanium scheme, and defined in a compilation unit within the same module as
390 // other C++ units.  In addition, different targets could have different ways
391 // of mangling names from a given language, likewise the compilation units
392 // within those targets.
393 lldb::LanguageType Mangled::GuessLanguage() const {
394   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
395   // Ask each language plugin to check if the mangled name belongs to it.
396   Language::ForEach([this, &result](Language *l) {
397     if (l->SymbolNameFitsToLanguage(*this)) {
398       result = l->GetLanguageType();
399       return false;
400     }
401     return true;
402   });
403   return result;
404 }
405 
406 // Dump OBJ to the supplied stream S.
407 Stream &operator<<(Stream &s, const Mangled &obj) {
408   if (obj.GetMangledName())
409     s << "mangled = '" << obj.GetMangledName() << "'";
410 
411   ConstString demangled = obj.GetDemangledName();
412   if (demangled)
413     s << ", demangled = '" << demangled << '\'';
414   else
415     s << ", demangled = <error>";
416   return s;
417 }
418 
419 // When encoding Mangled objects we can get away with encoding as little
420 // information as is required. The enumeration below helps us to efficiently
421 // encode Mangled objects.
422 enum MangledEncoding {
423   /// If the Mangled object has neither a mangled name or demangled name we can
424   /// encode the object with one zero byte using the Empty enumeration.
425   Empty = 0u,
426   /// If the Mangled object has only a demangled name and no mangled named, we
427   /// can encode only the demangled name.
428   DemangledOnly = 1u,
429   /// If the mangle name can calculate the demangled name (it is the
430   /// mangled/demangled counterpart), then we only need to encode the mangled
431   /// name as the demangled name can be recomputed.
432   MangledOnly = 2u,
433   /// If we have a Mangled object with two different names that are not related
434   /// then we need to save both strings. This can happen if we have a name that
435   /// isn't a true mangled name, but we want to be able to lookup a symbol by
436   /// name and type in the symbol table. We do this for Objective C symbols like
437   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
438   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
439   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
440   /// would fail, but in these cases we want these unrelated names to be
441   /// preserved.
442   MangledAndDemangled = 3u
443 };
444 
445 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
446                      const StringTableReader &strtab) {
447   m_mangled.Clear();
448   m_demangled.Clear();
449   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
450   switch (encoding) {
451     case Empty:
452       return true;
453 
454     case DemangledOnly:
455       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
456       return true;
457 
458     case MangledOnly:
459       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
460       return true;
461 
462     case MangledAndDemangled:
463       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
464       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
465       return true;
466   }
467   return false;
468 }
469 /// The encoding format for the Mangled object is as follows:
470 ///
471 /// uint8_t encoding;
472 /// char str1[]; (only if DemangledOnly, MangledOnly)
473 /// char str2[]; (only if MangledAndDemangled)
474 ///
475 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
476 /// are only saved if we need them based on the encoding.
477 ///
478 /// Some mangled names have a mangled name that can be demangled by the built
479 /// in demanglers. These kinds of mangled objects know when the mangled and
480 /// demangled names are the counterparts for each other. This is done because
481 /// demangling is very expensive and avoiding demangling the same name twice
482 /// saves us a lot of compute time. For these kinds of names we only need to
483 /// save the mangled name and have the encoding set to "MangledOnly".
484 ///
485 /// If a mangled obejct has only a demangled name, then we save only that string
486 /// and have the encoding set to "DemangledOnly".
487 ///
488 /// Some mangled objects have both mangled and demangled names, but the
489 /// demangled name can not be computed from the mangled name. This is often used
490 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
491 /// names must be saved and the encoding is set to "MangledAndDemangled".
492 ///
493 /// For a Mangled object with no names, we only need to set the encoding to
494 /// "Empty" and not store any string values.
495 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
496   MangledEncoding encoding = Empty;
497   if (m_mangled) {
498     encoding = MangledOnly;
499     if (m_demangled) {
500       // We have both mangled and demangled names. If the demangled name is the
501       // counterpart of the mangled name, then we only need to save the mangled
502       // named. If they are different, we need to save both.
503       ConstString s;
504       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
505         encoding = MangledAndDemangled;
506     }
507   } else if (m_demangled) {
508     encoding = DemangledOnly;
509   }
510   file.AppendU8(encoding);
511   switch (encoding) {
512     case Empty:
513       break;
514     case DemangledOnly:
515       file.AppendU32(strtab.Add(m_demangled));
516       break;
517     case MangledOnly:
518       file.AppendU32(strtab.Add(m_mangled));
519       break;
520     case MangledAndDemangled:
521       file.AppendU32(strtab.Add(m_mangled));
522       file.AppendU32(strtab.Add(m_demangled));
523       break;
524   }
525 }
526