1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/LLDBLog.h" 17 #include "lldb/Utility/Log.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Demangle/Demangle.h" 25 #include "llvm/Support/Compiler.h" 26 27 #include <mutex> 28 #include <string> 29 #include <string_view> 30 #include <utility> 31 32 #include <cstdlib> 33 #include <cstring> 34 using namespace lldb_private; 35 36 static inline bool cstring_is_mangled(llvm::StringRef s) { 37 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 38 } 39 40 #pragma mark Mangled 41 42 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 43 if (name.empty()) 44 return Mangled::eManglingSchemeNone; 45 46 if (name.starts_with("?")) 47 return Mangled::eManglingSchemeMSVC; 48 49 if (name.starts_with("_R")) 50 return Mangled::eManglingSchemeRustV0; 51 52 if (name.starts_with("_D")) { 53 // A dlang mangled name begins with `_D`, followed by a numeric length. One 54 // known exception is the symbol `_Dmain`. 55 // See `SymbolName` and `LName` in 56 // https://dlang.org/spec/abi.html#name_mangling 57 llvm::StringRef buf = name.drop_front(2); 58 if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain")) 59 return Mangled::eManglingSchemeD; 60 } 61 62 if (name.starts_with("_Z")) 63 return Mangled::eManglingSchemeItanium; 64 65 // ___Z is a clang extension of block invocations 66 if (name.starts_with("___Z")) 67 return Mangled::eManglingSchemeItanium; 68 69 // Swift's older style of mangling used "_T" as a mangling prefix. This can 70 // lead to false positives with other symbols that just so happen to start 71 // with "_T". To minimize the chance of that happening, we only return true 72 // for select old-style swift mangled names. The known cases are ObjC classes 73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC". 74 // Protocols are prefixed with "_TtP". 75 if (name.starts_with("_TtC") || name.starts_with("_TtGC") || 76 name.starts_with("_TtP")) 77 return Mangled::eManglingSchemeSwift; 78 79 // Swift 4.2 used "$S" and "_$S". 80 // Swift 5 and onward uses "$s" and "_$s". 81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames. 82 // Embedded Swift introduced "$e" and "_$e" as Swift mangling prefixes. 83 if (name.starts_with("$S") || name.starts_with("_$S") || 84 name.starts_with("$s") || name.starts_with("_$s") || 85 name.starts_with("$e") || name.starts_with("_$e") || 86 name.starts_with("@__swiftmacro_")) 87 return Mangled::eManglingSchemeSwift; 88 89 return Mangled::eManglingSchemeNone; 90 } 91 92 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 93 if (s) 94 SetValue(s); 95 } 96 97 Mangled::Mangled(llvm::StringRef name) { 98 if (!name.empty()) 99 SetValue(ConstString(name)); 100 } 101 102 // Convert to bool operator. This allows code to check any Mangled objects 103 // to see if they contain anything valid using code such as: 104 // 105 // Mangled mangled(...); 106 // if (mangled) 107 // { ... 108 Mangled::operator bool() const { return m_mangled || m_demangled; } 109 110 // Clear the mangled and demangled values. 111 void Mangled::Clear() { 112 m_mangled.Clear(); 113 m_demangled.Clear(); 114 } 115 116 // Compare the string values. 117 int Mangled::Compare(const Mangled &a, const Mangled &b) { 118 return ConstString::Compare(a.GetName(ePreferMangled), 119 b.GetName(ePreferMangled)); 120 } 121 122 void Mangled::SetValue(ConstString name) { 123 if (name) { 124 if (cstring_is_mangled(name.GetStringRef())) { 125 m_demangled.Clear(); 126 m_mangled = name; 127 } else { 128 m_demangled = name; 129 m_mangled.Clear(); 130 } 131 } else { 132 m_demangled.Clear(); 133 m_mangled.Clear(); 134 } 135 } 136 137 // Local helpers for different demangling implementations. 138 static char *GetMSVCDemangledStr(llvm::StringRef M) { 139 char *demangled_cstr = llvm::microsoftDemangle( 140 M, nullptr, nullptr, 141 llvm::MSDemangleFlags( 142 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 143 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 144 145 if (Log *log = GetLog(LLDBLog::Demangle)) { 146 if (demangled_cstr && demangled_cstr[0]) 147 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr); 148 else 149 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data()); 150 } 151 152 return demangled_cstr; 153 } 154 155 static char *GetItaniumDemangledStr(const char *M) { 156 char *demangled_cstr = nullptr; 157 158 llvm::ItaniumPartialDemangler ipd; 159 bool err = ipd.partialDemangle(M); 160 if (!err) { 161 // Default buffer and size (will realloc in case it's too small). 162 size_t demangled_size = 80; 163 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 164 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 165 166 assert(demangled_cstr && 167 "finishDemangle must always succeed if partialDemangle did"); 168 assert(demangled_cstr[demangled_size - 1] == '\0' && 169 "Expected demangled_size to return length including trailing null"); 170 } 171 172 if (Log *log = GetLog(LLDBLog::Demangle)) { 173 if (demangled_cstr) 174 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 175 else 176 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 177 } 178 179 return demangled_cstr; 180 } 181 182 static char *GetRustV0DemangledStr(llvm::StringRef M) { 183 char *demangled_cstr = llvm::rustDemangle(M); 184 185 if (Log *log = GetLog(LLDBLog::Demangle)) { 186 if (demangled_cstr && demangled_cstr[0]) 187 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 188 else 189 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", 190 static_cast<std::string_view>(M)); 191 } 192 193 return demangled_cstr; 194 } 195 196 static char *GetDLangDemangledStr(llvm::StringRef M) { 197 char *demangled_cstr = llvm::dlangDemangle(M); 198 199 if (Log *log = GetLog(LLDBLog::Demangle)) { 200 if (demangled_cstr && demangled_cstr[0]) 201 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 202 else 203 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", 204 static_cast<std::string_view>(M)); 205 } 206 207 return demangled_cstr; 208 } 209 210 // Explicit demangling for scheduled requests during batch processing. This 211 // makes use of ItaniumPartialDemangler's rich demangle info 212 bool Mangled::GetRichManglingInfo(RichManglingContext &context, 213 SkipMangledNameFn *skip_mangled_name) { 214 // Others are not meant to arrive here. ObjC names or C's main() for example 215 // have their names stored in m_demangled, while m_mangled is empty. 216 assert(m_mangled); 217 218 // Check whether or not we are interested in this name at all. 219 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 220 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 221 return false; 222 223 switch (scheme) { 224 case eManglingSchemeNone: 225 // The current mangled_name_filter would allow llvm_unreachable here. 226 return false; 227 228 case eManglingSchemeItanium: 229 // We want the rich mangling info here, so we don't care whether or not 230 // there is a demangled string in the pool already. 231 return context.FromItaniumName(m_mangled); 232 233 case eManglingSchemeMSVC: { 234 // We have no rich mangling for MSVC-mangled names yet, so first try to 235 // demangle it if necessary. 236 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 237 if (char *d = GetMSVCDemangledStr(m_mangled)) { 238 // Without the rich mangling info we have to demangle the full name. 239 // Copy it to string pool and connect the counterparts to accelerate 240 // later access in GetDemangledName(). 241 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 242 m_mangled); 243 ::free(d); 244 } else { 245 m_demangled.SetCString(""); 246 } 247 } 248 249 if (m_demangled.IsEmpty()) { 250 // Cannot demangle it, so don't try parsing. 251 return false; 252 } else { 253 // Demangled successfully, we can try and parse it with 254 // CPlusPlusLanguage::MethodName. 255 return context.FromCxxMethodName(m_demangled); 256 } 257 } 258 259 case eManglingSchemeRustV0: 260 case eManglingSchemeD: 261 case eManglingSchemeSwift: 262 // Rich demangling scheme is not supported 263 return false; 264 } 265 llvm_unreachable("Fully covered switch above!"); 266 } 267 268 // Generate the demangled name on demand using this accessor. Code in this 269 // class will need to use this accessor if it wishes to decode the demangled 270 // name. The result is cached and will be kept until a new string value is 271 // supplied to this object, or until the end of the object's lifetime. 272 ConstString Mangled::GetDemangledName() const { 273 // Check to make sure we have a valid mangled name and that we haven't 274 // already decoded our mangled name. 275 if (m_mangled && m_demangled.IsNull()) { 276 // Don't bother running anything that isn't mangled 277 const char *mangled_name = m_mangled.GetCString(); 278 ManglingScheme mangling_scheme = 279 GetManglingScheme(m_mangled.GetStringRef()); 280 if (mangling_scheme != eManglingSchemeNone && 281 !m_mangled.GetMangledCounterpart(m_demangled)) { 282 // We didn't already mangle this name, demangle it and if all goes well 283 // add it to our map. 284 char *demangled_name = nullptr; 285 switch (mangling_scheme) { 286 case eManglingSchemeMSVC: 287 demangled_name = GetMSVCDemangledStr(mangled_name); 288 break; 289 case eManglingSchemeItanium: { 290 demangled_name = GetItaniumDemangledStr(mangled_name); 291 break; 292 } 293 case eManglingSchemeRustV0: 294 demangled_name = GetRustV0DemangledStr(m_mangled); 295 break; 296 case eManglingSchemeD: 297 demangled_name = GetDLangDemangledStr(m_mangled); 298 break; 299 case eManglingSchemeSwift: 300 // Demangling a swift name requires the swift compiler. This is 301 // explicitly unsupported on llvm.org. 302 break; 303 case eManglingSchemeNone: 304 llvm_unreachable("eManglingSchemeNone was handled already"); 305 } 306 if (demangled_name) { 307 m_demangled.SetStringWithMangledCounterpart( 308 llvm::StringRef(demangled_name), m_mangled); 309 free(demangled_name); 310 } 311 } 312 if (m_demangled.IsNull()) { 313 // Set the demangled string to the empty string to indicate we tried to 314 // parse it once and failed. 315 m_demangled.SetCString(""); 316 } 317 } 318 319 return m_demangled; 320 } 321 322 ConstString Mangled::GetDisplayDemangledName() const { 323 if (Language *lang = Language::FindPlugin(GuessLanguage())) 324 return lang->GetDisplayDemangledName(*this); 325 return GetDemangledName(); 326 } 327 328 bool Mangled::NameMatches(const RegularExpression ®ex) const { 329 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 330 return true; 331 332 ConstString demangled = GetDemangledName(); 333 return demangled && regex.Execute(demangled.GetStringRef()); 334 } 335 336 // Get the demangled name if there is one, else return the mangled name. 337 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 338 if (preference == ePreferMangled && m_mangled) 339 return m_mangled; 340 341 // Call the accessor to make sure we get a demangled name in case it hasn't 342 // been demangled yet... 343 ConstString demangled = GetDemangledName(); 344 345 if (preference == ePreferDemangledWithoutArguments) { 346 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 347 return lang->GetDemangledFunctionNameWithoutArguments(*this); 348 } 349 } 350 if (preference == ePreferDemangled) { 351 if (demangled) 352 return demangled; 353 return m_mangled; 354 } 355 return demangled; 356 } 357 358 // Dump a Mangled object to stream "s". We don't force our demangled name to be 359 // computed currently (we don't use the accessor). 360 void Mangled::Dump(Stream *s) const { 361 if (m_mangled) { 362 *s << ", mangled = " << m_mangled; 363 } 364 if (m_demangled) { 365 const char *demangled = m_demangled.AsCString(); 366 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 367 } 368 } 369 370 // Dumps a debug version of this string with extra object and state information 371 // to stream "s". 372 void Mangled::DumpDebug(Stream *s) const { 373 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 374 static_cast<const void *>(this)); 375 m_mangled.DumpDebug(s); 376 s->Printf(", demangled = "); 377 m_demangled.DumpDebug(s); 378 } 379 380 // Return the size in byte that this object takes in memory. The size includes 381 // the size of the objects it owns, and not the strings that it references 382 // because they are shared strings. 383 size_t Mangled::MemorySize() const { 384 return m_mangled.MemorySize() + m_demangled.MemorySize(); 385 } 386 387 // We "guess" the language because we can't determine a symbol's language from 388 // it's name. For example, a Pascal symbol can be mangled using the C++ 389 // Itanium scheme, and defined in a compilation unit within the same module as 390 // other C++ units. In addition, different targets could have different ways 391 // of mangling names from a given language, likewise the compilation units 392 // within those targets. 393 lldb::LanguageType Mangled::GuessLanguage() const { 394 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 395 // Ask each language plugin to check if the mangled name belongs to it. 396 Language::ForEach([this, &result](Language *l) { 397 if (l->SymbolNameFitsToLanguage(*this)) { 398 result = l->GetLanguageType(); 399 return false; 400 } 401 return true; 402 }); 403 return result; 404 } 405 406 // Dump OBJ to the supplied stream S. 407 Stream &operator<<(Stream &s, const Mangled &obj) { 408 if (obj.GetMangledName()) 409 s << "mangled = '" << obj.GetMangledName() << "'"; 410 411 ConstString demangled = obj.GetDemangledName(); 412 if (demangled) 413 s << ", demangled = '" << demangled << '\''; 414 else 415 s << ", demangled = <error>"; 416 return s; 417 } 418 419 // When encoding Mangled objects we can get away with encoding as little 420 // information as is required. The enumeration below helps us to efficiently 421 // encode Mangled objects. 422 enum MangledEncoding { 423 /// If the Mangled object has neither a mangled name or demangled name we can 424 /// encode the object with one zero byte using the Empty enumeration. 425 Empty = 0u, 426 /// If the Mangled object has only a demangled name and no mangled named, we 427 /// can encode only the demangled name. 428 DemangledOnly = 1u, 429 /// If the mangle name can calculate the demangled name (it is the 430 /// mangled/demangled counterpart), then we only need to encode the mangled 431 /// name as the demangled name can be recomputed. 432 MangledOnly = 2u, 433 /// If we have a Mangled object with two different names that are not related 434 /// then we need to save both strings. This can happen if we have a name that 435 /// isn't a true mangled name, but we want to be able to lookup a symbol by 436 /// name and type in the symbol table. We do this for Objective C symbols like 437 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 438 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 439 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 440 /// would fail, but in these cases we want these unrelated names to be 441 /// preserved. 442 MangledAndDemangled = 3u 443 }; 444 445 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 446 const StringTableReader &strtab) { 447 m_mangled.Clear(); 448 m_demangled.Clear(); 449 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 450 switch (encoding) { 451 case Empty: 452 return true; 453 454 case DemangledOnly: 455 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 456 return true; 457 458 case MangledOnly: 459 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 460 return true; 461 462 case MangledAndDemangled: 463 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 464 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 465 return true; 466 } 467 return false; 468 } 469 /// The encoding format for the Mangled object is as follows: 470 /// 471 /// uint8_t encoding; 472 /// char str1[]; (only if DemangledOnly, MangledOnly) 473 /// char str2[]; (only if MangledAndDemangled) 474 /// 475 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 476 /// are only saved if we need them based on the encoding. 477 /// 478 /// Some mangled names have a mangled name that can be demangled by the built 479 /// in demanglers. These kinds of mangled objects know when the mangled and 480 /// demangled names are the counterparts for each other. This is done because 481 /// demangling is very expensive and avoiding demangling the same name twice 482 /// saves us a lot of compute time. For these kinds of names we only need to 483 /// save the mangled name and have the encoding set to "MangledOnly". 484 /// 485 /// If a mangled obejct has only a demangled name, then we save only that string 486 /// and have the encoding set to "DemangledOnly". 487 /// 488 /// Some mangled objects have both mangled and demangled names, but the 489 /// demangled name can not be computed from the mangled name. This is often used 490 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 491 /// names must be saved and the encoding is set to "MangledAndDemangled". 492 /// 493 /// For a Mangled object with no names, we only need to set the encoding to 494 /// "Empty" and not store any string values. 495 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 496 MangledEncoding encoding = Empty; 497 if (m_mangled) { 498 encoding = MangledOnly; 499 if (m_demangled) { 500 // We have both mangled and demangled names. If the demangled name is the 501 // counterpart of the mangled name, then we only need to save the mangled 502 // named. If they are different, we need to save both. 503 ConstString s; 504 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 505 encoding = MangledAndDemangled; 506 } 507 } else if (m_demangled) { 508 encoding = DemangledOnly; 509 } 510 file.AppendU8(encoding); 511 switch (encoding) { 512 case Empty: 513 break; 514 case DemangledOnly: 515 file.AppendU32(strtab.Add(m_demangled)); 516 break; 517 case MangledOnly: 518 file.AppendU32(strtab.Add(m_mangled)); 519 break; 520 case MangledAndDemangled: 521 file.AppendU32(strtab.Add(m_mangled)); 522 file.AppendU32(strtab.Add(m_demangled)); 523 break; 524 } 525 } 526