1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/LLDBLog.h" 17 #include "lldb/Utility/Log.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Demangle/Demangle.h" 25 #include "llvm/Support/Compiler.h" 26 27 #include <mutex> 28 #include <string> 29 #include <string_view> 30 #include <utility> 31 32 #include <cstdlib> 33 #include <cstring> 34 using namespace lldb_private; 35 36 static inline bool cstring_is_mangled(llvm::StringRef s) { 37 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 38 } 39 40 #pragma mark Mangled 41 42 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 43 if (name.empty()) 44 return Mangled::eManglingSchemeNone; 45 46 if (name.starts_with("?")) 47 return Mangled::eManglingSchemeMSVC; 48 49 if (name.starts_with("_R")) 50 return Mangled::eManglingSchemeRustV0; 51 52 if (name.starts_with("_D")) { 53 // A dlang mangled name begins with `_D`, followed by a numeric length. One 54 // known exception is the symbol `_Dmain`. 55 // See `SymbolName` and `LName` in 56 // https://dlang.org/spec/abi.html#name_mangling 57 llvm::StringRef buf = name.drop_front(2); 58 if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain")) 59 return Mangled::eManglingSchemeD; 60 } 61 62 if (name.starts_with("_Z")) 63 return Mangled::eManglingSchemeItanium; 64 65 // ___Z is a clang extension of block invocations 66 if (name.starts_with("___Z")) 67 return Mangled::eManglingSchemeItanium; 68 69 // Swift's older style of mangling used "_T" as a mangling prefix. This can 70 // lead to false positives with other symbols that just so happen to start 71 // with "_T". To minimize the chance of that happening, we only return true 72 // for select old-style swift mangled names. The known cases are ObjC classes 73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC". 74 // Protocols are prefixed with "_TtP". 75 if (name.starts_with("_TtC") || name.starts_with("_TtGC") || 76 name.starts_with("_TtP")) 77 return Mangled::eManglingSchemeSwift; 78 79 // Swift 4.2 used "$S" and "_$S". 80 // Swift 5 and onward uses "$s" and "_$s". 81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames. 82 if (name.starts_with("$S") || name.starts_with("_$S") || 83 name.starts_with("$s") || name.starts_with("_$s") || 84 name.starts_with("@__swiftmacro_")) 85 return Mangled::eManglingSchemeSwift; 86 87 return Mangled::eManglingSchemeNone; 88 } 89 90 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 91 if (s) 92 SetValue(s); 93 } 94 95 Mangled::Mangled(llvm::StringRef name) { 96 if (!name.empty()) 97 SetValue(ConstString(name)); 98 } 99 100 // Convert to bool operator. This allows code to check any Mangled objects 101 // to see if they contain anything valid using code such as: 102 // 103 // Mangled mangled(...); 104 // if (mangled) 105 // { ... 106 Mangled::operator bool() const { return m_mangled || m_demangled; } 107 108 // Clear the mangled and demangled values. 109 void Mangled::Clear() { 110 m_mangled.Clear(); 111 m_demangled.Clear(); 112 } 113 114 // Compare the string values. 115 int Mangled::Compare(const Mangled &a, const Mangled &b) { 116 return ConstString::Compare(a.GetName(ePreferMangled), 117 b.GetName(ePreferMangled)); 118 } 119 120 void Mangled::SetValue(ConstString name) { 121 if (name) { 122 if (cstring_is_mangled(name.GetStringRef())) { 123 m_demangled.Clear(); 124 m_mangled = name; 125 } else { 126 m_demangled = name; 127 m_mangled.Clear(); 128 } 129 } else { 130 m_demangled.Clear(); 131 m_mangled.Clear(); 132 } 133 } 134 135 // Local helpers for different demangling implementations. 136 static char *GetMSVCDemangledStr(llvm::StringRef M) { 137 char *demangled_cstr = llvm::microsoftDemangle( 138 M, nullptr, nullptr, 139 llvm::MSDemangleFlags( 140 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 141 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 142 143 if (Log *log = GetLog(LLDBLog::Demangle)) { 144 if (demangled_cstr && demangled_cstr[0]) 145 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr); 146 else 147 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data()); 148 } 149 150 return demangled_cstr; 151 } 152 153 static char *GetItaniumDemangledStr(const char *M) { 154 char *demangled_cstr = nullptr; 155 156 llvm::ItaniumPartialDemangler ipd; 157 bool err = ipd.partialDemangle(M); 158 if (!err) { 159 // Default buffer and size (will realloc in case it's too small). 160 size_t demangled_size = 80; 161 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 162 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 163 164 assert(demangled_cstr && 165 "finishDemangle must always succeed if partialDemangle did"); 166 assert(demangled_cstr[demangled_size - 1] == '\0' && 167 "Expected demangled_size to return length including trailing null"); 168 } 169 170 if (Log *log = GetLog(LLDBLog::Demangle)) { 171 if (demangled_cstr) 172 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 173 else 174 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 175 } 176 177 return demangled_cstr; 178 } 179 180 static char *GetRustV0DemangledStr(llvm::StringRef M) { 181 char *demangled_cstr = llvm::rustDemangle(M); 182 183 if (Log *log = GetLog(LLDBLog::Demangle)) { 184 if (demangled_cstr && demangled_cstr[0]) 185 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 186 else 187 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", 188 static_cast<std::string_view>(M)); 189 } 190 191 return demangled_cstr; 192 } 193 194 static char *GetDLangDemangledStr(llvm::StringRef M) { 195 char *demangled_cstr = llvm::dlangDemangle(M); 196 197 if (Log *log = GetLog(LLDBLog::Demangle)) { 198 if (demangled_cstr && demangled_cstr[0]) 199 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 200 else 201 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", 202 static_cast<std::string_view>(M)); 203 } 204 205 return demangled_cstr; 206 } 207 208 // Explicit demangling for scheduled requests during batch processing. This 209 // makes use of ItaniumPartialDemangler's rich demangle info 210 bool Mangled::GetRichManglingInfo(RichManglingContext &context, 211 SkipMangledNameFn *skip_mangled_name) { 212 // Others are not meant to arrive here. ObjC names or C's main() for example 213 // have their names stored in m_demangled, while m_mangled is empty. 214 assert(m_mangled); 215 216 // Check whether or not we are interested in this name at all. 217 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 218 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 219 return false; 220 221 switch (scheme) { 222 case eManglingSchemeNone: 223 // The current mangled_name_filter would allow llvm_unreachable here. 224 return false; 225 226 case eManglingSchemeItanium: 227 // We want the rich mangling info here, so we don't care whether or not 228 // there is a demangled string in the pool already. 229 return context.FromItaniumName(m_mangled); 230 231 case eManglingSchemeMSVC: { 232 // We have no rich mangling for MSVC-mangled names yet, so first try to 233 // demangle it if necessary. 234 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 235 if (char *d = GetMSVCDemangledStr(m_mangled)) { 236 // Without the rich mangling info we have to demangle the full name. 237 // Copy it to string pool and connect the counterparts to accelerate 238 // later access in GetDemangledName(). 239 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 240 m_mangled); 241 ::free(d); 242 } else { 243 m_demangled.SetCString(""); 244 } 245 } 246 247 if (m_demangled.IsEmpty()) { 248 // Cannot demangle it, so don't try parsing. 249 return false; 250 } else { 251 // Demangled successfully, we can try and parse it with 252 // CPlusPlusLanguage::MethodName. 253 return context.FromCxxMethodName(m_demangled); 254 } 255 } 256 257 case eManglingSchemeRustV0: 258 case eManglingSchemeD: 259 case eManglingSchemeSwift: 260 // Rich demangling scheme is not supported 261 return false; 262 } 263 llvm_unreachable("Fully covered switch above!"); 264 } 265 266 // Generate the demangled name on demand using this accessor. Code in this 267 // class will need to use this accessor if it wishes to decode the demangled 268 // name. The result is cached and will be kept until a new string value is 269 // supplied to this object, or until the end of the object's lifetime. 270 ConstString Mangled::GetDemangledName() const { 271 // Check to make sure we have a valid mangled name and that we haven't 272 // already decoded our mangled name. 273 if (m_mangled && m_demangled.IsNull()) { 274 // Don't bother running anything that isn't mangled 275 const char *mangled_name = m_mangled.GetCString(); 276 ManglingScheme mangling_scheme = 277 GetManglingScheme(m_mangled.GetStringRef()); 278 if (mangling_scheme != eManglingSchemeNone && 279 !m_mangled.GetMangledCounterpart(m_demangled)) { 280 // We didn't already mangle this name, demangle it and if all goes well 281 // add it to our map. 282 char *demangled_name = nullptr; 283 switch (mangling_scheme) { 284 case eManglingSchemeMSVC: 285 demangled_name = GetMSVCDemangledStr(mangled_name); 286 break; 287 case eManglingSchemeItanium: { 288 demangled_name = GetItaniumDemangledStr(mangled_name); 289 break; 290 } 291 case eManglingSchemeRustV0: 292 demangled_name = GetRustV0DemangledStr(m_mangled); 293 break; 294 case eManglingSchemeD: 295 demangled_name = GetDLangDemangledStr(m_mangled); 296 break; 297 case eManglingSchemeSwift: 298 // Demangling a swift name requires the swift compiler. This is 299 // explicitly unsupported on llvm.org. 300 break; 301 case eManglingSchemeNone: 302 llvm_unreachable("eManglingSchemeNone was handled already"); 303 } 304 if (demangled_name) { 305 m_demangled.SetStringWithMangledCounterpart( 306 llvm::StringRef(demangled_name), m_mangled); 307 free(demangled_name); 308 } 309 } 310 if (m_demangled.IsNull()) { 311 // Set the demangled string to the empty string to indicate we tried to 312 // parse it once and failed. 313 m_demangled.SetCString(""); 314 } 315 } 316 317 return m_demangled; 318 } 319 320 ConstString Mangled::GetDisplayDemangledName() const { 321 if (Language *lang = Language::FindPlugin(GuessLanguage())) 322 return lang->GetDisplayDemangledName(*this); 323 return GetDemangledName(); 324 } 325 326 bool Mangled::NameMatches(const RegularExpression ®ex) const { 327 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 328 return true; 329 330 ConstString demangled = GetDemangledName(); 331 return demangled && regex.Execute(demangled.GetStringRef()); 332 } 333 334 // Get the demangled name if there is one, else return the mangled name. 335 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 336 if (preference == ePreferMangled && m_mangled) 337 return m_mangled; 338 339 // Call the accessor to make sure we get a demangled name in case it hasn't 340 // been demangled yet... 341 ConstString demangled = GetDemangledName(); 342 343 if (preference == ePreferDemangledWithoutArguments) { 344 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 345 return lang->GetDemangledFunctionNameWithoutArguments(*this); 346 } 347 } 348 if (preference == ePreferDemangled) { 349 if (demangled) 350 return demangled; 351 return m_mangled; 352 } 353 return demangled; 354 } 355 356 // Dump a Mangled object to stream "s". We don't force our demangled name to be 357 // computed currently (we don't use the accessor). 358 void Mangled::Dump(Stream *s) const { 359 if (m_mangled) { 360 *s << ", mangled = " << m_mangled; 361 } 362 if (m_demangled) { 363 const char *demangled = m_demangled.AsCString(); 364 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 365 } 366 } 367 368 // Dumps a debug version of this string with extra object and state information 369 // to stream "s". 370 void Mangled::DumpDebug(Stream *s) const { 371 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 372 static_cast<const void *>(this)); 373 m_mangled.DumpDebug(s); 374 s->Printf(", demangled = "); 375 m_demangled.DumpDebug(s); 376 } 377 378 // Return the size in byte that this object takes in memory. The size includes 379 // the size of the objects it owns, and not the strings that it references 380 // because they are shared strings. 381 size_t Mangled::MemorySize() const { 382 return m_mangled.MemorySize() + m_demangled.MemorySize(); 383 } 384 385 // We "guess" the language because we can't determine a symbol's language from 386 // it's name. For example, a Pascal symbol can be mangled using the C++ 387 // Itanium scheme, and defined in a compilation unit within the same module as 388 // other C++ units. In addition, different targets could have different ways 389 // of mangling names from a given language, likewise the compilation units 390 // within those targets. 391 lldb::LanguageType Mangled::GuessLanguage() const { 392 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 393 // Ask each language plugin to check if the mangled name belongs to it. 394 Language::ForEach([this, &result](Language *l) { 395 if (l->SymbolNameFitsToLanguage(*this)) { 396 result = l->GetLanguageType(); 397 return false; 398 } 399 return true; 400 }); 401 return result; 402 } 403 404 // Dump OBJ to the supplied stream S. 405 Stream &operator<<(Stream &s, const Mangled &obj) { 406 if (obj.GetMangledName()) 407 s << "mangled = '" << obj.GetMangledName() << "'"; 408 409 ConstString demangled = obj.GetDemangledName(); 410 if (demangled) 411 s << ", demangled = '" << demangled << '\''; 412 else 413 s << ", demangled = <error>"; 414 return s; 415 } 416 417 // When encoding Mangled objects we can get away with encoding as little 418 // information as is required. The enumeration below helps us to efficiently 419 // encode Mangled objects. 420 enum MangledEncoding { 421 /// If the Mangled object has neither a mangled name or demangled name we can 422 /// encode the object with one zero byte using the Empty enumeration. 423 Empty = 0u, 424 /// If the Mangled object has only a demangled name and no mangled named, we 425 /// can encode only the demangled name. 426 DemangledOnly = 1u, 427 /// If the mangle name can calculate the demangled name (it is the 428 /// mangled/demangled counterpart), then we only need to encode the mangled 429 /// name as the demangled name can be recomputed. 430 MangledOnly = 2u, 431 /// If we have a Mangled object with two different names that are not related 432 /// then we need to save both strings. This can happen if we have a name that 433 /// isn't a true mangled name, but we want to be able to lookup a symbol by 434 /// name and type in the symbol table. We do this for Objective C symbols like 435 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 436 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 437 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 438 /// would fail, but in these cases we want these unrelated names to be 439 /// preserved. 440 MangledAndDemangled = 3u 441 }; 442 443 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 444 const StringTableReader &strtab) { 445 m_mangled.Clear(); 446 m_demangled.Clear(); 447 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 448 switch (encoding) { 449 case Empty: 450 return true; 451 452 case DemangledOnly: 453 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 454 return true; 455 456 case MangledOnly: 457 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 458 return true; 459 460 case MangledAndDemangled: 461 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 462 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 463 return true; 464 } 465 return false; 466 } 467 /// The encoding format for the Mangled object is as follows: 468 /// 469 /// uint8_t encoding; 470 /// char str1[]; (only if DemangledOnly, MangledOnly) 471 /// char str2[]; (only if MangledAndDemangled) 472 /// 473 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 474 /// are only saved if we need them based on the encoding. 475 /// 476 /// Some mangled names have a mangled name that can be demangled by the built 477 /// in demanglers. These kinds of mangled objects know when the mangled and 478 /// demangled names are the counterparts for each other. This is done because 479 /// demangling is very expensive and avoiding demangling the same name twice 480 /// saves us a lot of compute time. For these kinds of names we only need to 481 /// save the mangled name and have the encoding set to "MangledOnly". 482 /// 483 /// If a mangled obejct has only a demangled name, then we save only that string 484 /// and have the encoding set to "DemangledOnly". 485 /// 486 /// Some mangled objects have both mangled and demangled names, but the 487 /// demangled name can not be computed from the mangled name. This is often used 488 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 489 /// names must be saved and the encoding is set to "MangledAndDemangled". 490 /// 491 /// For a Mangled object with no names, we only need to set the encoding to 492 /// "Empty" and not store any string values. 493 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 494 MangledEncoding encoding = Empty; 495 if (m_mangled) { 496 encoding = MangledOnly; 497 if (m_demangled) { 498 // We have both mangled and demangled names. If the demangled name is the 499 // counterpart of the mangled name, then we only need to save the mangled 500 // named. If they are different, we need to save both. 501 ConstString s; 502 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 503 encoding = MangledAndDemangled; 504 } 505 } else if (m_demangled) { 506 encoding = DemangledOnly; 507 } 508 file.AppendU8(encoding); 509 switch (encoding) { 510 case Empty: 511 break; 512 case DemangledOnly: 513 file.AppendU32(strtab.Add(m_demangled)); 514 break; 515 case MangledOnly: 516 file.AppendU32(strtab.Add(m_mangled)); 517 break; 518 case MangledAndDemangled: 519 file.AppendU32(strtab.Add(m_mangled)); 520 file.AppendU32(strtab.Add(m_demangled)); 521 break; 522 } 523 } 524