1 //===-- Mangled.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Mangled.h" 10 11 #include "lldb/Core/DataFileCache.h" 12 #include "lldb/Core/RichManglingContext.h" 13 #include "lldb/Target/Language.h" 14 #include "lldb/Utility/ConstString.h" 15 #include "lldb/Utility/DataEncoder.h" 16 #include "lldb/Utility/Log.h" 17 #include "lldb/Utility/Logging.h" 18 #include "lldb/Utility/RegularExpression.h" 19 #include "lldb/Utility/Stream.h" 20 #include "lldb/lldb-enumerations.h" 21 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Demangle/Demangle.h" 24 #include "llvm/Support/Compiler.h" 25 26 #include <mutex> 27 #include <string> 28 #include <utility> 29 30 #include <cstdlib> 31 #include <cstring> 32 using namespace lldb_private; 33 34 static inline bool cstring_is_mangled(llvm::StringRef s) { 35 return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone; 36 } 37 38 #pragma mark Mangled 39 40 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) { 41 if (name.empty()) 42 return Mangled::eManglingSchemeNone; 43 44 if (name.startswith("?")) 45 return Mangled::eManglingSchemeMSVC; 46 47 if (name.startswith("_R")) 48 return Mangled::eManglingSchemeRustV0; 49 50 if (name.startswith("_D")) 51 return Mangled::eManglingSchemeD; 52 53 if (name.startswith("_Z")) 54 return Mangled::eManglingSchemeItanium; 55 56 // ___Z is a clang extension of block invocations 57 if (name.startswith("___Z")) 58 return Mangled::eManglingSchemeItanium; 59 60 return Mangled::eManglingSchemeNone; 61 } 62 63 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() { 64 if (s) 65 SetValue(s); 66 } 67 68 Mangled::Mangled(llvm::StringRef name) { 69 if (!name.empty()) 70 SetValue(ConstString(name)); 71 } 72 73 // Convert to bool operator. This allows code to check any Mangled objects 74 // to see if they contain anything valid using code such as: 75 // 76 // Mangled mangled(...); 77 // if (mangled) 78 // { ... 79 Mangled::operator bool() const { return m_mangled || m_demangled; } 80 81 // Clear the mangled and demangled values. 82 void Mangled::Clear() { 83 m_mangled.Clear(); 84 m_demangled.Clear(); 85 } 86 87 // Compare the string values. 88 int Mangled::Compare(const Mangled &a, const Mangled &b) { 89 return ConstString::Compare(a.GetName(ePreferMangled), 90 b.GetName(ePreferMangled)); 91 } 92 93 // Set the string value in this objects. If "mangled" is true, then the mangled 94 // named is set with the new value in "s", else the demangled name is set. 95 void Mangled::SetValue(ConstString s, bool mangled) { 96 if (s) { 97 if (mangled) { 98 m_demangled.Clear(); 99 m_mangled = s; 100 } else { 101 m_demangled = s; 102 m_mangled.Clear(); 103 } 104 } else { 105 m_demangled.Clear(); 106 m_mangled.Clear(); 107 } 108 } 109 110 void Mangled::SetValue(ConstString name) { 111 if (name) { 112 if (cstring_is_mangled(name.GetStringRef())) { 113 m_demangled.Clear(); 114 m_mangled = name; 115 } else { 116 m_demangled = name; 117 m_mangled.Clear(); 118 } 119 } else { 120 m_demangled.Clear(); 121 m_mangled.Clear(); 122 } 123 } 124 125 // Local helpers for different demangling implementations. 126 static char *GetMSVCDemangledStr(const char *M) { 127 char *demangled_cstr = llvm::microsoftDemangle( 128 M, nullptr, nullptr, nullptr, nullptr, 129 llvm::MSDemangleFlags( 130 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention | 131 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType)); 132 133 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 134 if (demangled_cstr && demangled_cstr[0]) 135 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M, demangled_cstr); 136 else 137 LLDB_LOGF(log, "demangled msvc: %s -> error", M); 138 } 139 140 return demangled_cstr; 141 } 142 143 static char *GetItaniumDemangledStr(const char *M) { 144 char *demangled_cstr = nullptr; 145 146 llvm::ItaniumPartialDemangler ipd; 147 bool err = ipd.partialDemangle(M); 148 if (!err) { 149 // Default buffer and size (will realloc in case it's too small). 150 size_t demangled_size = 80; 151 demangled_cstr = static_cast<char *>(std::malloc(demangled_size)); 152 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size); 153 154 assert(demangled_cstr && 155 "finishDemangle must always succeed if partialDemangle did"); 156 assert(demangled_cstr[demangled_size - 1] == '\0' && 157 "Expected demangled_size to return length including trailing null"); 158 } 159 160 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 161 if (demangled_cstr) 162 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr); 163 else 164 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M); 165 } 166 167 return demangled_cstr; 168 } 169 170 static char *GetRustV0DemangledStr(const char *M) { 171 char *demangled_cstr = llvm::rustDemangle(M, nullptr, nullptr, nullptr); 172 173 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 174 if (demangled_cstr && demangled_cstr[0]) 175 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr); 176 else 177 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M); 178 } 179 180 return demangled_cstr; 181 } 182 183 static char *GetDLangDemangledStr(const char *M) { 184 char *demangled_cstr = llvm::dlangDemangle(M); 185 186 if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) { 187 if (demangled_cstr && demangled_cstr[0]) 188 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr); 189 else 190 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M); 191 } 192 193 return demangled_cstr; 194 } 195 196 // Explicit demangling for scheduled requests during batch processing. This 197 // makes use of ItaniumPartialDemangler's rich demangle info 198 bool Mangled::DemangleWithRichManglingInfo( 199 RichManglingContext &context, SkipMangledNameFn *skip_mangled_name) { 200 // Others are not meant to arrive here. ObjC names or C's main() for example 201 // have their names stored in m_demangled, while m_mangled is empty. 202 assert(m_mangled); 203 204 // Check whether or not we are interested in this name at all. 205 ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef()); 206 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme)) 207 return false; 208 209 switch (scheme) { 210 case eManglingSchemeNone: 211 // The current mangled_name_filter would allow llvm_unreachable here. 212 return false; 213 214 case eManglingSchemeItanium: 215 // We want the rich mangling info here, so we don't care whether or not 216 // there is a demangled string in the pool already. 217 if (context.FromItaniumName(m_mangled)) { 218 // If we got an info, we have a name. Copy to string pool and connect the 219 // counterparts to accelerate later access in GetDemangledName(). 220 context.ParseFullName(); 221 m_demangled.SetStringWithMangledCounterpart(context.GetBufferRef(), 222 m_mangled); 223 return true; 224 } else { 225 m_demangled.SetCString(""); 226 return false; 227 } 228 229 case eManglingSchemeMSVC: { 230 // We have no rich mangling for MSVC-mangled names yet, so first try to 231 // demangle it if necessary. 232 if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) { 233 if (char *d = GetMSVCDemangledStr(m_mangled.GetCString())) { 234 // If we got an info, we have a name. Copy to string pool and connect 235 // the counterparts to accelerate later access in GetDemangledName(). 236 m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d), 237 m_mangled); 238 ::free(d); 239 } else { 240 m_demangled.SetCString(""); 241 } 242 } 243 244 if (m_demangled.IsEmpty()) { 245 // Cannot demangle it, so don't try parsing. 246 return false; 247 } else { 248 // Demangled successfully, we can try and parse it with 249 // CPlusPlusLanguage::MethodName. 250 return context.FromCxxMethodName(m_demangled); 251 } 252 } 253 254 case eManglingSchemeRustV0: 255 case eManglingSchemeD: 256 // Rich demangling scheme is not supported 257 return false; 258 } 259 llvm_unreachable("Fully covered switch above!"); 260 } 261 262 // Generate the demangled name on demand using this accessor. Code in this 263 // class will need to use this accessor if it wishes to decode the demangled 264 // name. The result is cached and will be kept until a new string value is 265 // supplied to this object, or until the end of the object's lifetime. 266 ConstString Mangled::GetDemangledName() const { 267 // Check to make sure we have a valid mangled name and that we haven't 268 // already decoded our mangled name. 269 if (m_mangled && m_demangled.IsNull()) { 270 // Don't bother running anything that isn't mangled 271 const char *mangled_name = m_mangled.GetCString(); 272 ManglingScheme mangling_scheme = 273 GetManglingScheme(m_mangled.GetStringRef()); 274 if (mangling_scheme != eManglingSchemeNone && 275 !m_mangled.GetMangledCounterpart(m_demangled)) { 276 // We didn't already mangle this name, demangle it and if all goes well 277 // add it to our map. 278 char *demangled_name = nullptr; 279 switch (mangling_scheme) { 280 case eManglingSchemeMSVC: 281 demangled_name = GetMSVCDemangledStr(mangled_name); 282 break; 283 case eManglingSchemeItanium: { 284 demangled_name = GetItaniumDemangledStr(mangled_name); 285 break; 286 } 287 case eManglingSchemeRustV0: 288 demangled_name = GetRustV0DemangledStr(mangled_name); 289 break; 290 case eManglingSchemeD: 291 demangled_name = GetDLangDemangledStr(mangled_name); 292 break; 293 case eManglingSchemeNone: 294 llvm_unreachable("eManglingSchemeNone was handled already"); 295 } 296 if (demangled_name) { 297 m_demangled.SetStringWithMangledCounterpart( 298 llvm::StringRef(demangled_name), m_mangled); 299 free(demangled_name); 300 } 301 } 302 if (m_demangled.IsNull()) { 303 // Set the demangled string to the empty string to indicate we tried to 304 // parse it once and failed. 305 m_demangled.SetCString(""); 306 } 307 } 308 309 return m_demangled; 310 } 311 312 ConstString Mangled::GetDisplayDemangledName() const { 313 return GetDemangledName(); 314 } 315 316 bool Mangled::NameMatches(const RegularExpression ®ex) const { 317 if (m_mangled && regex.Execute(m_mangled.GetStringRef())) 318 return true; 319 320 ConstString demangled = GetDemangledName(); 321 return demangled && regex.Execute(demangled.GetStringRef()); 322 } 323 324 // Get the demangled name if there is one, else return the mangled name. 325 ConstString Mangled::GetName(Mangled::NamePreference preference) const { 326 if (preference == ePreferMangled && m_mangled) 327 return m_mangled; 328 329 // Call the accessor to make sure we get a demangled name in case it hasn't 330 // been demangled yet... 331 ConstString demangled = GetDemangledName(); 332 333 if (preference == ePreferDemangledWithoutArguments) { 334 if (Language *lang = Language::FindPlugin(GuessLanguage())) { 335 return lang->GetDemangledFunctionNameWithoutArguments(*this); 336 } 337 } 338 if (preference == ePreferDemangled) { 339 if (demangled) 340 return demangled; 341 return m_mangled; 342 } 343 return demangled; 344 } 345 346 // Dump a Mangled object to stream "s". We don't force our demangled name to be 347 // computed currently (we don't use the accessor). 348 void Mangled::Dump(Stream *s) const { 349 if (m_mangled) { 350 *s << ", mangled = " << m_mangled; 351 } 352 if (m_demangled) { 353 const char *demangled = m_demangled.AsCString(); 354 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>"); 355 } 356 } 357 358 // Dumps a debug version of this string with extra object and state information 359 // to stream "s". 360 void Mangled::DumpDebug(Stream *s) const { 361 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2), 362 static_cast<const void *>(this)); 363 m_mangled.DumpDebug(s); 364 s->Printf(", demangled = "); 365 m_demangled.DumpDebug(s); 366 } 367 368 // Return the size in byte that this object takes in memory. The size includes 369 // the size of the objects it owns, and not the strings that it references 370 // because they are shared strings. 371 size_t Mangled::MemorySize() const { 372 return m_mangled.MemorySize() + m_demangled.MemorySize(); 373 } 374 375 // We "guess" the language because we can't determine a symbol's language from 376 // it's name. For example, a Pascal symbol can be mangled using the C++ 377 // Itanium scheme, and defined in a compilation unit within the same module as 378 // other C++ units. In addition, different targets could have different ways 379 // of mangling names from a given language, likewise the compilation units 380 // within those targets. 381 lldb::LanguageType Mangled::GuessLanguage() const { 382 lldb::LanguageType result = lldb::eLanguageTypeUnknown; 383 // Ask each language plugin to check if the mangled name belongs to it. 384 Language::ForEach([this, &result](Language *l) { 385 if (l->SymbolNameFitsToLanguage(*this)) { 386 result = l->GetLanguageType(); 387 return false; 388 } 389 return true; 390 }); 391 return result; 392 } 393 394 // Dump OBJ to the supplied stream S. 395 Stream &operator<<(Stream &s, const Mangled &obj) { 396 if (obj.GetMangledName()) 397 s << "mangled = '" << obj.GetMangledName() << "'"; 398 399 ConstString demangled = obj.GetDemangledName(); 400 if (demangled) 401 s << ", demangled = '" << demangled << '\''; 402 else 403 s << ", demangled = <error>"; 404 return s; 405 } 406 407 // When encoding Mangled objects we can get away with encoding as little 408 // information as is required. The enumeration below helps us to efficiently 409 // encode Mangled objects. 410 enum MangledEncoding { 411 /// If the Mangled object has neither a mangled name or demangled name we can 412 /// encode the object with one zero byte using the Empty enumeration. 413 Empty = 0u, 414 /// If the Mangled object has only a demangled name and no mangled named, we 415 /// can encode only the demangled name. 416 DemangledOnly = 1u, 417 /// If the mangle name can calculate the demangled name (it is the 418 /// mangled/demangled counterpart), then we only need to encode the mangled 419 /// name as the demangled name can be recomputed. 420 MangledOnly = 2u, 421 /// If we have a Mangled object with two different names that are not related 422 /// then we need to save both strings. This can happen if we have a name that 423 /// isn't a true mangled name, but we want to be able to lookup a symbol by 424 /// name and type in the symbol table. We do this for Objective C symbols like 425 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to 426 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to 427 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it 428 /// would fail, but in these cases we want these unrelated names to be 429 /// preserved. 430 MangledAndDemangled = 3u 431 }; 432 433 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, 434 const StringTableReader &strtab) { 435 m_mangled.Clear(); 436 m_demangled.Clear(); 437 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); 438 switch (encoding) { 439 case Empty: 440 return true; 441 442 case DemangledOnly: 443 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 444 return true; 445 446 case MangledOnly: 447 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 448 return true; 449 450 case MangledAndDemangled: 451 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 452 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); 453 return true; 454 } 455 return false; 456 } 457 /// The encoding format for the Mangled object is as follows: 458 /// 459 /// uint8_t encoding; 460 /// char str1[]; (only if DemangledOnly, MangledOnly) 461 /// char str2[]; (only if MangledAndDemangled) 462 /// 463 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2 464 /// are only saved if we need them based on the encoding. 465 /// 466 /// Some mangled names have a mangled name that can be demangled by the built 467 /// in demanglers. These kinds of mangled objects know when the mangled and 468 /// demangled names are the counterparts for each other. This is done because 469 /// demangling is very expensive and avoiding demangling the same name twice 470 /// saves us a lot of compute time. For these kinds of names we only need to 471 /// save the mangled name and have the encoding set to "MangledOnly". 472 /// 473 /// If a mangled obejct has only a demangled name, then we save only that string 474 /// and have the encoding set to "DemangledOnly". 475 /// 476 /// Some mangled objects have both mangled and demangled names, but the 477 /// demangled name can not be computed from the mangled name. This is often used 478 /// for runtime named, like Objective C runtime V2 and V3 names. Both these 479 /// names must be saved and the encoding is set to "MangledAndDemangled". 480 /// 481 /// For a Mangled object with no names, we only need to set the encoding to 482 /// "Empty" and not store any string values. 483 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { 484 MangledEncoding encoding = Empty; 485 if (m_mangled) { 486 encoding = MangledOnly; 487 if (m_demangled) { 488 // We have both mangled and demangled names. If the demangled name is the 489 // counterpart of the mangled name, then we only need to save the mangled 490 // named. If they are different, we need to save both. 491 ConstString s; 492 if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) 493 encoding = MangledAndDemangled; 494 } 495 } else if (m_demangled) { 496 encoding = DemangledOnly; 497 } 498 file.AppendU8(encoding); 499 switch (encoding) { 500 case Empty: 501 break; 502 case DemangledOnly: 503 file.AppendU32(strtab.Add(m_demangled)); 504 break; 505 case MangledOnly: 506 file.AppendU32(strtab.Add(m_mangled)); 507 break; 508 case MangledAndDemangled: 509 file.AppendU32(strtab.Add(m_mangled)); 510 file.AppendU32(strtab.Add(m_demangled)); 511 break; 512 } 513 } 514