1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 18 #include "llvm/ADT/StringViewExtras.h" 19 #include "llvm/Demangle/Demangle.h" 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <string_view> 28 #include <tuple> 29 30 using namespace llvm; 31 using namespace ms_demangle; 32 33 static bool startsWithDigit(std::string_view S) { 34 return !S.empty() && std::isdigit(S.front()); 35 } 36 37 struct NodeList { 38 Node *N = nullptr; 39 NodeList *Next = nullptr; 40 }; 41 42 static bool consumeFront(std::string_view &S, char C) { 43 if (!llvm::starts_with(S, C)) 44 return false; 45 S.remove_prefix(1); 46 return true; 47 } 48 49 static bool consumeFront(std::string_view &S, std::string_view C) { 50 if (!llvm::starts_with(S, C)) 51 return false; 52 S.remove_prefix(C.size()); 53 return true; 54 } 55 56 static bool isMemberPointer(std::string_view MangledName, bool &Error) { 57 Error = false; 58 const char F = MangledName.front(); 59 MangledName.remove_prefix(1); 60 switch (F) { 61 case '$': 62 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 63 // rvalue reference to a member. 64 return false; 65 case 'A': 66 // 'A' indicates a reference, and you cannot have a reference to a member 67 // function or member. 68 return false; 69 case 'P': 70 case 'Q': 71 case 'R': 72 case 'S': 73 // These 4 values indicate some kind of pointer, but we still don't know 74 // what. 75 break; 76 default: 77 // isMemberPointer() is called only if isPointerType() returns true, 78 // and it rejects other prefixes. 79 DEMANGLE_UNREACHABLE; 80 } 81 82 // If it starts with a number, then 6 indicates a non-member function 83 // pointer, and 8 indicates a member function pointer. 84 if (startsWithDigit(MangledName)) { 85 if (MangledName[0] != '6' && MangledName[0] != '8') { 86 Error = true; 87 return false; 88 } 89 return (MangledName[0] == '8'); 90 } 91 92 // Remove ext qualifiers since those can appear on either type and are 93 // therefore not indicative. 94 consumeFront(MangledName, 'E'); // 64-bit 95 consumeFront(MangledName, 'I'); // restrict 96 consumeFront(MangledName, 'F'); // unaligned 97 98 if (MangledName.empty()) { 99 Error = true; 100 return false; 101 } 102 103 // The next value should be either ABCD (non-member) or QRST (member). 104 switch (MangledName.front()) { 105 case 'A': 106 case 'B': 107 case 'C': 108 case 'D': 109 return false; 110 case 'Q': 111 case 'R': 112 case 'S': 113 case 'T': 114 return true; 115 default: 116 Error = true; 117 return false; 118 } 119 } 120 121 static SpecialIntrinsicKind 122 consumeSpecialIntrinsicKind(std::string_view &MangledName) { 123 if (consumeFront(MangledName, "?_7")) 124 return SpecialIntrinsicKind::Vftable; 125 if (consumeFront(MangledName, "?_8")) 126 return SpecialIntrinsicKind::Vbtable; 127 if (consumeFront(MangledName, "?_9")) 128 return SpecialIntrinsicKind::VcallThunk; 129 if (consumeFront(MangledName, "?_A")) 130 return SpecialIntrinsicKind::Typeof; 131 if (consumeFront(MangledName, "?_B")) 132 return SpecialIntrinsicKind::LocalStaticGuard; 133 if (consumeFront(MangledName, "?_C")) 134 return SpecialIntrinsicKind::StringLiteralSymbol; 135 if (consumeFront(MangledName, "?_P")) 136 return SpecialIntrinsicKind::UdtReturning; 137 if (consumeFront(MangledName, "?_R0")) 138 return SpecialIntrinsicKind::RttiTypeDescriptor; 139 if (consumeFront(MangledName, "?_R1")) 140 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 141 if (consumeFront(MangledName, "?_R2")) 142 return SpecialIntrinsicKind::RttiBaseClassArray; 143 if (consumeFront(MangledName, "?_R3")) 144 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 145 if (consumeFront(MangledName, "?_R4")) 146 return SpecialIntrinsicKind::RttiCompleteObjLocator; 147 if (consumeFront(MangledName, "?_S")) 148 return SpecialIntrinsicKind::LocalVftable; 149 if (consumeFront(MangledName, "?__E")) 150 return SpecialIntrinsicKind::DynamicInitializer; 151 if (consumeFront(MangledName, "?__F")) 152 return SpecialIntrinsicKind::DynamicAtexitDestructor; 153 if (consumeFront(MangledName, "?__J")) 154 return SpecialIntrinsicKind::LocalStaticThreadGuard; 155 return SpecialIntrinsicKind::None; 156 } 157 158 static bool startsWithLocalScopePattern(std::string_view S) { 159 if (!consumeFront(S, '?')) 160 return false; 161 162 size_t End = S.find('?'); 163 if (End == std::string_view::npos) 164 return false; 165 std::string_view Candidate = S.substr(0, End); 166 if (Candidate.empty()) 167 return false; 168 169 // \?[0-9]\? 170 // ?@? is the discriminator 0. 171 if (Candidate.size() == 1) 172 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 173 174 // If it's not 0-9, then it's an encoded number terminated with an @ 175 if (Candidate.back() != '@') 176 return false; 177 Candidate.remove_suffix(1); 178 179 // An encoded number starts with B-P and all subsequent digits are in A-P. 180 // Note that the reason the first digit cannot be A is two fold. First, it 181 // would create an ambiguity with ?A which delimits the beginning of an 182 // anonymous namespace. Second, A represents 0, and you don't start a multi 183 // digit number with a leading 0. Presumably the anonymous namespace 184 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 185 if (Candidate[0] < 'B' || Candidate[0] > 'P') 186 return false; 187 Candidate.remove_prefix(1); 188 while (!Candidate.empty()) { 189 if (Candidate[0] < 'A' || Candidate[0] > 'P') 190 return false; 191 Candidate.remove_prefix(1); 192 } 193 194 return true; 195 } 196 197 static bool isTagType(std::string_view S) { 198 switch (S.front()) { 199 case 'T': // union 200 case 'U': // struct 201 case 'V': // class 202 case 'W': // enum 203 return true; 204 } 205 return false; 206 } 207 208 static bool isCustomType(std::string_view S) { return S[0] == '?'; } 209 210 static bool isPointerType(std::string_view S) { 211 if (llvm::starts_with(S, "$$Q")) // foo && 212 return true; 213 214 switch (S.front()) { 215 case 'A': // foo & 216 case 'P': // foo * 217 case 'Q': // foo *const 218 case 'R': // foo *volatile 219 case 'S': // foo *const volatile 220 return true; 221 } 222 return false; 223 } 224 225 static bool isArrayType(std::string_view S) { return S[0] == 'Y'; } 226 227 static bool isFunctionType(std::string_view S) { 228 return llvm::starts_with(S, "$$A8@@") || llvm::starts_with(S, "$$A6"); 229 } 230 231 static FunctionRefQualifier 232 demangleFunctionRefQualifier(std::string_view &MangledName) { 233 if (consumeFront(MangledName, 'G')) 234 return FunctionRefQualifier::Reference; 235 else if (consumeFront(MangledName, 'H')) 236 return FunctionRefQualifier::RValueReference; 237 return FunctionRefQualifier::None; 238 } 239 240 static std::pair<Qualifiers, PointerAffinity> 241 demanglePointerCVQualifiers(std::string_view &MangledName) { 242 if (consumeFront(MangledName, "$$Q")) 243 return std::make_pair(Q_None, PointerAffinity::RValueReference); 244 245 const char F = MangledName.front(); 246 MangledName.remove_prefix(1); 247 switch (F) { 248 case 'A': 249 return std::make_pair(Q_None, PointerAffinity::Reference); 250 case 'P': 251 return std::make_pair(Q_None, PointerAffinity::Pointer); 252 case 'Q': 253 return std::make_pair(Q_Const, PointerAffinity::Pointer); 254 case 'R': 255 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 256 case 'S': 257 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 258 PointerAffinity::Pointer); 259 } 260 // This function is only called if isPointerType() returns true, 261 // and it only returns true for the six cases listed above. 262 DEMANGLE_UNREACHABLE; 263 } 264 265 std::string_view Demangler::copyString(std::string_view Borrowed) { 266 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); 267 // This is not a micro-optimization, it avoids UB, should Borrowed be an null 268 // buffer. 269 if (Borrowed.size()) 270 std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); 271 272 return {Stable, Borrowed.size()}; 273 } 274 275 SpecialTableSymbolNode * 276 Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, 277 SpecialIntrinsicKind K) { 278 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 279 switch (K) { 280 case SpecialIntrinsicKind::Vftable: 281 NI->Name = "`vftable'"; 282 break; 283 case SpecialIntrinsicKind::Vbtable: 284 NI->Name = "`vbtable'"; 285 break; 286 case SpecialIntrinsicKind::LocalVftable: 287 NI->Name = "`local vftable'"; 288 break; 289 case SpecialIntrinsicKind::RttiCompleteObjLocator: 290 NI->Name = "`RTTI Complete Object Locator'"; 291 break; 292 default: 293 DEMANGLE_UNREACHABLE; 294 } 295 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 296 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 297 STSN->Name = QN; 298 bool IsMember = false; 299 if (MangledName.empty()) { 300 Error = true; 301 return nullptr; 302 } 303 char Front = MangledName.front(); 304 MangledName.remove_prefix(1); 305 if (Front != '6' && Front != '7') { 306 Error = true; 307 return nullptr; 308 } 309 310 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 311 if (!consumeFront(MangledName, '@')) 312 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 313 return STSN; 314 } 315 316 LocalStaticGuardVariableNode * 317 Demangler::demangleLocalStaticGuard(std::string_view &MangledName, 318 bool IsThread) { 319 LocalStaticGuardIdentifierNode *LSGI = 320 Arena.alloc<LocalStaticGuardIdentifierNode>(); 321 LSGI->IsThread = IsThread; 322 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 323 LocalStaticGuardVariableNode *LSGVN = 324 Arena.alloc<LocalStaticGuardVariableNode>(); 325 LSGVN->Name = QN; 326 327 if (consumeFront(MangledName, "4IA")) 328 LSGVN->IsVisible = false; 329 else if (consumeFront(MangledName, "5")) 330 LSGVN->IsVisible = true; 331 else { 332 Error = true; 333 return nullptr; 334 } 335 336 if (!MangledName.empty()) 337 LSGI->ScopeIndex = demangleUnsigned(MangledName); 338 return LSGVN; 339 } 340 341 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 342 std::string_view Name) { 343 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 344 Id->Name = Name; 345 return Id; 346 } 347 348 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 349 IdentifierNode *Identifier) { 350 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 351 QN->Components = Arena.alloc<NodeArrayNode>(); 352 QN->Components->Count = 1; 353 QN->Components->Nodes = Arena.allocArray<Node *>(1); 354 QN->Components->Nodes[0] = Identifier; 355 return QN; 356 } 357 358 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 359 std::string_view Name) { 360 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 361 return synthesizeQualifiedName(Arena, Id); 362 } 363 364 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 365 TypeNode *Type, 366 std::string_view VariableName) { 367 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 368 VSN->Type = Type; 369 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 370 return VSN; 371 } 372 373 VariableSymbolNode * 374 Demangler::demangleUntypedVariable(ArenaAllocator &Arena, 375 std::string_view &MangledName, 376 std::string_view VariableName) { 377 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 378 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 379 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 380 VSN->Name = QN; 381 if (consumeFront(MangledName, "8")) 382 return VSN; 383 384 Error = true; 385 return nullptr; 386 } 387 388 VariableSymbolNode * 389 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 390 std::string_view &MangledName) { 391 RttiBaseClassDescriptorNode *RBCDN = 392 Arena.alloc<RttiBaseClassDescriptorNode>(); 393 RBCDN->NVOffset = demangleUnsigned(MangledName); 394 RBCDN->VBPtrOffset = demangleSigned(MangledName); 395 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 396 RBCDN->Flags = demangleUnsigned(MangledName); 397 if (Error) 398 return nullptr; 399 400 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 401 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 402 consumeFront(MangledName, '8'); 403 return VSN; 404 } 405 406 FunctionSymbolNode * 407 Demangler::demangleInitFiniStub(std::string_view &MangledName, 408 bool IsDestructor) { 409 DynamicStructorIdentifierNode *DSIN = 410 Arena.alloc<DynamicStructorIdentifierNode>(); 411 DSIN->IsDestructor = IsDestructor; 412 413 bool IsKnownStaticDataMember = false; 414 if (consumeFront(MangledName, '?')) 415 IsKnownStaticDataMember = true; 416 417 SymbolNode *Symbol = demangleDeclarator(MangledName); 418 if (Error) 419 return nullptr; 420 421 FunctionSymbolNode *FSN = nullptr; 422 423 if (Symbol->kind() == NodeKind::VariableSymbol) { 424 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 425 426 // Older versions of clang mangled this type of symbol incorrectly. They 427 // would omit the leading ? and they would only emit a single @ at the end. 428 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 429 // both cases. 430 int AtCount = IsKnownStaticDataMember ? 2 : 1; 431 for (int I = 0; I < AtCount; ++I) { 432 if (consumeFront(MangledName, '@')) 433 continue; 434 Error = true; 435 return nullptr; 436 } 437 438 FSN = demangleFunctionEncoding(MangledName); 439 if (FSN) 440 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 441 } else { 442 if (IsKnownStaticDataMember) { 443 // This was supposed to be a static data member, but we got a function. 444 Error = true; 445 return nullptr; 446 } 447 448 FSN = static_cast<FunctionSymbolNode *>(Symbol); 449 DSIN->Name = Symbol->Name; 450 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 451 } 452 453 return FSN; 454 } 455 456 SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) { 457 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 458 459 switch (SIK) { 460 case SpecialIntrinsicKind::None: 461 return nullptr; 462 case SpecialIntrinsicKind::StringLiteralSymbol: 463 return demangleStringLiteral(MangledName); 464 case SpecialIntrinsicKind::Vftable: 465 case SpecialIntrinsicKind::Vbtable: 466 case SpecialIntrinsicKind::LocalVftable: 467 case SpecialIntrinsicKind::RttiCompleteObjLocator: 468 return demangleSpecialTableSymbolNode(MangledName, SIK); 469 case SpecialIntrinsicKind::VcallThunk: 470 return demangleVcallThunkNode(MangledName); 471 case SpecialIntrinsicKind::LocalStaticGuard: 472 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 473 case SpecialIntrinsicKind::LocalStaticThreadGuard: 474 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 475 case SpecialIntrinsicKind::RttiTypeDescriptor: { 476 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 477 if (Error) 478 break; 479 if (!consumeFront(MangledName, "@8")) 480 break; 481 if (!MangledName.empty()) 482 break; 483 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 484 } 485 case SpecialIntrinsicKind::RttiBaseClassArray: 486 return demangleUntypedVariable(Arena, MangledName, 487 "`RTTI Base Class Array'"); 488 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 489 return demangleUntypedVariable(Arena, MangledName, 490 "`RTTI Class Hierarchy Descriptor'"); 491 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 492 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 493 case SpecialIntrinsicKind::DynamicInitializer: 494 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); 495 case SpecialIntrinsicKind::DynamicAtexitDestructor: 496 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); 497 case SpecialIntrinsicKind::Typeof: 498 case SpecialIntrinsicKind::UdtReturning: 499 // It's unclear which tools produces these manglings, so demangling 500 // support is not (yet?) implemented. 501 break; 502 case SpecialIntrinsicKind::Unknown: 503 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. 504 } 505 Error = true; 506 return nullptr; 507 } 508 509 IdentifierNode * 510 Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) { 511 assert(llvm::starts_with(MangledName, '?')); 512 MangledName.remove_prefix(1); 513 if (MangledName.empty()) { 514 Error = true; 515 return nullptr; 516 } 517 518 if (consumeFront(MangledName, "__")) 519 return demangleFunctionIdentifierCode( 520 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 521 if (consumeFront(MangledName, "_")) 522 return demangleFunctionIdentifierCode(MangledName, 523 FunctionIdentifierCodeGroup::Under); 524 return demangleFunctionIdentifierCode(MangledName, 525 FunctionIdentifierCodeGroup::Basic); 526 } 527 528 StructorIdentifierNode * 529 Demangler::demangleStructorIdentifier(std::string_view &MangledName, 530 bool IsDestructor) { 531 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 532 N->IsDestructor = IsDestructor; 533 return N; 534 } 535 536 ConversionOperatorIdentifierNode * 537 Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) { 538 ConversionOperatorIdentifierNode *N = 539 Arena.alloc<ConversionOperatorIdentifierNode>(); 540 return N; 541 } 542 543 LiteralOperatorIdentifierNode * 544 Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) { 545 LiteralOperatorIdentifierNode *N = 546 Arena.alloc<LiteralOperatorIdentifierNode>(); 547 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 548 return N; 549 } 550 551 IntrinsicFunctionKind 552 Demangler::translateIntrinsicFunctionCode(char CH, 553 FunctionIdentifierCodeGroup Group) { 554 using IFK = IntrinsicFunctionKind; 555 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 556 Error = true; 557 return IFK::None; 558 } 559 560 // Not all ? identifiers are intrinsics *functions*. This function only maps 561 // operator codes for the special functions, all others are handled elsewhere, 562 // hence the IFK::None entries in the table. 563 static IFK Basic[36] = { 564 IFK::None, // ?0 # Foo::Foo() 565 IFK::None, // ?1 # Foo::~Foo() 566 IFK::New, // ?2 # operator new 567 IFK::Delete, // ?3 # operator delete 568 IFK::Assign, // ?4 # operator= 569 IFK::RightShift, // ?5 # operator>> 570 IFK::LeftShift, // ?6 # operator<< 571 IFK::LogicalNot, // ?7 # operator! 572 IFK::Equals, // ?8 # operator== 573 IFK::NotEquals, // ?9 # operator!= 574 IFK::ArraySubscript, // ?A # operator[] 575 IFK::None, // ?B # Foo::operator <type>() 576 IFK::Pointer, // ?C # operator-> 577 IFK::Dereference, // ?D # operator* 578 IFK::Increment, // ?E # operator++ 579 IFK::Decrement, // ?F # operator-- 580 IFK::Minus, // ?G # operator- 581 IFK::Plus, // ?H # operator+ 582 IFK::BitwiseAnd, // ?I # operator& 583 IFK::MemberPointer, // ?J # operator->* 584 IFK::Divide, // ?K # operator/ 585 IFK::Modulus, // ?L # operator% 586 IFK::LessThan, // ?M operator< 587 IFK::LessThanEqual, // ?N operator<= 588 IFK::GreaterThan, // ?O operator> 589 IFK::GreaterThanEqual, // ?P operator>= 590 IFK::Comma, // ?Q operator, 591 IFK::Parens, // ?R operator() 592 IFK::BitwiseNot, // ?S operator~ 593 IFK::BitwiseXor, // ?T operator^ 594 IFK::BitwiseOr, // ?U operator| 595 IFK::LogicalAnd, // ?V operator&& 596 IFK::LogicalOr, // ?W operator|| 597 IFK::TimesEqual, // ?X operator*= 598 IFK::PlusEqual, // ?Y operator+= 599 IFK::MinusEqual, // ?Z operator-= 600 }; 601 static IFK Under[36] = { 602 IFK::DivEqual, // ?_0 operator/= 603 IFK::ModEqual, // ?_1 operator%= 604 IFK::RshEqual, // ?_2 operator>>= 605 IFK::LshEqual, // ?_3 operator<<= 606 IFK::BitwiseAndEqual, // ?_4 operator&= 607 IFK::BitwiseOrEqual, // ?_5 operator|= 608 IFK::BitwiseXorEqual, // ?_6 operator^= 609 IFK::None, // ?_7 # vftable 610 IFK::None, // ?_8 # vbtable 611 IFK::None, // ?_9 # vcall 612 IFK::None, // ?_A # typeof 613 IFK::None, // ?_B # local static guard 614 IFK::None, // ?_C # string literal 615 IFK::VbaseDtor, // ?_D # vbase destructor 616 IFK::VecDelDtor, // ?_E # vector deleting destructor 617 IFK::DefaultCtorClosure, // ?_F # default constructor closure 618 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 619 IFK::VecCtorIter, // ?_H # vector constructor iterator 620 IFK::VecDtorIter, // ?_I # vector destructor iterator 621 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 622 IFK::VdispMap, // ?_K # virtual displacement map 623 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 624 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 625 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 626 IFK::CopyCtorClosure, // ?_O # copy constructor closure 627 IFK::None, // ?_P<name> # udt returning <name> 628 IFK::None, // ?_Q # <unknown> 629 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 630 IFK::None, // ?_S # local vftable 631 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 632 IFK::ArrayNew, // ?_U operator new[] 633 IFK::ArrayDelete, // ?_V operator delete[] 634 IFK::None, // ?_W <unused> 635 IFK::None, // ?_X <unused> 636 IFK::None, // ?_Y <unused> 637 IFK::None, // ?_Z <unused> 638 }; 639 static IFK DoubleUnder[36] = { 640 IFK::None, // ?__0 <unused> 641 IFK::None, // ?__1 <unused> 642 IFK::None, // ?__2 <unused> 643 IFK::None, // ?__3 <unused> 644 IFK::None, // ?__4 <unused> 645 IFK::None, // ?__5 <unused> 646 IFK::None, // ?__6 <unused> 647 IFK::None, // ?__7 <unused> 648 IFK::None, // ?__8 <unused> 649 IFK::None, // ?__9 <unused> 650 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 651 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 652 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 653 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 654 IFK::None, // ?__E dynamic initializer for `T' 655 IFK::None, // ?__F dynamic atexit destructor for `T' 656 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 657 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 658 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 659 // iter 660 IFK::None, // ?__J local static thread guard 661 IFK::None, // ?__K operator ""_name 662 IFK::CoAwait, // ?__L operator co_await 663 IFK::Spaceship, // ?__M operator<=> 664 IFK::None, // ?__N <unused> 665 IFK::None, // ?__O <unused> 666 IFK::None, // ?__P <unused> 667 IFK::None, // ?__Q <unused> 668 IFK::None, // ?__R <unused> 669 IFK::None, // ?__S <unused> 670 IFK::None, // ?__T <unused> 671 IFK::None, // ?__U <unused> 672 IFK::None, // ?__V <unused> 673 IFK::None, // ?__W <unused> 674 IFK::None, // ?__X <unused> 675 IFK::None, // ?__Y <unused> 676 IFK::None, // ?__Z <unused> 677 }; 678 679 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 680 switch (Group) { 681 case FunctionIdentifierCodeGroup::Basic: 682 return Basic[Index]; 683 case FunctionIdentifierCodeGroup::Under: 684 return Under[Index]; 685 case FunctionIdentifierCodeGroup::DoubleUnder: 686 return DoubleUnder[Index]; 687 } 688 DEMANGLE_UNREACHABLE; 689 } 690 691 IdentifierNode * 692 Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName, 693 FunctionIdentifierCodeGroup Group) { 694 if (MangledName.empty()) { 695 Error = true; 696 return nullptr; 697 } 698 const char CH = MangledName.front(); 699 switch (Group) { 700 case FunctionIdentifierCodeGroup::Basic: 701 MangledName.remove_prefix(1); 702 switch (CH) { 703 case '0': 704 case '1': 705 return demangleStructorIdentifier(MangledName, CH == '1'); 706 case 'B': 707 return demangleConversionOperatorIdentifier(MangledName); 708 default: 709 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 710 translateIntrinsicFunctionCode(CH, Group)); 711 } 712 case FunctionIdentifierCodeGroup::Under: 713 MangledName.remove_prefix(1); 714 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 715 translateIntrinsicFunctionCode(CH, Group)); 716 case FunctionIdentifierCodeGroup::DoubleUnder: 717 MangledName.remove_prefix(1); 718 switch (CH) { 719 case 'K': 720 return demangleLiteralOperatorIdentifier(MangledName); 721 default: 722 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 723 translateIntrinsicFunctionCode(CH, Group)); 724 } 725 } 726 727 DEMANGLE_UNREACHABLE; 728 } 729 730 SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName, 731 QualifiedNameNode *Name) { 732 if (MangledName.empty()) { 733 Error = true; 734 return nullptr; 735 } 736 737 // Read a variable. 738 switch (MangledName.front()) { 739 case '0': 740 case '1': 741 case '2': 742 case '3': 743 case '4': { 744 StorageClass SC = demangleVariableStorageClass(MangledName); 745 return demangleVariableEncoding(MangledName, SC); 746 } 747 } 748 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 749 750 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 751 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 752 ConversionOperatorIdentifierNode *COIN = 753 static_cast<ConversionOperatorIdentifierNode *>(UQN); 754 if (FSN) 755 COIN->TargetType = FSN->Signature->ReturnType; 756 } 757 return FSN; 758 } 759 760 SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) { 761 // What follows is a main symbol name. This may include namespaces or class 762 // back references. 763 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 764 if (Error) 765 return nullptr; 766 767 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 768 if (Error) 769 return nullptr; 770 Symbol->Name = QN; 771 772 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 773 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 774 ConversionOperatorIdentifierNode *COIN = 775 static_cast<ConversionOperatorIdentifierNode *>(UQN); 776 if (!COIN->TargetType) { 777 Error = true; 778 return nullptr; 779 } 780 } 781 return Symbol; 782 } 783 784 SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) { 785 assert(llvm::starts_with(MangledName, "??@")); 786 // This is an MD5 mangled name. We can't demangle it, just return the 787 // mangled name. 788 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 789 size_t MD5Last = MangledName.find('@', strlen("??@")); 790 if (MD5Last == std::string_view::npos) { 791 Error = true; 792 return nullptr; 793 } 794 const char *Start = MangledName.begin(); 795 MangledName.remove_prefix(MD5Last + 1); 796 797 // There are two additional special cases for MD5 names: 798 // 1. For complete object locators where the object name is long enough 799 // for the object to have an MD5 name, the complete object locator is 800 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 801 // leading "??_R4". This is handled here. 802 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 803 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 804 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 805 // demangle catchable types anywhere, this isn't handled for MD5 names 806 // either. 807 consumeFront(MangledName, "??_R4@"); 808 809 std::string_view MD5(Start, MangledName.begin() - Start); 810 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 811 S->Name = synthesizeQualifiedName(Arena, MD5); 812 813 return S; 814 } 815 816 SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) { 817 assert(llvm::starts_with(MangledName, '.')); 818 consumeFront(MangledName, '.'); 819 820 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 821 if (Error || !MangledName.empty()) { 822 Error = true; 823 return nullptr; 824 } 825 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); 826 } 827 828 // Parser entry point. 829 SymbolNode *Demangler::parse(std::string_view &MangledName) { 830 // Typeinfo names are strings stored in RTTI data. They're not symbol names. 831 // It's still useful to demangle them. They're the only demangled entity 832 // that doesn't start with a "?" but a ".". 833 if (llvm::starts_with(MangledName, '.')) 834 return demangleTypeinfoName(MangledName); 835 836 if (llvm::starts_with(MangledName, "??@")) 837 return demangleMD5Name(MangledName); 838 839 // MSVC-style mangled symbols must start with '?'. 840 if (!llvm::starts_with(MangledName, '?')) { 841 Error = true; 842 return nullptr; 843 } 844 845 consumeFront(MangledName, '?'); 846 847 // ?$ is a template instantiation, but all other names that start with ? are 848 // operators / special names. 849 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 850 return SI; 851 852 return demangleDeclarator(MangledName); 853 } 854 855 TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) { 856 if (!consumeFront(MangledName, ".?A")) { 857 Error = true; 858 return nullptr; 859 } 860 consumeFront(MangledName, ".?A"); 861 if (MangledName.empty()) { 862 Error = true; 863 return nullptr; 864 } 865 866 return demangleClassType(MangledName); 867 } 868 869 // <type-encoding> ::= <storage-class> <variable-type> 870 // <storage-class> ::= 0 # private static member 871 // ::= 1 # protected static member 872 // ::= 2 # public static member 873 // ::= 3 # global 874 // ::= 4 # static local 875 876 VariableSymbolNode * 877 Demangler::demangleVariableEncoding(std::string_view &MangledName, 878 StorageClass SC) { 879 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 880 881 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 882 VSN->SC = SC; 883 884 if (Error) 885 return nullptr; 886 887 // <variable-type> ::= <type> <cvr-qualifiers> 888 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 889 switch (VSN->Type->kind()) { 890 case NodeKind::PointerType: { 891 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 892 893 Qualifiers ExtraChildQuals = Q_None; 894 PTN->Quals = Qualifiers(VSN->Type->Quals | 895 demanglePointerExtQualifiers(MangledName)); 896 897 bool IsMember = false; 898 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 899 900 if (PTN->ClassParent) { 901 QualifiedNameNode *BackRefName = 902 demangleFullyQualifiedTypeName(MangledName); 903 (void)BackRefName; 904 } 905 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 906 907 break; 908 } 909 default: 910 VSN->Type->Quals = demangleQualifiers(MangledName).first; 911 break; 912 } 913 914 return VSN; 915 } 916 917 // Sometimes numbers are encoded in mangled symbols. For example, 918 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 919 // length 20), so we need some way to embed numbers as part of symbols. 920 // This function parses it. 921 // 922 // <number> ::= [?] <non-negative integer> 923 // 924 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 925 // ::= <hex digit>+ @ # when Number == 0 or >= 10 926 // 927 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 928 std::pair<uint64_t, bool> 929 Demangler::demangleNumber(std::string_view &MangledName) { 930 bool IsNegative = consumeFront(MangledName, '?'); 931 932 if (startsWithDigit(MangledName)) { 933 uint64_t Ret = MangledName[0] - '0' + 1; 934 MangledName.remove_prefix(1); 935 return {Ret, IsNegative}; 936 } 937 938 uint64_t Ret = 0; 939 for (size_t i = 0; i < MangledName.size(); ++i) { 940 char C = MangledName[i]; 941 if (C == '@') { 942 MangledName.remove_prefix(i + 1); 943 return {Ret, IsNegative}; 944 } 945 if ('A' <= C && C <= 'P') { 946 Ret = (Ret << 4) + (C - 'A'); 947 continue; 948 } 949 break; 950 } 951 952 Error = true; 953 return {0ULL, false}; 954 } 955 956 uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) { 957 bool IsNegative = false; 958 uint64_t Number = 0; 959 std::tie(Number, IsNegative) = demangleNumber(MangledName); 960 if (IsNegative) 961 Error = true; 962 return Number; 963 } 964 965 int64_t Demangler::demangleSigned(std::string_view &MangledName) { 966 bool IsNegative = false; 967 uint64_t Number = 0; 968 std::tie(Number, IsNegative) = demangleNumber(MangledName); 969 if (Number > INT64_MAX) 970 Error = true; 971 int64_t I = static_cast<int64_t>(Number); 972 return IsNegative ? -I : I; 973 } 974 975 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 976 // Memorize it. 977 void Demangler::memorizeString(std::string_view S) { 978 if (Backrefs.NamesCount >= BackrefContext::Max) 979 return; 980 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 981 if (S == Backrefs.Names[i]->Name) 982 return; 983 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 984 N->Name = S; 985 Backrefs.Names[Backrefs.NamesCount++] = N; 986 } 987 988 NamedIdentifierNode * 989 Demangler::demangleBackRefName(std::string_view &MangledName) { 990 assert(startsWithDigit(MangledName)); 991 992 size_t I = MangledName[0] - '0'; 993 if (I >= Backrefs.NamesCount) { 994 Error = true; 995 return nullptr; 996 } 997 998 MangledName.remove_prefix(1); 999 return Backrefs.Names[I]; 1000 } 1001 1002 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 1003 // Render this class template name into a string buffer so that we can 1004 // memorize it for the purpose of back-referencing. 1005 OutputBuffer OB; 1006 Identifier->output(OB, OF_Default); 1007 std::string_view Owned = copyString(OB); 1008 memorizeString(Owned); 1009 std::free(OB.getBuffer()); 1010 } 1011 1012 IdentifierNode * 1013 Demangler::demangleTemplateInstantiationName(std::string_view &MangledName, 1014 NameBackrefBehavior NBB) { 1015 assert(llvm::starts_with(MangledName, "?$")); 1016 consumeFront(MangledName, "?$"); 1017 1018 BackrefContext OuterContext; 1019 std::swap(OuterContext, Backrefs); 1020 1021 IdentifierNode *Identifier = 1022 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1023 if (!Error) 1024 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 1025 1026 std::swap(OuterContext, Backrefs); 1027 if (Error) 1028 return nullptr; 1029 1030 if (NBB & NBB_Template) { 1031 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 1032 // Structors and conversion operators only makes sense in a leaf name, so 1033 // reject them in NBB_Template contexts. 1034 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 1035 Identifier->kind() == NodeKind::StructorIdentifier) { 1036 Error = true; 1037 return nullptr; 1038 } 1039 1040 memorizeIdentifier(Identifier); 1041 } 1042 1043 return Identifier; 1044 } 1045 1046 NamedIdentifierNode *Demangler::demangleSimpleName(std::string_view &MangledName, 1047 bool Memorize) { 1048 std::string_view S = demangleSimpleString(MangledName, Memorize); 1049 if (Error) 1050 return nullptr; 1051 1052 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1053 Name->Name = S; 1054 return Name; 1055 } 1056 1057 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1058 1059 static uint8_t rebasedHexDigitToNumber(char C) { 1060 assert(isRebasedHexDigit(C)); 1061 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1062 } 1063 1064 uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) { 1065 assert(!MangledName.empty()); 1066 if (!llvm::starts_with(MangledName, '?')) { 1067 const uint8_t F = MangledName.front(); 1068 MangledName.remove_prefix(1); 1069 return F; 1070 } 1071 1072 MangledName.remove_prefix(1); 1073 if (MangledName.empty()) 1074 goto CharLiteralError; 1075 1076 if (consumeFront(MangledName, '$')) { 1077 // Two hex digits 1078 if (MangledName.size() < 2) 1079 goto CharLiteralError; 1080 std::string_view Nibbles = MangledName.substr(0, 2); 1081 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1082 goto CharLiteralError; 1083 // Don't append the null terminator. 1084 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1085 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1086 MangledName.remove_prefix(2); 1087 return (C1 << 4) | C2; 1088 } 1089 1090 if (startsWithDigit(MangledName)) { 1091 const char *Lookup = ",/\\:. \n\t'-"; 1092 char C = Lookup[MangledName[0] - '0']; 1093 MangledName.remove_prefix(1); 1094 return C; 1095 } 1096 1097 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1098 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1099 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1100 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1101 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1102 char C = Lookup[MangledName[0] - 'a']; 1103 MangledName.remove_prefix(1); 1104 return C; 1105 } 1106 1107 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1108 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1109 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1110 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1111 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1112 char C = Lookup[MangledName[0] - 'A']; 1113 MangledName.remove_prefix(1); 1114 return C; 1115 } 1116 1117 CharLiteralError: 1118 Error = true; 1119 return '\0'; 1120 } 1121 1122 wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) { 1123 uint8_t C1, C2; 1124 1125 C1 = demangleCharLiteral(MangledName); 1126 if (Error || MangledName.empty()) 1127 goto WCharLiteralError; 1128 C2 = demangleCharLiteral(MangledName); 1129 if (Error) 1130 goto WCharLiteralError; 1131 1132 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1133 1134 WCharLiteralError: 1135 Error = true; 1136 return L'\0'; 1137 } 1138 1139 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1140 assert(Digit <= 15); 1141 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1142 } 1143 1144 static void outputHex(OutputBuffer &OB, unsigned C) { 1145 assert (C != 0); 1146 1147 // It's easier to do the math if we can work from right to left, but we need 1148 // to print the numbers from left to right. So render this into a temporary 1149 // buffer first, then output the temporary buffer. Each byte is of the form 1150 // \xAB, which means that each byte needs 4 characters. Since there are at 1151 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1152 char TempBuffer[17]; 1153 1154 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1155 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1156 1157 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1158 while (C != 0) { 1159 for (int I = 0; I < 2; ++I) { 1160 writeHexDigit(&TempBuffer[Pos--], C % 16); 1161 C /= 16; 1162 } 1163 } 1164 TempBuffer[Pos--] = 'x'; 1165 assert(Pos >= 0); 1166 TempBuffer[Pos--] = '\\'; 1167 OB << std::string_view(&TempBuffer[Pos + 1]); 1168 } 1169 1170 static void outputEscapedChar(OutputBuffer &OB, unsigned C) { 1171 switch (C) { 1172 case '\0': // nul 1173 OB << "\\0"; 1174 return; 1175 case '\'': // single quote 1176 OB << "\\\'"; 1177 return; 1178 case '\"': // double quote 1179 OB << "\\\""; 1180 return; 1181 case '\\': // backslash 1182 OB << "\\\\"; 1183 return; 1184 case '\a': // bell 1185 OB << "\\a"; 1186 return; 1187 case '\b': // backspace 1188 OB << "\\b"; 1189 return; 1190 case '\f': // form feed 1191 OB << "\\f"; 1192 return; 1193 case '\n': // new line 1194 OB << "\\n"; 1195 return; 1196 case '\r': // carriage return 1197 OB << "\\r"; 1198 return; 1199 case '\t': // tab 1200 OB << "\\t"; 1201 return; 1202 case '\v': // vertical tab 1203 OB << "\\v"; 1204 return; 1205 default: 1206 break; 1207 } 1208 1209 if (C > 0x1F && C < 0x7F) { 1210 // Standard ascii char. 1211 OB << (char)C; 1212 return; 1213 } 1214 1215 outputHex(OB, C); 1216 } 1217 1218 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1219 const uint8_t *End = StringBytes + Length - 1; 1220 unsigned Count = 0; 1221 while (Length > 0 && *End == 0) { 1222 --Length; 1223 --End; 1224 ++Count; 1225 } 1226 return Count; 1227 } 1228 1229 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1230 unsigned Length) { 1231 unsigned Result = 0; 1232 for (unsigned I = 0; I < Length; ++I) { 1233 if (*StringBytes++ == 0) 1234 ++Result; 1235 } 1236 return Result; 1237 } 1238 1239 // A mangled (non-wide) string literal stores the total length of the string it 1240 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1241 // (passed in StringBytes, NumChars). 1242 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1243 uint64_t NumBytes) { 1244 assert(NumBytes > 0); 1245 1246 // If the number of bytes is odd, this is guaranteed to be a char string. 1247 if (NumBytes % 2 == 1) 1248 return 1; 1249 1250 // All strings can encode at most 32 bytes of data. If it's less than that, 1251 // then we encoded the entire string. In this case we check for a 1-byte, 1252 // 2-byte, or 4-byte null terminator. 1253 if (NumBytes < 32) { 1254 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1255 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1256 return 4; 1257 if (TrailingNulls >= 2) 1258 return 2; 1259 return 1; 1260 } 1261 1262 // The whole string was not able to be encoded. Try to look at embedded null 1263 // terminators to guess. The heuristic is that we count all embedded null 1264 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1265 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1266 // perfect and is biased towards languages that have ascii alphabets, but this 1267 // was always going to be best effort since the encoding is lossy. 1268 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1269 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1270 return 4; 1271 if (Nulls >= NumChars / 3) 1272 return 2; 1273 return 1; 1274 } 1275 1276 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1277 unsigned CharIndex, unsigned CharBytes) { 1278 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1279 unsigned Offset = CharIndex * CharBytes; 1280 unsigned Result = 0; 1281 StringBytes = StringBytes + Offset; 1282 for (unsigned I = 0; I < CharBytes; ++I) { 1283 unsigned C = static_cast<unsigned>(StringBytes[I]); 1284 Result |= C << (8 * I); 1285 } 1286 return Result; 1287 } 1288 1289 FunctionSymbolNode * 1290 Demangler::demangleVcallThunkNode(std::string_view &MangledName) { 1291 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1292 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1293 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1294 FSN->Signature->FunctionClass = FC_NoParameterList; 1295 1296 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1297 if (!Error) 1298 Error = !consumeFront(MangledName, "$B"); 1299 if (!Error) 1300 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1301 if (!Error) 1302 Error = !consumeFront(MangledName, 'A'); 1303 if (!Error) 1304 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1305 return (Error) ? nullptr : FSN; 1306 } 1307 1308 EncodedStringLiteralNode * 1309 Demangler::demangleStringLiteral(std::string_view &MangledName) { 1310 // This function uses goto, so declare all variables up front. 1311 OutputBuffer OB; 1312 std::string_view CRC; 1313 uint64_t StringByteSize; 1314 bool IsWcharT = false; 1315 bool IsNegative = false; 1316 size_t CrcEndPos = 0; 1317 char F; 1318 1319 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1320 1321 // Prefix indicating the beginning of a string literal 1322 if (!consumeFront(MangledName, "@_")) 1323 goto StringLiteralError; 1324 if (MangledName.empty()) 1325 goto StringLiteralError; 1326 1327 // Char Type (regular or wchar_t) 1328 F = MangledName.front(); 1329 MangledName.remove_prefix(1); 1330 switch (F) { 1331 case '1': 1332 IsWcharT = true; 1333 DEMANGLE_FALLTHROUGH; 1334 case '0': 1335 break; 1336 default: 1337 goto StringLiteralError; 1338 } 1339 1340 // Encoded Length 1341 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1342 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1343 goto StringLiteralError; 1344 1345 // CRC 32 (always 8 characters plus a terminator) 1346 CrcEndPos = MangledName.find('@'); 1347 if (CrcEndPos == std::string_view::npos) 1348 goto StringLiteralError; 1349 CRC = MangledName.substr(0, CrcEndPos); 1350 MangledName.remove_prefix(CrcEndPos + 1); 1351 if (MangledName.empty()) 1352 goto StringLiteralError; 1353 1354 if (IsWcharT) { 1355 Result->Char = CharKind::Wchar; 1356 if (StringByteSize > 64) 1357 Result->IsTruncated = true; 1358 1359 while (!consumeFront(MangledName, '@')) { 1360 if (MangledName.size() < 2) 1361 goto StringLiteralError; 1362 wchar_t W = demangleWcharLiteral(MangledName); 1363 if (StringByteSize != 2 || Result->IsTruncated) 1364 outputEscapedChar(OB, W); 1365 StringByteSize -= 2; 1366 if (Error) 1367 goto StringLiteralError; 1368 } 1369 } else { 1370 // The max byte length is actually 32, but some compilers mangled strings 1371 // incorrectly, so we have to assume it can go higher. 1372 constexpr unsigned MaxStringByteLength = 32 * 4; 1373 uint8_t StringBytes[MaxStringByteLength]; 1374 1375 unsigned BytesDecoded = 0; 1376 while (!consumeFront(MangledName, '@')) { 1377 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1378 goto StringLiteralError; 1379 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1380 } 1381 1382 if (StringByteSize > BytesDecoded) 1383 Result->IsTruncated = true; 1384 1385 unsigned CharBytes = 1386 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1387 assert(StringByteSize % CharBytes == 0); 1388 switch (CharBytes) { 1389 case 1: 1390 Result->Char = CharKind::Char; 1391 break; 1392 case 2: 1393 Result->Char = CharKind::Char16; 1394 break; 1395 case 4: 1396 Result->Char = CharKind::Char32; 1397 break; 1398 default: 1399 DEMANGLE_UNREACHABLE; 1400 } 1401 const unsigned NumChars = BytesDecoded / CharBytes; 1402 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1403 unsigned NextChar = 1404 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1405 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1406 outputEscapedChar(OB, NextChar); 1407 } 1408 } 1409 1410 Result->DecodedString = copyString(OB); 1411 std::free(OB.getBuffer()); 1412 return Result; 1413 1414 StringLiteralError: 1415 Error = true; 1416 std::free(OB.getBuffer()); 1417 return nullptr; 1418 } 1419 1420 // Returns MangledName's prefix before the first '@', or an error if 1421 // MangledName contains no '@' or the prefix has length 0. 1422 std::string_view Demangler::demangleSimpleString(std::string_view &MangledName, 1423 bool Memorize) { 1424 std::string_view S; 1425 for (size_t i = 0; i < MangledName.size(); ++i) { 1426 if (MangledName[i] != '@') 1427 continue; 1428 if (i == 0) 1429 break; 1430 S = MangledName.substr(0, i); 1431 MangledName.remove_prefix(i + 1); 1432 1433 if (Memorize) 1434 memorizeString(S); 1435 return S; 1436 } 1437 1438 Error = true; 1439 return {}; 1440 } 1441 1442 NamedIdentifierNode * 1443 Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) { 1444 assert(llvm::starts_with(MangledName, "?A")); 1445 consumeFront(MangledName, "?A"); 1446 1447 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1448 Node->Name = "`anonymous namespace'"; 1449 size_t EndPos = MangledName.find('@'); 1450 if (EndPos == std::string_view::npos) { 1451 Error = true; 1452 return nullptr; 1453 } 1454 std::string_view NamespaceKey = MangledName.substr(0, EndPos); 1455 memorizeString(NamespaceKey); 1456 MangledName = MangledName.substr(EndPos + 1); 1457 return Node; 1458 } 1459 1460 NamedIdentifierNode * 1461 Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) { 1462 assert(startsWithLocalScopePattern(MangledName)); 1463 1464 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1465 consumeFront(MangledName, '?'); 1466 uint64_t Number = 0; 1467 bool IsNegative = false; 1468 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1469 assert(!IsNegative); 1470 1471 // One ? to terminate the number 1472 consumeFront(MangledName, '?'); 1473 1474 assert(!Error); 1475 Node *Scope = parse(MangledName); 1476 if (Error) 1477 return nullptr; 1478 1479 // Render the parent symbol's name into a buffer. 1480 OutputBuffer OB; 1481 OB << '`'; 1482 Scope->output(OB, OF_Default); 1483 OB << '\''; 1484 OB << "::`" << Number << "'"; 1485 1486 Identifier->Name = copyString(OB); 1487 std::free(OB.getBuffer()); 1488 return Identifier; 1489 } 1490 1491 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1492 QualifiedNameNode * 1493 Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) { 1494 IdentifierNode *Identifier = 1495 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1496 if (Error) 1497 return nullptr; 1498 assert(Identifier); 1499 1500 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1501 if (Error) 1502 return nullptr; 1503 assert(QN); 1504 return QN; 1505 } 1506 1507 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1508 // Symbol names have slightly different rules regarding what can appear 1509 // so we separate out the implementations for flexibility. 1510 QualifiedNameNode * 1511 Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) { 1512 // This is the final component of a symbol name (i.e. the leftmost component 1513 // of a mangled name. Since the only possible template instantiation that 1514 // can appear in this context is a function template, and since those are 1515 // not saved for the purposes of name backreferences, only backref simple 1516 // names. 1517 IdentifierNode *Identifier = 1518 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1519 if (Error) 1520 return nullptr; 1521 1522 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1523 if (Error) 1524 return nullptr; 1525 1526 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1527 if (QN->Components->Count < 2) { 1528 Error = true; 1529 return nullptr; 1530 } 1531 StructorIdentifierNode *SIN = 1532 static_cast<StructorIdentifierNode *>(Identifier); 1533 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1534 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1535 } 1536 assert(QN); 1537 return QN; 1538 } 1539 1540 IdentifierNode * 1541 Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName, 1542 bool Memorize) { 1543 // An inner-most name can be a back-reference, because a fully-qualified name 1544 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1545 // them (for example template parameters), and these nested parameters can 1546 // refer to previously mangled types. 1547 if (startsWithDigit(MangledName)) 1548 return demangleBackRefName(MangledName); 1549 1550 if (llvm::starts_with(MangledName, "?$")) 1551 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1552 1553 return demangleSimpleName(MangledName, Memorize); 1554 } 1555 1556 IdentifierNode * 1557 Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName, 1558 NameBackrefBehavior NBB) { 1559 if (startsWithDigit(MangledName)) 1560 return demangleBackRefName(MangledName); 1561 if (llvm::starts_with(MangledName, "?$")) 1562 return demangleTemplateInstantiationName(MangledName, NBB); 1563 if (llvm::starts_with(MangledName, '?')) 1564 return demangleFunctionIdentifierCode(MangledName); 1565 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1566 } 1567 1568 IdentifierNode * 1569 Demangler::demangleNameScopePiece(std::string_view &MangledName) { 1570 if (startsWithDigit(MangledName)) 1571 return demangleBackRefName(MangledName); 1572 1573 if (llvm::starts_with(MangledName, "?$")) 1574 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1575 1576 if (llvm::starts_with(MangledName, "?A")) 1577 return demangleAnonymousNamespaceName(MangledName); 1578 1579 if (startsWithLocalScopePattern(MangledName)) 1580 return demangleLocallyScopedNamePiece(MangledName); 1581 1582 return demangleSimpleName(MangledName, /*Memorize=*/true); 1583 } 1584 1585 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1586 size_t Count) { 1587 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1588 N->Count = Count; 1589 N->Nodes = Arena.allocArray<Node *>(Count); 1590 for (size_t I = 0; I < Count; ++I) { 1591 N->Nodes[I] = Head->N; 1592 Head = Head->Next; 1593 } 1594 return N; 1595 } 1596 1597 QualifiedNameNode * 1598 Demangler::demangleNameScopeChain(std::string_view &MangledName, 1599 IdentifierNode *UnqualifiedName) { 1600 NodeList *Head = Arena.alloc<NodeList>(); 1601 1602 Head->N = UnqualifiedName; 1603 1604 size_t Count = 1; 1605 while (!consumeFront(MangledName, "@")) { 1606 ++Count; 1607 NodeList *NewHead = Arena.alloc<NodeList>(); 1608 NewHead->Next = Head; 1609 Head = NewHead; 1610 1611 if (MangledName.empty()) { 1612 Error = true; 1613 return nullptr; 1614 } 1615 1616 assert(!Error); 1617 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1618 if (Error) 1619 return nullptr; 1620 1621 Head->N = Elem; 1622 } 1623 1624 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1625 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1626 return QN; 1627 } 1628 1629 FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) { 1630 const char F = MangledName.front(); 1631 MangledName.remove_prefix(1); 1632 switch (F) { 1633 case '9': 1634 return FuncClass(FC_ExternC | FC_NoParameterList); 1635 case 'A': 1636 return FC_Private; 1637 case 'B': 1638 return FuncClass(FC_Private | FC_Far); 1639 case 'C': 1640 return FuncClass(FC_Private | FC_Static); 1641 case 'D': 1642 return FuncClass(FC_Private | FC_Static | FC_Far); 1643 case 'E': 1644 return FuncClass(FC_Private | FC_Virtual); 1645 case 'F': 1646 return FuncClass(FC_Private | FC_Virtual | FC_Far); 1647 case 'G': 1648 return FuncClass(FC_Private | FC_StaticThisAdjust); 1649 case 'H': 1650 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1651 case 'I': 1652 return FuncClass(FC_Protected); 1653 case 'J': 1654 return FuncClass(FC_Protected | FC_Far); 1655 case 'K': 1656 return FuncClass(FC_Protected | FC_Static); 1657 case 'L': 1658 return FuncClass(FC_Protected | FC_Static | FC_Far); 1659 case 'M': 1660 return FuncClass(FC_Protected | FC_Virtual); 1661 case 'N': 1662 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1663 case 'O': 1664 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1665 case 'P': 1666 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1667 case 'Q': 1668 return FuncClass(FC_Public); 1669 case 'R': 1670 return FuncClass(FC_Public | FC_Far); 1671 case 'S': 1672 return FuncClass(FC_Public | FC_Static); 1673 case 'T': 1674 return FuncClass(FC_Public | FC_Static | FC_Far); 1675 case 'U': 1676 return FuncClass(FC_Public | FC_Virtual); 1677 case 'V': 1678 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1679 case 'W': 1680 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1681 case 'X': 1682 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1683 case 'Y': 1684 return FuncClass(FC_Global); 1685 case 'Z': 1686 return FuncClass(FC_Global | FC_Far); 1687 case '$': { 1688 FuncClass VFlag = FC_VirtualThisAdjust; 1689 if (consumeFront(MangledName, 'R')) 1690 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1691 if (MangledName.empty()) 1692 break; 1693 const char F = MangledName.front(); 1694 MangledName.remove_prefix(1); 1695 switch (F) { 1696 case '0': 1697 return FuncClass(FC_Private | FC_Virtual | VFlag); 1698 case '1': 1699 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1700 case '2': 1701 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1702 case '3': 1703 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1704 case '4': 1705 return FuncClass(FC_Public | FC_Virtual | VFlag); 1706 case '5': 1707 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1708 } 1709 } 1710 } 1711 1712 Error = true; 1713 return FC_Public; 1714 } 1715 1716 CallingConv Demangler::demangleCallingConvention(std::string_view &MangledName) { 1717 if (MangledName.empty()) { 1718 Error = true; 1719 return CallingConv::None; 1720 } 1721 1722 const char F = MangledName.front(); 1723 MangledName.remove_prefix(1); 1724 switch (F) { 1725 case 'A': 1726 case 'B': 1727 return CallingConv::Cdecl; 1728 case 'C': 1729 case 'D': 1730 return CallingConv::Pascal; 1731 case 'E': 1732 case 'F': 1733 return CallingConv::Thiscall; 1734 case 'G': 1735 case 'H': 1736 return CallingConv::Stdcall; 1737 case 'I': 1738 case 'J': 1739 return CallingConv::Fastcall; 1740 case 'M': 1741 case 'N': 1742 return CallingConv::Clrcall; 1743 case 'O': 1744 case 'P': 1745 return CallingConv::Eabi; 1746 case 'Q': 1747 return CallingConv::Vectorcall; 1748 case 'S': 1749 return CallingConv::Swift; 1750 case 'W': 1751 return CallingConv::SwiftAsync; 1752 } 1753 1754 return CallingConv::None; 1755 } 1756 1757 StorageClass 1758 Demangler::demangleVariableStorageClass(std::string_view &MangledName) { 1759 assert(MangledName.front() >= '0' && MangledName.front() <= '4'); 1760 1761 const char F = MangledName.front(); 1762 MangledName.remove_prefix(1); 1763 switch (F) { 1764 case '0': 1765 return StorageClass::PrivateStatic; 1766 case '1': 1767 return StorageClass::ProtectedStatic; 1768 case '2': 1769 return StorageClass::PublicStatic; 1770 case '3': 1771 return StorageClass::Global; 1772 case '4': 1773 return StorageClass::FunctionLocalStatic; 1774 } 1775 DEMANGLE_UNREACHABLE; 1776 } 1777 1778 std::pair<Qualifiers, bool> 1779 Demangler::demangleQualifiers(std::string_view &MangledName) { 1780 if (MangledName.empty()) { 1781 Error = true; 1782 return std::make_pair(Q_None, false); 1783 } 1784 1785 const char F = MangledName.front(); 1786 MangledName.remove_prefix(1); 1787 switch (F) { 1788 // Member qualifiers 1789 case 'Q': 1790 return std::make_pair(Q_None, true); 1791 case 'R': 1792 return std::make_pair(Q_Const, true); 1793 case 'S': 1794 return std::make_pair(Q_Volatile, true); 1795 case 'T': 1796 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1797 // Non-Member qualifiers 1798 case 'A': 1799 return std::make_pair(Q_None, false); 1800 case 'B': 1801 return std::make_pair(Q_Const, false); 1802 case 'C': 1803 return std::make_pair(Q_Volatile, false); 1804 case 'D': 1805 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1806 } 1807 Error = true; 1808 return std::make_pair(Q_None, false); 1809 } 1810 1811 // <variable-type> ::= <type> <cvr-qualifiers> 1812 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1813 TypeNode *Demangler::demangleType(std::string_view &MangledName, 1814 QualifierMangleMode QMM) { 1815 Qualifiers Quals = Q_None; 1816 bool IsMember = false; 1817 if (QMM == QualifierMangleMode::Mangle) { 1818 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1819 } else if (QMM == QualifierMangleMode::Result) { 1820 if (consumeFront(MangledName, '?')) 1821 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1822 } 1823 1824 if (MangledName.empty()) { 1825 Error = true; 1826 return nullptr; 1827 } 1828 1829 TypeNode *Ty = nullptr; 1830 if (isTagType(MangledName)) 1831 Ty = demangleClassType(MangledName); 1832 else if (isPointerType(MangledName)) { 1833 if (isMemberPointer(MangledName, Error)) 1834 Ty = demangleMemberPointerType(MangledName); 1835 else if (!Error) 1836 Ty = demanglePointerType(MangledName); 1837 else 1838 return nullptr; 1839 } else if (isArrayType(MangledName)) 1840 Ty = demangleArrayType(MangledName); 1841 else if (isFunctionType(MangledName)) { 1842 if (consumeFront(MangledName, "$$A8@@")) 1843 Ty = demangleFunctionType(MangledName, true); 1844 else { 1845 assert(llvm::starts_with(MangledName, "$$A6")); 1846 consumeFront(MangledName, "$$A6"); 1847 Ty = demangleFunctionType(MangledName, false); 1848 } 1849 } else if (isCustomType(MangledName)) { 1850 Ty = demangleCustomType(MangledName); 1851 } else { 1852 Ty = demanglePrimitiveType(MangledName); 1853 } 1854 1855 if (!Ty || Error) 1856 return Ty; 1857 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1858 return Ty; 1859 } 1860 1861 bool Demangler::demangleThrowSpecification(std::string_view &MangledName) { 1862 if (consumeFront(MangledName, "_E")) 1863 return true; 1864 if (consumeFront(MangledName, 'Z')) 1865 return false; 1866 1867 Error = true; 1868 return false; 1869 } 1870 1871 FunctionSignatureNode * 1872 Demangler::demangleFunctionType(std::string_view &MangledName, 1873 bool HasThisQuals) { 1874 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1875 1876 if (HasThisQuals) { 1877 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1878 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1879 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1880 } 1881 1882 // Fields that appear on both member and non-member functions. 1883 FTy->CallConvention = demangleCallingConvention(MangledName); 1884 1885 // <return-type> ::= <type> 1886 // ::= @ # structors (they have no declared return type) 1887 bool IsStructor = consumeFront(MangledName, '@'); 1888 if (!IsStructor) 1889 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1890 1891 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); 1892 1893 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1894 1895 return FTy; 1896 } 1897 1898 FunctionSymbolNode * 1899 Demangler::demangleFunctionEncoding(std::string_view &MangledName) { 1900 FuncClass ExtraFlags = FC_None; 1901 if (consumeFront(MangledName, "$$J0")) 1902 ExtraFlags = FC_ExternC; 1903 1904 if (MangledName.empty()) { 1905 Error = true; 1906 return nullptr; 1907 } 1908 1909 FuncClass FC = demangleFunctionClass(MangledName); 1910 FC = FuncClass(ExtraFlags | FC); 1911 1912 FunctionSignatureNode *FSN = nullptr; 1913 ThunkSignatureNode *TTN = nullptr; 1914 if (FC & FC_StaticThisAdjust) { 1915 TTN = Arena.alloc<ThunkSignatureNode>(); 1916 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1917 } else if (FC & FC_VirtualThisAdjust) { 1918 TTN = Arena.alloc<ThunkSignatureNode>(); 1919 if (FC & FC_VirtualThisAdjustEx) { 1920 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1921 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1922 } 1923 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1924 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1925 } 1926 1927 if (FC & FC_NoParameterList) { 1928 // This is an extern "C" function whose full signature hasn't been mangled. 1929 // This happens when we need to mangle a local symbol inside of an extern 1930 // "C" function. 1931 FSN = Arena.alloc<FunctionSignatureNode>(); 1932 } else { 1933 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1934 FSN = demangleFunctionType(MangledName, HasThisQuals); 1935 } 1936 1937 if (Error) 1938 return nullptr; 1939 1940 if (TTN) { 1941 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1942 FSN = TTN; 1943 } 1944 FSN->FunctionClass = FC; 1945 1946 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1947 Symbol->Signature = FSN; 1948 return Symbol; 1949 } 1950 1951 CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) { 1952 assert(llvm::starts_with(MangledName, '?')); 1953 MangledName.remove_prefix(1); 1954 1955 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1956 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1957 if (!consumeFront(MangledName, '@')) 1958 Error = true; 1959 if (Error) 1960 return nullptr; 1961 return CTN; 1962 } 1963 1964 // Reads a primitive type. 1965 PrimitiveTypeNode * 1966 Demangler::demanglePrimitiveType(std::string_view &MangledName) { 1967 if (consumeFront(MangledName, "$$T")) 1968 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1969 1970 const char F = MangledName.front(); 1971 MangledName.remove_prefix(1); 1972 switch (F) { 1973 case 'X': 1974 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1975 case 'D': 1976 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1977 case 'C': 1978 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1979 case 'E': 1980 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1981 case 'F': 1982 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1983 case 'G': 1984 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1985 case 'H': 1986 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1987 case 'I': 1988 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1989 case 'J': 1990 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1991 case 'K': 1992 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1993 case 'M': 1994 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1995 case 'N': 1996 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1997 case 'O': 1998 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1999 case '_': { 2000 if (MangledName.empty()) { 2001 Error = true; 2002 return nullptr; 2003 } 2004 const char F = MangledName.front(); 2005 MangledName.remove_prefix(1); 2006 switch (F) { 2007 case 'N': 2008 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 2009 case 'J': 2010 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 2011 case 'K': 2012 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 2013 case 'W': 2014 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 2015 case 'Q': 2016 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 2017 case 'S': 2018 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 2019 case 'U': 2020 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 2021 } 2022 break; 2023 } 2024 } 2025 Error = true; 2026 return nullptr; 2027 } 2028 2029 TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) { 2030 TagTypeNode *TT = nullptr; 2031 2032 const char F = MangledName.front(); 2033 MangledName.remove_prefix(1); 2034 switch (F) { 2035 case 'T': 2036 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 2037 break; 2038 case 'U': 2039 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 2040 break; 2041 case 'V': 2042 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 2043 break; 2044 case 'W': 2045 if (!consumeFront(MangledName, '4')) { 2046 Error = true; 2047 return nullptr; 2048 } 2049 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 2050 break; 2051 default: 2052 assert(false); 2053 } 2054 2055 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 2056 return TT; 2057 } 2058 2059 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2060 // # the E is required for 64-bit non-static pointers 2061 PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) { 2062 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2063 2064 std::tie(Pointer->Quals, Pointer->Affinity) = 2065 demanglePointerCVQualifiers(MangledName); 2066 2067 if (consumeFront(MangledName, "6")) { 2068 Pointer->Pointee = demangleFunctionType(MangledName, false); 2069 return Pointer; 2070 } 2071 2072 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2073 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2074 2075 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2076 return Pointer; 2077 } 2078 2079 PointerTypeNode * 2080 Demangler::demangleMemberPointerType(std::string_view &MangledName) { 2081 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2082 2083 std::tie(Pointer->Quals, Pointer->Affinity) = 2084 demanglePointerCVQualifiers(MangledName); 2085 assert(Pointer->Affinity == PointerAffinity::Pointer); 2086 2087 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2088 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2089 2090 // isMemberPointer() only returns true if there is at least one character 2091 // after the qualifiers. 2092 if (consumeFront(MangledName, "8")) { 2093 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2094 Pointer->Pointee = demangleFunctionType(MangledName, true); 2095 } else { 2096 Qualifiers PointeeQuals = Q_None; 2097 bool IsMember = false; 2098 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2099 assert(IsMember || Error); 2100 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2101 2102 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2103 if (Pointer->Pointee) 2104 Pointer->Pointee->Quals = PointeeQuals; 2105 } 2106 2107 return Pointer; 2108 } 2109 2110 Qualifiers 2111 Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) { 2112 Qualifiers Quals = Q_None; 2113 if (consumeFront(MangledName, 'E')) 2114 Quals = Qualifiers(Quals | Q_Pointer64); 2115 if (consumeFront(MangledName, 'I')) 2116 Quals = Qualifiers(Quals | Q_Restrict); 2117 if (consumeFront(MangledName, 'F')) 2118 Quals = Qualifiers(Quals | Q_Unaligned); 2119 2120 return Quals; 2121 } 2122 2123 ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) { 2124 assert(MangledName.front() == 'Y'); 2125 MangledName.remove_prefix(1); 2126 2127 uint64_t Rank = 0; 2128 bool IsNegative = false; 2129 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2130 if (IsNegative || Rank == 0) { 2131 Error = true; 2132 return nullptr; 2133 } 2134 2135 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2136 NodeList *Head = Arena.alloc<NodeList>(); 2137 NodeList *Tail = Head; 2138 2139 for (uint64_t I = 0; I < Rank; ++I) { 2140 uint64_t D = 0; 2141 std::tie(D, IsNegative) = demangleNumber(MangledName); 2142 if (Error || IsNegative) { 2143 Error = true; 2144 return nullptr; 2145 } 2146 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2147 if (I + 1 < Rank) { 2148 Tail->Next = Arena.alloc<NodeList>(); 2149 Tail = Tail->Next; 2150 } 2151 } 2152 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2153 2154 if (consumeFront(MangledName, "$$C")) { 2155 bool IsMember = false; 2156 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2157 if (IsMember) { 2158 Error = true; 2159 return nullptr; 2160 } 2161 } 2162 2163 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2164 return ATy; 2165 } 2166 2167 // Reads a function's parameters. 2168 NodeArrayNode * 2169 Demangler::demangleFunctionParameterList(std::string_view &MangledName, 2170 bool &IsVariadic) { 2171 // Empty parameter list. 2172 if (consumeFront(MangledName, 'X')) 2173 return nullptr; 2174 2175 NodeList *Head = Arena.alloc<NodeList>(); 2176 NodeList **Current = &Head; 2177 size_t Count = 0; 2178 while (!Error && !llvm::starts_with(MangledName, '@') && 2179 !llvm::starts_with(MangledName, 'Z')) { 2180 ++Count; 2181 2182 if (startsWithDigit(MangledName)) { 2183 size_t N = MangledName[0] - '0'; 2184 if (N >= Backrefs.FunctionParamCount) { 2185 Error = true; 2186 return nullptr; 2187 } 2188 MangledName.remove_prefix(1); 2189 2190 *Current = Arena.alloc<NodeList>(); 2191 (*Current)->N = Backrefs.FunctionParams[N]; 2192 Current = &(*Current)->Next; 2193 continue; 2194 } 2195 2196 size_t OldSize = MangledName.size(); 2197 2198 *Current = Arena.alloc<NodeList>(); 2199 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2200 if (!TN || Error) 2201 return nullptr; 2202 2203 (*Current)->N = TN; 2204 2205 size_t CharsConsumed = OldSize - MangledName.size(); 2206 assert(CharsConsumed != 0); 2207 2208 // Single-letter types are ignored for backreferences because memorizing 2209 // them doesn't save anything. 2210 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2211 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2212 2213 Current = &(*Current)->Next; 2214 } 2215 2216 if (Error) 2217 return nullptr; 2218 2219 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2220 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2221 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2222 // the following Z could be a throw specifier. 2223 if (consumeFront(MangledName, '@')) 2224 return NA; 2225 2226 if (consumeFront(MangledName, 'Z')) { 2227 IsVariadic = true; 2228 return NA; 2229 } 2230 2231 DEMANGLE_UNREACHABLE; 2232 } 2233 2234 NodeArrayNode * 2235 Demangler::demangleTemplateParameterList(std::string_view &MangledName) { 2236 NodeList *Head = nullptr; 2237 NodeList **Current = &Head; 2238 size_t Count = 0; 2239 2240 while (!llvm::starts_with(MangledName, '@')) { 2241 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") || 2242 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) { 2243 // parameter pack separator 2244 continue; 2245 } 2246 2247 ++Count; 2248 2249 // Template parameter lists don't participate in back-referencing. 2250 *Current = Arena.alloc<NodeList>(); 2251 2252 NodeList &TP = **Current; 2253 2254 TemplateParameterReferenceNode *TPRN = nullptr; 2255 if (consumeFront(MangledName, "$$Y")) { 2256 // Template alias 2257 TP.N = demangleFullyQualifiedTypeName(MangledName); 2258 } else if (consumeFront(MangledName, "$$B")) { 2259 // Array 2260 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2261 } else if (consumeFront(MangledName, "$$C")) { 2262 // Type has qualifiers. 2263 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2264 } else if (llvm::starts_with(MangledName, "$1") || 2265 llvm::starts_with(MangledName, "$H") || 2266 llvm::starts_with(MangledName, "$I") || 2267 llvm::starts_with(MangledName, "$J")) { 2268 // Pointer to member 2269 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2270 TPRN->IsMemberPointer = true; 2271 2272 MangledName.remove_prefix(1); 2273 // 1 - single inheritance <name> 2274 // H - multiple inheritance <name> <number> 2275 // I - virtual inheritance <name> <number> <number> 2276 // J - unspecified inheritance <name> <number> <number> <number> 2277 char InheritanceSpecifier = MangledName.front(); 2278 MangledName.remove_prefix(1); 2279 SymbolNode *S = nullptr; 2280 if (llvm::starts_with(MangledName, '?')) { 2281 S = parse(MangledName); 2282 if (Error || !S->Name) { 2283 Error = true; 2284 return nullptr; 2285 } 2286 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2287 } 2288 2289 switch (InheritanceSpecifier) { 2290 case 'J': 2291 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2292 demangleSigned(MangledName); 2293 DEMANGLE_FALLTHROUGH; 2294 case 'I': 2295 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2296 demangleSigned(MangledName); 2297 DEMANGLE_FALLTHROUGH; 2298 case 'H': 2299 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2300 demangleSigned(MangledName); 2301 DEMANGLE_FALLTHROUGH; 2302 case '1': 2303 break; 2304 default: 2305 DEMANGLE_UNREACHABLE; 2306 } 2307 TPRN->Affinity = PointerAffinity::Pointer; 2308 TPRN->Symbol = S; 2309 } else if (llvm::starts_with(MangledName, "$E?")) { 2310 consumeFront(MangledName, "$E"); 2311 // Reference to symbol 2312 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2313 TPRN->Symbol = parse(MangledName); 2314 TPRN->Affinity = PointerAffinity::Reference; 2315 } else if (llvm::starts_with(MangledName, "$F") || 2316 llvm::starts_with(MangledName, "$G")) { 2317 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2318 2319 // Data member pointer. 2320 MangledName.remove_prefix(1); 2321 char InheritanceSpecifier = MangledName.front(); 2322 MangledName.remove_prefix(1); 2323 2324 switch (InheritanceSpecifier) { 2325 case 'G': 2326 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2327 demangleSigned(MangledName); 2328 DEMANGLE_FALLTHROUGH; 2329 case 'F': 2330 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2331 demangleSigned(MangledName); 2332 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2333 demangleSigned(MangledName); 2334 break; 2335 default: 2336 DEMANGLE_UNREACHABLE; 2337 } 2338 TPRN->IsMemberPointer = true; 2339 2340 } else if (consumeFront(MangledName, "$0")) { 2341 // Integral non-type template parameter 2342 bool IsNegative = false; 2343 uint64_t Value = 0; 2344 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2345 2346 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2347 } else { 2348 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2349 } 2350 if (Error) 2351 return nullptr; 2352 2353 Current = &TP.Next; 2354 } 2355 2356 // The loop above returns nullptr on Error. 2357 assert(!Error); 2358 2359 // Template parameter lists cannot be variadic, so it can only be terminated 2360 // by @ (as opposed to 'Z' in the function parameter case). 2361 assert( 2362 llvm::starts_with(MangledName, '@')); // The above loop exits only on '@'. 2363 consumeFront(MangledName, '@'); 2364 return nodeListToNodeArray(Arena, Head, Count); 2365 } 2366 2367 void Demangler::dumpBackReferences() { 2368 std::printf("%d function parameter backreferences\n", 2369 (int)Backrefs.FunctionParamCount); 2370 2371 // Create an output stream so we can render each type. 2372 OutputBuffer OB; 2373 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2374 OB.setCurrentPosition(0); 2375 2376 TypeNode *T = Backrefs.FunctionParams[I]; 2377 T->output(OB, OF_Default); 2378 2379 std::string_view B = OB; 2380 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin()); 2381 } 2382 std::free(OB.getBuffer()); 2383 2384 if (Backrefs.FunctionParamCount > 0) 2385 std::printf("\n"); 2386 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2387 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2388 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2389 Backrefs.Names[I]->Name.begin()); 2390 } 2391 if (Backrefs.NamesCount > 0) 2392 std::printf("\n"); 2393 } 2394 2395 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, 2396 char *Buf, size_t *N, 2397 int *Status, MSDemangleFlags Flags) { 2398 Demangler D; 2399 2400 std::string_view Name{MangledName}; 2401 SymbolNode *AST = D.parse(Name); 2402 if (!D.Error && NMangled) 2403 *NMangled = Name.begin() - MangledName; 2404 2405 if (Flags & MSDF_DumpBackrefs) 2406 D.dumpBackReferences(); 2407 2408 OutputFlags OF = OF_Default; 2409 if (Flags & MSDF_NoCallingConvention) 2410 OF = OutputFlags(OF | OF_NoCallingConvention); 2411 if (Flags & MSDF_NoAccessSpecifier) 2412 OF = OutputFlags(OF | OF_NoAccessSpecifier); 2413 if (Flags & MSDF_NoReturnType) 2414 OF = OutputFlags(OF | OF_NoReturnType); 2415 if (Flags & MSDF_NoMemberType) 2416 OF = OutputFlags(OF | OF_NoMemberType); 2417 if (Flags & MSDF_NoVariableType) 2418 OF = OutputFlags(OF | OF_NoVariableType); 2419 2420 int InternalStatus = demangle_success; 2421 if (D.Error) 2422 InternalStatus = demangle_invalid_mangled_name; 2423 else { 2424 OutputBuffer OB(Buf, N); 2425 AST->output(OB, OF); 2426 OB += '\0'; 2427 if (N != nullptr) 2428 *N = OB.getCurrentPosition(); 2429 Buf = OB.getBuffer(); 2430 } 2431 2432 if (Status) 2433 *Status = InternalStatus; 2434 return InternalStatus == demangle_success ? Buf : nullptr; 2435 } 2436