1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 #include "llvm/Demangle/Demangle.h" 18 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 19 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 struct NodeList { 37 Node *N = nullptr; 38 NodeList *Next = nullptr; 39 }; 40 41 static bool consumeFront(StringView &S, char C) { 42 if (!S.startsWith(C)) 43 return false; 44 S.remove_prefix(1); 45 return true; 46 } 47 48 static bool consumeFront(StringView &S, StringView C) { 49 if (!S.startsWith(C)) 50 return false; 51 S.remove_prefix(C.size()); 52 return true; 53 } 54 55 static bool isMemberPointer(StringView MangledName, bool &Error) { 56 Error = false; 57 const char F = MangledName.front(); 58 MangledName.remove_prefix(1); 59 switch (F) { 60 case '$': 61 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 62 // rvalue reference to a member. 63 return false; 64 case 'A': 65 // 'A' indicates a reference, and you cannot have a reference to a member 66 // function or member. 67 return false; 68 case 'P': 69 case 'Q': 70 case 'R': 71 case 'S': 72 // These 4 values indicate some kind of pointer, but we still don't know 73 // what. 74 break; 75 default: 76 // isMemberPointer() is called only if isPointerType() returns true, 77 // and it rejects other prefixes. 78 DEMANGLE_UNREACHABLE; 79 } 80 81 // If it starts with a number, then 6 indicates a non-member function 82 // pointer, and 8 indicates a member function pointer. 83 if (startsWithDigit(MangledName)) { 84 if (MangledName[0] != '6' && MangledName[0] != '8') { 85 Error = true; 86 return false; 87 } 88 return (MangledName[0] == '8'); 89 } 90 91 // Remove ext qualifiers since those can appear on either type and are 92 // therefore not indicative. 93 consumeFront(MangledName, 'E'); // 64-bit 94 consumeFront(MangledName, 'I'); // restrict 95 consumeFront(MangledName, 'F'); // unaligned 96 97 if (MangledName.empty()) { 98 Error = true; 99 return false; 100 } 101 102 // The next value should be either ABCD (non-member) or QRST (member). 103 switch (MangledName.front()) { 104 case 'A': 105 case 'B': 106 case 'C': 107 case 'D': 108 return false; 109 case 'Q': 110 case 'R': 111 case 'S': 112 case 'T': 113 return true; 114 default: 115 Error = true; 116 return false; 117 } 118 } 119 120 static SpecialIntrinsicKind 121 consumeSpecialIntrinsicKind(StringView &MangledName) { 122 if (consumeFront(MangledName, "?_7")) 123 return SpecialIntrinsicKind::Vftable; 124 if (consumeFront(MangledName, "?_8")) 125 return SpecialIntrinsicKind::Vbtable; 126 if (consumeFront(MangledName, "?_9")) 127 return SpecialIntrinsicKind::VcallThunk; 128 if (consumeFront(MangledName, "?_A")) 129 return SpecialIntrinsicKind::Typeof; 130 if (consumeFront(MangledName, "?_B")) 131 return SpecialIntrinsicKind::LocalStaticGuard; 132 if (consumeFront(MangledName, "?_C")) 133 return SpecialIntrinsicKind::StringLiteralSymbol; 134 if (consumeFront(MangledName, "?_P")) 135 return SpecialIntrinsicKind::UdtReturning; 136 if (consumeFront(MangledName, "?_R0")) 137 return SpecialIntrinsicKind::RttiTypeDescriptor; 138 if (consumeFront(MangledName, "?_R1")) 139 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 140 if (consumeFront(MangledName, "?_R2")) 141 return SpecialIntrinsicKind::RttiBaseClassArray; 142 if (consumeFront(MangledName, "?_R3")) 143 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 144 if (consumeFront(MangledName, "?_R4")) 145 return SpecialIntrinsicKind::RttiCompleteObjLocator; 146 if (consumeFront(MangledName, "?_S")) 147 return SpecialIntrinsicKind::LocalVftable; 148 if (consumeFront(MangledName, "?__E")) 149 return SpecialIntrinsicKind::DynamicInitializer; 150 if (consumeFront(MangledName, "?__F")) 151 return SpecialIntrinsicKind::DynamicAtexitDestructor; 152 if (consumeFront(MangledName, "?__J")) 153 return SpecialIntrinsicKind::LocalStaticThreadGuard; 154 return SpecialIntrinsicKind::None; 155 } 156 157 static bool startsWithLocalScopePattern(StringView S) { 158 if (!consumeFront(S, '?')) 159 return false; 160 161 size_t End = S.find('?'); 162 if (End == StringView::npos) 163 return false; 164 StringView Candidate = S.substr(0, End); 165 if (Candidate.empty()) 166 return false; 167 168 // \?[0-9]\? 169 // ?@? is the discriminator 0. 170 if (Candidate.size() == 1) 171 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 172 173 // If it's not 0-9, then it's an encoded number terminated with an @ 174 if (Candidate.back() != '@') 175 return false; 176 Candidate.remove_suffix(1); 177 178 // An encoded number starts with B-P and all subsequent digits are in A-P. 179 // Note that the reason the first digit cannot be A is two fold. First, it 180 // would create an ambiguity with ?A which delimits the beginning of an 181 // anonymous namespace. Second, A represents 0, and you don't start a multi 182 // digit number with a leading 0. Presumably the anonymous namespace 183 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 184 if (Candidate[0] < 'B' || Candidate[0] > 'P') 185 return false; 186 Candidate.remove_prefix(1); 187 while (!Candidate.empty()) { 188 if (Candidate[0] < 'A' || Candidate[0] > 'P') 189 return false; 190 Candidate.remove_prefix(1); 191 } 192 193 return true; 194 } 195 196 static bool isTagType(StringView S) { 197 switch (S.front()) { 198 case 'T': // union 199 case 'U': // struct 200 case 'V': // class 201 case 'W': // enum 202 return true; 203 } 204 return false; 205 } 206 207 static bool isCustomType(StringView S) { return S[0] == '?'; } 208 209 static bool isPointerType(StringView S) { 210 if (S.startsWith("$$Q")) // foo && 211 return true; 212 213 switch (S.front()) { 214 case 'A': // foo & 215 case 'P': // foo * 216 case 'Q': // foo *const 217 case 'R': // foo *volatile 218 case 'S': // foo *const volatile 219 return true; 220 } 221 return false; 222 } 223 224 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 225 226 static bool isFunctionType(StringView S) { 227 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 228 } 229 230 static FunctionRefQualifier 231 demangleFunctionRefQualifier(StringView &MangledName) { 232 if (consumeFront(MangledName, 'G')) 233 return FunctionRefQualifier::Reference; 234 else if (consumeFront(MangledName, 'H')) 235 return FunctionRefQualifier::RValueReference; 236 return FunctionRefQualifier::None; 237 } 238 239 static std::pair<Qualifiers, PointerAffinity> 240 demanglePointerCVQualifiers(StringView &MangledName) { 241 if (consumeFront(MangledName, "$$Q")) 242 return std::make_pair(Q_None, PointerAffinity::RValueReference); 243 244 const char F = MangledName.front(); 245 MangledName.remove_prefix(1); 246 switch (F) { 247 case 'A': 248 return std::make_pair(Q_None, PointerAffinity::Reference); 249 case 'P': 250 return std::make_pair(Q_None, PointerAffinity::Pointer); 251 case 'Q': 252 return std::make_pair(Q_Const, PointerAffinity::Pointer); 253 case 'R': 254 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 255 case 'S': 256 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 257 PointerAffinity::Pointer); 258 } 259 // This function is only called if isPointerType() returns true, 260 // and it only returns true for the six cases listed above. 261 DEMANGLE_UNREACHABLE; 262 } 263 264 StringView Demangler::copyString(StringView Borrowed) { 265 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); 266 // This is not a micro-optimization, it avoids UB, should Borrowed be an null 267 // buffer. 268 if (Borrowed.size()) 269 std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); 270 271 return {Stable, Borrowed.size()}; 272 } 273 274 SpecialTableSymbolNode * 275 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 276 SpecialIntrinsicKind K) { 277 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 278 switch (K) { 279 case SpecialIntrinsicKind::Vftable: 280 NI->Name = "`vftable'"; 281 break; 282 case SpecialIntrinsicKind::Vbtable: 283 NI->Name = "`vbtable'"; 284 break; 285 case SpecialIntrinsicKind::LocalVftable: 286 NI->Name = "`local vftable'"; 287 break; 288 case SpecialIntrinsicKind::RttiCompleteObjLocator: 289 NI->Name = "`RTTI Complete Object Locator'"; 290 break; 291 default: 292 DEMANGLE_UNREACHABLE; 293 } 294 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 295 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 296 STSN->Name = QN; 297 bool IsMember = false; 298 if (MangledName.empty()) { 299 Error = true; 300 return nullptr; 301 } 302 char Front = MangledName.front(); 303 MangledName.remove_prefix(1); 304 if (Front != '6' && Front != '7') { 305 Error = true; 306 return nullptr; 307 } 308 309 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 310 if (!consumeFront(MangledName, '@')) 311 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 312 return STSN; 313 } 314 315 LocalStaticGuardVariableNode * 316 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) { 317 LocalStaticGuardIdentifierNode *LSGI = 318 Arena.alloc<LocalStaticGuardIdentifierNode>(); 319 LSGI->IsThread = IsThread; 320 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 321 LocalStaticGuardVariableNode *LSGVN = 322 Arena.alloc<LocalStaticGuardVariableNode>(); 323 LSGVN->Name = QN; 324 325 if (consumeFront(MangledName, "4IA")) 326 LSGVN->IsVisible = false; 327 else if (consumeFront(MangledName, "5")) 328 LSGVN->IsVisible = true; 329 else { 330 Error = true; 331 return nullptr; 332 } 333 334 if (!MangledName.empty()) 335 LSGI->ScopeIndex = demangleUnsigned(MangledName); 336 return LSGVN; 337 } 338 339 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 340 StringView Name) { 341 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 342 Id->Name = Name; 343 return Id; 344 } 345 346 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 347 IdentifierNode *Identifier) { 348 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 349 QN->Components = Arena.alloc<NodeArrayNode>(); 350 QN->Components->Count = 1; 351 QN->Components->Nodes = Arena.allocArray<Node *>(1); 352 QN->Components->Nodes[0] = Identifier; 353 return QN; 354 } 355 356 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 357 StringView Name) { 358 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 359 return synthesizeQualifiedName(Arena, Id); 360 } 361 362 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 363 TypeNode *Type, 364 StringView VariableName) { 365 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 366 VSN->Type = Type; 367 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 368 return VSN; 369 } 370 371 VariableSymbolNode *Demangler::demangleUntypedVariable( 372 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 373 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 374 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 375 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 376 VSN->Name = QN; 377 if (consumeFront(MangledName, "8")) 378 return VSN; 379 380 Error = true; 381 return nullptr; 382 } 383 384 VariableSymbolNode * 385 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 386 StringView &MangledName) { 387 RttiBaseClassDescriptorNode *RBCDN = 388 Arena.alloc<RttiBaseClassDescriptorNode>(); 389 RBCDN->NVOffset = demangleUnsigned(MangledName); 390 RBCDN->VBPtrOffset = demangleSigned(MangledName); 391 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 392 RBCDN->Flags = demangleUnsigned(MangledName); 393 if (Error) 394 return nullptr; 395 396 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 397 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 398 consumeFront(MangledName, '8'); 399 return VSN; 400 } 401 402 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 403 bool IsDestructor) { 404 DynamicStructorIdentifierNode *DSIN = 405 Arena.alloc<DynamicStructorIdentifierNode>(); 406 DSIN->IsDestructor = IsDestructor; 407 408 bool IsKnownStaticDataMember = false; 409 if (consumeFront(MangledName, '?')) 410 IsKnownStaticDataMember = true; 411 412 SymbolNode *Symbol = demangleDeclarator(MangledName); 413 if (Error) 414 return nullptr; 415 416 FunctionSymbolNode *FSN = nullptr; 417 418 if (Symbol->kind() == NodeKind::VariableSymbol) { 419 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 420 421 // Older versions of clang mangled this type of symbol incorrectly. They 422 // would omit the leading ? and they would only emit a single @ at the end. 423 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 424 // both cases. 425 int AtCount = IsKnownStaticDataMember ? 2 : 1; 426 for (int I = 0; I < AtCount; ++I) { 427 if (consumeFront(MangledName, '@')) 428 continue; 429 Error = true; 430 return nullptr; 431 } 432 433 FSN = demangleFunctionEncoding(MangledName); 434 if (FSN) 435 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 436 } else { 437 if (IsKnownStaticDataMember) { 438 // This was supposed to be a static data member, but we got a function. 439 Error = true; 440 return nullptr; 441 } 442 443 FSN = static_cast<FunctionSymbolNode *>(Symbol); 444 DSIN->Name = Symbol->Name; 445 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 446 } 447 448 return FSN; 449 } 450 451 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 452 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 453 454 switch (SIK) { 455 case SpecialIntrinsicKind::None: 456 return nullptr; 457 case SpecialIntrinsicKind::StringLiteralSymbol: 458 return demangleStringLiteral(MangledName); 459 case SpecialIntrinsicKind::Vftable: 460 case SpecialIntrinsicKind::Vbtable: 461 case SpecialIntrinsicKind::LocalVftable: 462 case SpecialIntrinsicKind::RttiCompleteObjLocator: 463 return demangleSpecialTableSymbolNode(MangledName, SIK); 464 case SpecialIntrinsicKind::VcallThunk: 465 return demangleVcallThunkNode(MangledName); 466 case SpecialIntrinsicKind::LocalStaticGuard: 467 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 468 case SpecialIntrinsicKind::LocalStaticThreadGuard: 469 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 470 case SpecialIntrinsicKind::RttiTypeDescriptor: { 471 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 472 if (Error) 473 break; 474 if (!consumeFront(MangledName, "@8")) 475 break; 476 if (!MangledName.empty()) 477 break; 478 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 479 } 480 case SpecialIntrinsicKind::RttiBaseClassArray: 481 return demangleUntypedVariable(Arena, MangledName, 482 "`RTTI Base Class Array'"); 483 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 484 return demangleUntypedVariable(Arena, MangledName, 485 "`RTTI Class Hierarchy Descriptor'"); 486 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 487 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 488 case SpecialIntrinsicKind::DynamicInitializer: 489 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); 490 case SpecialIntrinsicKind::DynamicAtexitDestructor: 491 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); 492 case SpecialIntrinsicKind::Typeof: 493 case SpecialIntrinsicKind::UdtReturning: 494 // It's unclear which tools produces these manglings, so demangling 495 // support is not (yet?) implemented. 496 break; 497 case SpecialIntrinsicKind::Unknown: 498 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. 499 } 500 Error = true; 501 return nullptr; 502 } 503 504 IdentifierNode * 505 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 506 assert(MangledName.startsWith('?')); 507 MangledName.remove_prefix(1); 508 if (MangledName.empty()) { 509 Error = true; 510 return nullptr; 511 } 512 513 if (consumeFront(MangledName, "__")) 514 return demangleFunctionIdentifierCode( 515 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 516 if (consumeFront(MangledName, "_")) 517 return demangleFunctionIdentifierCode(MangledName, 518 FunctionIdentifierCodeGroup::Under); 519 return demangleFunctionIdentifierCode(MangledName, 520 FunctionIdentifierCodeGroup::Basic); 521 } 522 523 StructorIdentifierNode * 524 Demangler::demangleStructorIdentifier(StringView &MangledName, 525 bool IsDestructor) { 526 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 527 N->IsDestructor = IsDestructor; 528 return N; 529 } 530 531 ConversionOperatorIdentifierNode * 532 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 533 ConversionOperatorIdentifierNode *N = 534 Arena.alloc<ConversionOperatorIdentifierNode>(); 535 return N; 536 } 537 538 LiteralOperatorIdentifierNode * 539 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 540 LiteralOperatorIdentifierNode *N = 541 Arena.alloc<LiteralOperatorIdentifierNode>(); 542 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 543 return N; 544 } 545 546 IntrinsicFunctionKind 547 Demangler::translateIntrinsicFunctionCode(char CH, 548 FunctionIdentifierCodeGroup Group) { 549 using IFK = IntrinsicFunctionKind; 550 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 551 Error = true; 552 return IFK::None; 553 } 554 555 // Not all ? identifiers are intrinsics *functions*. This function only maps 556 // operator codes for the special functions, all others are handled elsewhere, 557 // hence the IFK::None entries in the table. 558 static IFK Basic[36] = { 559 IFK::None, // ?0 # Foo::Foo() 560 IFK::None, // ?1 # Foo::~Foo() 561 IFK::New, // ?2 # operator new 562 IFK::Delete, // ?3 # operator delete 563 IFK::Assign, // ?4 # operator= 564 IFK::RightShift, // ?5 # operator>> 565 IFK::LeftShift, // ?6 # operator<< 566 IFK::LogicalNot, // ?7 # operator! 567 IFK::Equals, // ?8 # operator== 568 IFK::NotEquals, // ?9 # operator!= 569 IFK::ArraySubscript, // ?A # operator[] 570 IFK::None, // ?B # Foo::operator <type>() 571 IFK::Pointer, // ?C # operator-> 572 IFK::Dereference, // ?D # operator* 573 IFK::Increment, // ?E # operator++ 574 IFK::Decrement, // ?F # operator-- 575 IFK::Minus, // ?G # operator- 576 IFK::Plus, // ?H # operator+ 577 IFK::BitwiseAnd, // ?I # operator& 578 IFK::MemberPointer, // ?J # operator->* 579 IFK::Divide, // ?K # operator/ 580 IFK::Modulus, // ?L # operator% 581 IFK::LessThan, // ?M operator< 582 IFK::LessThanEqual, // ?N operator<= 583 IFK::GreaterThan, // ?O operator> 584 IFK::GreaterThanEqual, // ?P operator>= 585 IFK::Comma, // ?Q operator, 586 IFK::Parens, // ?R operator() 587 IFK::BitwiseNot, // ?S operator~ 588 IFK::BitwiseXor, // ?T operator^ 589 IFK::BitwiseOr, // ?U operator| 590 IFK::LogicalAnd, // ?V operator&& 591 IFK::LogicalOr, // ?W operator|| 592 IFK::TimesEqual, // ?X operator*= 593 IFK::PlusEqual, // ?Y operator+= 594 IFK::MinusEqual, // ?Z operator-= 595 }; 596 static IFK Under[36] = { 597 IFK::DivEqual, // ?_0 operator/= 598 IFK::ModEqual, // ?_1 operator%= 599 IFK::RshEqual, // ?_2 operator>>= 600 IFK::LshEqual, // ?_3 operator<<= 601 IFK::BitwiseAndEqual, // ?_4 operator&= 602 IFK::BitwiseOrEqual, // ?_5 operator|= 603 IFK::BitwiseXorEqual, // ?_6 operator^= 604 IFK::None, // ?_7 # vftable 605 IFK::None, // ?_8 # vbtable 606 IFK::None, // ?_9 # vcall 607 IFK::None, // ?_A # typeof 608 IFK::None, // ?_B # local static guard 609 IFK::None, // ?_C # string literal 610 IFK::VbaseDtor, // ?_D # vbase destructor 611 IFK::VecDelDtor, // ?_E # vector deleting destructor 612 IFK::DefaultCtorClosure, // ?_F # default constructor closure 613 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 614 IFK::VecCtorIter, // ?_H # vector constructor iterator 615 IFK::VecDtorIter, // ?_I # vector destructor iterator 616 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 617 IFK::VdispMap, // ?_K # virtual displacement map 618 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 619 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 620 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 621 IFK::CopyCtorClosure, // ?_O # copy constructor closure 622 IFK::None, // ?_P<name> # udt returning <name> 623 IFK::None, // ?_Q # <unknown> 624 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 625 IFK::None, // ?_S # local vftable 626 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 627 IFK::ArrayNew, // ?_U operator new[] 628 IFK::ArrayDelete, // ?_V operator delete[] 629 IFK::None, // ?_W <unused> 630 IFK::None, // ?_X <unused> 631 IFK::None, // ?_Y <unused> 632 IFK::None, // ?_Z <unused> 633 }; 634 static IFK DoubleUnder[36] = { 635 IFK::None, // ?__0 <unused> 636 IFK::None, // ?__1 <unused> 637 IFK::None, // ?__2 <unused> 638 IFK::None, // ?__3 <unused> 639 IFK::None, // ?__4 <unused> 640 IFK::None, // ?__5 <unused> 641 IFK::None, // ?__6 <unused> 642 IFK::None, // ?__7 <unused> 643 IFK::None, // ?__8 <unused> 644 IFK::None, // ?__9 <unused> 645 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 646 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 647 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 648 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 649 IFK::None, // ?__E dynamic initializer for `T' 650 IFK::None, // ?__F dynamic atexit destructor for `T' 651 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 652 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 653 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 654 // iter 655 IFK::None, // ?__J local static thread guard 656 IFK::None, // ?__K operator ""_name 657 IFK::CoAwait, // ?__L operator co_await 658 IFK::Spaceship, // ?__M operator<=> 659 IFK::None, // ?__N <unused> 660 IFK::None, // ?__O <unused> 661 IFK::None, // ?__P <unused> 662 IFK::None, // ?__Q <unused> 663 IFK::None, // ?__R <unused> 664 IFK::None, // ?__S <unused> 665 IFK::None, // ?__T <unused> 666 IFK::None, // ?__U <unused> 667 IFK::None, // ?__V <unused> 668 IFK::None, // ?__W <unused> 669 IFK::None, // ?__X <unused> 670 IFK::None, // ?__Y <unused> 671 IFK::None, // ?__Z <unused> 672 }; 673 674 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 675 switch (Group) { 676 case FunctionIdentifierCodeGroup::Basic: 677 return Basic[Index]; 678 case FunctionIdentifierCodeGroup::Under: 679 return Under[Index]; 680 case FunctionIdentifierCodeGroup::DoubleUnder: 681 return DoubleUnder[Index]; 682 } 683 DEMANGLE_UNREACHABLE; 684 } 685 686 IdentifierNode * 687 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 688 FunctionIdentifierCodeGroup Group) { 689 if (MangledName.empty()) { 690 Error = true; 691 return nullptr; 692 } 693 const char CH = MangledName.front(); 694 switch (Group) { 695 case FunctionIdentifierCodeGroup::Basic: 696 MangledName.remove_prefix(1); 697 switch (CH) { 698 case '0': 699 case '1': 700 return demangleStructorIdentifier(MangledName, CH == '1'); 701 case 'B': 702 return demangleConversionOperatorIdentifier(MangledName); 703 default: 704 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 705 translateIntrinsicFunctionCode(CH, Group)); 706 } 707 case FunctionIdentifierCodeGroup::Under: 708 MangledName.remove_prefix(1); 709 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 710 translateIntrinsicFunctionCode(CH, Group)); 711 case FunctionIdentifierCodeGroup::DoubleUnder: 712 MangledName.remove_prefix(1); 713 switch (CH) { 714 case 'K': 715 return demangleLiteralOperatorIdentifier(MangledName); 716 default: 717 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 718 translateIntrinsicFunctionCode(CH, Group)); 719 } 720 } 721 722 DEMANGLE_UNREACHABLE; 723 } 724 725 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 726 QualifiedNameNode *Name) { 727 if (MangledName.empty()) { 728 Error = true; 729 return nullptr; 730 } 731 732 // Read a variable. 733 switch (MangledName.front()) { 734 case '0': 735 case '1': 736 case '2': 737 case '3': 738 case '4': { 739 StorageClass SC = demangleVariableStorageClass(MangledName); 740 return demangleVariableEncoding(MangledName, SC); 741 } 742 } 743 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 744 745 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 746 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 747 ConversionOperatorIdentifierNode *COIN = 748 static_cast<ConversionOperatorIdentifierNode *>(UQN); 749 if (FSN) 750 COIN->TargetType = FSN->Signature->ReturnType; 751 } 752 return FSN; 753 } 754 755 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) { 756 // What follows is a main symbol name. This may include namespaces or class 757 // back references. 758 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 759 if (Error) 760 return nullptr; 761 762 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 763 if (Error) 764 return nullptr; 765 Symbol->Name = QN; 766 767 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 768 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 769 ConversionOperatorIdentifierNode *COIN = 770 static_cast<ConversionOperatorIdentifierNode *>(UQN); 771 if (!COIN->TargetType) { 772 Error = true; 773 return nullptr; 774 } 775 } 776 return Symbol; 777 } 778 779 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { 780 assert(MangledName.startsWith("??@")); 781 // This is an MD5 mangled name. We can't demangle it, just return the 782 // mangled name. 783 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 784 size_t MD5Last = MangledName.find('@', strlen("??@")); 785 if (MD5Last == StringView::npos) { 786 Error = true; 787 return nullptr; 788 } 789 const char *Start = MangledName.begin(); 790 MangledName.remove_prefix(MD5Last + 1); 791 792 // There are two additional special cases for MD5 names: 793 // 1. For complete object locators where the object name is long enough 794 // for the object to have an MD5 name, the complete object locator is 795 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 796 // leading "??_R4". This is handled here. 797 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 798 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 799 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 800 // demangle catchable types anywhere, this isn't handled for MD5 names 801 // either. 802 consumeFront(MangledName, "??_R4@"); 803 804 StringView MD5(Start, MangledName.begin() - Start); 805 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 806 S->Name = synthesizeQualifiedName(Arena, MD5); 807 808 return S; 809 } 810 811 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { 812 assert(MangledName.startsWith('.')); 813 consumeFront(MangledName, '.'); 814 815 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 816 if (Error || !MangledName.empty()) { 817 Error = true; 818 return nullptr; 819 } 820 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); 821 } 822 823 // Parser entry point. 824 SymbolNode *Demangler::parse(StringView &MangledName) { 825 // Typeinfo names are strings stored in RTTI data. They're not symbol names. 826 // It's still useful to demangle them. They're the only demangled entity 827 // that doesn't start with a "?" but a ".". 828 if (MangledName.startsWith('.')) 829 return demangleTypeinfoName(MangledName); 830 831 if (MangledName.startsWith("??@")) 832 return demangleMD5Name(MangledName); 833 834 // MSVC-style mangled symbols must start with '?'. 835 if (!MangledName.startsWith('?')) { 836 Error = true; 837 return nullptr; 838 } 839 840 consumeFront(MangledName, '?'); 841 842 // ?$ is a template instantiation, but all other names that start with ? are 843 // operators / special names. 844 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 845 return SI; 846 847 return demangleDeclarator(MangledName); 848 } 849 850 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { 851 if (!consumeFront(MangledName, ".?A")) { 852 Error = true; 853 return nullptr; 854 } 855 consumeFront(MangledName, ".?A"); 856 if (MangledName.empty()) { 857 Error = true; 858 return nullptr; 859 } 860 861 return demangleClassType(MangledName); 862 } 863 864 // <type-encoding> ::= <storage-class> <variable-type> 865 // <storage-class> ::= 0 # private static member 866 // ::= 1 # protected static member 867 // ::= 2 # public static member 868 // ::= 3 # global 869 // ::= 4 # static local 870 871 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 872 StorageClass SC) { 873 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 874 875 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 876 VSN->SC = SC; 877 878 if (Error) 879 return nullptr; 880 881 // <variable-type> ::= <type> <cvr-qualifiers> 882 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 883 switch (VSN->Type->kind()) { 884 case NodeKind::PointerType: { 885 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 886 887 Qualifiers ExtraChildQuals = Q_None; 888 PTN->Quals = Qualifiers(VSN->Type->Quals | 889 demanglePointerExtQualifiers(MangledName)); 890 891 bool IsMember = false; 892 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 893 894 if (PTN->ClassParent) { 895 QualifiedNameNode *BackRefName = 896 demangleFullyQualifiedTypeName(MangledName); 897 (void)BackRefName; 898 } 899 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 900 901 break; 902 } 903 default: 904 VSN->Type->Quals = demangleQualifiers(MangledName).first; 905 break; 906 } 907 908 return VSN; 909 } 910 911 // Sometimes numbers are encoded in mangled symbols. For example, 912 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 913 // length 20), so we need some way to embed numbers as part of symbols. 914 // This function parses it. 915 // 916 // <number> ::= [?] <non-negative integer> 917 // 918 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 919 // ::= <hex digit>+ @ # when Number == 0 or >= 10 920 // 921 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 922 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 923 bool IsNegative = consumeFront(MangledName, '?'); 924 925 if (startsWithDigit(MangledName)) { 926 uint64_t Ret = MangledName[0] - '0' + 1; 927 MangledName.remove_prefix(1); 928 return {Ret, IsNegative}; 929 } 930 931 uint64_t Ret = 0; 932 for (size_t i = 0; i < MangledName.size(); ++i) { 933 char C = MangledName[i]; 934 if (C == '@') { 935 MangledName.remove_prefix(i + 1); 936 return {Ret, IsNegative}; 937 } 938 if ('A' <= C && C <= 'P') { 939 Ret = (Ret << 4) + (C - 'A'); 940 continue; 941 } 942 break; 943 } 944 945 Error = true; 946 return {0ULL, false}; 947 } 948 949 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 950 bool IsNegative = false; 951 uint64_t Number = 0; 952 std::tie(Number, IsNegative) = demangleNumber(MangledName); 953 if (IsNegative) 954 Error = true; 955 return Number; 956 } 957 958 int64_t Demangler::demangleSigned(StringView &MangledName) { 959 bool IsNegative = false; 960 uint64_t Number = 0; 961 std::tie(Number, IsNegative) = demangleNumber(MangledName); 962 if (Number > INT64_MAX) 963 Error = true; 964 int64_t I = static_cast<int64_t>(Number); 965 return IsNegative ? -I : I; 966 } 967 968 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 969 // Memorize it. 970 void Demangler::memorizeString(StringView S) { 971 if (Backrefs.NamesCount >= BackrefContext::Max) 972 return; 973 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 974 if (S == Backrefs.Names[i]->Name) 975 return; 976 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 977 N->Name = S; 978 Backrefs.Names[Backrefs.NamesCount++] = N; 979 } 980 981 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 982 assert(startsWithDigit(MangledName)); 983 984 size_t I = MangledName[0] - '0'; 985 if (I >= Backrefs.NamesCount) { 986 Error = true; 987 return nullptr; 988 } 989 990 MangledName.remove_prefix(1); 991 return Backrefs.Names[I]; 992 } 993 994 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 995 // Render this class template name into a string buffer so that we can 996 // memorize it for the purpose of back-referencing. 997 OutputBuffer OB; 998 Identifier->output(OB, OF_Default); 999 StringView Owned = copyString(OB); 1000 memorizeString(Owned); 1001 std::free(OB.getBuffer()); 1002 } 1003 1004 IdentifierNode * 1005 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 1006 NameBackrefBehavior NBB) { 1007 assert(MangledName.startsWith("?$")); 1008 consumeFront(MangledName, "?$"); 1009 1010 BackrefContext OuterContext; 1011 std::swap(OuterContext, Backrefs); 1012 1013 IdentifierNode *Identifier = 1014 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1015 if (!Error) 1016 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 1017 1018 std::swap(OuterContext, Backrefs); 1019 if (Error) 1020 return nullptr; 1021 1022 if (NBB & NBB_Template) { 1023 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 1024 // Structors and conversion operators only makes sense in a leaf name, so 1025 // reject them in NBB_Template contexts. 1026 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 1027 Identifier->kind() == NodeKind::StructorIdentifier) { 1028 Error = true; 1029 return nullptr; 1030 } 1031 1032 memorizeIdentifier(Identifier); 1033 } 1034 1035 return Identifier; 1036 } 1037 1038 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1039 bool Memorize) { 1040 StringView S = demangleSimpleString(MangledName, Memorize); 1041 if (Error) 1042 return nullptr; 1043 1044 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1045 Name->Name = S; 1046 return Name; 1047 } 1048 1049 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1050 1051 static uint8_t rebasedHexDigitToNumber(char C) { 1052 assert(isRebasedHexDigit(C)); 1053 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1054 } 1055 1056 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1057 assert(!MangledName.empty()); 1058 if (!MangledName.startsWith('?')) { 1059 const uint8_t F = MangledName.front(); 1060 MangledName.remove_prefix(1); 1061 return F; 1062 } 1063 1064 MangledName.remove_prefix(1); 1065 if (MangledName.empty()) 1066 goto CharLiteralError; 1067 1068 if (consumeFront(MangledName, '$')) { 1069 // Two hex digits 1070 if (MangledName.size() < 2) 1071 goto CharLiteralError; 1072 StringView Nibbles = MangledName.substr(0, 2); 1073 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1074 goto CharLiteralError; 1075 // Don't append the null terminator. 1076 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1077 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1078 MangledName.remove_prefix(2); 1079 return (C1 << 4) | C2; 1080 } 1081 1082 if (startsWithDigit(MangledName)) { 1083 const char *Lookup = ",/\\:. \n\t'-"; 1084 char C = Lookup[MangledName[0] - '0']; 1085 MangledName.remove_prefix(1); 1086 return C; 1087 } 1088 1089 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1090 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1091 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1092 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1093 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1094 char C = Lookup[MangledName[0] - 'a']; 1095 MangledName.remove_prefix(1); 1096 return C; 1097 } 1098 1099 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1100 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1101 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1102 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1103 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1104 char C = Lookup[MangledName[0] - 'A']; 1105 MangledName.remove_prefix(1); 1106 return C; 1107 } 1108 1109 CharLiteralError: 1110 Error = true; 1111 return '\0'; 1112 } 1113 1114 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1115 uint8_t C1, C2; 1116 1117 C1 = demangleCharLiteral(MangledName); 1118 if (Error || MangledName.empty()) 1119 goto WCharLiteralError; 1120 C2 = demangleCharLiteral(MangledName); 1121 if (Error) 1122 goto WCharLiteralError; 1123 1124 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1125 1126 WCharLiteralError: 1127 Error = true; 1128 return L'\0'; 1129 } 1130 1131 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1132 assert(Digit <= 15); 1133 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1134 } 1135 1136 static void outputHex(OutputBuffer &OB, unsigned C) { 1137 assert (C != 0); 1138 1139 // It's easier to do the math if we can work from right to left, but we need 1140 // to print the numbers from left to right. So render this into a temporary 1141 // buffer first, then output the temporary buffer. Each byte is of the form 1142 // \xAB, which means that each byte needs 4 characters. Since there are at 1143 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1144 char TempBuffer[17]; 1145 1146 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1147 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1148 1149 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1150 while (C != 0) { 1151 for (int I = 0; I < 2; ++I) { 1152 writeHexDigit(&TempBuffer[Pos--], C % 16); 1153 C /= 16; 1154 } 1155 } 1156 TempBuffer[Pos--] = 'x'; 1157 assert(Pos >= 0); 1158 TempBuffer[Pos--] = '\\'; 1159 OB << StringView(&TempBuffer[Pos + 1]); 1160 } 1161 1162 static void outputEscapedChar(OutputBuffer &OB, unsigned C) { 1163 switch (C) { 1164 case '\0': // nul 1165 OB << "\\0"; 1166 return; 1167 case '\'': // single quote 1168 OB << "\\\'"; 1169 return; 1170 case '\"': // double quote 1171 OB << "\\\""; 1172 return; 1173 case '\\': // backslash 1174 OB << "\\\\"; 1175 return; 1176 case '\a': // bell 1177 OB << "\\a"; 1178 return; 1179 case '\b': // backspace 1180 OB << "\\b"; 1181 return; 1182 case '\f': // form feed 1183 OB << "\\f"; 1184 return; 1185 case '\n': // new line 1186 OB << "\\n"; 1187 return; 1188 case '\r': // carriage return 1189 OB << "\\r"; 1190 return; 1191 case '\t': // tab 1192 OB << "\\t"; 1193 return; 1194 case '\v': // vertical tab 1195 OB << "\\v"; 1196 return; 1197 default: 1198 break; 1199 } 1200 1201 if (C > 0x1F && C < 0x7F) { 1202 // Standard ascii char. 1203 OB << (char)C; 1204 return; 1205 } 1206 1207 outputHex(OB, C); 1208 } 1209 1210 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1211 const uint8_t *End = StringBytes + Length - 1; 1212 unsigned Count = 0; 1213 while (Length > 0 && *End == 0) { 1214 --Length; 1215 --End; 1216 ++Count; 1217 } 1218 return Count; 1219 } 1220 1221 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1222 unsigned Length) { 1223 unsigned Result = 0; 1224 for (unsigned I = 0; I < Length; ++I) { 1225 if (*StringBytes++ == 0) 1226 ++Result; 1227 } 1228 return Result; 1229 } 1230 1231 // A mangled (non-wide) string literal stores the total length of the string it 1232 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1233 // (passed in StringBytes, NumChars). 1234 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1235 uint64_t NumBytes) { 1236 assert(NumBytes > 0); 1237 1238 // If the number of bytes is odd, this is guaranteed to be a char string. 1239 if (NumBytes % 2 == 1) 1240 return 1; 1241 1242 // All strings can encode at most 32 bytes of data. If it's less than that, 1243 // then we encoded the entire string. In this case we check for a 1-byte, 1244 // 2-byte, or 4-byte null terminator. 1245 if (NumBytes < 32) { 1246 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1247 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1248 return 4; 1249 if (TrailingNulls >= 2) 1250 return 2; 1251 return 1; 1252 } 1253 1254 // The whole string was not able to be encoded. Try to look at embedded null 1255 // terminators to guess. The heuristic is that we count all embedded null 1256 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1257 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1258 // perfect and is biased towards languages that have ascii alphabets, but this 1259 // was always going to be best effort since the encoding is lossy. 1260 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1261 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1262 return 4; 1263 if (Nulls >= NumChars / 3) 1264 return 2; 1265 return 1; 1266 } 1267 1268 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1269 unsigned CharIndex, unsigned CharBytes) { 1270 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1271 unsigned Offset = CharIndex * CharBytes; 1272 unsigned Result = 0; 1273 StringBytes = StringBytes + Offset; 1274 for (unsigned I = 0; I < CharBytes; ++I) { 1275 unsigned C = static_cast<unsigned>(StringBytes[I]); 1276 Result |= C << (8 * I); 1277 } 1278 return Result; 1279 } 1280 1281 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1282 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1283 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1284 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1285 FSN->Signature->FunctionClass = FC_NoParameterList; 1286 1287 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1288 if (!Error) 1289 Error = !consumeFront(MangledName, "$B"); 1290 if (!Error) 1291 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1292 if (!Error) 1293 Error = !consumeFront(MangledName, 'A'); 1294 if (!Error) 1295 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1296 return (Error) ? nullptr : FSN; 1297 } 1298 1299 EncodedStringLiteralNode * 1300 Demangler::demangleStringLiteral(StringView &MangledName) { 1301 // This function uses goto, so declare all variables up front. 1302 OutputBuffer OB; 1303 StringView CRC; 1304 uint64_t StringByteSize; 1305 bool IsWcharT = false; 1306 bool IsNegative = false; 1307 size_t CrcEndPos = 0; 1308 char F; 1309 1310 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1311 1312 // Prefix indicating the beginning of a string literal 1313 if (!consumeFront(MangledName, "@_")) 1314 goto StringLiteralError; 1315 if (MangledName.empty()) 1316 goto StringLiteralError; 1317 1318 // Char Type (regular or wchar_t) 1319 F = MangledName.front(); 1320 MangledName.remove_prefix(1); 1321 switch (F) { 1322 case '1': 1323 IsWcharT = true; 1324 DEMANGLE_FALLTHROUGH; 1325 case '0': 1326 break; 1327 default: 1328 goto StringLiteralError; 1329 } 1330 1331 // Encoded Length 1332 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1333 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1334 goto StringLiteralError; 1335 1336 // CRC 32 (always 8 characters plus a terminator) 1337 CrcEndPos = MangledName.find('@'); 1338 if (CrcEndPos == StringView::npos) 1339 goto StringLiteralError; 1340 CRC = MangledName.substr(0, CrcEndPos); 1341 MangledName.remove_prefix(CrcEndPos + 1); 1342 if (MangledName.empty()) 1343 goto StringLiteralError; 1344 1345 if (IsWcharT) { 1346 Result->Char = CharKind::Wchar; 1347 if (StringByteSize > 64) 1348 Result->IsTruncated = true; 1349 1350 while (!consumeFront(MangledName, '@')) { 1351 if (MangledName.size() < 2) 1352 goto StringLiteralError; 1353 wchar_t W = demangleWcharLiteral(MangledName); 1354 if (StringByteSize != 2 || Result->IsTruncated) 1355 outputEscapedChar(OB, W); 1356 StringByteSize -= 2; 1357 if (Error) 1358 goto StringLiteralError; 1359 } 1360 } else { 1361 // The max byte length is actually 32, but some compilers mangled strings 1362 // incorrectly, so we have to assume it can go higher. 1363 constexpr unsigned MaxStringByteLength = 32 * 4; 1364 uint8_t StringBytes[MaxStringByteLength]; 1365 1366 unsigned BytesDecoded = 0; 1367 while (!consumeFront(MangledName, '@')) { 1368 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1369 goto StringLiteralError; 1370 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1371 } 1372 1373 if (StringByteSize > BytesDecoded) 1374 Result->IsTruncated = true; 1375 1376 unsigned CharBytes = 1377 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1378 assert(StringByteSize % CharBytes == 0); 1379 switch (CharBytes) { 1380 case 1: 1381 Result->Char = CharKind::Char; 1382 break; 1383 case 2: 1384 Result->Char = CharKind::Char16; 1385 break; 1386 case 4: 1387 Result->Char = CharKind::Char32; 1388 break; 1389 default: 1390 DEMANGLE_UNREACHABLE; 1391 } 1392 const unsigned NumChars = BytesDecoded / CharBytes; 1393 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1394 unsigned NextChar = 1395 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1396 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1397 outputEscapedChar(OB, NextChar); 1398 } 1399 } 1400 1401 Result->DecodedString = copyString(OB); 1402 std::free(OB.getBuffer()); 1403 return Result; 1404 1405 StringLiteralError: 1406 Error = true; 1407 std::free(OB.getBuffer()); 1408 return nullptr; 1409 } 1410 1411 // Returns MangledName's prefix before the first '@', or an error if 1412 // MangledName contains no '@' or the prefix has length 0. 1413 StringView Demangler::demangleSimpleString(StringView &MangledName, 1414 bool Memorize) { 1415 StringView S; 1416 for (size_t i = 0; i < MangledName.size(); ++i) { 1417 if (MangledName[i] != '@') 1418 continue; 1419 if (i == 0) 1420 break; 1421 S = MangledName.substr(0, i); 1422 MangledName.remove_prefix(i + 1); 1423 1424 if (Memorize) 1425 memorizeString(S); 1426 return S; 1427 } 1428 1429 Error = true; 1430 return {}; 1431 } 1432 1433 NamedIdentifierNode * 1434 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1435 assert(MangledName.startsWith("?A")); 1436 consumeFront(MangledName, "?A"); 1437 1438 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1439 Node->Name = "`anonymous namespace'"; 1440 size_t EndPos = MangledName.find('@'); 1441 if (EndPos == StringView::npos) { 1442 Error = true; 1443 return nullptr; 1444 } 1445 StringView NamespaceKey = MangledName.substr(0, EndPos); 1446 memorizeString(NamespaceKey); 1447 MangledName = MangledName.substr(EndPos + 1); 1448 return Node; 1449 } 1450 1451 NamedIdentifierNode * 1452 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1453 assert(startsWithLocalScopePattern(MangledName)); 1454 1455 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1456 consumeFront(MangledName, '?'); 1457 uint64_t Number = 0; 1458 bool IsNegative = false; 1459 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1460 assert(!IsNegative); 1461 1462 // One ? to terminate the number 1463 consumeFront(MangledName, '?'); 1464 1465 assert(!Error); 1466 Node *Scope = parse(MangledName); 1467 if (Error) 1468 return nullptr; 1469 1470 // Render the parent symbol's name into a buffer. 1471 OutputBuffer OB; 1472 OB << '`'; 1473 Scope->output(OB, OF_Default); 1474 OB << '\''; 1475 OB << "::`" << Number << "'"; 1476 1477 Identifier->Name = copyString(OB); 1478 std::free(OB.getBuffer()); 1479 return Identifier; 1480 } 1481 1482 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1483 QualifiedNameNode * 1484 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1485 IdentifierNode *Identifier = 1486 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1487 if (Error) 1488 return nullptr; 1489 assert(Identifier); 1490 1491 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1492 if (Error) 1493 return nullptr; 1494 assert(QN); 1495 return QN; 1496 } 1497 1498 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1499 // Symbol names have slightly different rules regarding what can appear 1500 // so we separate out the implementations for flexibility. 1501 QualifiedNameNode * 1502 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1503 // This is the final component of a symbol name (i.e. the leftmost component 1504 // of a mangled name. Since the only possible template instantiation that 1505 // can appear in this context is a function template, and since those are 1506 // not saved for the purposes of name backreferences, only backref simple 1507 // names. 1508 IdentifierNode *Identifier = 1509 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1510 if (Error) 1511 return nullptr; 1512 1513 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1514 if (Error) 1515 return nullptr; 1516 1517 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1518 if (QN->Components->Count < 2) { 1519 Error = true; 1520 return nullptr; 1521 } 1522 StructorIdentifierNode *SIN = 1523 static_cast<StructorIdentifierNode *>(Identifier); 1524 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1525 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1526 } 1527 assert(QN); 1528 return QN; 1529 } 1530 1531 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1532 bool Memorize) { 1533 // An inner-most name can be a back-reference, because a fully-qualified name 1534 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1535 // them (for example template parameters), and these nested parameters can 1536 // refer to previously mangled types. 1537 if (startsWithDigit(MangledName)) 1538 return demangleBackRefName(MangledName); 1539 1540 if (MangledName.startsWith("?$")) 1541 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1542 1543 return demangleSimpleName(MangledName, Memorize); 1544 } 1545 1546 IdentifierNode * 1547 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1548 NameBackrefBehavior NBB) { 1549 if (startsWithDigit(MangledName)) 1550 return demangleBackRefName(MangledName); 1551 if (MangledName.startsWith("?$")) 1552 return demangleTemplateInstantiationName(MangledName, NBB); 1553 if (MangledName.startsWith('?')) 1554 return demangleFunctionIdentifierCode(MangledName); 1555 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1556 } 1557 1558 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1559 if (startsWithDigit(MangledName)) 1560 return demangleBackRefName(MangledName); 1561 1562 if (MangledName.startsWith("?$")) 1563 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1564 1565 if (MangledName.startsWith("?A")) 1566 return demangleAnonymousNamespaceName(MangledName); 1567 1568 if (startsWithLocalScopePattern(MangledName)) 1569 return demangleLocallyScopedNamePiece(MangledName); 1570 1571 return demangleSimpleName(MangledName, /*Memorize=*/true); 1572 } 1573 1574 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1575 size_t Count) { 1576 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1577 N->Count = Count; 1578 N->Nodes = Arena.allocArray<Node *>(Count); 1579 for (size_t I = 0; I < Count; ++I) { 1580 N->Nodes[I] = Head->N; 1581 Head = Head->Next; 1582 } 1583 return N; 1584 } 1585 1586 QualifiedNameNode * 1587 Demangler::demangleNameScopeChain(StringView &MangledName, 1588 IdentifierNode *UnqualifiedName) { 1589 NodeList *Head = Arena.alloc<NodeList>(); 1590 1591 Head->N = UnqualifiedName; 1592 1593 size_t Count = 1; 1594 while (!consumeFront(MangledName, "@")) { 1595 ++Count; 1596 NodeList *NewHead = Arena.alloc<NodeList>(); 1597 NewHead->Next = Head; 1598 Head = NewHead; 1599 1600 if (MangledName.empty()) { 1601 Error = true; 1602 return nullptr; 1603 } 1604 1605 assert(!Error); 1606 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1607 if (Error) 1608 return nullptr; 1609 1610 Head->N = Elem; 1611 } 1612 1613 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1614 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1615 return QN; 1616 } 1617 1618 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1619 const char F = MangledName.front(); 1620 MangledName.remove_prefix(1); 1621 switch (F) { 1622 case '9': 1623 return FuncClass(FC_ExternC | FC_NoParameterList); 1624 case 'A': 1625 return FC_Private; 1626 case 'B': 1627 return FuncClass(FC_Private | FC_Far); 1628 case 'C': 1629 return FuncClass(FC_Private | FC_Static); 1630 case 'D': 1631 return FuncClass(FC_Private | FC_Static | FC_Far); 1632 case 'E': 1633 return FuncClass(FC_Private | FC_Virtual); 1634 case 'F': 1635 return FuncClass(FC_Private | FC_Virtual | FC_Far); 1636 case 'G': 1637 return FuncClass(FC_Private | FC_StaticThisAdjust); 1638 case 'H': 1639 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1640 case 'I': 1641 return FuncClass(FC_Protected); 1642 case 'J': 1643 return FuncClass(FC_Protected | FC_Far); 1644 case 'K': 1645 return FuncClass(FC_Protected | FC_Static); 1646 case 'L': 1647 return FuncClass(FC_Protected | FC_Static | FC_Far); 1648 case 'M': 1649 return FuncClass(FC_Protected | FC_Virtual); 1650 case 'N': 1651 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1652 case 'O': 1653 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1654 case 'P': 1655 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1656 case 'Q': 1657 return FuncClass(FC_Public); 1658 case 'R': 1659 return FuncClass(FC_Public | FC_Far); 1660 case 'S': 1661 return FuncClass(FC_Public | FC_Static); 1662 case 'T': 1663 return FuncClass(FC_Public | FC_Static | FC_Far); 1664 case 'U': 1665 return FuncClass(FC_Public | FC_Virtual); 1666 case 'V': 1667 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1668 case 'W': 1669 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1670 case 'X': 1671 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1672 case 'Y': 1673 return FuncClass(FC_Global); 1674 case 'Z': 1675 return FuncClass(FC_Global | FC_Far); 1676 case '$': { 1677 FuncClass VFlag = FC_VirtualThisAdjust; 1678 if (consumeFront(MangledName, 'R')) 1679 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1680 if (MangledName.empty()) 1681 break; 1682 const char F = MangledName.front(); 1683 MangledName.remove_prefix(1); 1684 switch (F) { 1685 case '0': 1686 return FuncClass(FC_Private | FC_Virtual | VFlag); 1687 case '1': 1688 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1689 case '2': 1690 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1691 case '3': 1692 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1693 case '4': 1694 return FuncClass(FC_Public | FC_Virtual | VFlag); 1695 case '5': 1696 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1697 } 1698 } 1699 } 1700 1701 Error = true; 1702 return FC_Public; 1703 } 1704 1705 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1706 if (MangledName.empty()) { 1707 Error = true; 1708 return CallingConv::None; 1709 } 1710 1711 const char F = MangledName.front(); 1712 MangledName.remove_prefix(1); 1713 switch (F) { 1714 case 'A': 1715 case 'B': 1716 return CallingConv::Cdecl; 1717 case 'C': 1718 case 'D': 1719 return CallingConv::Pascal; 1720 case 'E': 1721 case 'F': 1722 return CallingConv::Thiscall; 1723 case 'G': 1724 case 'H': 1725 return CallingConv::Stdcall; 1726 case 'I': 1727 case 'J': 1728 return CallingConv::Fastcall; 1729 case 'M': 1730 case 'N': 1731 return CallingConv::Clrcall; 1732 case 'O': 1733 case 'P': 1734 return CallingConv::Eabi; 1735 case 'Q': 1736 return CallingConv::Vectorcall; 1737 case 'S': 1738 return CallingConv::Swift; 1739 case 'W': 1740 return CallingConv::SwiftAsync; 1741 } 1742 1743 return CallingConv::None; 1744 } 1745 1746 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1747 assert(MangledName.front() >= '0' && MangledName.front() <= '4'); 1748 1749 const char F = MangledName.front(); 1750 MangledName.remove_prefix(1); 1751 switch (F) { 1752 case '0': 1753 return StorageClass::PrivateStatic; 1754 case '1': 1755 return StorageClass::ProtectedStatic; 1756 case '2': 1757 return StorageClass::PublicStatic; 1758 case '3': 1759 return StorageClass::Global; 1760 case '4': 1761 return StorageClass::FunctionLocalStatic; 1762 } 1763 DEMANGLE_UNREACHABLE; 1764 } 1765 1766 std::pair<Qualifiers, bool> 1767 Demangler::demangleQualifiers(StringView &MangledName) { 1768 if (MangledName.empty()) { 1769 Error = true; 1770 return std::make_pair(Q_None, false); 1771 } 1772 1773 const char F = MangledName.front(); 1774 MangledName.remove_prefix(1); 1775 switch (F) { 1776 // Member qualifiers 1777 case 'Q': 1778 return std::make_pair(Q_None, true); 1779 case 'R': 1780 return std::make_pair(Q_Const, true); 1781 case 'S': 1782 return std::make_pair(Q_Volatile, true); 1783 case 'T': 1784 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1785 // Non-Member qualifiers 1786 case 'A': 1787 return std::make_pair(Q_None, false); 1788 case 'B': 1789 return std::make_pair(Q_Const, false); 1790 case 'C': 1791 return std::make_pair(Q_Volatile, false); 1792 case 'D': 1793 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1794 } 1795 Error = true; 1796 return std::make_pair(Q_None, false); 1797 } 1798 1799 // <variable-type> ::= <type> <cvr-qualifiers> 1800 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1801 TypeNode *Demangler::demangleType(StringView &MangledName, 1802 QualifierMangleMode QMM) { 1803 Qualifiers Quals = Q_None; 1804 bool IsMember = false; 1805 if (QMM == QualifierMangleMode::Mangle) { 1806 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1807 } else if (QMM == QualifierMangleMode::Result) { 1808 if (consumeFront(MangledName, '?')) 1809 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1810 } 1811 1812 if (MangledName.empty()) { 1813 Error = true; 1814 return nullptr; 1815 } 1816 1817 TypeNode *Ty = nullptr; 1818 if (isTagType(MangledName)) 1819 Ty = demangleClassType(MangledName); 1820 else if (isPointerType(MangledName)) { 1821 if (isMemberPointer(MangledName, Error)) 1822 Ty = demangleMemberPointerType(MangledName); 1823 else if (!Error) 1824 Ty = demanglePointerType(MangledName); 1825 else 1826 return nullptr; 1827 } else if (isArrayType(MangledName)) 1828 Ty = demangleArrayType(MangledName); 1829 else if (isFunctionType(MangledName)) { 1830 if (consumeFront(MangledName, "$$A8@@")) 1831 Ty = demangleFunctionType(MangledName, true); 1832 else { 1833 assert(MangledName.startsWith("$$A6")); 1834 consumeFront(MangledName, "$$A6"); 1835 Ty = demangleFunctionType(MangledName, false); 1836 } 1837 } else if (isCustomType(MangledName)) { 1838 Ty = demangleCustomType(MangledName); 1839 } else { 1840 Ty = demanglePrimitiveType(MangledName); 1841 } 1842 1843 if (!Ty || Error) 1844 return Ty; 1845 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1846 return Ty; 1847 } 1848 1849 bool Demangler::demangleThrowSpecification(StringView &MangledName) { 1850 if (consumeFront(MangledName, "_E")) 1851 return true; 1852 if (consumeFront(MangledName, 'Z')) 1853 return false; 1854 1855 Error = true; 1856 return false; 1857 } 1858 1859 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1860 bool HasThisQuals) { 1861 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1862 1863 if (HasThisQuals) { 1864 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1865 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1866 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1867 } 1868 1869 // Fields that appear on both member and non-member functions. 1870 FTy->CallConvention = demangleCallingConvention(MangledName); 1871 1872 // <return-type> ::= <type> 1873 // ::= @ # structors (they have no declared return type) 1874 bool IsStructor = consumeFront(MangledName, '@'); 1875 if (!IsStructor) 1876 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1877 1878 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); 1879 1880 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1881 1882 return FTy; 1883 } 1884 1885 FunctionSymbolNode * 1886 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1887 FuncClass ExtraFlags = FC_None; 1888 if (consumeFront(MangledName, "$$J0")) 1889 ExtraFlags = FC_ExternC; 1890 1891 if (MangledName.empty()) { 1892 Error = true; 1893 return nullptr; 1894 } 1895 1896 FuncClass FC = demangleFunctionClass(MangledName); 1897 FC = FuncClass(ExtraFlags | FC); 1898 1899 FunctionSignatureNode *FSN = nullptr; 1900 ThunkSignatureNode *TTN = nullptr; 1901 if (FC & FC_StaticThisAdjust) { 1902 TTN = Arena.alloc<ThunkSignatureNode>(); 1903 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1904 } else if (FC & FC_VirtualThisAdjust) { 1905 TTN = Arena.alloc<ThunkSignatureNode>(); 1906 if (FC & FC_VirtualThisAdjustEx) { 1907 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1908 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1909 } 1910 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1911 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1912 } 1913 1914 if (FC & FC_NoParameterList) { 1915 // This is an extern "C" function whose full signature hasn't been mangled. 1916 // This happens when we need to mangle a local symbol inside of an extern 1917 // "C" function. 1918 FSN = Arena.alloc<FunctionSignatureNode>(); 1919 } else { 1920 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1921 FSN = demangleFunctionType(MangledName, HasThisQuals); 1922 } 1923 1924 if (Error) 1925 return nullptr; 1926 1927 if (TTN) { 1928 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1929 FSN = TTN; 1930 } 1931 FSN->FunctionClass = FC; 1932 1933 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1934 Symbol->Signature = FSN; 1935 return Symbol; 1936 } 1937 1938 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1939 assert(MangledName.startsWith('?')); 1940 MangledName.remove_prefix(1); 1941 1942 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1943 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1944 if (!consumeFront(MangledName, '@')) 1945 Error = true; 1946 if (Error) 1947 return nullptr; 1948 return CTN; 1949 } 1950 1951 // Reads a primitive type. 1952 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1953 if (consumeFront(MangledName, "$$T")) 1954 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1955 1956 const char F = MangledName.front(); 1957 MangledName.remove_prefix(1); 1958 switch (F) { 1959 case 'X': 1960 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1961 case 'D': 1962 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1963 case 'C': 1964 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1965 case 'E': 1966 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1967 case 'F': 1968 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1969 case 'G': 1970 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1971 case 'H': 1972 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1973 case 'I': 1974 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1975 case 'J': 1976 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1977 case 'K': 1978 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1979 case 'M': 1980 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1981 case 'N': 1982 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1983 case 'O': 1984 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1985 case '_': { 1986 if (MangledName.empty()) { 1987 Error = true; 1988 return nullptr; 1989 } 1990 const char F = MangledName.front(); 1991 MangledName.remove_prefix(1); 1992 switch (F) { 1993 case 'N': 1994 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1995 case 'J': 1996 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1997 case 'K': 1998 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 1999 case 'W': 2000 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 2001 case 'Q': 2002 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 2003 case 'S': 2004 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 2005 case 'U': 2006 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 2007 } 2008 break; 2009 } 2010 } 2011 Error = true; 2012 return nullptr; 2013 } 2014 2015 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 2016 TagTypeNode *TT = nullptr; 2017 2018 const char F = MangledName.front(); 2019 MangledName.remove_prefix(1); 2020 switch (F) { 2021 case 'T': 2022 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 2023 break; 2024 case 'U': 2025 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 2026 break; 2027 case 'V': 2028 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 2029 break; 2030 case 'W': 2031 if (!consumeFront(MangledName, '4')) { 2032 Error = true; 2033 return nullptr; 2034 } 2035 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 2036 break; 2037 default: 2038 assert(false); 2039 } 2040 2041 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 2042 return TT; 2043 } 2044 2045 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2046 // # the E is required for 64-bit non-static pointers 2047 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 2048 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2049 2050 std::tie(Pointer->Quals, Pointer->Affinity) = 2051 demanglePointerCVQualifiers(MangledName); 2052 2053 if (consumeFront(MangledName, "6")) { 2054 Pointer->Pointee = demangleFunctionType(MangledName, false); 2055 return Pointer; 2056 } 2057 2058 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2059 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2060 2061 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2062 return Pointer; 2063 } 2064 2065 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2066 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2067 2068 std::tie(Pointer->Quals, Pointer->Affinity) = 2069 demanglePointerCVQualifiers(MangledName); 2070 assert(Pointer->Affinity == PointerAffinity::Pointer); 2071 2072 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2073 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2074 2075 // isMemberPointer() only returns true if there is at least one character 2076 // after the qualifiers. 2077 if (consumeFront(MangledName, "8")) { 2078 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2079 Pointer->Pointee = demangleFunctionType(MangledName, true); 2080 } else { 2081 Qualifiers PointeeQuals = Q_None; 2082 bool IsMember = false; 2083 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2084 assert(IsMember || Error); 2085 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2086 2087 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2088 if (Pointer->Pointee) 2089 Pointer->Pointee->Quals = PointeeQuals; 2090 } 2091 2092 return Pointer; 2093 } 2094 2095 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2096 Qualifiers Quals = Q_None; 2097 if (consumeFront(MangledName, 'E')) 2098 Quals = Qualifiers(Quals | Q_Pointer64); 2099 if (consumeFront(MangledName, 'I')) 2100 Quals = Qualifiers(Quals | Q_Restrict); 2101 if (consumeFront(MangledName, 'F')) 2102 Quals = Qualifiers(Quals | Q_Unaligned); 2103 2104 return Quals; 2105 } 2106 2107 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2108 assert(MangledName.front() == 'Y'); 2109 MangledName.remove_prefix(1); 2110 2111 uint64_t Rank = 0; 2112 bool IsNegative = false; 2113 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2114 if (IsNegative || Rank == 0) { 2115 Error = true; 2116 return nullptr; 2117 } 2118 2119 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2120 NodeList *Head = Arena.alloc<NodeList>(); 2121 NodeList *Tail = Head; 2122 2123 for (uint64_t I = 0; I < Rank; ++I) { 2124 uint64_t D = 0; 2125 std::tie(D, IsNegative) = demangleNumber(MangledName); 2126 if (Error || IsNegative) { 2127 Error = true; 2128 return nullptr; 2129 } 2130 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2131 if (I + 1 < Rank) { 2132 Tail->Next = Arena.alloc<NodeList>(); 2133 Tail = Tail->Next; 2134 } 2135 } 2136 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2137 2138 if (consumeFront(MangledName, "$$C")) { 2139 bool IsMember = false; 2140 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2141 if (IsMember) { 2142 Error = true; 2143 return nullptr; 2144 } 2145 } 2146 2147 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2148 return ATy; 2149 } 2150 2151 // Reads a function's parameters. 2152 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, 2153 bool &IsVariadic) { 2154 // Empty parameter list. 2155 if (consumeFront(MangledName, 'X')) 2156 return nullptr; 2157 2158 NodeList *Head = Arena.alloc<NodeList>(); 2159 NodeList **Current = &Head; 2160 size_t Count = 0; 2161 while (!Error && !MangledName.startsWith('@') && 2162 !MangledName.startsWith('Z')) { 2163 ++Count; 2164 2165 if (startsWithDigit(MangledName)) { 2166 size_t N = MangledName[0] - '0'; 2167 if (N >= Backrefs.FunctionParamCount) { 2168 Error = true; 2169 return nullptr; 2170 } 2171 MangledName.remove_prefix(1); 2172 2173 *Current = Arena.alloc<NodeList>(); 2174 (*Current)->N = Backrefs.FunctionParams[N]; 2175 Current = &(*Current)->Next; 2176 continue; 2177 } 2178 2179 size_t OldSize = MangledName.size(); 2180 2181 *Current = Arena.alloc<NodeList>(); 2182 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2183 if (!TN || Error) 2184 return nullptr; 2185 2186 (*Current)->N = TN; 2187 2188 size_t CharsConsumed = OldSize - MangledName.size(); 2189 assert(CharsConsumed != 0); 2190 2191 // Single-letter types are ignored for backreferences because memorizing 2192 // them doesn't save anything. 2193 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2194 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2195 2196 Current = &(*Current)->Next; 2197 } 2198 2199 if (Error) 2200 return nullptr; 2201 2202 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2203 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2204 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2205 // the following Z could be a throw specifier. 2206 if (consumeFront(MangledName, '@')) 2207 return NA; 2208 2209 if (consumeFront(MangledName, 'Z')) { 2210 IsVariadic = true; 2211 return NA; 2212 } 2213 2214 DEMANGLE_UNREACHABLE; 2215 } 2216 2217 NodeArrayNode * 2218 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2219 NodeList *Head = nullptr; 2220 NodeList **Current = &Head; 2221 size_t Count = 0; 2222 2223 while (!MangledName.startsWith('@')) { 2224 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") || 2225 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) { 2226 // parameter pack separator 2227 continue; 2228 } 2229 2230 ++Count; 2231 2232 // Template parameter lists don't participate in back-referencing. 2233 *Current = Arena.alloc<NodeList>(); 2234 2235 NodeList &TP = **Current; 2236 2237 TemplateParameterReferenceNode *TPRN = nullptr; 2238 if (consumeFront(MangledName, "$$Y")) { 2239 // Template alias 2240 TP.N = demangleFullyQualifiedTypeName(MangledName); 2241 } else if (consumeFront(MangledName, "$$B")) { 2242 // Array 2243 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2244 } else if (consumeFront(MangledName, "$$C")) { 2245 // Type has qualifiers. 2246 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2247 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2248 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2249 // Pointer to member 2250 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2251 TPRN->IsMemberPointer = true; 2252 2253 MangledName.remove_prefix(1); 2254 // 1 - single inheritance <name> 2255 // H - multiple inheritance <name> <number> 2256 // I - virtual inheritance <name> <number> <number> 2257 // J - unspecified inheritance <name> <number> <number> <number> 2258 char InheritanceSpecifier = MangledName.front(); 2259 MangledName.remove_prefix(1); 2260 SymbolNode *S = nullptr; 2261 if (MangledName.startsWith('?')) { 2262 S = parse(MangledName); 2263 if (Error || !S->Name) { 2264 Error = true; 2265 return nullptr; 2266 } 2267 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2268 } 2269 2270 switch (InheritanceSpecifier) { 2271 case 'J': 2272 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2273 demangleSigned(MangledName); 2274 DEMANGLE_FALLTHROUGH; 2275 case 'I': 2276 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2277 demangleSigned(MangledName); 2278 DEMANGLE_FALLTHROUGH; 2279 case 'H': 2280 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2281 demangleSigned(MangledName); 2282 DEMANGLE_FALLTHROUGH; 2283 case '1': 2284 break; 2285 default: 2286 DEMANGLE_UNREACHABLE; 2287 } 2288 TPRN->Affinity = PointerAffinity::Pointer; 2289 TPRN->Symbol = S; 2290 } else if (MangledName.startsWith("$E?")) { 2291 consumeFront(MangledName, "$E"); 2292 // Reference to symbol 2293 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2294 TPRN->Symbol = parse(MangledName); 2295 TPRN->Affinity = PointerAffinity::Reference; 2296 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2297 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2298 2299 // Data member pointer. 2300 MangledName.remove_prefix(1); 2301 char InheritanceSpecifier = MangledName.front(); 2302 MangledName.remove_prefix(1); 2303 2304 switch (InheritanceSpecifier) { 2305 case 'G': 2306 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2307 demangleSigned(MangledName); 2308 DEMANGLE_FALLTHROUGH; 2309 case 'F': 2310 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2311 demangleSigned(MangledName); 2312 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2313 demangleSigned(MangledName); 2314 break; 2315 default: 2316 DEMANGLE_UNREACHABLE; 2317 } 2318 TPRN->IsMemberPointer = true; 2319 2320 } else if (consumeFront(MangledName, "$0")) { 2321 // Integral non-type template parameter 2322 bool IsNegative = false; 2323 uint64_t Value = 0; 2324 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2325 2326 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2327 } else { 2328 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2329 } 2330 if (Error) 2331 return nullptr; 2332 2333 Current = &TP.Next; 2334 } 2335 2336 // The loop above returns nullptr on Error. 2337 assert(!Error); 2338 2339 // Template parameter lists cannot be variadic, so it can only be terminated 2340 // by @ (as opposed to 'Z' in the function parameter case). 2341 assert(MangledName.startsWith('@')); // The above loop exits only on '@'. 2342 consumeFront(MangledName, '@'); 2343 return nodeListToNodeArray(Arena, Head, Count); 2344 } 2345 2346 void Demangler::dumpBackReferences() { 2347 std::printf("%d function parameter backreferences\n", 2348 (int)Backrefs.FunctionParamCount); 2349 2350 // Create an output stream so we can render each type. 2351 OutputBuffer OB; 2352 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2353 OB.setCurrentPosition(0); 2354 2355 TypeNode *T = Backrefs.FunctionParams[I]; 2356 T->output(OB, OF_Default); 2357 2358 StringView B = OB; 2359 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin()); 2360 } 2361 std::free(OB.getBuffer()); 2362 2363 if (Backrefs.FunctionParamCount > 0) 2364 std::printf("\n"); 2365 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2366 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2367 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2368 Backrefs.Names[I]->Name.begin()); 2369 } 2370 if (Backrefs.NamesCount > 0) 2371 std::printf("\n"); 2372 } 2373 2374 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, 2375 char *Buf, size_t *N, 2376 int *Status, MSDemangleFlags Flags) { 2377 Demangler D; 2378 2379 StringView Name{MangledName}; 2380 SymbolNode *AST = D.parse(Name); 2381 if (!D.Error && NMangled) 2382 *NMangled = Name.begin() - MangledName; 2383 2384 if (Flags & MSDF_DumpBackrefs) 2385 D.dumpBackReferences(); 2386 2387 OutputFlags OF = OF_Default; 2388 if (Flags & MSDF_NoCallingConvention) 2389 OF = OutputFlags(OF | OF_NoCallingConvention); 2390 if (Flags & MSDF_NoAccessSpecifier) 2391 OF = OutputFlags(OF | OF_NoAccessSpecifier); 2392 if (Flags & MSDF_NoReturnType) 2393 OF = OutputFlags(OF | OF_NoReturnType); 2394 if (Flags & MSDF_NoMemberType) 2395 OF = OutputFlags(OF | OF_NoMemberType); 2396 if (Flags & MSDF_NoVariableType) 2397 OF = OutputFlags(OF | OF_NoVariableType); 2398 2399 int InternalStatus = demangle_success; 2400 if (D.Error) 2401 InternalStatus = demangle_invalid_mangled_name; 2402 else { 2403 OutputBuffer OB(Buf, N); 2404 AST->output(OB, OF); 2405 OB += '\0'; 2406 if (N != nullptr) 2407 *N = OB.getCurrentPosition(); 2408 Buf = OB.getBuffer(); 2409 } 2410 2411 if (Status) 2412 *Status = InternalStatus; 2413 return InternalStatus == demangle_success ? Buf : nullptr; 2414 } 2415