1 //===- MicrosoftDemangle.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for MSVC-style mangled symbols. 10 // 11 // This file has no dependencies on the rest of LLVM so that it can be 12 // easily reused in other programs such as libcxxabi. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Demangle/MicrosoftDemangle.h" 17 #include "llvm/Demangle/Demangle.h" 18 #include "llvm/Demangle/MicrosoftDemangleNodes.h" 19 20 #include "llvm/Demangle/DemangleConfig.h" 21 #include "llvm/Demangle/StringView.h" 22 #include "llvm/Demangle/Utility.h" 23 24 #include <array> 25 #include <cctype> 26 #include <cstdio> 27 #include <tuple> 28 29 using namespace llvm; 30 using namespace ms_demangle; 31 32 static bool startsWithDigit(StringView S) { 33 return !S.empty() && std::isdigit(S.front()); 34 } 35 36 37 struct NodeList { 38 Node *N = nullptr; 39 NodeList *Next = nullptr; 40 }; 41 42 static bool consumeFront(StringView &S, char C) { 43 if (!S.startsWith(C)) 44 return false; 45 S.remove_prefix(1); 46 return true; 47 } 48 49 static bool consumeFront(StringView &S, StringView C) { 50 if (!S.startsWith(C)) 51 return false; 52 S.remove_prefix(C.size()); 53 return true; 54 } 55 56 static bool isMemberPointer(StringView MangledName, bool &Error) { 57 Error = false; 58 const char F = MangledName.front(); 59 MangledName.remove_prefix(1); 60 switch (F) { 61 case '$': 62 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an 63 // rvalue reference to a member. 64 return false; 65 case 'A': 66 // 'A' indicates a reference, and you cannot have a reference to a member 67 // function or member. 68 return false; 69 case 'P': 70 case 'Q': 71 case 'R': 72 case 'S': 73 // These 4 values indicate some kind of pointer, but we still don't know 74 // what. 75 break; 76 default: 77 // isMemberPointer() is called only if isPointerType() returns true, 78 // and it rejects other prefixes. 79 DEMANGLE_UNREACHABLE; 80 } 81 82 // If it starts with a number, then 6 indicates a non-member function 83 // pointer, and 8 indicates a member function pointer. 84 if (startsWithDigit(MangledName)) { 85 if (MangledName[0] != '6' && MangledName[0] != '8') { 86 Error = true; 87 return false; 88 } 89 return (MangledName[0] == '8'); 90 } 91 92 // Remove ext qualifiers since those can appear on either type and are 93 // therefore not indicative. 94 consumeFront(MangledName, 'E'); // 64-bit 95 consumeFront(MangledName, 'I'); // restrict 96 consumeFront(MangledName, 'F'); // unaligned 97 98 if (MangledName.empty()) { 99 Error = true; 100 return false; 101 } 102 103 // The next value should be either ABCD (non-member) or QRST (member). 104 switch (MangledName.front()) { 105 case 'A': 106 case 'B': 107 case 'C': 108 case 'D': 109 return false; 110 case 'Q': 111 case 'R': 112 case 'S': 113 case 'T': 114 return true; 115 default: 116 Error = true; 117 return false; 118 } 119 } 120 121 static SpecialIntrinsicKind 122 consumeSpecialIntrinsicKind(StringView &MangledName) { 123 if (consumeFront(MangledName, "?_7")) 124 return SpecialIntrinsicKind::Vftable; 125 if (consumeFront(MangledName, "?_8")) 126 return SpecialIntrinsicKind::Vbtable; 127 if (consumeFront(MangledName, "?_9")) 128 return SpecialIntrinsicKind::VcallThunk; 129 if (consumeFront(MangledName, "?_A")) 130 return SpecialIntrinsicKind::Typeof; 131 if (consumeFront(MangledName, "?_B")) 132 return SpecialIntrinsicKind::LocalStaticGuard; 133 if (consumeFront(MangledName, "?_C")) 134 return SpecialIntrinsicKind::StringLiteralSymbol; 135 if (consumeFront(MangledName, "?_P")) 136 return SpecialIntrinsicKind::UdtReturning; 137 if (consumeFront(MangledName, "?_R0")) 138 return SpecialIntrinsicKind::RttiTypeDescriptor; 139 if (consumeFront(MangledName, "?_R1")) 140 return SpecialIntrinsicKind::RttiBaseClassDescriptor; 141 if (consumeFront(MangledName, "?_R2")) 142 return SpecialIntrinsicKind::RttiBaseClassArray; 143 if (consumeFront(MangledName, "?_R3")) 144 return SpecialIntrinsicKind::RttiClassHierarchyDescriptor; 145 if (consumeFront(MangledName, "?_R4")) 146 return SpecialIntrinsicKind::RttiCompleteObjLocator; 147 if (consumeFront(MangledName, "?_S")) 148 return SpecialIntrinsicKind::LocalVftable; 149 if (consumeFront(MangledName, "?__E")) 150 return SpecialIntrinsicKind::DynamicInitializer; 151 if (consumeFront(MangledName, "?__F")) 152 return SpecialIntrinsicKind::DynamicAtexitDestructor; 153 if (consumeFront(MangledName, "?__J")) 154 return SpecialIntrinsicKind::LocalStaticThreadGuard; 155 return SpecialIntrinsicKind::None; 156 } 157 158 static bool startsWithLocalScopePattern(StringView S) { 159 if (!consumeFront(S, '?')) 160 return false; 161 162 size_t End = S.find('?'); 163 if (End == StringView::npos) 164 return false; 165 StringView Candidate = S.substr(0, End); 166 if (Candidate.empty()) 167 return false; 168 169 // \?[0-9]\? 170 // ?@? is the discriminator 0. 171 if (Candidate.size() == 1) 172 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9'); 173 174 // If it's not 0-9, then it's an encoded number terminated with an @ 175 if (Candidate.back() != '@') 176 return false; 177 Candidate.remove_suffix(1); 178 179 // An encoded number starts with B-P and all subsequent digits are in A-P. 180 // Note that the reason the first digit cannot be A is two fold. First, it 181 // would create an ambiguity with ?A which delimits the beginning of an 182 // anonymous namespace. Second, A represents 0, and you don't start a multi 183 // digit number with a leading 0. Presumably the anonymous namespace 184 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J. 185 if (Candidate[0] < 'B' || Candidate[0] > 'P') 186 return false; 187 Candidate.remove_prefix(1); 188 while (!Candidate.empty()) { 189 if (Candidate[0] < 'A' || Candidate[0] > 'P') 190 return false; 191 Candidate.remove_prefix(1); 192 } 193 194 return true; 195 } 196 197 static bool isTagType(StringView S) { 198 switch (S.front()) { 199 case 'T': // union 200 case 'U': // struct 201 case 'V': // class 202 case 'W': // enum 203 return true; 204 } 205 return false; 206 } 207 208 static bool isCustomType(StringView S) { return S[0] == '?'; } 209 210 static bool isPointerType(StringView S) { 211 if (S.startsWith("$$Q")) // foo && 212 return true; 213 214 switch (S.front()) { 215 case 'A': // foo & 216 case 'P': // foo * 217 case 'Q': // foo *const 218 case 'R': // foo *volatile 219 case 'S': // foo *const volatile 220 return true; 221 } 222 return false; 223 } 224 225 static bool isArrayType(StringView S) { return S[0] == 'Y'; } 226 227 static bool isFunctionType(StringView S) { 228 return S.startsWith("$$A8@@") || S.startsWith("$$A6"); 229 } 230 231 static FunctionRefQualifier 232 demangleFunctionRefQualifier(StringView &MangledName) { 233 if (consumeFront(MangledName, 'G')) 234 return FunctionRefQualifier::Reference; 235 else if (consumeFront(MangledName, 'H')) 236 return FunctionRefQualifier::RValueReference; 237 return FunctionRefQualifier::None; 238 } 239 240 static std::pair<Qualifiers, PointerAffinity> 241 demanglePointerCVQualifiers(StringView &MangledName) { 242 if (consumeFront(MangledName, "$$Q")) 243 return std::make_pair(Q_None, PointerAffinity::RValueReference); 244 245 const char F = MangledName.front(); 246 MangledName.remove_prefix(1); 247 switch (F) { 248 case 'A': 249 return std::make_pair(Q_None, PointerAffinity::Reference); 250 case 'P': 251 return std::make_pair(Q_None, PointerAffinity::Pointer); 252 case 'Q': 253 return std::make_pair(Q_Const, PointerAffinity::Pointer); 254 case 'R': 255 return std::make_pair(Q_Volatile, PointerAffinity::Pointer); 256 case 'S': 257 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), 258 PointerAffinity::Pointer); 259 } 260 // This function is only called if isPointerType() returns true, 261 // and it only returns true for the six cases listed above. 262 DEMANGLE_UNREACHABLE; 263 } 264 265 StringView Demangler::copyString(StringView Borrowed) { 266 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); 267 // This is not a micro-optimization, it avoids UB, should Borrowed be an null 268 // buffer. 269 if (Borrowed.size()) 270 std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); 271 272 return {Stable, Borrowed.size()}; 273 } 274 275 SpecialTableSymbolNode * 276 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName, 277 SpecialIntrinsicKind K) { 278 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>(); 279 switch (K) { 280 case SpecialIntrinsicKind::Vftable: 281 NI->Name = "`vftable'"; 282 break; 283 case SpecialIntrinsicKind::Vbtable: 284 NI->Name = "`vbtable'"; 285 break; 286 case SpecialIntrinsicKind::LocalVftable: 287 NI->Name = "`local vftable'"; 288 break; 289 case SpecialIntrinsicKind::RttiCompleteObjLocator: 290 NI->Name = "`RTTI Complete Object Locator'"; 291 break; 292 default: 293 DEMANGLE_UNREACHABLE; 294 } 295 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 296 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>(); 297 STSN->Name = QN; 298 bool IsMember = false; 299 if (MangledName.empty()) { 300 Error = true; 301 return nullptr; 302 } 303 char Front = MangledName.front(); 304 MangledName.remove_prefix(1); 305 if (Front != '6' && Front != '7') { 306 Error = true; 307 return nullptr; 308 } 309 310 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); 311 if (!consumeFront(MangledName, '@')) 312 STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); 313 return STSN; 314 } 315 316 LocalStaticGuardVariableNode * 317 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) { 318 LocalStaticGuardIdentifierNode *LSGI = 319 Arena.alloc<LocalStaticGuardIdentifierNode>(); 320 LSGI->IsThread = IsThread; 321 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI); 322 LocalStaticGuardVariableNode *LSGVN = 323 Arena.alloc<LocalStaticGuardVariableNode>(); 324 LSGVN->Name = QN; 325 326 if (consumeFront(MangledName, "4IA")) 327 LSGVN->IsVisible = false; 328 else if (consumeFront(MangledName, "5")) 329 LSGVN->IsVisible = true; 330 else { 331 Error = true; 332 return nullptr; 333 } 334 335 if (!MangledName.empty()) 336 LSGI->ScopeIndex = demangleUnsigned(MangledName); 337 return LSGVN; 338 } 339 340 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena, 341 StringView Name) { 342 NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>(); 343 Id->Name = Name; 344 return Id; 345 } 346 347 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 348 IdentifierNode *Identifier) { 349 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 350 QN->Components = Arena.alloc<NodeArrayNode>(); 351 QN->Components->Count = 1; 352 QN->Components->Nodes = Arena.allocArray<Node *>(1); 353 QN->Components->Nodes[0] = Identifier; 354 return QN; 355 } 356 357 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena, 358 StringView Name) { 359 NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name); 360 return synthesizeQualifiedName(Arena, Id); 361 } 362 363 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena, 364 TypeNode *Type, 365 StringView VariableName) { 366 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 367 VSN->Type = Type; 368 VSN->Name = synthesizeQualifiedName(Arena, VariableName); 369 return VSN; 370 } 371 372 VariableSymbolNode *Demangler::demangleUntypedVariable( 373 ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) { 374 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName); 375 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI); 376 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 377 VSN->Name = QN; 378 if (consumeFront(MangledName, "8")) 379 return VSN; 380 381 Error = true; 382 return nullptr; 383 } 384 385 VariableSymbolNode * 386 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena, 387 StringView &MangledName) { 388 RttiBaseClassDescriptorNode *RBCDN = 389 Arena.alloc<RttiBaseClassDescriptorNode>(); 390 RBCDN->NVOffset = demangleUnsigned(MangledName); 391 RBCDN->VBPtrOffset = demangleSigned(MangledName); 392 RBCDN->VBTableOffset = demangleUnsigned(MangledName); 393 RBCDN->Flags = demangleUnsigned(MangledName); 394 if (Error) 395 return nullptr; 396 397 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 398 VSN->Name = demangleNameScopeChain(MangledName, RBCDN); 399 consumeFront(MangledName, '8'); 400 return VSN; 401 } 402 403 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName, 404 bool IsDestructor) { 405 DynamicStructorIdentifierNode *DSIN = 406 Arena.alloc<DynamicStructorIdentifierNode>(); 407 DSIN->IsDestructor = IsDestructor; 408 409 bool IsKnownStaticDataMember = false; 410 if (consumeFront(MangledName, '?')) 411 IsKnownStaticDataMember = true; 412 413 SymbolNode *Symbol = demangleDeclarator(MangledName); 414 if (Error) 415 return nullptr; 416 417 FunctionSymbolNode *FSN = nullptr; 418 419 if (Symbol->kind() == NodeKind::VariableSymbol) { 420 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol); 421 422 // Older versions of clang mangled this type of symbol incorrectly. They 423 // would omit the leading ? and they would only emit a single @ at the end. 424 // The correct mangling is a leading ? and 2 trailing @ signs. Handle 425 // both cases. 426 int AtCount = IsKnownStaticDataMember ? 2 : 1; 427 for (int I = 0; I < AtCount; ++I) { 428 if (consumeFront(MangledName, '@')) 429 continue; 430 Error = true; 431 return nullptr; 432 } 433 434 FSN = demangleFunctionEncoding(MangledName); 435 if (FSN) 436 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 437 } else { 438 if (IsKnownStaticDataMember) { 439 // This was supposed to be a static data member, but we got a function. 440 Error = true; 441 return nullptr; 442 } 443 444 FSN = static_cast<FunctionSymbolNode *>(Symbol); 445 DSIN->Name = Symbol->Name; 446 FSN->Name = synthesizeQualifiedName(Arena, DSIN); 447 } 448 449 return FSN; 450 } 451 452 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) { 453 SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName); 454 455 switch (SIK) { 456 case SpecialIntrinsicKind::None: 457 return nullptr; 458 case SpecialIntrinsicKind::StringLiteralSymbol: 459 return demangleStringLiteral(MangledName); 460 case SpecialIntrinsicKind::Vftable: 461 case SpecialIntrinsicKind::Vbtable: 462 case SpecialIntrinsicKind::LocalVftable: 463 case SpecialIntrinsicKind::RttiCompleteObjLocator: 464 return demangleSpecialTableSymbolNode(MangledName, SIK); 465 case SpecialIntrinsicKind::VcallThunk: 466 return demangleVcallThunkNode(MangledName); 467 case SpecialIntrinsicKind::LocalStaticGuard: 468 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false); 469 case SpecialIntrinsicKind::LocalStaticThreadGuard: 470 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true); 471 case SpecialIntrinsicKind::RttiTypeDescriptor: { 472 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 473 if (Error) 474 break; 475 if (!consumeFront(MangledName, "@8")) 476 break; 477 if (!MangledName.empty()) 478 break; 479 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'"); 480 } 481 case SpecialIntrinsicKind::RttiBaseClassArray: 482 return demangleUntypedVariable(Arena, MangledName, 483 "`RTTI Base Class Array'"); 484 case SpecialIntrinsicKind::RttiClassHierarchyDescriptor: 485 return demangleUntypedVariable(Arena, MangledName, 486 "`RTTI Class Hierarchy Descriptor'"); 487 case SpecialIntrinsicKind::RttiBaseClassDescriptor: 488 return demangleRttiBaseClassDescriptorNode(Arena, MangledName); 489 case SpecialIntrinsicKind::DynamicInitializer: 490 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false); 491 case SpecialIntrinsicKind::DynamicAtexitDestructor: 492 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true); 493 case SpecialIntrinsicKind::Typeof: 494 case SpecialIntrinsicKind::UdtReturning: 495 // It's unclear which tools produces these manglings, so demangling 496 // support is not (yet?) implemented. 497 break; 498 case SpecialIntrinsicKind::Unknown: 499 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind. 500 } 501 Error = true; 502 return nullptr; 503 } 504 505 IdentifierNode * 506 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) { 507 assert(MangledName.startsWith('?')); 508 MangledName.remove_prefix(1); 509 if (MangledName.empty()) { 510 Error = true; 511 return nullptr; 512 } 513 514 if (consumeFront(MangledName, "__")) 515 return demangleFunctionIdentifierCode( 516 MangledName, FunctionIdentifierCodeGroup::DoubleUnder); 517 if (consumeFront(MangledName, "_")) 518 return demangleFunctionIdentifierCode(MangledName, 519 FunctionIdentifierCodeGroup::Under); 520 return demangleFunctionIdentifierCode(MangledName, 521 FunctionIdentifierCodeGroup::Basic); 522 } 523 524 StructorIdentifierNode * 525 Demangler::demangleStructorIdentifier(StringView &MangledName, 526 bool IsDestructor) { 527 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>(); 528 N->IsDestructor = IsDestructor; 529 return N; 530 } 531 532 ConversionOperatorIdentifierNode * 533 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) { 534 ConversionOperatorIdentifierNode *N = 535 Arena.alloc<ConversionOperatorIdentifierNode>(); 536 return N; 537 } 538 539 LiteralOperatorIdentifierNode * 540 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) { 541 LiteralOperatorIdentifierNode *N = 542 Arena.alloc<LiteralOperatorIdentifierNode>(); 543 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false); 544 return N; 545 } 546 547 IntrinsicFunctionKind 548 Demangler::translateIntrinsicFunctionCode(char CH, 549 FunctionIdentifierCodeGroup Group) { 550 using IFK = IntrinsicFunctionKind; 551 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) { 552 Error = true; 553 return IFK::None; 554 } 555 556 // Not all ? identifiers are intrinsics *functions*. This function only maps 557 // operator codes for the special functions, all others are handled elsewhere, 558 // hence the IFK::None entries in the table. 559 static IFK Basic[36] = { 560 IFK::None, // ?0 # Foo::Foo() 561 IFK::None, // ?1 # Foo::~Foo() 562 IFK::New, // ?2 # operator new 563 IFK::Delete, // ?3 # operator delete 564 IFK::Assign, // ?4 # operator= 565 IFK::RightShift, // ?5 # operator>> 566 IFK::LeftShift, // ?6 # operator<< 567 IFK::LogicalNot, // ?7 # operator! 568 IFK::Equals, // ?8 # operator== 569 IFK::NotEquals, // ?9 # operator!= 570 IFK::ArraySubscript, // ?A # operator[] 571 IFK::None, // ?B # Foo::operator <type>() 572 IFK::Pointer, // ?C # operator-> 573 IFK::Dereference, // ?D # operator* 574 IFK::Increment, // ?E # operator++ 575 IFK::Decrement, // ?F # operator-- 576 IFK::Minus, // ?G # operator- 577 IFK::Plus, // ?H # operator+ 578 IFK::BitwiseAnd, // ?I # operator& 579 IFK::MemberPointer, // ?J # operator->* 580 IFK::Divide, // ?K # operator/ 581 IFK::Modulus, // ?L # operator% 582 IFK::LessThan, // ?M operator< 583 IFK::LessThanEqual, // ?N operator<= 584 IFK::GreaterThan, // ?O operator> 585 IFK::GreaterThanEqual, // ?P operator>= 586 IFK::Comma, // ?Q operator, 587 IFK::Parens, // ?R operator() 588 IFK::BitwiseNot, // ?S operator~ 589 IFK::BitwiseXor, // ?T operator^ 590 IFK::BitwiseOr, // ?U operator| 591 IFK::LogicalAnd, // ?V operator&& 592 IFK::LogicalOr, // ?W operator|| 593 IFK::TimesEqual, // ?X operator*= 594 IFK::PlusEqual, // ?Y operator+= 595 IFK::MinusEqual, // ?Z operator-= 596 }; 597 static IFK Under[36] = { 598 IFK::DivEqual, // ?_0 operator/= 599 IFK::ModEqual, // ?_1 operator%= 600 IFK::RshEqual, // ?_2 operator>>= 601 IFK::LshEqual, // ?_3 operator<<= 602 IFK::BitwiseAndEqual, // ?_4 operator&= 603 IFK::BitwiseOrEqual, // ?_5 operator|= 604 IFK::BitwiseXorEqual, // ?_6 operator^= 605 IFK::None, // ?_7 # vftable 606 IFK::None, // ?_8 # vbtable 607 IFK::None, // ?_9 # vcall 608 IFK::None, // ?_A # typeof 609 IFK::None, // ?_B # local static guard 610 IFK::None, // ?_C # string literal 611 IFK::VbaseDtor, // ?_D # vbase destructor 612 IFK::VecDelDtor, // ?_E # vector deleting destructor 613 IFK::DefaultCtorClosure, // ?_F # default constructor closure 614 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor 615 IFK::VecCtorIter, // ?_H # vector constructor iterator 616 IFK::VecDtorIter, // ?_I # vector destructor iterator 617 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator 618 IFK::VdispMap, // ?_K # virtual displacement map 619 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator 620 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator 621 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator 622 IFK::CopyCtorClosure, // ?_O # copy constructor closure 623 IFK::None, // ?_P<name> # udt returning <name> 624 IFK::None, // ?_Q # <unknown> 625 IFK::None, // ?_R0 - ?_R4 # RTTI Codes 626 IFK::None, // ?_S # local vftable 627 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure 628 IFK::ArrayNew, // ?_U operator new[] 629 IFK::ArrayDelete, // ?_V operator delete[] 630 IFK::None, // ?_W <unused> 631 IFK::None, // ?_X <unused> 632 IFK::None, // ?_Y <unused> 633 IFK::None, // ?_Z <unused> 634 }; 635 static IFK DoubleUnder[36] = { 636 IFK::None, // ?__0 <unused> 637 IFK::None, // ?__1 <unused> 638 IFK::None, // ?__2 <unused> 639 IFK::None, // ?__3 <unused> 640 IFK::None, // ?__4 <unused> 641 IFK::None, // ?__5 <unused> 642 IFK::None, // ?__6 <unused> 643 IFK::None, // ?__7 <unused> 644 IFK::None, // ?__8 <unused> 645 IFK::None, // ?__9 <unused> 646 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator 647 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator 648 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator 649 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter 650 IFK::None, // ?__E dynamic initializer for `T' 651 IFK::None, // ?__F dynamic atexit destructor for `T' 652 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter 653 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter 654 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor 655 // iter 656 IFK::None, // ?__J local static thread guard 657 IFK::None, // ?__K operator ""_name 658 IFK::CoAwait, // ?__L operator co_await 659 IFK::Spaceship, // ?__M operator<=> 660 IFK::None, // ?__N <unused> 661 IFK::None, // ?__O <unused> 662 IFK::None, // ?__P <unused> 663 IFK::None, // ?__Q <unused> 664 IFK::None, // ?__R <unused> 665 IFK::None, // ?__S <unused> 666 IFK::None, // ?__T <unused> 667 IFK::None, // ?__U <unused> 668 IFK::None, // ?__V <unused> 669 IFK::None, // ?__W <unused> 670 IFK::None, // ?__X <unused> 671 IFK::None, // ?__Y <unused> 672 IFK::None, // ?__Z <unused> 673 }; 674 675 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10); 676 switch (Group) { 677 case FunctionIdentifierCodeGroup::Basic: 678 return Basic[Index]; 679 case FunctionIdentifierCodeGroup::Under: 680 return Under[Index]; 681 case FunctionIdentifierCodeGroup::DoubleUnder: 682 return DoubleUnder[Index]; 683 } 684 DEMANGLE_UNREACHABLE; 685 } 686 687 IdentifierNode * 688 Demangler::demangleFunctionIdentifierCode(StringView &MangledName, 689 FunctionIdentifierCodeGroup Group) { 690 if (MangledName.empty()) { 691 Error = true; 692 return nullptr; 693 } 694 const char CH = MangledName.front(); 695 switch (Group) { 696 case FunctionIdentifierCodeGroup::Basic: 697 MangledName.remove_prefix(1); 698 switch (CH) { 699 case '0': 700 case '1': 701 return demangleStructorIdentifier(MangledName, CH == '1'); 702 case 'B': 703 return demangleConversionOperatorIdentifier(MangledName); 704 default: 705 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 706 translateIntrinsicFunctionCode(CH, Group)); 707 } 708 case FunctionIdentifierCodeGroup::Under: 709 MangledName.remove_prefix(1); 710 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 711 translateIntrinsicFunctionCode(CH, Group)); 712 case FunctionIdentifierCodeGroup::DoubleUnder: 713 MangledName.remove_prefix(1); 714 switch (CH) { 715 case 'K': 716 return demangleLiteralOperatorIdentifier(MangledName); 717 default: 718 return Arena.alloc<IntrinsicFunctionIdentifierNode>( 719 translateIntrinsicFunctionCode(CH, Group)); 720 } 721 } 722 723 DEMANGLE_UNREACHABLE; 724 } 725 726 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName, 727 QualifiedNameNode *Name) { 728 if (MangledName.empty()) { 729 Error = true; 730 return nullptr; 731 } 732 733 // Read a variable. 734 switch (MangledName.front()) { 735 case '0': 736 case '1': 737 case '2': 738 case '3': 739 case '4': { 740 StorageClass SC = demangleVariableStorageClass(MangledName); 741 return demangleVariableEncoding(MangledName, SC); 742 } 743 } 744 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName); 745 746 IdentifierNode *UQN = Name->getUnqualifiedIdentifier(); 747 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 748 ConversionOperatorIdentifierNode *COIN = 749 static_cast<ConversionOperatorIdentifierNode *>(UQN); 750 if (FSN) 751 COIN->TargetType = FSN->Signature->ReturnType; 752 } 753 return FSN; 754 } 755 756 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) { 757 // What follows is a main symbol name. This may include namespaces or class 758 // back references. 759 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName); 760 if (Error) 761 return nullptr; 762 763 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN); 764 if (Error) 765 return nullptr; 766 Symbol->Name = QN; 767 768 IdentifierNode *UQN = QN->getUnqualifiedIdentifier(); 769 if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) { 770 ConversionOperatorIdentifierNode *COIN = 771 static_cast<ConversionOperatorIdentifierNode *>(UQN); 772 if (!COIN->TargetType) { 773 Error = true; 774 return nullptr; 775 } 776 } 777 return Symbol; 778 } 779 780 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { 781 assert(MangledName.startsWith("??@")); 782 // This is an MD5 mangled name. We can't demangle it, just return the 783 // mangled name. 784 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @. 785 size_t MD5Last = MangledName.find('@', strlen("??@")); 786 if (MD5Last == StringView::npos) { 787 Error = true; 788 return nullptr; 789 } 790 const char *Start = MangledName.begin(); 791 MangledName.remove_prefix(MD5Last + 1); 792 793 // There are two additional special cases for MD5 names: 794 // 1. For complete object locators where the object name is long enough 795 // for the object to have an MD5 name, the complete object locator is 796 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual 797 // leading "??_R4". This is handled here. 798 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after 799 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8 800 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet 801 // demangle catchable types anywhere, this isn't handled for MD5 names 802 // either. 803 consumeFront(MangledName, "??_R4@"); 804 805 StringView MD5(Start, MangledName.begin() - Start); 806 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol); 807 S->Name = synthesizeQualifiedName(Arena, MD5); 808 809 return S; 810 } 811 812 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { 813 assert(MangledName.startsWith('.')); 814 consumeFront(MangledName, '.'); 815 816 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); 817 if (Error || !MangledName.empty()) { 818 Error = true; 819 return nullptr; 820 } 821 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); 822 } 823 824 // Parser entry point. 825 SymbolNode *Demangler::parse(StringView &MangledName) { 826 // Typeinfo names are strings stored in RTTI data. They're not symbol names. 827 // It's still useful to demangle them. They're the only demangled entity 828 // that doesn't start with a "?" but a ".". 829 if (MangledName.startsWith('.')) 830 return demangleTypeinfoName(MangledName); 831 832 if (MangledName.startsWith("??@")) 833 return demangleMD5Name(MangledName); 834 835 // MSVC-style mangled symbols must start with '?'. 836 if (!MangledName.startsWith('?')) { 837 Error = true; 838 return nullptr; 839 } 840 841 consumeFront(MangledName, '?'); 842 843 // ?$ is a template instantiation, but all other names that start with ? are 844 // operators / special names. 845 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName)) 846 return SI; 847 848 return demangleDeclarator(MangledName); 849 } 850 851 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) { 852 if (!consumeFront(MangledName, ".?A")) { 853 Error = true; 854 return nullptr; 855 } 856 consumeFront(MangledName, ".?A"); 857 if (MangledName.empty()) { 858 Error = true; 859 return nullptr; 860 } 861 862 return demangleClassType(MangledName); 863 } 864 865 // <type-encoding> ::= <storage-class> <variable-type> 866 // <storage-class> ::= 0 # private static member 867 // ::= 1 # protected static member 868 // ::= 2 # public static member 869 // ::= 3 # global 870 // ::= 4 # static local 871 872 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName, 873 StorageClass SC) { 874 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>(); 875 876 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop); 877 VSN->SC = SC; 878 879 if (Error) 880 return nullptr; 881 882 // <variable-type> ::= <type> <cvr-qualifiers> 883 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 884 switch (VSN->Type->kind()) { 885 case NodeKind::PointerType: { 886 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type); 887 888 Qualifiers ExtraChildQuals = Q_None; 889 PTN->Quals = Qualifiers(VSN->Type->Quals | 890 demanglePointerExtQualifiers(MangledName)); 891 892 bool IsMember = false; 893 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName); 894 895 if (PTN->ClassParent) { 896 QualifiedNameNode *BackRefName = 897 demangleFullyQualifiedTypeName(MangledName); 898 (void)BackRefName; 899 } 900 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals); 901 902 break; 903 } 904 default: 905 VSN->Type->Quals = demangleQualifiers(MangledName).first; 906 break; 907 } 908 909 return VSN; 910 } 911 912 // Sometimes numbers are encoded in mangled symbols. For example, 913 // "int (*x)[20]" is a valid C type (x is a pointer to an array of 914 // length 20), so we need some way to embed numbers as part of symbols. 915 // This function parses it. 916 // 917 // <number> ::= [?] <non-negative integer> 918 // 919 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10 920 // ::= <hex digit>+ @ # when Number == 0 or >= 10 921 // 922 // <hex-digit> ::= [A-P] # A = 0, B = 1, ... 923 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) { 924 bool IsNegative = consumeFront(MangledName, '?'); 925 926 if (startsWithDigit(MangledName)) { 927 uint64_t Ret = MangledName[0] - '0' + 1; 928 MangledName.remove_prefix(1); 929 return {Ret, IsNegative}; 930 } 931 932 uint64_t Ret = 0; 933 for (size_t i = 0; i < MangledName.size(); ++i) { 934 char C = MangledName[i]; 935 if (C == '@') { 936 MangledName.remove_prefix(i + 1); 937 return {Ret, IsNegative}; 938 } 939 if ('A' <= C && C <= 'P') { 940 Ret = (Ret << 4) + (C - 'A'); 941 continue; 942 } 943 break; 944 } 945 946 Error = true; 947 return {0ULL, false}; 948 } 949 950 uint64_t Demangler::demangleUnsigned(StringView &MangledName) { 951 bool IsNegative = false; 952 uint64_t Number = 0; 953 std::tie(Number, IsNegative) = demangleNumber(MangledName); 954 if (IsNegative) 955 Error = true; 956 return Number; 957 } 958 959 int64_t Demangler::demangleSigned(StringView &MangledName) { 960 bool IsNegative = false; 961 uint64_t Number = 0; 962 std::tie(Number, IsNegative) = demangleNumber(MangledName); 963 if (Number > INT64_MAX) 964 Error = true; 965 int64_t I = static_cast<int64_t>(Number); 966 return IsNegative ? -I : I; 967 } 968 969 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9. 970 // Memorize it. 971 void Demangler::memorizeString(StringView S) { 972 if (Backrefs.NamesCount >= BackrefContext::Max) 973 return; 974 for (size_t i = 0; i < Backrefs.NamesCount; ++i) 975 if (S == Backrefs.Names[i]->Name) 976 return; 977 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>(); 978 N->Name = S; 979 Backrefs.Names[Backrefs.NamesCount++] = N; 980 } 981 982 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) { 983 assert(startsWithDigit(MangledName)); 984 985 size_t I = MangledName[0] - '0'; 986 if (I >= Backrefs.NamesCount) { 987 Error = true; 988 return nullptr; 989 } 990 991 MangledName.remove_prefix(1); 992 return Backrefs.Names[I]; 993 } 994 995 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { 996 // Render this class template name into a string buffer so that we can 997 // memorize it for the purpose of back-referencing. 998 OutputBuffer OB; 999 Identifier->output(OB, OF_Default); 1000 StringView Owned = copyString(OB); 1001 memorizeString(Owned); 1002 std::free(OB.getBuffer()); 1003 } 1004 1005 IdentifierNode * 1006 Demangler::demangleTemplateInstantiationName(StringView &MangledName, 1007 NameBackrefBehavior NBB) { 1008 assert(MangledName.startsWith("?$")); 1009 consumeFront(MangledName, "?$"); 1010 1011 BackrefContext OuterContext; 1012 std::swap(OuterContext, Backrefs); 1013 1014 IdentifierNode *Identifier = 1015 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1016 if (!Error) 1017 Identifier->TemplateParams = demangleTemplateParameterList(MangledName); 1018 1019 std::swap(OuterContext, Backrefs); 1020 if (Error) 1021 return nullptr; 1022 1023 if (NBB & NBB_Template) { 1024 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b"). 1025 // Structors and conversion operators only makes sense in a leaf name, so 1026 // reject them in NBB_Template contexts. 1027 if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier || 1028 Identifier->kind() == NodeKind::StructorIdentifier) { 1029 Error = true; 1030 return nullptr; 1031 } 1032 1033 memorizeIdentifier(Identifier); 1034 } 1035 1036 return Identifier; 1037 } 1038 1039 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName, 1040 bool Memorize) { 1041 StringView S = demangleSimpleString(MangledName, Memorize); 1042 if (Error) 1043 return nullptr; 1044 1045 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>(); 1046 Name->Name = S; 1047 return Name; 1048 } 1049 1050 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); } 1051 1052 static uint8_t rebasedHexDigitToNumber(char C) { 1053 assert(isRebasedHexDigit(C)); 1054 return (C <= 'J') ? (C - 'A') : (10 + C - 'K'); 1055 } 1056 1057 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) { 1058 assert(!MangledName.empty()); 1059 if (!MangledName.startsWith('?')) { 1060 const uint8_t F = MangledName.front(); 1061 MangledName.remove_prefix(1); 1062 return F; 1063 } 1064 1065 MangledName.remove_prefix(1); 1066 if (MangledName.empty()) 1067 goto CharLiteralError; 1068 1069 if (consumeFront(MangledName, '$')) { 1070 // Two hex digits 1071 if (MangledName.size() < 2) 1072 goto CharLiteralError; 1073 StringView Nibbles = MangledName.substr(0, 2); 1074 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1])) 1075 goto CharLiteralError; 1076 // Don't append the null terminator. 1077 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]); 1078 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]); 1079 MangledName.remove_prefix(2); 1080 return (C1 << 4) | C2; 1081 } 1082 1083 if (startsWithDigit(MangledName)) { 1084 const char *Lookup = ",/\\:. \n\t'-"; 1085 char C = Lookup[MangledName[0] - '0']; 1086 MangledName.remove_prefix(1); 1087 return C; 1088 } 1089 1090 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') { 1091 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 1092 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', 1093 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', 1094 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'}; 1095 char C = Lookup[MangledName[0] - 'a']; 1096 MangledName.remove_prefix(1); 1097 return C; 1098 } 1099 1100 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') { 1101 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 1102 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', 1103 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', 1104 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'}; 1105 char C = Lookup[MangledName[0] - 'A']; 1106 MangledName.remove_prefix(1); 1107 return C; 1108 } 1109 1110 CharLiteralError: 1111 Error = true; 1112 return '\0'; 1113 } 1114 1115 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) { 1116 uint8_t C1, C2; 1117 1118 C1 = demangleCharLiteral(MangledName); 1119 if (Error || MangledName.empty()) 1120 goto WCharLiteralError; 1121 C2 = demangleCharLiteral(MangledName); 1122 if (Error) 1123 goto WCharLiteralError; 1124 1125 return ((wchar_t)C1 << 8) | (wchar_t)C2; 1126 1127 WCharLiteralError: 1128 Error = true; 1129 return L'\0'; 1130 } 1131 1132 static void writeHexDigit(char *Buffer, uint8_t Digit) { 1133 assert(Digit <= 15); 1134 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10); 1135 } 1136 1137 static void outputHex(OutputBuffer &OB, unsigned C) { 1138 assert (C != 0); 1139 1140 // It's easier to do the math if we can work from right to left, but we need 1141 // to print the numbers from left to right. So render this into a temporary 1142 // buffer first, then output the temporary buffer. Each byte is of the form 1143 // \xAB, which means that each byte needs 4 characters. Since there are at 1144 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer. 1145 char TempBuffer[17]; 1146 1147 ::memset(TempBuffer, 0, sizeof(TempBuffer)); 1148 constexpr int MaxPos = sizeof(TempBuffer) - 1; 1149 1150 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0. 1151 while (C != 0) { 1152 for (int I = 0; I < 2; ++I) { 1153 writeHexDigit(&TempBuffer[Pos--], C % 16); 1154 C /= 16; 1155 } 1156 } 1157 TempBuffer[Pos--] = 'x'; 1158 assert(Pos >= 0); 1159 TempBuffer[Pos--] = '\\'; 1160 OB << StringView(&TempBuffer[Pos + 1]); 1161 } 1162 1163 static void outputEscapedChar(OutputBuffer &OB, unsigned C) { 1164 switch (C) { 1165 case '\0': // nul 1166 OB << "\\0"; 1167 return; 1168 case '\'': // single quote 1169 OB << "\\\'"; 1170 return; 1171 case '\"': // double quote 1172 OB << "\\\""; 1173 return; 1174 case '\\': // backslash 1175 OB << "\\\\"; 1176 return; 1177 case '\a': // bell 1178 OB << "\\a"; 1179 return; 1180 case '\b': // backspace 1181 OB << "\\b"; 1182 return; 1183 case '\f': // form feed 1184 OB << "\\f"; 1185 return; 1186 case '\n': // new line 1187 OB << "\\n"; 1188 return; 1189 case '\r': // carriage return 1190 OB << "\\r"; 1191 return; 1192 case '\t': // tab 1193 OB << "\\t"; 1194 return; 1195 case '\v': // vertical tab 1196 OB << "\\v"; 1197 return; 1198 default: 1199 break; 1200 } 1201 1202 if (C > 0x1F && C < 0x7F) { 1203 // Standard ascii char. 1204 OB << (char)C; 1205 return; 1206 } 1207 1208 outputHex(OB, C); 1209 } 1210 1211 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) { 1212 const uint8_t *End = StringBytes + Length - 1; 1213 unsigned Count = 0; 1214 while (Length > 0 && *End == 0) { 1215 --Length; 1216 --End; 1217 ++Count; 1218 } 1219 return Count; 1220 } 1221 1222 static unsigned countEmbeddedNulls(const uint8_t *StringBytes, 1223 unsigned Length) { 1224 unsigned Result = 0; 1225 for (unsigned I = 0; I < Length; ++I) { 1226 if (*StringBytes++ == 0) 1227 ++Result; 1228 } 1229 return Result; 1230 } 1231 1232 // A mangled (non-wide) string literal stores the total length of the string it 1233 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text 1234 // (passed in StringBytes, NumChars). 1235 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, 1236 uint64_t NumBytes) { 1237 assert(NumBytes > 0); 1238 1239 // If the number of bytes is odd, this is guaranteed to be a char string. 1240 if (NumBytes % 2 == 1) 1241 return 1; 1242 1243 // All strings can encode at most 32 bytes of data. If it's less than that, 1244 // then we encoded the entire string. In this case we check for a 1-byte, 1245 // 2-byte, or 4-byte null terminator. 1246 if (NumBytes < 32) { 1247 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); 1248 if (TrailingNulls >= 4 && NumBytes % 4 == 0) 1249 return 4; 1250 if (TrailingNulls >= 2) 1251 return 2; 1252 return 1; 1253 } 1254 1255 // The whole string was not able to be encoded. Try to look at embedded null 1256 // terminators to guess. The heuristic is that we count all embedded null 1257 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3 1258 // are null, it's a char16. Otherwise it's a char8. This obviously isn't 1259 // perfect and is biased towards languages that have ascii alphabets, but this 1260 // was always going to be best effort since the encoding is lossy. 1261 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); 1262 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) 1263 return 4; 1264 if (Nulls >= NumChars / 3) 1265 return 2; 1266 return 1; 1267 } 1268 1269 static unsigned decodeMultiByteChar(const uint8_t *StringBytes, 1270 unsigned CharIndex, unsigned CharBytes) { 1271 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4); 1272 unsigned Offset = CharIndex * CharBytes; 1273 unsigned Result = 0; 1274 StringBytes = StringBytes + Offset; 1275 for (unsigned I = 0; I < CharBytes; ++I) { 1276 unsigned C = static_cast<unsigned>(StringBytes[I]); 1277 Result |= C << (8 * I); 1278 } 1279 return Result; 1280 } 1281 1282 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) { 1283 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>(); 1284 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>(); 1285 FSN->Signature = Arena.alloc<ThunkSignatureNode>(); 1286 FSN->Signature->FunctionClass = FC_NoParameterList; 1287 1288 FSN->Name = demangleNameScopeChain(MangledName, VTIN); 1289 if (!Error) 1290 Error = !consumeFront(MangledName, "$B"); 1291 if (!Error) 1292 VTIN->OffsetInVTable = demangleUnsigned(MangledName); 1293 if (!Error) 1294 Error = !consumeFront(MangledName, 'A'); 1295 if (!Error) 1296 FSN->Signature->CallConvention = demangleCallingConvention(MangledName); 1297 return (Error) ? nullptr : FSN; 1298 } 1299 1300 EncodedStringLiteralNode * 1301 Demangler::demangleStringLiteral(StringView &MangledName) { 1302 // This function uses goto, so declare all variables up front. 1303 OutputBuffer OB; 1304 StringView CRC; 1305 uint64_t StringByteSize; 1306 bool IsWcharT = false; 1307 bool IsNegative = false; 1308 size_t CrcEndPos = 0; 1309 char F; 1310 1311 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>(); 1312 1313 // Prefix indicating the beginning of a string literal 1314 if (!consumeFront(MangledName, "@_")) 1315 goto StringLiteralError; 1316 if (MangledName.empty()) 1317 goto StringLiteralError; 1318 1319 // Char Type (regular or wchar_t) 1320 F = MangledName.front(); 1321 MangledName.remove_prefix(1); 1322 switch (F) { 1323 case '1': 1324 IsWcharT = true; 1325 DEMANGLE_FALLTHROUGH; 1326 case '0': 1327 break; 1328 default: 1329 goto StringLiteralError; 1330 } 1331 1332 // Encoded Length 1333 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName); 1334 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1)) 1335 goto StringLiteralError; 1336 1337 // CRC 32 (always 8 characters plus a terminator) 1338 CrcEndPos = MangledName.find('@'); 1339 if (CrcEndPos == StringView::npos) 1340 goto StringLiteralError; 1341 CRC = MangledName.substr(0, CrcEndPos); 1342 MangledName.remove_prefix(CrcEndPos + 1); 1343 if (MangledName.empty()) 1344 goto StringLiteralError; 1345 1346 if (IsWcharT) { 1347 Result->Char = CharKind::Wchar; 1348 if (StringByteSize > 64) 1349 Result->IsTruncated = true; 1350 1351 while (!consumeFront(MangledName, '@')) { 1352 if (MangledName.size() < 2) 1353 goto StringLiteralError; 1354 wchar_t W = demangleWcharLiteral(MangledName); 1355 if (StringByteSize != 2 || Result->IsTruncated) 1356 outputEscapedChar(OB, W); 1357 StringByteSize -= 2; 1358 if (Error) 1359 goto StringLiteralError; 1360 } 1361 } else { 1362 // The max byte length is actually 32, but some compilers mangled strings 1363 // incorrectly, so we have to assume it can go higher. 1364 constexpr unsigned MaxStringByteLength = 32 * 4; 1365 uint8_t StringBytes[MaxStringByteLength]; 1366 1367 unsigned BytesDecoded = 0; 1368 while (!consumeFront(MangledName, '@')) { 1369 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength) 1370 goto StringLiteralError; 1371 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName); 1372 } 1373 1374 if (StringByteSize > BytesDecoded) 1375 Result->IsTruncated = true; 1376 1377 unsigned CharBytes = 1378 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize); 1379 assert(StringByteSize % CharBytes == 0); 1380 switch (CharBytes) { 1381 case 1: 1382 Result->Char = CharKind::Char; 1383 break; 1384 case 2: 1385 Result->Char = CharKind::Char16; 1386 break; 1387 case 4: 1388 Result->Char = CharKind::Char32; 1389 break; 1390 default: 1391 DEMANGLE_UNREACHABLE; 1392 } 1393 const unsigned NumChars = BytesDecoded / CharBytes; 1394 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) { 1395 unsigned NextChar = 1396 decodeMultiByteChar(StringBytes, CharIndex, CharBytes); 1397 if (CharIndex + 1 < NumChars || Result->IsTruncated) 1398 outputEscapedChar(OB, NextChar); 1399 } 1400 } 1401 1402 Result->DecodedString = copyString(OB); 1403 std::free(OB.getBuffer()); 1404 return Result; 1405 1406 StringLiteralError: 1407 Error = true; 1408 std::free(OB.getBuffer()); 1409 return nullptr; 1410 } 1411 1412 // Returns MangledName's prefix before the first '@', or an error if 1413 // MangledName contains no '@' or the prefix has length 0. 1414 StringView Demangler::demangleSimpleString(StringView &MangledName, 1415 bool Memorize) { 1416 StringView S; 1417 for (size_t i = 0; i < MangledName.size(); ++i) { 1418 if (MangledName[i] != '@') 1419 continue; 1420 if (i == 0) 1421 break; 1422 S = MangledName.substr(0, i); 1423 MangledName.remove_prefix(i + 1); 1424 1425 if (Memorize) 1426 memorizeString(S); 1427 return S; 1428 } 1429 1430 Error = true; 1431 return {}; 1432 } 1433 1434 NamedIdentifierNode * 1435 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) { 1436 assert(MangledName.startsWith("?A")); 1437 consumeFront(MangledName, "?A"); 1438 1439 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>(); 1440 Node->Name = "`anonymous namespace'"; 1441 size_t EndPos = MangledName.find('@'); 1442 if (EndPos == StringView::npos) { 1443 Error = true; 1444 return nullptr; 1445 } 1446 StringView NamespaceKey = MangledName.substr(0, EndPos); 1447 memorizeString(NamespaceKey); 1448 MangledName = MangledName.substr(EndPos + 1); 1449 return Node; 1450 } 1451 1452 NamedIdentifierNode * 1453 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { 1454 assert(startsWithLocalScopePattern(MangledName)); 1455 1456 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>(); 1457 consumeFront(MangledName, '?'); 1458 uint64_t Number = 0; 1459 bool IsNegative = false; 1460 std::tie(Number, IsNegative) = demangleNumber(MangledName); 1461 assert(!IsNegative); 1462 1463 // One ? to terminate the number 1464 consumeFront(MangledName, '?'); 1465 1466 assert(!Error); 1467 Node *Scope = parse(MangledName); 1468 if (Error) 1469 return nullptr; 1470 1471 // Render the parent symbol's name into a buffer. 1472 OutputBuffer OB; 1473 OB << '`'; 1474 Scope->output(OB, OF_Default); 1475 OB << '\''; 1476 OB << "::`" << Number << "'"; 1477 1478 Identifier->Name = copyString(OB); 1479 std::free(OB.getBuffer()); 1480 return Identifier; 1481 } 1482 1483 // Parses a type name in the form of A@B@C@@ which represents C::B::A. 1484 QualifiedNameNode * 1485 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) { 1486 IdentifierNode *Identifier = 1487 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1488 if (Error) 1489 return nullptr; 1490 assert(Identifier); 1491 1492 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1493 if (Error) 1494 return nullptr; 1495 assert(QN); 1496 return QN; 1497 } 1498 1499 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A. 1500 // Symbol names have slightly different rules regarding what can appear 1501 // so we separate out the implementations for flexibility. 1502 QualifiedNameNode * 1503 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) { 1504 // This is the final component of a symbol name (i.e. the leftmost component 1505 // of a mangled name. Since the only possible template instantiation that 1506 // can appear in this context is a function template, and since those are 1507 // not saved for the purposes of name backreferences, only backref simple 1508 // names. 1509 IdentifierNode *Identifier = 1510 demangleUnqualifiedSymbolName(MangledName, NBB_Simple); 1511 if (Error) 1512 return nullptr; 1513 1514 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier); 1515 if (Error) 1516 return nullptr; 1517 1518 if (Identifier->kind() == NodeKind::StructorIdentifier) { 1519 if (QN->Components->Count < 2) { 1520 Error = true; 1521 return nullptr; 1522 } 1523 StructorIdentifierNode *SIN = 1524 static_cast<StructorIdentifierNode *>(Identifier); 1525 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2]; 1526 SIN->Class = static_cast<IdentifierNode *>(ClassNode); 1527 } 1528 assert(QN); 1529 return QN; 1530 } 1531 1532 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName, 1533 bool Memorize) { 1534 // An inner-most name can be a back-reference, because a fully-qualified name 1535 // (e.g. Scope + Inner) can contain other fully qualified names inside of 1536 // them (for example template parameters), and these nested parameters can 1537 // refer to previously mangled types. 1538 if (startsWithDigit(MangledName)) 1539 return demangleBackRefName(MangledName); 1540 1541 if (MangledName.startsWith("?$")) 1542 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1543 1544 return demangleSimpleName(MangledName, Memorize); 1545 } 1546 1547 IdentifierNode * 1548 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName, 1549 NameBackrefBehavior NBB) { 1550 if (startsWithDigit(MangledName)) 1551 return demangleBackRefName(MangledName); 1552 if (MangledName.startsWith("?$")) 1553 return demangleTemplateInstantiationName(MangledName, NBB); 1554 if (MangledName.startsWith('?')) 1555 return demangleFunctionIdentifierCode(MangledName); 1556 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0); 1557 } 1558 1559 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) { 1560 if (startsWithDigit(MangledName)) 1561 return demangleBackRefName(MangledName); 1562 1563 if (MangledName.startsWith("?$")) 1564 return demangleTemplateInstantiationName(MangledName, NBB_Template); 1565 1566 if (MangledName.startsWith("?A")) 1567 return demangleAnonymousNamespaceName(MangledName); 1568 1569 if (startsWithLocalScopePattern(MangledName)) 1570 return demangleLocallyScopedNamePiece(MangledName); 1571 1572 return demangleSimpleName(MangledName, /*Memorize=*/true); 1573 } 1574 1575 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, 1576 size_t Count) { 1577 NodeArrayNode *N = Arena.alloc<NodeArrayNode>(); 1578 N->Count = Count; 1579 N->Nodes = Arena.allocArray<Node *>(Count); 1580 for (size_t I = 0; I < Count; ++I) { 1581 N->Nodes[I] = Head->N; 1582 Head = Head->Next; 1583 } 1584 return N; 1585 } 1586 1587 QualifiedNameNode * 1588 Demangler::demangleNameScopeChain(StringView &MangledName, 1589 IdentifierNode *UnqualifiedName) { 1590 NodeList *Head = Arena.alloc<NodeList>(); 1591 1592 Head->N = UnqualifiedName; 1593 1594 size_t Count = 1; 1595 while (!consumeFront(MangledName, "@")) { 1596 ++Count; 1597 NodeList *NewHead = Arena.alloc<NodeList>(); 1598 NewHead->Next = Head; 1599 Head = NewHead; 1600 1601 if (MangledName.empty()) { 1602 Error = true; 1603 return nullptr; 1604 } 1605 1606 assert(!Error); 1607 IdentifierNode *Elem = demangleNameScopePiece(MangledName); 1608 if (Error) 1609 return nullptr; 1610 1611 Head->N = Elem; 1612 } 1613 1614 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>(); 1615 QN->Components = nodeListToNodeArray(Arena, Head, Count); 1616 return QN; 1617 } 1618 1619 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) { 1620 const char F = MangledName.front(); 1621 MangledName.remove_prefix(1); 1622 switch (F) { 1623 case '9': 1624 return FuncClass(FC_ExternC | FC_NoParameterList); 1625 case 'A': 1626 return FC_Private; 1627 case 'B': 1628 return FuncClass(FC_Private | FC_Far); 1629 case 'C': 1630 return FuncClass(FC_Private | FC_Static); 1631 case 'D': 1632 return FuncClass(FC_Private | FC_Static | FC_Far); 1633 case 'E': 1634 return FuncClass(FC_Private | FC_Virtual); 1635 case 'F': 1636 return FuncClass(FC_Private | FC_Virtual | FC_Far); 1637 case 'G': 1638 return FuncClass(FC_Private | FC_StaticThisAdjust); 1639 case 'H': 1640 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far); 1641 case 'I': 1642 return FuncClass(FC_Protected); 1643 case 'J': 1644 return FuncClass(FC_Protected | FC_Far); 1645 case 'K': 1646 return FuncClass(FC_Protected | FC_Static); 1647 case 'L': 1648 return FuncClass(FC_Protected | FC_Static | FC_Far); 1649 case 'M': 1650 return FuncClass(FC_Protected | FC_Virtual); 1651 case 'N': 1652 return FuncClass(FC_Protected | FC_Virtual | FC_Far); 1653 case 'O': 1654 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust); 1655 case 'P': 1656 return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1657 case 'Q': 1658 return FuncClass(FC_Public); 1659 case 'R': 1660 return FuncClass(FC_Public | FC_Far); 1661 case 'S': 1662 return FuncClass(FC_Public | FC_Static); 1663 case 'T': 1664 return FuncClass(FC_Public | FC_Static | FC_Far); 1665 case 'U': 1666 return FuncClass(FC_Public | FC_Virtual); 1667 case 'V': 1668 return FuncClass(FC_Public | FC_Virtual | FC_Far); 1669 case 'W': 1670 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust); 1671 case 'X': 1672 return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far); 1673 case 'Y': 1674 return FuncClass(FC_Global); 1675 case 'Z': 1676 return FuncClass(FC_Global | FC_Far); 1677 case '$': { 1678 FuncClass VFlag = FC_VirtualThisAdjust; 1679 if (consumeFront(MangledName, 'R')) 1680 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx); 1681 if (MangledName.empty()) 1682 break; 1683 const char F = MangledName.front(); 1684 MangledName.remove_prefix(1); 1685 switch (F) { 1686 case '0': 1687 return FuncClass(FC_Private | FC_Virtual | VFlag); 1688 case '1': 1689 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far); 1690 case '2': 1691 return FuncClass(FC_Protected | FC_Virtual | VFlag); 1692 case '3': 1693 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far); 1694 case '4': 1695 return FuncClass(FC_Public | FC_Virtual | VFlag); 1696 case '5': 1697 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far); 1698 } 1699 } 1700 } 1701 1702 Error = true; 1703 return FC_Public; 1704 } 1705 1706 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) { 1707 if (MangledName.empty()) { 1708 Error = true; 1709 return CallingConv::None; 1710 } 1711 1712 const char F = MangledName.front(); 1713 MangledName.remove_prefix(1); 1714 switch (F) { 1715 case 'A': 1716 case 'B': 1717 return CallingConv::Cdecl; 1718 case 'C': 1719 case 'D': 1720 return CallingConv::Pascal; 1721 case 'E': 1722 case 'F': 1723 return CallingConv::Thiscall; 1724 case 'G': 1725 case 'H': 1726 return CallingConv::Stdcall; 1727 case 'I': 1728 case 'J': 1729 return CallingConv::Fastcall; 1730 case 'M': 1731 case 'N': 1732 return CallingConv::Clrcall; 1733 case 'O': 1734 case 'P': 1735 return CallingConv::Eabi; 1736 case 'Q': 1737 return CallingConv::Vectorcall; 1738 case 'S': 1739 return CallingConv::Swift; 1740 case 'W': 1741 return CallingConv::SwiftAsync; 1742 } 1743 1744 return CallingConv::None; 1745 } 1746 1747 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) { 1748 assert(MangledName.front() >= '0' && MangledName.front() <= '4'); 1749 1750 const char F = MangledName.front(); 1751 MangledName.remove_prefix(1); 1752 switch (F) { 1753 case '0': 1754 return StorageClass::PrivateStatic; 1755 case '1': 1756 return StorageClass::ProtectedStatic; 1757 case '2': 1758 return StorageClass::PublicStatic; 1759 case '3': 1760 return StorageClass::Global; 1761 case '4': 1762 return StorageClass::FunctionLocalStatic; 1763 } 1764 DEMANGLE_UNREACHABLE; 1765 } 1766 1767 std::pair<Qualifiers, bool> 1768 Demangler::demangleQualifiers(StringView &MangledName) { 1769 if (MangledName.empty()) { 1770 Error = true; 1771 return std::make_pair(Q_None, false); 1772 } 1773 1774 const char F = MangledName.front(); 1775 MangledName.remove_prefix(1); 1776 switch (F) { 1777 // Member qualifiers 1778 case 'Q': 1779 return std::make_pair(Q_None, true); 1780 case 'R': 1781 return std::make_pair(Q_Const, true); 1782 case 'S': 1783 return std::make_pair(Q_Volatile, true); 1784 case 'T': 1785 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true); 1786 // Non-Member qualifiers 1787 case 'A': 1788 return std::make_pair(Q_None, false); 1789 case 'B': 1790 return std::make_pair(Q_Const, false); 1791 case 'C': 1792 return std::make_pair(Q_Volatile, false); 1793 case 'D': 1794 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false); 1795 } 1796 Error = true; 1797 return std::make_pair(Q_None, false); 1798 } 1799 1800 // <variable-type> ::= <type> <cvr-qualifiers> 1801 // ::= <type> <pointee-cvr-qualifiers> # pointers, references 1802 TypeNode *Demangler::demangleType(StringView &MangledName, 1803 QualifierMangleMode QMM) { 1804 Qualifiers Quals = Q_None; 1805 bool IsMember = false; 1806 if (QMM == QualifierMangleMode::Mangle) { 1807 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1808 } else if (QMM == QualifierMangleMode::Result) { 1809 if (consumeFront(MangledName, '?')) 1810 std::tie(Quals, IsMember) = demangleQualifiers(MangledName); 1811 } 1812 1813 if (MangledName.empty()) { 1814 Error = true; 1815 return nullptr; 1816 } 1817 1818 TypeNode *Ty = nullptr; 1819 if (isTagType(MangledName)) 1820 Ty = demangleClassType(MangledName); 1821 else if (isPointerType(MangledName)) { 1822 if (isMemberPointer(MangledName, Error)) 1823 Ty = demangleMemberPointerType(MangledName); 1824 else if (!Error) 1825 Ty = demanglePointerType(MangledName); 1826 else 1827 return nullptr; 1828 } else if (isArrayType(MangledName)) 1829 Ty = demangleArrayType(MangledName); 1830 else if (isFunctionType(MangledName)) { 1831 if (consumeFront(MangledName, "$$A8@@")) 1832 Ty = demangleFunctionType(MangledName, true); 1833 else { 1834 assert(MangledName.startsWith("$$A6")); 1835 consumeFront(MangledName, "$$A6"); 1836 Ty = demangleFunctionType(MangledName, false); 1837 } 1838 } else if (isCustomType(MangledName)) { 1839 Ty = demangleCustomType(MangledName); 1840 } else { 1841 Ty = demanglePrimitiveType(MangledName); 1842 } 1843 1844 if (!Ty || Error) 1845 return Ty; 1846 Ty->Quals = Qualifiers(Ty->Quals | Quals); 1847 return Ty; 1848 } 1849 1850 bool Demangler::demangleThrowSpecification(StringView &MangledName) { 1851 if (consumeFront(MangledName, "_E")) 1852 return true; 1853 if (consumeFront(MangledName, 'Z')) 1854 return false; 1855 1856 Error = true; 1857 return false; 1858 } 1859 1860 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName, 1861 bool HasThisQuals) { 1862 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>(); 1863 1864 if (HasThisQuals) { 1865 FTy->Quals = demanglePointerExtQualifiers(MangledName); 1866 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName); 1867 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first); 1868 } 1869 1870 // Fields that appear on both member and non-member functions. 1871 FTy->CallConvention = demangleCallingConvention(MangledName); 1872 1873 // <return-type> ::= <type> 1874 // ::= @ # structors (they have no declared return type) 1875 bool IsStructor = consumeFront(MangledName, '@'); 1876 if (!IsStructor) 1877 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result); 1878 1879 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic); 1880 1881 FTy->IsNoexcept = demangleThrowSpecification(MangledName); 1882 1883 return FTy; 1884 } 1885 1886 FunctionSymbolNode * 1887 Demangler::demangleFunctionEncoding(StringView &MangledName) { 1888 FuncClass ExtraFlags = FC_None; 1889 if (consumeFront(MangledName, "$$J0")) 1890 ExtraFlags = FC_ExternC; 1891 1892 if (MangledName.empty()) { 1893 Error = true; 1894 return nullptr; 1895 } 1896 1897 FuncClass FC = demangleFunctionClass(MangledName); 1898 FC = FuncClass(ExtraFlags | FC); 1899 1900 FunctionSignatureNode *FSN = nullptr; 1901 ThunkSignatureNode *TTN = nullptr; 1902 if (FC & FC_StaticThisAdjust) { 1903 TTN = Arena.alloc<ThunkSignatureNode>(); 1904 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1905 } else if (FC & FC_VirtualThisAdjust) { 1906 TTN = Arena.alloc<ThunkSignatureNode>(); 1907 if (FC & FC_VirtualThisAdjustEx) { 1908 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName); 1909 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName); 1910 } 1911 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName); 1912 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName); 1913 } 1914 1915 if (FC & FC_NoParameterList) { 1916 // This is an extern "C" function whose full signature hasn't been mangled. 1917 // This happens when we need to mangle a local symbol inside of an extern 1918 // "C" function. 1919 FSN = Arena.alloc<FunctionSignatureNode>(); 1920 } else { 1921 bool HasThisQuals = !(FC & (FC_Global | FC_Static)); 1922 FSN = demangleFunctionType(MangledName, HasThisQuals); 1923 } 1924 1925 if (Error) 1926 return nullptr; 1927 1928 if (TTN) { 1929 *static_cast<FunctionSignatureNode *>(TTN) = *FSN; 1930 FSN = TTN; 1931 } 1932 FSN->FunctionClass = FC; 1933 1934 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>(); 1935 Symbol->Signature = FSN; 1936 return Symbol; 1937 } 1938 1939 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) { 1940 assert(MangledName.startsWith('?')); 1941 MangledName.remove_prefix(1); 1942 1943 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>(); 1944 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true); 1945 if (!consumeFront(MangledName, '@')) 1946 Error = true; 1947 if (Error) 1948 return nullptr; 1949 return CTN; 1950 } 1951 1952 // Reads a primitive type. 1953 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) { 1954 if (consumeFront(MangledName, "$$T")) 1955 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr); 1956 1957 const char F = MangledName.front(); 1958 MangledName.remove_prefix(1); 1959 switch (F) { 1960 case 'X': 1961 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void); 1962 case 'D': 1963 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char); 1964 case 'C': 1965 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar); 1966 case 'E': 1967 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar); 1968 case 'F': 1969 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short); 1970 case 'G': 1971 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort); 1972 case 'H': 1973 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int); 1974 case 'I': 1975 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint); 1976 case 'J': 1977 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long); 1978 case 'K': 1979 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong); 1980 case 'M': 1981 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float); 1982 case 'N': 1983 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double); 1984 case 'O': 1985 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble); 1986 case '_': { 1987 if (MangledName.empty()) { 1988 Error = true; 1989 return nullptr; 1990 } 1991 const char F = MangledName.front(); 1992 MangledName.remove_prefix(1); 1993 switch (F) { 1994 case 'N': 1995 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool); 1996 case 'J': 1997 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64); 1998 case 'K': 1999 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64); 2000 case 'W': 2001 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar); 2002 case 'Q': 2003 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8); 2004 case 'S': 2005 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16); 2006 case 'U': 2007 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32); 2008 } 2009 break; 2010 } 2011 } 2012 Error = true; 2013 return nullptr; 2014 } 2015 2016 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) { 2017 TagTypeNode *TT = nullptr; 2018 2019 const char F = MangledName.front(); 2020 MangledName.remove_prefix(1); 2021 switch (F) { 2022 case 'T': 2023 TT = Arena.alloc<TagTypeNode>(TagKind::Union); 2024 break; 2025 case 'U': 2026 TT = Arena.alloc<TagTypeNode>(TagKind::Struct); 2027 break; 2028 case 'V': 2029 TT = Arena.alloc<TagTypeNode>(TagKind::Class); 2030 break; 2031 case 'W': 2032 if (!consumeFront(MangledName, '4')) { 2033 Error = true; 2034 return nullptr; 2035 } 2036 TT = Arena.alloc<TagTypeNode>(TagKind::Enum); 2037 break; 2038 default: 2039 assert(false); 2040 } 2041 2042 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName); 2043 return TT; 2044 } 2045 2046 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type> 2047 // # the E is required for 64-bit non-static pointers 2048 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) { 2049 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2050 2051 std::tie(Pointer->Quals, Pointer->Affinity) = 2052 demanglePointerCVQualifiers(MangledName); 2053 2054 if (consumeFront(MangledName, "6")) { 2055 Pointer->Pointee = demangleFunctionType(MangledName, false); 2056 return Pointer; 2057 } 2058 2059 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2060 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2061 2062 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle); 2063 return Pointer; 2064 } 2065 2066 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) { 2067 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>(); 2068 2069 std::tie(Pointer->Quals, Pointer->Affinity) = 2070 demanglePointerCVQualifiers(MangledName); 2071 assert(Pointer->Affinity == PointerAffinity::Pointer); 2072 2073 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName); 2074 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals); 2075 2076 // isMemberPointer() only returns true if there is at least one character 2077 // after the qualifiers. 2078 if (consumeFront(MangledName, "8")) { 2079 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2080 Pointer->Pointee = demangleFunctionType(MangledName, true); 2081 } else { 2082 Qualifiers PointeeQuals = Q_None; 2083 bool IsMember = false; 2084 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName); 2085 assert(IsMember || Error); 2086 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName); 2087 2088 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop); 2089 if (Pointer->Pointee) 2090 Pointer->Pointee->Quals = PointeeQuals; 2091 } 2092 2093 return Pointer; 2094 } 2095 2096 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) { 2097 Qualifiers Quals = Q_None; 2098 if (consumeFront(MangledName, 'E')) 2099 Quals = Qualifiers(Quals | Q_Pointer64); 2100 if (consumeFront(MangledName, 'I')) 2101 Quals = Qualifiers(Quals | Q_Restrict); 2102 if (consumeFront(MangledName, 'F')) 2103 Quals = Qualifiers(Quals | Q_Unaligned); 2104 2105 return Quals; 2106 } 2107 2108 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) { 2109 assert(MangledName.front() == 'Y'); 2110 MangledName.remove_prefix(1); 2111 2112 uint64_t Rank = 0; 2113 bool IsNegative = false; 2114 std::tie(Rank, IsNegative) = demangleNumber(MangledName); 2115 if (IsNegative || Rank == 0) { 2116 Error = true; 2117 return nullptr; 2118 } 2119 2120 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>(); 2121 NodeList *Head = Arena.alloc<NodeList>(); 2122 NodeList *Tail = Head; 2123 2124 for (uint64_t I = 0; I < Rank; ++I) { 2125 uint64_t D = 0; 2126 std::tie(D, IsNegative) = demangleNumber(MangledName); 2127 if (Error || IsNegative) { 2128 Error = true; 2129 return nullptr; 2130 } 2131 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative); 2132 if (I + 1 < Rank) { 2133 Tail->Next = Arena.alloc<NodeList>(); 2134 Tail = Tail->Next; 2135 } 2136 } 2137 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank); 2138 2139 if (consumeFront(MangledName, "$$C")) { 2140 bool IsMember = false; 2141 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName); 2142 if (IsMember) { 2143 Error = true; 2144 return nullptr; 2145 } 2146 } 2147 2148 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop); 2149 return ATy; 2150 } 2151 2152 // Reads a function's parameters. 2153 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, 2154 bool &IsVariadic) { 2155 // Empty parameter list. 2156 if (consumeFront(MangledName, 'X')) 2157 return nullptr; 2158 2159 NodeList *Head = Arena.alloc<NodeList>(); 2160 NodeList **Current = &Head; 2161 size_t Count = 0; 2162 while (!Error && !MangledName.startsWith('@') && 2163 !MangledName.startsWith('Z')) { 2164 ++Count; 2165 2166 if (startsWithDigit(MangledName)) { 2167 size_t N = MangledName[0] - '0'; 2168 if (N >= Backrefs.FunctionParamCount) { 2169 Error = true; 2170 return nullptr; 2171 } 2172 MangledName.remove_prefix(1); 2173 2174 *Current = Arena.alloc<NodeList>(); 2175 (*Current)->N = Backrefs.FunctionParams[N]; 2176 Current = &(*Current)->Next; 2177 continue; 2178 } 2179 2180 size_t OldSize = MangledName.size(); 2181 2182 *Current = Arena.alloc<NodeList>(); 2183 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop); 2184 if (!TN || Error) 2185 return nullptr; 2186 2187 (*Current)->N = TN; 2188 2189 size_t CharsConsumed = OldSize - MangledName.size(); 2190 assert(CharsConsumed != 0); 2191 2192 // Single-letter types are ignored for backreferences because memorizing 2193 // them doesn't save anything. 2194 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1) 2195 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN; 2196 2197 Current = &(*Current)->Next; 2198 } 2199 2200 if (Error) 2201 return nullptr; 2202 2203 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count); 2204 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter 2205 // list or '@' (non variadic). Careful not to consume "@Z", as in that case 2206 // the following Z could be a throw specifier. 2207 if (consumeFront(MangledName, '@')) 2208 return NA; 2209 2210 if (consumeFront(MangledName, 'Z')) { 2211 IsVariadic = true; 2212 return NA; 2213 } 2214 2215 DEMANGLE_UNREACHABLE; 2216 } 2217 2218 NodeArrayNode * 2219 Demangler::demangleTemplateParameterList(StringView &MangledName) { 2220 NodeList *Head = nullptr; 2221 NodeList **Current = &Head; 2222 size_t Count = 0; 2223 2224 while (!MangledName.startsWith('@')) { 2225 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") || 2226 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) { 2227 // parameter pack separator 2228 continue; 2229 } 2230 2231 ++Count; 2232 2233 // Template parameter lists don't participate in back-referencing. 2234 *Current = Arena.alloc<NodeList>(); 2235 2236 NodeList &TP = **Current; 2237 2238 TemplateParameterReferenceNode *TPRN = nullptr; 2239 if (consumeFront(MangledName, "$$Y")) { 2240 // Template alias 2241 TP.N = demangleFullyQualifiedTypeName(MangledName); 2242 } else if (consumeFront(MangledName, "$$B")) { 2243 // Array 2244 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2245 } else if (consumeFront(MangledName, "$$C")) { 2246 // Type has qualifiers. 2247 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle); 2248 } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") || 2249 MangledName.startsWith("$I") || MangledName.startsWith("$J")) { 2250 // Pointer to member 2251 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2252 TPRN->IsMemberPointer = true; 2253 2254 MangledName.remove_prefix(1); 2255 // 1 - single inheritance <name> 2256 // H - multiple inheritance <name> <number> 2257 // I - virtual inheritance <name> <number> <number> 2258 // J - unspecified inheritance <name> <number> <number> <number> 2259 char InheritanceSpecifier = MangledName.front(); 2260 MangledName.remove_prefix(1); 2261 SymbolNode *S = nullptr; 2262 if (MangledName.startsWith('?')) { 2263 S = parse(MangledName); 2264 if (Error || !S->Name) { 2265 Error = true; 2266 return nullptr; 2267 } 2268 memorizeIdentifier(S->Name->getUnqualifiedIdentifier()); 2269 } 2270 2271 switch (InheritanceSpecifier) { 2272 case 'J': 2273 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2274 demangleSigned(MangledName); 2275 DEMANGLE_FALLTHROUGH; 2276 case 'I': 2277 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2278 demangleSigned(MangledName); 2279 DEMANGLE_FALLTHROUGH; 2280 case 'H': 2281 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2282 demangleSigned(MangledName); 2283 DEMANGLE_FALLTHROUGH; 2284 case '1': 2285 break; 2286 default: 2287 DEMANGLE_UNREACHABLE; 2288 } 2289 TPRN->Affinity = PointerAffinity::Pointer; 2290 TPRN->Symbol = S; 2291 } else if (MangledName.startsWith("$E?")) { 2292 consumeFront(MangledName, "$E"); 2293 // Reference to symbol 2294 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2295 TPRN->Symbol = parse(MangledName); 2296 TPRN->Affinity = PointerAffinity::Reference; 2297 } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) { 2298 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>(); 2299 2300 // Data member pointer. 2301 MangledName.remove_prefix(1); 2302 char InheritanceSpecifier = MangledName.front(); 2303 MangledName.remove_prefix(1); 2304 2305 switch (InheritanceSpecifier) { 2306 case 'G': 2307 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2308 demangleSigned(MangledName); 2309 DEMANGLE_FALLTHROUGH; 2310 case 'F': 2311 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2312 demangleSigned(MangledName); 2313 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] = 2314 demangleSigned(MangledName); 2315 break; 2316 default: 2317 DEMANGLE_UNREACHABLE; 2318 } 2319 TPRN->IsMemberPointer = true; 2320 2321 } else if (consumeFront(MangledName, "$0")) { 2322 // Integral non-type template parameter 2323 bool IsNegative = false; 2324 uint64_t Value = 0; 2325 std::tie(Value, IsNegative) = demangleNumber(MangledName); 2326 2327 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative); 2328 } else { 2329 TP.N = demangleType(MangledName, QualifierMangleMode::Drop); 2330 } 2331 if (Error) 2332 return nullptr; 2333 2334 Current = &TP.Next; 2335 } 2336 2337 // The loop above returns nullptr on Error. 2338 assert(!Error); 2339 2340 // Template parameter lists cannot be variadic, so it can only be terminated 2341 // by @ (as opposed to 'Z' in the function parameter case). 2342 assert(MangledName.startsWith('@')); // The above loop exits only on '@'. 2343 consumeFront(MangledName, '@'); 2344 return nodeListToNodeArray(Arena, Head, Count); 2345 } 2346 2347 void Demangler::dumpBackReferences() { 2348 std::printf("%d function parameter backreferences\n", 2349 (int)Backrefs.FunctionParamCount); 2350 2351 // Create an output stream so we can render each type. 2352 OutputBuffer OB; 2353 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { 2354 OB.setCurrentPosition(0); 2355 2356 TypeNode *T = Backrefs.FunctionParams[I]; 2357 T->output(OB, OF_Default); 2358 2359 StringView B = OB; 2360 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin()); 2361 } 2362 std::free(OB.getBuffer()); 2363 2364 if (Backrefs.FunctionParamCount > 0) 2365 std::printf("\n"); 2366 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount); 2367 for (size_t I = 0; I < Backrefs.NamesCount; ++I) { 2368 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(), 2369 Backrefs.Names[I]->Name.begin()); 2370 } 2371 if (Backrefs.NamesCount > 0) 2372 std::printf("\n"); 2373 } 2374 2375 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, 2376 char *Buf, size_t *N, 2377 int *Status, MSDemangleFlags Flags) { 2378 Demangler D; 2379 2380 StringView Name{MangledName}; 2381 SymbolNode *AST = D.parse(Name); 2382 if (!D.Error && NMangled) 2383 *NMangled = Name.begin() - MangledName; 2384 2385 if (Flags & MSDF_DumpBackrefs) 2386 D.dumpBackReferences(); 2387 2388 OutputFlags OF = OF_Default; 2389 if (Flags & MSDF_NoCallingConvention) 2390 OF = OutputFlags(OF | OF_NoCallingConvention); 2391 if (Flags & MSDF_NoAccessSpecifier) 2392 OF = OutputFlags(OF | OF_NoAccessSpecifier); 2393 if (Flags & MSDF_NoReturnType) 2394 OF = OutputFlags(OF | OF_NoReturnType); 2395 if (Flags & MSDF_NoMemberType) 2396 OF = OutputFlags(OF | OF_NoMemberType); 2397 if (Flags & MSDF_NoVariableType) 2398 OF = OutputFlags(OF | OF_NoVariableType); 2399 2400 int InternalStatus = demangle_success; 2401 if (D.Error) 2402 InternalStatus = demangle_invalid_mangled_name; 2403 else { 2404 OutputBuffer OB(Buf, N); 2405 AST->output(OB, OF); 2406 OB += '\0'; 2407 if (N != nullptr) 2408 *N = OB.getCurrentPosition(); 2409 Buf = OB.getBuffer(); 2410 } 2411 2412 if (Status) 2413 *Status = InternalStatus; 2414 return InternalStatus == demangle_success ? Buf : nullptr; 2415 } 2416