xref: /llvm-project/llvm/lib/Demangle/MicrosoftDemangle.cpp (revision d81cdb49d74064e88843733e7da92db865943509)
1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for MSVC-style mangled symbols.
10 //
11 // This file has no dependencies on the rest of LLVM so that it can be
12 // easily reused in other programs such as libcxxabi.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/MicrosoftDemangle.h"
17 #include "llvm/Demangle/Demangle.h"
18 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
19 
20 #include "llvm/Demangle/DemangleConfig.h"
21 #include "llvm/Demangle/StringView.h"
22 #include "llvm/Demangle/Utility.h"
23 
24 #include <array>
25 #include <cctype>
26 #include <cstdio>
27 #include <tuple>
28 
29 using namespace llvm;
30 using namespace ms_demangle;
31 
32 static bool startsWithDigit(StringView S) {
33   return !S.empty() && std::isdigit(S.front());
34 }
35 
36 struct NodeList {
37   Node *N = nullptr;
38   NodeList *Next = nullptr;
39 };
40 
41 static bool consumeFront(StringView &S, char C) {
42   if (!S.startsWith(C))
43     return false;
44   S.remove_prefix(1);
45   return true;
46 }
47 
48 static bool consumeFront(StringView &S, StringView C) {
49   if (!S.startsWith(C))
50     return false;
51   S.remove_prefix(C.size());
52   return true;
53 }
54 
55 static bool isMemberPointer(StringView MangledName, bool &Error) {
56   Error = false;
57   const char F = MangledName.front();
58   MangledName.remove_prefix(1);
59   switch (F) {
60   case '$':
61     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
62     // rvalue reference to a member.
63     return false;
64   case 'A':
65     // 'A' indicates a reference, and you cannot have a reference to a member
66     // function or member.
67     return false;
68   case 'P':
69   case 'Q':
70   case 'R':
71   case 'S':
72     // These 4 values indicate some kind of pointer, but we still don't know
73     // what.
74     break;
75   default:
76     // isMemberPointer() is called only if isPointerType() returns true,
77     // and it rejects other prefixes.
78     DEMANGLE_UNREACHABLE;
79   }
80 
81   // If it starts with a number, then 6 indicates a non-member function
82   // pointer, and 8 indicates a member function pointer.
83   if (startsWithDigit(MangledName)) {
84     if (MangledName[0] != '6' && MangledName[0] != '8') {
85       Error = true;
86       return false;
87     }
88     return (MangledName[0] == '8');
89   }
90 
91   // Remove ext qualifiers since those can appear on either type and are
92   // therefore not indicative.
93   consumeFront(MangledName, 'E'); // 64-bit
94   consumeFront(MangledName, 'I'); // restrict
95   consumeFront(MangledName, 'F'); // unaligned
96 
97   if (MangledName.empty()) {
98     Error = true;
99     return false;
100   }
101 
102   // The next value should be either ABCD (non-member) or QRST (member).
103   switch (MangledName.front()) {
104   case 'A':
105   case 'B':
106   case 'C':
107   case 'D':
108     return false;
109   case 'Q':
110   case 'R':
111   case 'S':
112   case 'T':
113     return true;
114   default:
115     Error = true;
116     return false;
117   }
118 }
119 
120 static SpecialIntrinsicKind
121 consumeSpecialIntrinsicKind(StringView &MangledName) {
122   if (consumeFront(MangledName, "?_7"))
123     return SpecialIntrinsicKind::Vftable;
124   if (consumeFront(MangledName, "?_8"))
125     return SpecialIntrinsicKind::Vbtable;
126   if (consumeFront(MangledName, "?_9"))
127     return SpecialIntrinsicKind::VcallThunk;
128   if (consumeFront(MangledName, "?_A"))
129     return SpecialIntrinsicKind::Typeof;
130   if (consumeFront(MangledName, "?_B"))
131     return SpecialIntrinsicKind::LocalStaticGuard;
132   if (consumeFront(MangledName, "?_C"))
133     return SpecialIntrinsicKind::StringLiteralSymbol;
134   if (consumeFront(MangledName, "?_P"))
135     return SpecialIntrinsicKind::UdtReturning;
136   if (consumeFront(MangledName, "?_R0"))
137     return SpecialIntrinsicKind::RttiTypeDescriptor;
138   if (consumeFront(MangledName, "?_R1"))
139     return SpecialIntrinsicKind::RttiBaseClassDescriptor;
140   if (consumeFront(MangledName, "?_R2"))
141     return SpecialIntrinsicKind::RttiBaseClassArray;
142   if (consumeFront(MangledName, "?_R3"))
143     return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
144   if (consumeFront(MangledName, "?_R4"))
145     return SpecialIntrinsicKind::RttiCompleteObjLocator;
146   if (consumeFront(MangledName, "?_S"))
147     return SpecialIntrinsicKind::LocalVftable;
148   if (consumeFront(MangledName, "?__E"))
149     return SpecialIntrinsicKind::DynamicInitializer;
150   if (consumeFront(MangledName, "?__F"))
151     return SpecialIntrinsicKind::DynamicAtexitDestructor;
152   if (consumeFront(MangledName, "?__J"))
153     return SpecialIntrinsicKind::LocalStaticThreadGuard;
154   return SpecialIntrinsicKind::None;
155 }
156 
157 static bool startsWithLocalScopePattern(StringView S) {
158   if (!consumeFront(S, '?'))
159     return false;
160 
161   size_t End = S.find('?');
162   if (End == StringView::npos)
163     return false;
164   StringView Candidate = S.substr(0, End);
165   if (Candidate.empty())
166     return false;
167 
168   // \?[0-9]\?
169   // ?@? is the discriminator 0.
170   if (Candidate.size() == 1)
171     return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
172 
173   // If it's not 0-9, then it's an encoded number terminated with an @
174   if (Candidate.back() != '@')
175     return false;
176   Candidate.remove_suffix(1);
177 
178   // An encoded number starts with B-P and all subsequent digits are in A-P.
179   // Note that the reason the first digit cannot be A is two fold.  First, it
180   // would create an ambiguity with ?A which delimits the beginning of an
181   // anonymous namespace.  Second, A represents 0, and you don't start a multi
182   // digit number with a leading 0.  Presumably the anonymous namespace
183   // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
184   if (Candidate[0] < 'B' || Candidate[0] > 'P')
185     return false;
186   Candidate.remove_prefix(1);
187   while (!Candidate.empty()) {
188     if (Candidate[0] < 'A' || Candidate[0] > 'P')
189       return false;
190     Candidate.remove_prefix(1);
191   }
192 
193   return true;
194 }
195 
196 static bool isTagType(StringView S) {
197   switch (S.front()) {
198   case 'T': // union
199   case 'U': // struct
200   case 'V': // class
201   case 'W': // enum
202     return true;
203   }
204   return false;
205 }
206 
207 static bool isCustomType(StringView S) { return S[0] == '?'; }
208 
209 static bool isPointerType(StringView S) {
210   if (S.startsWith("$$Q")) // foo &&
211     return true;
212 
213   switch (S.front()) {
214   case 'A': // foo &
215   case 'P': // foo *
216   case 'Q': // foo *const
217   case 'R': // foo *volatile
218   case 'S': // foo *const volatile
219     return true;
220   }
221   return false;
222 }
223 
224 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
225 
226 static bool isFunctionType(StringView S) {
227   return S.startsWith("$$A8@@") || S.startsWith("$$A6");
228 }
229 
230 static FunctionRefQualifier
231 demangleFunctionRefQualifier(StringView &MangledName) {
232   if (consumeFront(MangledName, 'G'))
233     return FunctionRefQualifier::Reference;
234   else if (consumeFront(MangledName, 'H'))
235     return FunctionRefQualifier::RValueReference;
236   return FunctionRefQualifier::None;
237 }
238 
239 static std::pair<Qualifiers, PointerAffinity>
240 demanglePointerCVQualifiers(StringView &MangledName) {
241   if (consumeFront(MangledName, "$$Q"))
242     return std::make_pair(Q_None, PointerAffinity::RValueReference);
243 
244   const char F = MangledName.front();
245   MangledName.remove_prefix(1);
246   switch (F) {
247   case 'A':
248     return std::make_pair(Q_None, PointerAffinity::Reference);
249   case 'P':
250     return std::make_pair(Q_None, PointerAffinity::Pointer);
251   case 'Q':
252     return std::make_pair(Q_Const, PointerAffinity::Pointer);
253   case 'R':
254     return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
255   case 'S':
256     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
257                           PointerAffinity::Pointer);
258   }
259   // This function is only called if isPointerType() returns true,
260   // and it only returns true for the six cases listed above.
261   DEMANGLE_UNREACHABLE;
262 }
263 
264 StringView Demangler::copyString(StringView Borrowed) {
265   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
266   // This is not a micro-optimization, it avoids UB, should Borrowed be an null
267   // buffer.
268   if (Borrowed.size())
269     std::memcpy(Stable, Borrowed.begin(), Borrowed.size());
270 
271   return {Stable, Borrowed.size()};
272 }
273 
274 SpecialTableSymbolNode *
275 Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
276                                           SpecialIntrinsicKind K) {
277   NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
278   switch (K) {
279   case SpecialIntrinsicKind::Vftable:
280     NI->Name = "`vftable'";
281     break;
282   case SpecialIntrinsicKind::Vbtable:
283     NI->Name = "`vbtable'";
284     break;
285   case SpecialIntrinsicKind::LocalVftable:
286     NI->Name = "`local vftable'";
287     break;
288   case SpecialIntrinsicKind::RttiCompleteObjLocator:
289     NI->Name = "`RTTI Complete Object Locator'";
290     break;
291   default:
292     DEMANGLE_UNREACHABLE;
293   }
294   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
295   SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
296   STSN->Name = QN;
297   bool IsMember = false;
298   if (MangledName.empty()) {
299     Error = true;
300     return nullptr;
301   }
302   char Front = MangledName.front();
303   MangledName.remove_prefix(1);
304   if (Front != '6' && Front != '7') {
305     Error = true;
306     return nullptr;
307   }
308 
309   std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
310   if (!consumeFront(MangledName, '@'))
311     STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
312   return STSN;
313 }
314 
315 LocalStaticGuardVariableNode *
316 Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
317   LocalStaticGuardIdentifierNode *LSGI =
318       Arena.alloc<LocalStaticGuardIdentifierNode>();
319   LSGI->IsThread = IsThread;
320   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
321   LocalStaticGuardVariableNode *LSGVN =
322       Arena.alloc<LocalStaticGuardVariableNode>();
323   LSGVN->Name = QN;
324 
325   if (consumeFront(MangledName, "4IA"))
326     LSGVN->IsVisible = false;
327   else if (consumeFront(MangledName, "5"))
328     LSGVN->IsVisible = true;
329   else {
330     Error = true;
331     return nullptr;
332   }
333 
334   if (!MangledName.empty())
335     LSGI->ScopeIndex = demangleUnsigned(MangledName);
336   return LSGVN;
337 }
338 
339 static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
340                                                       StringView Name) {
341   NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
342   Id->Name = Name;
343   return Id;
344 }
345 
346 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
347                                                   IdentifierNode *Identifier) {
348   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
349   QN->Components = Arena.alloc<NodeArrayNode>();
350   QN->Components->Count = 1;
351   QN->Components->Nodes = Arena.allocArray<Node *>(1);
352   QN->Components->Nodes[0] = Identifier;
353   return QN;
354 }
355 
356 static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
357                                                   StringView Name) {
358   NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
359   return synthesizeQualifiedName(Arena, Id);
360 }
361 
362 static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
363                                               TypeNode *Type,
364                                               StringView VariableName) {
365   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
366   VSN->Type = Type;
367   VSN->Name = synthesizeQualifiedName(Arena, VariableName);
368   return VSN;
369 }
370 
371 VariableSymbolNode *Demangler::demangleUntypedVariable(
372     ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
373   NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
374   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
375   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
376   VSN->Name = QN;
377   if (consumeFront(MangledName, "8"))
378     return VSN;
379 
380   Error = true;
381   return nullptr;
382 }
383 
384 VariableSymbolNode *
385 Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
386                                                StringView &MangledName) {
387   RttiBaseClassDescriptorNode *RBCDN =
388       Arena.alloc<RttiBaseClassDescriptorNode>();
389   RBCDN->NVOffset = demangleUnsigned(MangledName);
390   RBCDN->VBPtrOffset = demangleSigned(MangledName);
391   RBCDN->VBTableOffset = demangleUnsigned(MangledName);
392   RBCDN->Flags = demangleUnsigned(MangledName);
393   if (Error)
394     return nullptr;
395 
396   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
397   VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
398   consumeFront(MangledName, '8');
399   return VSN;
400 }
401 
402 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
403                                                     bool IsDestructor) {
404   DynamicStructorIdentifierNode *DSIN =
405       Arena.alloc<DynamicStructorIdentifierNode>();
406   DSIN->IsDestructor = IsDestructor;
407 
408   bool IsKnownStaticDataMember = false;
409   if (consumeFront(MangledName, '?'))
410     IsKnownStaticDataMember = true;
411 
412   SymbolNode *Symbol = demangleDeclarator(MangledName);
413   if (Error)
414     return nullptr;
415 
416   FunctionSymbolNode *FSN = nullptr;
417 
418   if (Symbol->kind() == NodeKind::VariableSymbol) {
419     DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
420 
421     // Older versions of clang mangled this type of symbol incorrectly.  They
422     // would omit the leading ? and they would only emit a single @ at the end.
423     // The correct mangling is a leading ? and 2 trailing @ signs.  Handle
424     // both cases.
425     int AtCount = IsKnownStaticDataMember ? 2 : 1;
426     for (int I = 0; I < AtCount; ++I) {
427       if (consumeFront(MangledName, '@'))
428         continue;
429       Error = true;
430       return nullptr;
431     }
432 
433     FSN = demangleFunctionEncoding(MangledName);
434     if (FSN)
435       FSN->Name = synthesizeQualifiedName(Arena, DSIN);
436   } else {
437     if (IsKnownStaticDataMember) {
438       // This was supposed to be a static data member, but we got a function.
439       Error = true;
440       return nullptr;
441     }
442 
443     FSN = static_cast<FunctionSymbolNode *>(Symbol);
444     DSIN->Name = Symbol->Name;
445     FSN->Name = synthesizeQualifiedName(Arena, DSIN);
446   }
447 
448   return FSN;
449 }
450 
451 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
452   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
453 
454   switch (SIK) {
455   case SpecialIntrinsicKind::None:
456     return nullptr;
457   case SpecialIntrinsicKind::StringLiteralSymbol:
458     return demangleStringLiteral(MangledName);
459   case SpecialIntrinsicKind::Vftable:
460   case SpecialIntrinsicKind::Vbtable:
461   case SpecialIntrinsicKind::LocalVftable:
462   case SpecialIntrinsicKind::RttiCompleteObjLocator:
463     return demangleSpecialTableSymbolNode(MangledName, SIK);
464   case SpecialIntrinsicKind::VcallThunk:
465     return demangleVcallThunkNode(MangledName);
466   case SpecialIntrinsicKind::LocalStaticGuard:
467     return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
468   case SpecialIntrinsicKind::LocalStaticThreadGuard:
469     return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
470   case SpecialIntrinsicKind::RttiTypeDescriptor: {
471     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
472     if (Error)
473       break;
474     if (!consumeFront(MangledName, "@8"))
475       break;
476     if (!MangledName.empty())
477       break;
478     return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
479   }
480   case SpecialIntrinsicKind::RttiBaseClassArray:
481     return demangleUntypedVariable(Arena, MangledName,
482                                    "`RTTI Base Class Array'");
483   case SpecialIntrinsicKind::RttiClassHierarchyDescriptor:
484     return demangleUntypedVariable(Arena, MangledName,
485                                    "`RTTI Class Hierarchy Descriptor'");
486   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
487     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
488   case SpecialIntrinsicKind::DynamicInitializer:
489     return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
490   case SpecialIntrinsicKind::DynamicAtexitDestructor:
491     return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
492   case SpecialIntrinsicKind::Typeof:
493   case SpecialIntrinsicKind::UdtReturning:
494     // It's unclear which tools produces these manglings, so demangling
495     // support is not (yet?) implemented.
496     break;
497   case SpecialIntrinsicKind::Unknown:
498     DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
499   }
500   Error = true;
501   return nullptr;
502 }
503 
504 IdentifierNode *
505 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
506   assert(MangledName.startsWith('?'));
507   MangledName.remove_prefix(1);
508   if (MangledName.empty()) {
509     Error = true;
510     return nullptr;
511   }
512 
513   if (consumeFront(MangledName, "__"))
514     return demangleFunctionIdentifierCode(
515         MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
516   if (consumeFront(MangledName, "_"))
517     return demangleFunctionIdentifierCode(MangledName,
518                                           FunctionIdentifierCodeGroup::Under);
519   return demangleFunctionIdentifierCode(MangledName,
520                                         FunctionIdentifierCodeGroup::Basic);
521 }
522 
523 StructorIdentifierNode *
524 Demangler::demangleStructorIdentifier(StringView &MangledName,
525                                       bool IsDestructor) {
526   StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
527   N->IsDestructor = IsDestructor;
528   return N;
529 }
530 
531 ConversionOperatorIdentifierNode *
532 Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
533   ConversionOperatorIdentifierNode *N =
534       Arena.alloc<ConversionOperatorIdentifierNode>();
535   return N;
536 }
537 
538 LiteralOperatorIdentifierNode *
539 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
540   LiteralOperatorIdentifierNode *N =
541       Arena.alloc<LiteralOperatorIdentifierNode>();
542   N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
543   return N;
544 }
545 
546 IntrinsicFunctionKind
547 Demangler::translateIntrinsicFunctionCode(char CH,
548                                           FunctionIdentifierCodeGroup Group) {
549   using IFK = IntrinsicFunctionKind;
550   if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
551     Error = true;
552     return IFK::None;
553   }
554 
555   // Not all ? identifiers are intrinsics *functions*.  This function only maps
556   // operator codes for the special functions, all others are handled elsewhere,
557   // hence the IFK::None entries in the table.
558   static IFK Basic[36] = {
559       IFK::None,             // ?0 # Foo::Foo()
560       IFK::None,             // ?1 # Foo::~Foo()
561       IFK::New,              // ?2 # operator new
562       IFK::Delete,           // ?3 # operator delete
563       IFK::Assign,           // ?4 # operator=
564       IFK::RightShift,       // ?5 # operator>>
565       IFK::LeftShift,        // ?6 # operator<<
566       IFK::LogicalNot,       // ?7 # operator!
567       IFK::Equals,           // ?8 # operator==
568       IFK::NotEquals,        // ?9 # operator!=
569       IFK::ArraySubscript,   // ?A # operator[]
570       IFK::None,             // ?B # Foo::operator <type>()
571       IFK::Pointer,          // ?C # operator->
572       IFK::Dereference,      // ?D # operator*
573       IFK::Increment,        // ?E # operator++
574       IFK::Decrement,        // ?F # operator--
575       IFK::Minus,            // ?G # operator-
576       IFK::Plus,             // ?H # operator+
577       IFK::BitwiseAnd,       // ?I # operator&
578       IFK::MemberPointer,    // ?J # operator->*
579       IFK::Divide,           // ?K # operator/
580       IFK::Modulus,          // ?L # operator%
581       IFK::LessThan,         // ?M operator<
582       IFK::LessThanEqual,    // ?N operator<=
583       IFK::GreaterThan,      // ?O operator>
584       IFK::GreaterThanEqual, // ?P operator>=
585       IFK::Comma,            // ?Q operator,
586       IFK::Parens,           // ?R operator()
587       IFK::BitwiseNot,       // ?S operator~
588       IFK::BitwiseXor,       // ?T operator^
589       IFK::BitwiseOr,        // ?U operator|
590       IFK::LogicalAnd,       // ?V operator&&
591       IFK::LogicalOr,        // ?W operator||
592       IFK::TimesEqual,       // ?X operator*=
593       IFK::PlusEqual,        // ?Y operator+=
594       IFK::MinusEqual,       // ?Z operator-=
595   };
596   static IFK Under[36] = {
597       IFK::DivEqual,           // ?_0 operator/=
598       IFK::ModEqual,           // ?_1 operator%=
599       IFK::RshEqual,           // ?_2 operator>>=
600       IFK::LshEqual,           // ?_3 operator<<=
601       IFK::BitwiseAndEqual,    // ?_4 operator&=
602       IFK::BitwiseOrEqual,     // ?_5 operator|=
603       IFK::BitwiseXorEqual,    // ?_6 operator^=
604       IFK::None,               // ?_7 # vftable
605       IFK::None,               // ?_8 # vbtable
606       IFK::None,               // ?_9 # vcall
607       IFK::None,               // ?_A # typeof
608       IFK::None,               // ?_B # local static guard
609       IFK::None,               // ?_C # string literal
610       IFK::VbaseDtor,          // ?_D # vbase destructor
611       IFK::VecDelDtor,         // ?_E # vector deleting destructor
612       IFK::DefaultCtorClosure, // ?_F # default constructor closure
613       IFK::ScalarDelDtor,      // ?_G # scalar deleting destructor
614       IFK::VecCtorIter,        // ?_H # vector constructor iterator
615       IFK::VecDtorIter,        // ?_I # vector destructor iterator
616       IFK::VecVbaseCtorIter,   // ?_J # vector vbase constructor iterator
617       IFK::VdispMap,           // ?_K # virtual displacement map
618       IFK::EHVecCtorIter,      // ?_L # eh vector constructor iterator
619       IFK::EHVecDtorIter,      // ?_M # eh vector destructor iterator
620       IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
621       IFK::CopyCtorClosure,    // ?_O # copy constructor closure
622       IFK::None,               // ?_P<name> # udt returning <name>
623       IFK::None,               // ?_Q # <unknown>
624       IFK::None,               // ?_R0 - ?_R4 # RTTI Codes
625       IFK::None,               // ?_S # local vftable
626       IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
627       IFK::ArrayNew,                // ?_U operator new[]
628       IFK::ArrayDelete,             // ?_V operator delete[]
629       IFK::None,                    // ?_W <unused>
630       IFK::None,                    // ?_X <unused>
631       IFK::None,                    // ?_Y <unused>
632       IFK::None,                    // ?_Z <unused>
633   };
634   static IFK DoubleUnder[36] = {
635       IFK::None,                       // ?__0 <unused>
636       IFK::None,                       // ?__1 <unused>
637       IFK::None,                       // ?__2 <unused>
638       IFK::None,                       // ?__3 <unused>
639       IFK::None,                       // ?__4 <unused>
640       IFK::None,                       // ?__5 <unused>
641       IFK::None,                       // ?__6 <unused>
642       IFK::None,                       // ?__7 <unused>
643       IFK::None,                       // ?__8 <unused>
644       IFK::None,                       // ?__9 <unused>
645       IFK::ManVectorCtorIter,          // ?__A managed vector ctor iterator
646       IFK::ManVectorDtorIter,          // ?__B managed vector dtor iterator
647       IFK::EHVectorCopyCtorIter,       // ?__C EH vector copy ctor iterator
648       IFK::EHVectorVbaseCopyCtorIter,  // ?__D EH vector vbase copy ctor iter
649       IFK::None,                       // ?__E dynamic initializer for `T'
650       IFK::None,                       // ?__F dynamic atexit destructor for `T'
651       IFK::VectorCopyCtorIter,         // ?__G vector copy constructor iter
652       IFK::VectorVbaseCopyCtorIter,    // ?__H vector vbase copy ctor iter
653       IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
654                                        // iter
655       IFK::None,                       // ?__J local static thread guard
656       IFK::None,                       // ?__K operator ""_name
657       IFK::CoAwait,                    // ?__L operator co_await
658       IFK::Spaceship,                  // ?__M operator<=>
659       IFK::None,                       // ?__N <unused>
660       IFK::None,                       // ?__O <unused>
661       IFK::None,                       // ?__P <unused>
662       IFK::None,                       // ?__Q <unused>
663       IFK::None,                       // ?__R <unused>
664       IFK::None,                       // ?__S <unused>
665       IFK::None,                       // ?__T <unused>
666       IFK::None,                       // ?__U <unused>
667       IFK::None,                       // ?__V <unused>
668       IFK::None,                       // ?__W <unused>
669       IFK::None,                       // ?__X <unused>
670       IFK::None,                       // ?__Y <unused>
671       IFK::None,                       // ?__Z <unused>
672   };
673 
674   int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
675   switch (Group) {
676   case FunctionIdentifierCodeGroup::Basic:
677     return Basic[Index];
678   case FunctionIdentifierCodeGroup::Under:
679     return Under[Index];
680   case FunctionIdentifierCodeGroup::DoubleUnder:
681     return DoubleUnder[Index];
682   }
683   DEMANGLE_UNREACHABLE;
684 }
685 
686 IdentifierNode *
687 Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
688                                           FunctionIdentifierCodeGroup Group) {
689   if (MangledName.empty()) {
690     Error = true;
691     return nullptr;
692   }
693   const char CH = MangledName.front();
694   switch (Group) {
695   case FunctionIdentifierCodeGroup::Basic:
696     MangledName.remove_prefix(1);
697     switch (CH) {
698     case '0':
699     case '1':
700       return demangleStructorIdentifier(MangledName, CH == '1');
701     case 'B':
702       return demangleConversionOperatorIdentifier(MangledName);
703     default:
704       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
705           translateIntrinsicFunctionCode(CH, Group));
706     }
707   case FunctionIdentifierCodeGroup::Under:
708     MangledName.remove_prefix(1);
709     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
710         translateIntrinsicFunctionCode(CH, Group));
711   case FunctionIdentifierCodeGroup::DoubleUnder:
712     MangledName.remove_prefix(1);
713     switch (CH) {
714     case 'K':
715       return demangleLiteralOperatorIdentifier(MangledName);
716     default:
717       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
718           translateIntrinsicFunctionCode(CH, Group));
719     }
720   }
721 
722   DEMANGLE_UNREACHABLE;
723 }
724 
725 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
726                                              QualifiedNameNode *Name) {
727   if (MangledName.empty()) {
728     Error = true;
729     return nullptr;
730   }
731 
732   // Read a variable.
733   switch (MangledName.front()) {
734   case '0':
735   case '1':
736   case '2':
737   case '3':
738   case '4': {
739     StorageClass SC = demangleVariableStorageClass(MangledName);
740     return demangleVariableEncoding(MangledName, SC);
741   }
742   }
743   FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
744 
745   IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
746   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
747     ConversionOperatorIdentifierNode *COIN =
748         static_cast<ConversionOperatorIdentifierNode *>(UQN);
749     if (FSN)
750       COIN->TargetType = FSN->Signature->ReturnType;
751   }
752   return FSN;
753 }
754 
755 SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
756   // What follows is a main symbol name. This may include namespaces or class
757   // back references.
758   QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
759   if (Error)
760     return nullptr;
761 
762   SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
763   if (Error)
764     return nullptr;
765   Symbol->Name = QN;
766 
767   IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
768   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
769     ConversionOperatorIdentifierNode *COIN =
770         static_cast<ConversionOperatorIdentifierNode *>(UQN);
771     if (!COIN->TargetType) {
772       Error = true;
773       return nullptr;
774     }
775   }
776   return Symbol;
777 }
778 
779 SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
780   assert(MangledName.startsWith("??@"));
781   // This is an MD5 mangled name.  We can't demangle it, just return the
782   // mangled name.
783   // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
784   size_t MD5Last = MangledName.find('@', strlen("??@"));
785   if (MD5Last == StringView::npos) {
786     Error = true;
787     return nullptr;
788   }
789   const char *Start = MangledName.begin();
790   MangledName.remove_prefix(MD5Last + 1);
791 
792   // There are two additional special cases for MD5 names:
793   // 1. For complete object locators where the object name is long enough
794   //    for the object to have an MD5 name, the complete object locator is
795   //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
796   //    leading "??_R4". This is handled here.
797   // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
798   //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
799   //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
800   //    demangle catchable types anywhere, this isn't handled for MD5 names
801   //    either.
802   consumeFront(MangledName, "??_R4@");
803 
804   StringView MD5(Start, MangledName.begin() - Start);
805   SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
806   S->Name = synthesizeQualifiedName(Arena, MD5);
807 
808   return S;
809 }
810 
811 SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
812   assert(MangledName.startsWith('.'));
813   consumeFront(MangledName, '.');
814 
815   TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
816   if (Error || !MangledName.empty()) {
817     Error = true;
818     return nullptr;
819   }
820   return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
821 }
822 
823 // Parser entry point.
824 SymbolNode *Demangler::parse(StringView &MangledName) {
825   // Typeinfo names are strings stored in RTTI data. They're not symbol names.
826   // It's still useful to demangle them. They're the only demangled entity
827   // that doesn't start with a "?" but a ".".
828   if (MangledName.startsWith('.'))
829     return demangleTypeinfoName(MangledName);
830 
831   if (MangledName.startsWith("??@"))
832     return demangleMD5Name(MangledName);
833 
834   // MSVC-style mangled symbols must start with '?'.
835   if (!MangledName.startsWith('?')) {
836     Error = true;
837     return nullptr;
838   }
839 
840   consumeFront(MangledName, '?');
841 
842   // ?$ is a template instantiation, but all other names that start with ? are
843   // operators / special names.
844   if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
845     return SI;
846 
847   return demangleDeclarator(MangledName);
848 }
849 
850 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
851   if (!consumeFront(MangledName, ".?A")) {
852     Error = true;
853     return nullptr;
854   }
855   consumeFront(MangledName, ".?A");
856   if (MangledName.empty()) {
857     Error = true;
858     return nullptr;
859   }
860 
861   return demangleClassType(MangledName);
862 }
863 
864 // <type-encoding> ::= <storage-class> <variable-type>
865 // <storage-class> ::= 0  # private static member
866 //                 ::= 1  # protected static member
867 //                 ::= 2  # public static member
868 //                 ::= 3  # global
869 //                 ::= 4  # static local
870 
871 VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
872                                                         StorageClass SC) {
873   VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
874 
875   VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
876   VSN->SC = SC;
877 
878   if (Error)
879     return nullptr;
880 
881   // <variable-type> ::= <type> <cvr-qualifiers>
882   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
883   switch (VSN->Type->kind()) {
884   case NodeKind::PointerType: {
885     PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
886 
887     Qualifiers ExtraChildQuals = Q_None;
888     PTN->Quals = Qualifiers(VSN->Type->Quals |
889                             demanglePointerExtQualifiers(MangledName));
890 
891     bool IsMember = false;
892     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
893 
894     if (PTN->ClassParent) {
895       QualifiedNameNode *BackRefName =
896           demangleFullyQualifiedTypeName(MangledName);
897       (void)BackRefName;
898     }
899     PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
900 
901     break;
902   }
903   default:
904     VSN->Type->Quals = demangleQualifiers(MangledName).first;
905     break;
906   }
907 
908   return VSN;
909 }
910 
911 // Sometimes numbers are encoded in mangled symbols. For example,
912 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
913 // length 20), so we need some way to embed numbers as part of symbols.
914 // This function parses it.
915 //
916 // <number>               ::= [?] <non-negative integer>
917 //
918 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
919 //                        ::= <hex digit>+ @  # when Number == 0 or >= 10
920 //
921 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
922 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
923   bool IsNegative = consumeFront(MangledName, '?');
924 
925   if (startsWithDigit(MangledName)) {
926     uint64_t Ret = MangledName[0] - '0' + 1;
927     MangledName.remove_prefix(1);
928     return {Ret, IsNegative};
929   }
930 
931   uint64_t Ret = 0;
932   for (size_t i = 0; i < MangledName.size(); ++i) {
933     char C = MangledName[i];
934     if (C == '@') {
935       MangledName.remove_prefix(i + 1);
936       return {Ret, IsNegative};
937     }
938     if ('A' <= C && C <= 'P') {
939       Ret = (Ret << 4) + (C - 'A');
940       continue;
941     }
942     break;
943   }
944 
945   Error = true;
946   return {0ULL, false};
947 }
948 
949 uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
950   bool IsNegative = false;
951   uint64_t Number = 0;
952   std::tie(Number, IsNegative) = demangleNumber(MangledName);
953   if (IsNegative)
954     Error = true;
955   return Number;
956 }
957 
958 int64_t Demangler::demangleSigned(StringView &MangledName) {
959   bool IsNegative = false;
960   uint64_t Number = 0;
961   std::tie(Number, IsNegative) = demangleNumber(MangledName);
962   if (Number > INT64_MAX)
963     Error = true;
964   int64_t I = static_cast<int64_t>(Number);
965   return IsNegative ? -I : I;
966 }
967 
968 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
969 // Memorize it.
970 void Demangler::memorizeString(StringView S) {
971   if (Backrefs.NamesCount >= BackrefContext::Max)
972     return;
973   for (size_t i = 0; i < Backrefs.NamesCount; ++i)
974     if (S == Backrefs.Names[i]->Name)
975       return;
976   NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
977   N->Name = S;
978   Backrefs.Names[Backrefs.NamesCount++] = N;
979 }
980 
981 NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
982   assert(startsWithDigit(MangledName));
983 
984   size_t I = MangledName[0] - '0';
985   if (I >= Backrefs.NamesCount) {
986     Error = true;
987     return nullptr;
988   }
989 
990   MangledName.remove_prefix(1);
991   return Backrefs.Names[I];
992 }
993 
994 void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
995   // Render this class template name into a string buffer so that we can
996   // memorize it for the purpose of back-referencing.
997   OutputBuffer OB;
998   Identifier->output(OB, OF_Default);
999   StringView Owned = copyString(OB);
1000   memorizeString(Owned);
1001   std::free(OB.getBuffer());
1002 }
1003 
1004 IdentifierNode *
1005 Demangler::demangleTemplateInstantiationName(StringView &MangledName,
1006                                              NameBackrefBehavior NBB) {
1007   assert(MangledName.startsWith("?$"));
1008   consumeFront(MangledName, "?$");
1009 
1010   BackrefContext OuterContext;
1011   std::swap(OuterContext, Backrefs);
1012 
1013   IdentifierNode *Identifier =
1014       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1015   if (!Error)
1016     Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
1017 
1018   std::swap(OuterContext, Backrefs);
1019   if (Error)
1020     return nullptr;
1021 
1022   if (NBB & NBB_Template) {
1023     // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1024     // Structors and conversion operators only makes sense in a leaf name, so
1025     // reject them in NBB_Template contexts.
1026     if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
1027         Identifier->kind() == NodeKind::StructorIdentifier) {
1028       Error = true;
1029       return nullptr;
1030     }
1031 
1032     memorizeIdentifier(Identifier);
1033   }
1034 
1035   return Identifier;
1036 }
1037 
1038 NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
1039                                                    bool Memorize) {
1040   StringView S = demangleSimpleString(MangledName, Memorize);
1041   if (Error)
1042     return nullptr;
1043 
1044   NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1045   Name->Name = S;
1046   return Name;
1047 }
1048 
1049 static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1050 
1051 static uint8_t rebasedHexDigitToNumber(char C) {
1052   assert(isRebasedHexDigit(C));
1053   return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1054 }
1055 
1056 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
1057   assert(!MangledName.empty());
1058   if (!MangledName.startsWith('?')) {
1059     const uint8_t F = MangledName.front();
1060     MangledName.remove_prefix(1);
1061     return F;
1062   }
1063 
1064   MangledName.remove_prefix(1);
1065   if (MangledName.empty())
1066     goto CharLiteralError;
1067 
1068   if (consumeFront(MangledName, '$')) {
1069     // Two hex digits
1070     if (MangledName.size() < 2)
1071       goto CharLiteralError;
1072     StringView Nibbles = MangledName.substr(0, 2);
1073     if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1074       goto CharLiteralError;
1075     // Don't append the null terminator.
1076     uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1077     uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1078     MangledName.remove_prefix(2);
1079     return (C1 << 4) | C2;
1080   }
1081 
1082   if (startsWithDigit(MangledName)) {
1083     const char *Lookup = ",/\\:. \n\t'-";
1084     char C = Lookup[MangledName[0] - '0'];
1085     MangledName.remove_prefix(1);
1086     return C;
1087   }
1088 
1089   if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1090     char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1091                        '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1092                        '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1093                        '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1094     char C = Lookup[MangledName[0] - 'a'];
1095     MangledName.remove_prefix(1);
1096     return C;
1097   }
1098 
1099   if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1100     char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1101                        '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1102                        '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1103                        '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1104     char C = Lookup[MangledName[0] - 'A'];
1105     MangledName.remove_prefix(1);
1106     return C;
1107   }
1108 
1109 CharLiteralError:
1110   Error = true;
1111   return '\0';
1112 }
1113 
1114 wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
1115   uint8_t C1, C2;
1116 
1117   C1 = demangleCharLiteral(MangledName);
1118   if (Error || MangledName.empty())
1119     goto WCharLiteralError;
1120   C2 = demangleCharLiteral(MangledName);
1121   if (Error)
1122     goto WCharLiteralError;
1123 
1124   return ((wchar_t)C1 << 8) | (wchar_t)C2;
1125 
1126 WCharLiteralError:
1127   Error = true;
1128   return L'\0';
1129 }
1130 
1131 static void writeHexDigit(char *Buffer, uint8_t Digit) {
1132   assert(Digit <= 15);
1133   *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1134 }
1135 
1136 static void outputHex(OutputBuffer &OB, unsigned C) {
1137   assert (C != 0);
1138 
1139   // It's easier to do the math if we can work from right to left, but we need
1140   // to print the numbers from left to right.  So render this into a temporary
1141   // buffer first, then output the temporary buffer.  Each byte is of the form
1142   // \xAB, which means that each byte needs 4 characters.  Since there are at
1143   // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1144   char TempBuffer[17];
1145 
1146   ::memset(TempBuffer, 0, sizeof(TempBuffer));
1147   constexpr int MaxPos = sizeof(TempBuffer) - 1;
1148 
1149   int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1150   while (C != 0) {
1151     for (int I = 0; I < 2; ++I) {
1152       writeHexDigit(&TempBuffer[Pos--], C % 16);
1153       C /= 16;
1154     }
1155   }
1156   TempBuffer[Pos--] = 'x';
1157   assert(Pos >= 0);
1158   TempBuffer[Pos--] = '\\';
1159   OB << StringView(&TempBuffer[Pos + 1]);
1160 }
1161 
1162 static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1163   switch (C) {
1164   case '\0': // nul
1165     OB << "\\0";
1166     return;
1167   case '\'': // single quote
1168     OB << "\\\'";
1169     return;
1170   case '\"': // double quote
1171     OB << "\\\"";
1172     return;
1173   case '\\': // backslash
1174     OB << "\\\\";
1175     return;
1176   case '\a': // bell
1177     OB << "\\a";
1178     return;
1179   case '\b': // backspace
1180     OB << "\\b";
1181     return;
1182   case '\f': // form feed
1183     OB << "\\f";
1184     return;
1185   case '\n': // new line
1186     OB << "\\n";
1187     return;
1188   case '\r': // carriage return
1189     OB << "\\r";
1190     return;
1191   case '\t': // tab
1192     OB << "\\t";
1193     return;
1194   case '\v': // vertical tab
1195     OB << "\\v";
1196     return;
1197   default:
1198     break;
1199   }
1200 
1201   if (C > 0x1F && C < 0x7F) {
1202     // Standard ascii char.
1203     OB << (char)C;
1204     return;
1205   }
1206 
1207   outputHex(OB, C);
1208 }
1209 
1210 static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1211   const uint8_t *End = StringBytes + Length - 1;
1212   unsigned Count = 0;
1213   while (Length > 0 && *End == 0) {
1214     --Length;
1215     --End;
1216     ++Count;
1217   }
1218   return Count;
1219 }
1220 
1221 static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1222                                    unsigned Length) {
1223   unsigned Result = 0;
1224   for (unsigned I = 0; I < Length; ++I) {
1225     if (*StringBytes++ == 0)
1226       ++Result;
1227   }
1228   return Result;
1229 }
1230 
1231 // A mangled (non-wide) string literal stores the total length of the string it
1232 // refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1233 // (passed in StringBytes, NumChars).
1234 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1235                                   uint64_t NumBytes) {
1236   assert(NumBytes > 0);
1237 
1238   // If the number of bytes is odd, this is guaranteed to be a char string.
1239   if (NumBytes % 2 == 1)
1240     return 1;
1241 
1242   // All strings can encode at most 32 bytes of data.  If it's less than that,
1243   // then we encoded the entire string.  In this case we check for a 1-byte,
1244   // 2-byte, or 4-byte null terminator.
1245   if (NumBytes < 32) {
1246     unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1247     if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1248       return 4;
1249     if (TrailingNulls >= 2)
1250       return 2;
1251     return 1;
1252   }
1253 
1254   // The whole string was not able to be encoded.  Try to look at embedded null
1255   // terminators to guess.  The heuristic is that we count all embedded null
1256   // terminators.  If more than 2/3 are null, it's a char32.  If more than 1/3
1257   // are null, it's a char16.  Otherwise it's a char8.  This obviously isn't
1258   // perfect and is biased towards languages that have ascii alphabets, but this
1259   // was always going to be best effort since the encoding is lossy.
1260   unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1261   if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1262     return 4;
1263   if (Nulls >= NumChars / 3)
1264     return 2;
1265   return 1;
1266 }
1267 
1268 static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1269                                     unsigned CharIndex, unsigned CharBytes) {
1270   assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1271   unsigned Offset = CharIndex * CharBytes;
1272   unsigned Result = 0;
1273   StringBytes = StringBytes + Offset;
1274   for (unsigned I = 0; I < CharBytes; ++I) {
1275     unsigned C = static_cast<unsigned>(StringBytes[I]);
1276     Result |= C << (8 * I);
1277   }
1278   return Result;
1279 }
1280 
1281 FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
1282   FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1283   VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1284   FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1285   FSN->Signature->FunctionClass = FC_NoParameterList;
1286 
1287   FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1288   if (!Error)
1289     Error = !consumeFront(MangledName, "$B");
1290   if (!Error)
1291     VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1292   if (!Error)
1293     Error = !consumeFront(MangledName, 'A');
1294   if (!Error)
1295     FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1296   return (Error) ? nullptr : FSN;
1297 }
1298 
1299 EncodedStringLiteralNode *
1300 Demangler::demangleStringLiteral(StringView &MangledName) {
1301   // This function uses goto, so declare all variables up front.
1302   OutputBuffer OB;
1303   StringView CRC;
1304   uint64_t StringByteSize;
1305   bool IsWcharT = false;
1306   bool IsNegative = false;
1307   size_t CrcEndPos = 0;
1308   char F;
1309 
1310   EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1311 
1312   // Prefix indicating the beginning of a string literal
1313   if (!consumeFront(MangledName, "@_"))
1314     goto StringLiteralError;
1315   if (MangledName.empty())
1316     goto StringLiteralError;
1317 
1318   // Char Type (regular or wchar_t)
1319   F = MangledName.front();
1320   MangledName.remove_prefix(1);
1321   switch (F) {
1322   case '1':
1323     IsWcharT = true;
1324     DEMANGLE_FALLTHROUGH;
1325   case '0':
1326     break;
1327   default:
1328     goto StringLiteralError;
1329   }
1330 
1331   // Encoded Length
1332   std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1333   if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1334     goto StringLiteralError;
1335 
1336   // CRC 32 (always 8 characters plus a terminator)
1337   CrcEndPos = MangledName.find('@');
1338   if (CrcEndPos == StringView::npos)
1339     goto StringLiteralError;
1340   CRC = MangledName.substr(0, CrcEndPos);
1341   MangledName.remove_prefix(CrcEndPos + 1);
1342   if (MangledName.empty())
1343     goto StringLiteralError;
1344 
1345   if (IsWcharT) {
1346     Result->Char = CharKind::Wchar;
1347     if (StringByteSize > 64)
1348       Result->IsTruncated = true;
1349 
1350     while (!consumeFront(MangledName, '@')) {
1351       if (MangledName.size() < 2)
1352         goto StringLiteralError;
1353       wchar_t W = demangleWcharLiteral(MangledName);
1354       if (StringByteSize != 2 || Result->IsTruncated)
1355         outputEscapedChar(OB, W);
1356       StringByteSize -= 2;
1357       if (Error)
1358         goto StringLiteralError;
1359     }
1360   } else {
1361     // The max byte length is actually 32, but some compilers mangled strings
1362     // incorrectly, so we have to assume it can go higher.
1363     constexpr unsigned MaxStringByteLength = 32 * 4;
1364     uint8_t StringBytes[MaxStringByteLength];
1365 
1366     unsigned BytesDecoded = 0;
1367     while (!consumeFront(MangledName, '@')) {
1368       if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1369         goto StringLiteralError;
1370       StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1371     }
1372 
1373     if (StringByteSize > BytesDecoded)
1374       Result->IsTruncated = true;
1375 
1376     unsigned CharBytes =
1377         guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1378     assert(StringByteSize % CharBytes == 0);
1379     switch (CharBytes) {
1380     case 1:
1381       Result->Char = CharKind::Char;
1382       break;
1383     case 2:
1384       Result->Char = CharKind::Char16;
1385       break;
1386     case 4:
1387       Result->Char = CharKind::Char32;
1388       break;
1389     default:
1390       DEMANGLE_UNREACHABLE;
1391     }
1392     const unsigned NumChars = BytesDecoded / CharBytes;
1393     for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1394       unsigned NextChar =
1395           decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1396       if (CharIndex + 1 < NumChars || Result->IsTruncated)
1397         outputEscapedChar(OB, NextChar);
1398     }
1399   }
1400 
1401   Result->DecodedString = copyString(OB);
1402   std::free(OB.getBuffer());
1403   return Result;
1404 
1405 StringLiteralError:
1406   Error = true;
1407   std::free(OB.getBuffer());
1408   return nullptr;
1409 }
1410 
1411 // Returns MangledName's prefix before the first '@', or an error if
1412 // MangledName contains no '@' or the prefix has length 0.
1413 StringView Demangler::demangleSimpleString(StringView &MangledName,
1414                                            bool Memorize) {
1415   StringView S;
1416   for (size_t i = 0; i < MangledName.size(); ++i) {
1417     if (MangledName[i] != '@')
1418       continue;
1419     if (i == 0)
1420       break;
1421     S = MangledName.substr(0, i);
1422     MangledName.remove_prefix(i + 1);
1423 
1424     if (Memorize)
1425       memorizeString(S);
1426     return S;
1427   }
1428 
1429   Error = true;
1430   return {};
1431 }
1432 
1433 NamedIdentifierNode *
1434 Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1435   assert(MangledName.startsWith("?A"));
1436   consumeFront(MangledName, "?A");
1437 
1438   NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1439   Node->Name = "`anonymous namespace'";
1440   size_t EndPos = MangledName.find('@');
1441   if (EndPos == StringView::npos) {
1442     Error = true;
1443     return nullptr;
1444   }
1445   StringView NamespaceKey = MangledName.substr(0, EndPos);
1446   memorizeString(NamespaceKey);
1447   MangledName = MangledName.substr(EndPos + 1);
1448   return Node;
1449 }
1450 
1451 NamedIdentifierNode *
1452 Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1453   assert(startsWithLocalScopePattern(MangledName));
1454 
1455   NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1456   consumeFront(MangledName, '?');
1457   uint64_t Number = 0;
1458   bool IsNegative = false;
1459   std::tie(Number, IsNegative) = demangleNumber(MangledName);
1460   assert(!IsNegative);
1461 
1462   // One ? to terminate the number
1463   consumeFront(MangledName, '?');
1464 
1465   assert(!Error);
1466   Node *Scope = parse(MangledName);
1467   if (Error)
1468     return nullptr;
1469 
1470   // Render the parent symbol's name into a buffer.
1471   OutputBuffer OB;
1472   OB << '`';
1473   Scope->output(OB, OF_Default);
1474   OB << '\'';
1475   OB << "::`" << Number << "'";
1476 
1477   Identifier->Name = copyString(OB);
1478   std::free(OB.getBuffer());
1479   return Identifier;
1480 }
1481 
1482 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
1483 QualifiedNameNode *
1484 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1485   IdentifierNode *Identifier =
1486       demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1487   if (Error)
1488     return nullptr;
1489   assert(Identifier);
1490 
1491   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1492   if (Error)
1493     return nullptr;
1494   assert(QN);
1495   return QN;
1496 }
1497 
1498 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1499 // Symbol names have slightly different rules regarding what can appear
1500 // so we separate out the implementations for flexibility.
1501 QualifiedNameNode *
1502 Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1503   // This is the final component of a symbol name (i.e. the leftmost component
1504   // of a mangled name.  Since the only possible template instantiation that
1505   // can appear in this context is a function template, and since those are
1506   // not saved for the purposes of name backreferences, only backref simple
1507   // names.
1508   IdentifierNode *Identifier =
1509       demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1510   if (Error)
1511     return nullptr;
1512 
1513   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1514   if (Error)
1515     return nullptr;
1516 
1517   if (Identifier->kind() == NodeKind::StructorIdentifier) {
1518     if (QN->Components->Count < 2) {
1519       Error = true;
1520       return nullptr;
1521     }
1522     StructorIdentifierNode *SIN =
1523         static_cast<StructorIdentifierNode *>(Identifier);
1524     Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1525     SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1526   }
1527   assert(QN);
1528   return QN;
1529 }
1530 
1531 IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
1532                                                        bool Memorize) {
1533   // An inner-most name can be a back-reference, because a fully-qualified name
1534   // (e.g. Scope + Inner) can contain other fully qualified names inside of
1535   // them (for example template parameters), and these nested parameters can
1536   // refer to previously mangled types.
1537   if (startsWithDigit(MangledName))
1538     return demangleBackRefName(MangledName);
1539 
1540   if (MangledName.startsWith("?$"))
1541     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1542 
1543   return demangleSimpleName(MangledName, Memorize);
1544 }
1545 
1546 IdentifierNode *
1547 Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
1548                                          NameBackrefBehavior NBB) {
1549   if (startsWithDigit(MangledName))
1550     return demangleBackRefName(MangledName);
1551   if (MangledName.startsWith("?$"))
1552     return demangleTemplateInstantiationName(MangledName, NBB);
1553   if (MangledName.startsWith('?'))
1554     return demangleFunctionIdentifierCode(MangledName);
1555   return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1556 }
1557 
1558 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
1559   if (startsWithDigit(MangledName))
1560     return demangleBackRefName(MangledName);
1561 
1562   if (MangledName.startsWith("?$"))
1563     return demangleTemplateInstantiationName(MangledName, NBB_Template);
1564 
1565   if (MangledName.startsWith("?A"))
1566     return demangleAnonymousNamespaceName(MangledName);
1567 
1568   if (startsWithLocalScopePattern(MangledName))
1569     return demangleLocallyScopedNamePiece(MangledName);
1570 
1571   return demangleSimpleName(MangledName, /*Memorize=*/true);
1572 }
1573 
1574 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
1575                                           size_t Count) {
1576   NodeArrayNode *N = Arena.alloc<NodeArrayNode>();
1577   N->Count = Count;
1578   N->Nodes = Arena.allocArray<Node *>(Count);
1579   for (size_t I = 0; I < Count; ++I) {
1580     N->Nodes[I] = Head->N;
1581     Head = Head->Next;
1582   }
1583   return N;
1584 }
1585 
1586 QualifiedNameNode *
1587 Demangler::demangleNameScopeChain(StringView &MangledName,
1588                                   IdentifierNode *UnqualifiedName) {
1589   NodeList *Head = Arena.alloc<NodeList>();
1590 
1591   Head->N = UnqualifiedName;
1592 
1593   size_t Count = 1;
1594   while (!consumeFront(MangledName, "@")) {
1595     ++Count;
1596     NodeList *NewHead = Arena.alloc<NodeList>();
1597     NewHead->Next = Head;
1598     Head = NewHead;
1599 
1600     if (MangledName.empty()) {
1601       Error = true;
1602       return nullptr;
1603     }
1604 
1605     assert(!Error);
1606     IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1607     if (Error)
1608       return nullptr;
1609 
1610     Head->N = Elem;
1611   }
1612 
1613   QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1614   QN->Components = nodeListToNodeArray(Arena, Head, Count);
1615   return QN;
1616 }
1617 
1618 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1619   const char F = MangledName.front();
1620   MangledName.remove_prefix(1);
1621   switch (F) {
1622   case '9':
1623     return FuncClass(FC_ExternC | FC_NoParameterList);
1624   case 'A':
1625     return FC_Private;
1626   case 'B':
1627     return FuncClass(FC_Private | FC_Far);
1628   case 'C':
1629     return FuncClass(FC_Private | FC_Static);
1630   case 'D':
1631     return FuncClass(FC_Private | FC_Static | FC_Far);
1632   case 'E':
1633     return FuncClass(FC_Private | FC_Virtual);
1634   case 'F':
1635     return FuncClass(FC_Private | FC_Virtual | FC_Far);
1636   case 'G':
1637     return FuncClass(FC_Private | FC_StaticThisAdjust);
1638   case 'H':
1639     return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
1640   case 'I':
1641     return FuncClass(FC_Protected);
1642   case 'J':
1643     return FuncClass(FC_Protected | FC_Far);
1644   case 'K':
1645     return FuncClass(FC_Protected | FC_Static);
1646   case 'L':
1647     return FuncClass(FC_Protected | FC_Static | FC_Far);
1648   case 'M':
1649     return FuncClass(FC_Protected | FC_Virtual);
1650   case 'N':
1651     return FuncClass(FC_Protected | FC_Virtual | FC_Far);
1652   case 'O':
1653     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust);
1654   case 'P':
1655     return FuncClass(FC_Protected | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1656   case 'Q':
1657     return FuncClass(FC_Public);
1658   case 'R':
1659     return FuncClass(FC_Public | FC_Far);
1660   case 'S':
1661     return FuncClass(FC_Public | FC_Static);
1662   case 'T':
1663     return FuncClass(FC_Public | FC_Static | FC_Far);
1664   case 'U':
1665     return FuncClass(FC_Public | FC_Virtual);
1666   case 'V':
1667     return FuncClass(FC_Public | FC_Virtual | FC_Far);
1668   case 'W':
1669     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust);
1670   case 'X':
1671     return FuncClass(FC_Public | FC_Virtual | FC_StaticThisAdjust | FC_Far);
1672   case 'Y':
1673     return FuncClass(FC_Global);
1674   case 'Z':
1675     return FuncClass(FC_Global | FC_Far);
1676   case '$': {
1677     FuncClass VFlag = FC_VirtualThisAdjust;
1678     if (consumeFront(MangledName, 'R'))
1679       VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1680     if (MangledName.empty())
1681       break;
1682     const char F = MangledName.front();
1683     MangledName.remove_prefix(1);
1684     switch (F) {
1685     case '0':
1686       return FuncClass(FC_Private | FC_Virtual | VFlag);
1687     case '1':
1688       return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1689     case '2':
1690       return FuncClass(FC_Protected | FC_Virtual | VFlag);
1691     case '3':
1692       return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1693     case '4':
1694       return FuncClass(FC_Public | FC_Virtual | VFlag);
1695     case '5':
1696       return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1697     }
1698   }
1699   }
1700 
1701   Error = true;
1702   return FC_Public;
1703 }
1704 
1705 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1706   if (MangledName.empty()) {
1707     Error = true;
1708     return CallingConv::None;
1709   }
1710 
1711   const char F = MangledName.front();
1712   MangledName.remove_prefix(1);
1713   switch (F) {
1714   case 'A':
1715   case 'B':
1716     return CallingConv::Cdecl;
1717   case 'C':
1718   case 'D':
1719     return CallingConv::Pascal;
1720   case 'E':
1721   case 'F':
1722     return CallingConv::Thiscall;
1723   case 'G':
1724   case 'H':
1725     return CallingConv::Stdcall;
1726   case 'I':
1727   case 'J':
1728     return CallingConv::Fastcall;
1729   case 'M':
1730   case 'N':
1731     return CallingConv::Clrcall;
1732   case 'O':
1733   case 'P':
1734     return CallingConv::Eabi;
1735   case 'Q':
1736     return CallingConv::Vectorcall;
1737   case 'S':
1738     return CallingConv::Swift;
1739   case 'W':
1740     return CallingConv::SwiftAsync;
1741   }
1742 
1743   return CallingConv::None;
1744 }
1745 
1746 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1747   assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1748 
1749   const char F = MangledName.front();
1750   MangledName.remove_prefix(1);
1751   switch (F) {
1752   case '0':
1753     return StorageClass::PrivateStatic;
1754   case '1':
1755     return StorageClass::ProtectedStatic;
1756   case '2':
1757     return StorageClass::PublicStatic;
1758   case '3':
1759     return StorageClass::Global;
1760   case '4':
1761     return StorageClass::FunctionLocalStatic;
1762   }
1763   DEMANGLE_UNREACHABLE;
1764 }
1765 
1766 std::pair<Qualifiers, bool>
1767 Demangler::demangleQualifiers(StringView &MangledName) {
1768   if (MangledName.empty()) {
1769     Error = true;
1770     return std::make_pair(Q_None, false);
1771   }
1772 
1773   const char F = MangledName.front();
1774   MangledName.remove_prefix(1);
1775   switch (F) {
1776   // Member qualifiers
1777   case 'Q':
1778     return std::make_pair(Q_None, true);
1779   case 'R':
1780     return std::make_pair(Q_Const, true);
1781   case 'S':
1782     return std::make_pair(Q_Volatile, true);
1783   case 'T':
1784     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1785   // Non-Member qualifiers
1786   case 'A':
1787     return std::make_pair(Q_None, false);
1788   case 'B':
1789     return std::make_pair(Q_Const, false);
1790   case 'C':
1791     return std::make_pair(Q_Volatile, false);
1792   case 'D':
1793     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1794   }
1795   Error = true;
1796   return std::make_pair(Q_None, false);
1797 }
1798 
1799 // <variable-type> ::= <type> <cvr-qualifiers>
1800 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
1801 TypeNode *Demangler::demangleType(StringView &MangledName,
1802                                   QualifierMangleMode QMM) {
1803   Qualifiers Quals = Q_None;
1804   bool IsMember = false;
1805   if (QMM == QualifierMangleMode::Mangle) {
1806     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1807   } else if (QMM == QualifierMangleMode::Result) {
1808     if (consumeFront(MangledName, '?'))
1809       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1810   }
1811 
1812   if (MangledName.empty()) {
1813     Error = true;
1814     return nullptr;
1815   }
1816 
1817   TypeNode *Ty = nullptr;
1818   if (isTagType(MangledName))
1819     Ty = demangleClassType(MangledName);
1820   else if (isPointerType(MangledName)) {
1821     if (isMemberPointer(MangledName, Error))
1822       Ty = demangleMemberPointerType(MangledName);
1823     else if (!Error)
1824       Ty = demanglePointerType(MangledName);
1825     else
1826       return nullptr;
1827   } else if (isArrayType(MangledName))
1828     Ty = demangleArrayType(MangledName);
1829   else if (isFunctionType(MangledName)) {
1830     if (consumeFront(MangledName, "$$A8@@"))
1831       Ty = demangleFunctionType(MangledName, true);
1832     else {
1833       assert(MangledName.startsWith("$$A6"));
1834       consumeFront(MangledName, "$$A6");
1835       Ty = demangleFunctionType(MangledName, false);
1836     }
1837   } else if (isCustomType(MangledName)) {
1838     Ty = demangleCustomType(MangledName);
1839   } else {
1840     Ty = demanglePrimitiveType(MangledName);
1841   }
1842 
1843   if (!Ty || Error)
1844     return Ty;
1845   Ty->Quals = Qualifiers(Ty->Quals | Quals);
1846   return Ty;
1847 }
1848 
1849 bool Demangler::demangleThrowSpecification(StringView &MangledName) {
1850   if (consumeFront(MangledName, "_E"))
1851     return true;
1852   if (consumeFront(MangledName, 'Z'))
1853     return false;
1854 
1855   Error = true;
1856   return false;
1857 }
1858 
1859 FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
1860                                                        bool HasThisQuals) {
1861   FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1862 
1863   if (HasThisQuals) {
1864     FTy->Quals = demanglePointerExtQualifiers(MangledName);
1865     FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1866     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1867   }
1868 
1869   // Fields that appear on both member and non-member functions.
1870   FTy->CallConvention = demangleCallingConvention(MangledName);
1871 
1872   // <return-type> ::= <type>
1873   //               ::= @ # structors (they have no declared return type)
1874   bool IsStructor = consumeFront(MangledName, '@');
1875   if (!IsStructor)
1876     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1877 
1878   FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1879 
1880   FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1881 
1882   return FTy;
1883 }
1884 
1885 FunctionSymbolNode *
1886 Demangler::demangleFunctionEncoding(StringView &MangledName) {
1887   FuncClass ExtraFlags = FC_None;
1888   if (consumeFront(MangledName, "$$J0"))
1889     ExtraFlags = FC_ExternC;
1890 
1891   if (MangledName.empty()) {
1892     Error = true;
1893     return nullptr;
1894   }
1895 
1896   FuncClass FC = demangleFunctionClass(MangledName);
1897   FC = FuncClass(ExtraFlags | FC);
1898 
1899   FunctionSignatureNode *FSN = nullptr;
1900   ThunkSignatureNode *TTN = nullptr;
1901   if (FC & FC_StaticThisAdjust) {
1902     TTN = Arena.alloc<ThunkSignatureNode>();
1903     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1904   } else if (FC & FC_VirtualThisAdjust) {
1905     TTN = Arena.alloc<ThunkSignatureNode>();
1906     if (FC & FC_VirtualThisAdjustEx) {
1907       TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1908       TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1909     }
1910     TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1911     TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1912   }
1913 
1914   if (FC & FC_NoParameterList) {
1915     // This is an extern "C" function whose full signature hasn't been mangled.
1916     // This happens when we need to mangle a local symbol inside of an extern
1917     // "C" function.
1918     FSN = Arena.alloc<FunctionSignatureNode>();
1919   } else {
1920     bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1921     FSN = demangleFunctionType(MangledName, HasThisQuals);
1922   }
1923 
1924   if (Error)
1925     return nullptr;
1926 
1927   if (TTN) {
1928     *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1929     FSN = TTN;
1930   }
1931   FSN->FunctionClass = FC;
1932 
1933   FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1934   Symbol->Signature = FSN;
1935   return Symbol;
1936 }
1937 
1938 CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
1939   assert(MangledName.startsWith('?'));
1940   MangledName.remove_prefix(1);
1941 
1942   CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
1943   CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1944   if (!consumeFront(MangledName, '@'))
1945     Error = true;
1946   if (Error)
1947     return nullptr;
1948   return CTN;
1949 }
1950 
1951 // Reads a primitive type.
1952 PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
1953   if (consumeFront(MangledName, "$$T"))
1954     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
1955 
1956   const char F = MangledName.front();
1957   MangledName.remove_prefix(1);
1958   switch (F) {
1959   case 'X':
1960     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
1961   case 'D':
1962     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
1963   case 'C':
1964     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
1965   case 'E':
1966     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
1967   case 'F':
1968     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
1969   case 'G':
1970     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
1971   case 'H':
1972     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
1973   case 'I':
1974     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
1975   case 'J':
1976     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
1977   case 'K':
1978     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
1979   case 'M':
1980     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
1981   case 'N':
1982     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
1983   case 'O':
1984     return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
1985   case '_': {
1986     if (MangledName.empty()) {
1987       Error = true;
1988       return nullptr;
1989     }
1990     const char F = MangledName.front();
1991     MangledName.remove_prefix(1);
1992     switch (F) {
1993     case 'N':
1994       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
1995     case 'J':
1996       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
1997     case 'K':
1998       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
1999     case 'W':
2000       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
2001     case 'Q':
2002       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
2003     case 'S':
2004       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
2005     case 'U':
2006       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
2007     }
2008     break;
2009   }
2010   }
2011   Error = true;
2012   return nullptr;
2013 }
2014 
2015 TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
2016   TagTypeNode *TT = nullptr;
2017 
2018   const char F = MangledName.front();
2019   MangledName.remove_prefix(1);
2020   switch (F) {
2021   case 'T':
2022     TT = Arena.alloc<TagTypeNode>(TagKind::Union);
2023     break;
2024   case 'U':
2025     TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
2026     break;
2027   case 'V':
2028     TT = Arena.alloc<TagTypeNode>(TagKind::Class);
2029     break;
2030   case 'W':
2031     if (!consumeFront(MangledName, '4')) {
2032       Error = true;
2033       return nullptr;
2034     }
2035     TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
2036     break;
2037   default:
2038     assert(false);
2039   }
2040 
2041   TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
2042   return TT;
2043 }
2044 
2045 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2046 //                       # the E is required for 64-bit non-static pointers
2047 PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
2048   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2049 
2050   std::tie(Pointer->Quals, Pointer->Affinity) =
2051       demanglePointerCVQualifiers(MangledName);
2052 
2053   if (consumeFront(MangledName, "6")) {
2054     Pointer->Pointee = demangleFunctionType(MangledName, false);
2055     return Pointer;
2056   }
2057 
2058   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2059   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2060 
2061   Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2062   return Pointer;
2063 }
2064 
2065 PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
2066   PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2067 
2068   std::tie(Pointer->Quals, Pointer->Affinity) =
2069       demanglePointerCVQualifiers(MangledName);
2070   assert(Pointer->Affinity == PointerAffinity::Pointer);
2071 
2072   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2073   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2074 
2075   // isMemberPointer() only returns true if there is at least one character
2076   // after the qualifiers.
2077   if (consumeFront(MangledName, "8")) {
2078     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2079     Pointer->Pointee = demangleFunctionType(MangledName, true);
2080   } else {
2081     Qualifiers PointeeQuals = Q_None;
2082     bool IsMember = false;
2083     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2084     assert(IsMember || Error);
2085     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2086 
2087     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2088     if (Pointer->Pointee)
2089       Pointer->Pointee->Quals = PointeeQuals;
2090   }
2091 
2092   return Pointer;
2093 }
2094 
2095 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
2096   Qualifiers Quals = Q_None;
2097   if (consumeFront(MangledName, 'E'))
2098     Quals = Qualifiers(Quals | Q_Pointer64);
2099   if (consumeFront(MangledName, 'I'))
2100     Quals = Qualifiers(Quals | Q_Restrict);
2101   if (consumeFront(MangledName, 'F'))
2102     Quals = Qualifiers(Quals | Q_Unaligned);
2103 
2104   return Quals;
2105 }
2106 
2107 ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
2108   assert(MangledName.front() == 'Y');
2109   MangledName.remove_prefix(1);
2110 
2111   uint64_t Rank = 0;
2112   bool IsNegative = false;
2113   std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2114   if (IsNegative || Rank == 0) {
2115     Error = true;
2116     return nullptr;
2117   }
2118 
2119   ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2120   NodeList *Head = Arena.alloc<NodeList>();
2121   NodeList *Tail = Head;
2122 
2123   for (uint64_t I = 0; I < Rank; ++I) {
2124     uint64_t D = 0;
2125     std::tie(D, IsNegative) = demangleNumber(MangledName);
2126     if (Error || IsNegative) {
2127       Error = true;
2128       return nullptr;
2129     }
2130     Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2131     if (I + 1 < Rank) {
2132       Tail->Next = Arena.alloc<NodeList>();
2133       Tail = Tail->Next;
2134     }
2135   }
2136   ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2137 
2138   if (consumeFront(MangledName, "$$C")) {
2139     bool IsMember = false;
2140     std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2141     if (IsMember) {
2142       Error = true;
2143       return nullptr;
2144     }
2145   }
2146 
2147   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2148   return ATy;
2149 }
2150 
2151 // Reads a function's parameters.
2152 NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
2153                                                         bool &IsVariadic) {
2154   // Empty parameter list.
2155   if (consumeFront(MangledName, 'X'))
2156     return nullptr;
2157 
2158   NodeList *Head = Arena.alloc<NodeList>();
2159   NodeList **Current = &Head;
2160   size_t Count = 0;
2161   while (!Error && !MangledName.startsWith('@') &&
2162          !MangledName.startsWith('Z')) {
2163     ++Count;
2164 
2165     if (startsWithDigit(MangledName)) {
2166       size_t N = MangledName[0] - '0';
2167       if (N >= Backrefs.FunctionParamCount) {
2168         Error = true;
2169         return nullptr;
2170       }
2171       MangledName.remove_prefix(1);
2172 
2173       *Current = Arena.alloc<NodeList>();
2174       (*Current)->N = Backrefs.FunctionParams[N];
2175       Current = &(*Current)->Next;
2176       continue;
2177     }
2178 
2179     size_t OldSize = MangledName.size();
2180 
2181     *Current = Arena.alloc<NodeList>();
2182     TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2183     if (!TN || Error)
2184       return nullptr;
2185 
2186     (*Current)->N = TN;
2187 
2188     size_t CharsConsumed = OldSize - MangledName.size();
2189     assert(CharsConsumed != 0);
2190 
2191     // Single-letter types are ignored for backreferences because memorizing
2192     // them doesn't save anything.
2193     if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2194       Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2195 
2196     Current = &(*Current)->Next;
2197   }
2198 
2199   if (Error)
2200     return nullptr;
2201 
2202   NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2203   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2204   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
2205   // the following Z could be a throw specifier.
2206   if (consumeFront(MangledName, '@'))
2207     return NA;
2208 
2209   if (consumeFront(MangledName, 'Z')) {
2210     IsVariadic = true;
2211     return NA;
2212   }
2213 
2214   DEMANGLE_UNREACHABLE;
2215 }
2216 
2217 NodeArrayNode *
2218 Demangler::demangleTemplateParameterList(StringView &MangledName) {
2219   NodeList *Head = nullptr;
2220   NodeList **Current = &Head;
2221   size_t Count = 0;
2222 
2223   while (!MangledName.startsWith('@')) {
2224     if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") ||
2225         consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) {
2226       // parameter pack separator
2227       continue;
2228     }
2229 
2230     ++Count;
2231 
2232     // Template parameter lists don't participate in back-referencing.
2233     *Current = Arena.alloc<NodeList>();
2234 
2235     NodeList &TP = **Current;
2236 
2237     TemplateParameterReferenceNode *TPRN = nullptr;
2238     if (consumeFront(MangledName, "$$Y")) {
2239       // Template alias
2240       TP.N = demangleFullyQualifiedTypeName(MangledName);
2241     } else if (consumeFront(MangledName, "$$B")) {
2242       // Array
2243       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2244     } else if (consumeFront(MangledName, "$$C")) {
2245       // Type has qualifiers.
2246       TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2247     } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
2248                MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
2249       // Pointer to member
2250       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2251       TPRN->IsMemberPointer = true;
2252 
2253       MangledName.remove_prefix(1);
2254       // 1 - single inheritance       <name>
2255       // H - multiple inheritance     <name> <number>
2256       // I - virtual inheritance      <name> <number> <number>
2257       // J - unspecified inheritance  <name> <number> <number> <number>
2258       char InheritanceSpecifier = MangledName.front();
2259       MangledName.remove_prefix(1);
2260       SymbolNode *S = nullptr;
2261       if (MangledName.startsWith('?')) {
2262         S = parse(MangledName);
2263         if (Error || !S->Name) {
2264           Error = true;
2265           return nullptr;
2266         }
2267         memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2268       }
2269 
2270       switch (InheritanceSpecifier) {
2271       case 'J':
2272         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2273             demangleSigned(MangledName);
2274         DEMANGLE_FALLTHROUGH;
2275       case 'I':
2276         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2277             demangleSigned(MangledName);
2278         DEMANGLE_FALLTHROUGH;
2279       case 'H':
2280         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2281             demangleSigned(MangledName);
2282         DEMANGLE_FALLTHROUGH;
2283       case '1':
2284         break;
2285       default:
2286         DEMANGLE_UNREACHABLE;
2287       }
2288       TPRN->Affinity = PointerAffinity::Pointer;
2289       TPRN->Symbol = S;
2290     } else if (MangledName.startsWith("$E?")) {
2291       consumeFront(MangledName, "$E");
2292       // Reference to symbol
2293       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2294       TPRN->Symbol = parse(MangledName);
2295       TPRN->Affinity = PointerAffinity::Reference;
2296     } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
2297       TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2298 
2299       // Data member pointer.
2300       MangledName.remove_prefix(1);
2301       char InheritanceSpecifier = MangledName.front();
2302       MangledName.remove_prefix(1);
2303 
2304       switch (InheritanceSpecifier) {
2305       case 'G':
2306         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2307             demangleSigned(MangledName);
2308         DEMANGLE_FALLTHROUGH;
2309       case 'F':
2310         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2311             demangleSigned(MangledName);
2312         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2313             demangleSigned(MangledName);
2314         break;
2315       default:
2316         DEMANGLE_UNREACHABLE;
2317       }
2318       TPRN->IsMemberPointer = true;
2319 
2320     } else if (consumeFront(MangledName, "$0")) {
2321       // Integral non-type template parameter
2322       bool IsNegative = false;
2323       uint64_t Value = 0;
2324       std::tie(Value, IsNegative) = demangleNumber(MangledName);
2325 
2326       TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2327     } else {
2328       TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2329     }
2330     if (Error)
2331       return nullptr;
2332 
2333     Current = &TP.Next;
2334   }
2335 
2336   // The loop above returns nullptr on Error.
2337   assert(!Error);
2338 
2339   // Template parameter lists cannot be variadic, so it can only be terminated
2340   // by @ (as opposed to 'Z' in the function parameter case).
2341   assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
2342   consumeFront(MangledName, '@');
2343   return nodeListToNodeArray(Arena, Head, Count);
2344 }
2345 
2346 void Demangler::dumpBackReferences() {
2347   std::printf("%d function parameter backreferences\n",
2348               (int)Backrefs.FunctionParamCount);
2349 
2350   // Create an output stream so we can render each type.
2351   OutputBuffer OB;
2352   for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2353     OB.setCurrentPosition(0);
2354 
2355     TypeNode *T = Backrefs.FunctionParams[I];
2356     T->output(OB, OF_Default);
2357 
2358     StringView B = OB;
2359     std::printf("  [%d] - %.*s\n", (int)I, (int)B.size(), B.begin());
2360   }
2361   std::free(OB.getBuffer());
2362 
2363   if (Backrefs.FunctionParamCount > 0)
2364     std::printf("\n");
2365   std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2366   for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2367     std::printf("  [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2368                 Backrefs.Names[I]->Name.begin());
2369   }
2370   if (Backrefs.NamesCount > 0)
2371     std::printf("\n");
2372 }
2373 
2374 char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
2375                               char *Buf, size_t *N,
2376                               int *Status, MSDemangleFlags Flags) {
2377   Demangler D;
2378 
2379   StringView Name{MangledName};
2380   SymbolNode *AST = D.parse(Name);
2381   if (!D.Error && NMangled)
2382     *NMangled = Name.begin() - MangledName;
2383 
2384   if (Flags & MSDF_DumpBackrefs)
2385     D.dumpBackReferences();
2386 
2387   OutputFlags OF = OF_Default;
2388   if (Flags & MSDF_NoCallingConvention)
2389     OF = OutputFlags(OF | OF_NoCallingConvention);
2390   if (Flags & MSDF_NoAccessSpecifier)
2391     OF = OutputFlags(OF | OF_NoAccessSpecifier);
2392   if (Flags & MSDF_NoReturnType)
2393     OF = OutputFlags(OF | OF_NoReturnType);
2394   if (Flags & MSDF_NoMemberType)
2395     OF = OutputFlags(OF | OF_NoMemberType);
2396   if (Flags & MSDF_NoVariableType)
2397     OF = OutputFlags(OF | OF_NoVariableType);
2398 
2399   int InternalStatus = demangle_success;
2400   if (D.Error)
2401     InternalStatus = demangle_invalid_mangled_name;
2402   else {
2403     OutputBuffer OB(Buf, N);
2404     AST->output(OB, OF);
2405     OB += '\0';
2406     if (N != nullptr)
2407       *N = OB.getCurrentPosition();
2408     Buf = OB.getBuffer();
2409   }
2410 
2411   if (Status)
2412     *Status = InternalStatus;
2413   return InternalStatus == demangle_success ? Buf : nullptr;
2414 }
2415