xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 7c59e8001a3b66be545a890a26ad8a24a6c9fe4b)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/Demangle.h"
15 #include "llvm/Demangle/StringViewExtras.h"
16 #include "llvm/Demangle/Utility.h"
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 
26 using llvm::itanium_demangle::OutputBuffer;
27 using llvm::itanium_demangle::ScopedOverride;
28 
29 namespace {
30 
31 struct Identifier {
32   std::string_view Name;
33   bool Punycode;
34 
35   bool empty() const { return Name.empty(); }
36 };
37 
38 enum class BasicType {
39   Bool,
40   Char,
41   I8,
42   I16,
43   I32,
44   I64,
45   I128,
46   ISize,
47   U8,
48   U16,
49   U32,
50   U64,
51   U128,
52   USize,
53   F32,
54   F64,
55   Str,
56   Placeholder,
57   Unit,
58   Variadic,
59   Never,
60 };
61 
62 enum class IsInType {
63   No,
64   Yes,
65 };
66 
67 enum class LeaveGenericsOpen {
68   No,
69   Yes,
70 };
71 
72 class Demangler {
73   // Maximum recursion level. Used to avoid stack overflow.
74   size_t MaxRecursionLevel;
75   // Current recursion level.
76   size_t RecursionLevel;
77   size_t BoundLifetimes;
78   // Input string that is being demangled with "_R" prefix removed.
79   std::string_view Input;
80   // Position in the input string.
81   size_t Position;
82   // When true, print methods append the output to the stream.
83   // When false, the output is suppressed.
84   bool Print;
85   // True if an error occurred.
86   bool Error;
87 
88 public:
89   // Demangled output.
90   OutputBuffer Output;
91 
92   Demangler(size_t MaxRecursionLevel = 500);
93 
94   bool demangle(std::string_view MangledName);
95 
96 private:
97   bool demanglePath(IsInType Type,
98                     LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
99   void demangleImplPath(IsInType InType);
100   void demangleGenericArg();
101   void demangleType();
102   void demangleFnSig();
103   void demangleDynBounds();
104   void demangleDynTrait();
105   void demangleOptionalBinder();
106   void demangleConst();
107   void demangleConstInt();
108   void demangleConstBool();
109   void demangleConstChar();
110 
111   template <typename Callable> void demangleBackref(Callable Demangler) {
112     uint64_t Backref = parseBase62Number();
113     if (Error || Backref >= Position) {
114       Error = true;
115       return;
116     }
117 
118     if (!Print)
119       return;
120 
121     ScopedOverride<size_t> SavePosition(Position, Position);
122     Position = Backref;
123     Demangler();
124   }
125 
126   Identifier parseIdentifier();
127   uint64_t parseOptionalBase62Number(char Tag);
128   uint64_t parseBase62Number();
129   uint64_t parseDecimalNumber();
130   uint64_t parseHexNumber(std::string_view &HexDigits);
131 
132   void print(char C);
133   void print(std::string_view S);
134   void printDecimalNumber(uint64_t N);
135   void printBasicType(BasicType);
136   void printLifetime(uint64_t Index);
137   void printIdentifier(Identifier Ident);
138 
139   char look() const;
140   char consume();
141   bool consumeIf(char Prefix);
142 
143   bool addAssign(uint64_t &A, uint64_t B);
144   bool mulAssign(uint64_t &A, uint64_t B);
145 };
146 
147 } // namespace
148 
149 char *llvm::rustDemangle(const char *MangledName) {
150   if (MangledName == nullptr)
151     return nullptr;
152 
153   // Return early if mangled name doesn't look like a Rust symbol.
154   std::string_view Mangled(MangledName);
155   if (!llvm::itanium_demangle::starts_with(Mangled, "_R"))
156     return nullptr;
157 
158   Demangler D;
159   if (!D.demangle(Mangled)) {
160     std::free(D.Output.getBuffer());
161     return nullptr;
162   }
163 
164   D.Output += '\0';
165 
166   return D.Output.getBuffer();
167 }
168 
169 Demangler::Demangler(size_t MaxRecursionLevel)
170     : MaxRecursionLevel(MaxRecursionLevel) {}
171 
172 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
173 
174 static inline bool isHexDigit(const char C) {
175   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
176 }
177 
178 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
179 
180 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
181 
182 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
183 static inline bool isValid(const char C) {
184   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
185 }
186 
187 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
188 // otherwise. The demangled symbol is stored in Output field. It is
189 // responsibility of the caller to free the memory behind the output stream.
190 //
191 // <symbol-name> = "_R" <path> [<instantiating-crate>]
192 bool Demangler::demangle(std::string_view Mangled) {
193   Position = 0;
194   Error = false;
195   Print = true;
196   RecursionLevel = 0;
197   BoundLifetimes = 0;
198 
199   if (!llvm::itanium_demangle::starts_with(Mangled, "_R")) {
200     Error = true;
201     return false;
202   }
203   Mangled.remove_prefix(2);
204   size_t Dot = Mangled.find('.');
205   Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
206 
207   demanglePath(IsInType::No);
208 
209   if (Position != Input.size()) {
210     ScopedOverride<bool> SavePrint(Print, false);
211     demanglePath(IsInType::No);
212   }
213 
214   if (Position != Input.size())
215     Error = true;
216 
217   if (Dot != std::string_view::npos) {
218     print(" (");
219     print(Mangled.substr(Dot));
220     print(")");
221   }
222 
223   return !Error;
224 }
225 
226 // Demangles a path. InType indicates whether a path is inside a type. When
227 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the
228 // output. Return value indicates whether generics arguments have been left
229 // open.
230 //
231 // <path> = "C" <identifier>               // crate root
232 //        | "M" <impl-path> <type>         // <T> (inherent impl)
233 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
234 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
235 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
236 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
237 //        | <backref>
238 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
239 // <ns> = "C"      // closure
240 //      | "S"      // shim
241 //      | <A-Z>    // other special namespaces
242 //      | <a-z>    // internal namespaces
243 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
244   if (Error || RecursionLevel >= MaxRecursionLevel) {
245     Error = true;
246     return false;
247   }
248   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
249 
250   switch (consume()) {
251   case 'C': {
252     parseOptionalBase62Number('s');
253     printIdentifier(parseIdentifier());
254     break;
255   }
256   case 'M': {
257     demangleImplPath(InType);
258     print("<");
259     demangleType();
260     print(">");
261     break;
262   }
263   case 'X': {
264     demangleImplPath(InType);
265     print("<");
266     demangleType();
267     print(" as ");
268     demanglePath(IsInType::Yes);
269     print(">");
270     break;
271   }
272   case 'Y': {
273     print("<");
274     demangleType();
275     print(" as ");
276     demanglePath(IsInType::Yes);
277     print(">");
278     break;
279   }
280   case 'N': {
281     char NS = consume();
282     if (!isLower(NS) && !isUpper(NS)) {
283       Error = true;
284       break;
285     }
286     demanglePath(InType);
287 
288     uint64_t Disambiguator = parseOptionalBase62Number('s');
289     Identifier Ident = parseIdentifier();
290 
291     if (isUpper(NS)) {
292       // Special namespaces
293       print("::{");
294       if (NS == 'C')
295         print("closure");
296       else if (NS == 'S')
297         print("shim");
298       else
299         print(NS);
300       if (!Ident.empty()) {
301         print(":");
302         printIdentifier(Ident);
303       }
304       print('#');
305       printDecimalNumber(Disambiguator);
306       print('}');
307     } else {
308       // Implementation internal namespaces.
309       if (!Ident.empty()) {
310         print("::");
311         printIdentifier(Ident);
312       }
313     }
314     break;
315   }
316   case 'I': {
317     demanglePath(InType);
318     // Omit "::" when in a type, where it is optional.
319     if (InType == IsInType::No)
320       print("::");
321     print("<");
322     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
323       if (I > 0)
324         print(", ");
325       demangleGenericArg();
326     }
327     if (LeaveOpen == LeaveGenericsOpen::Yes)
328       return true;
329     else
330       print(">");
331     break;
332   }
333   case 'B': {
334     bool IsOpen = false;
335     demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
336     return IsOpen;
337   }
338   default:
339     Error = true;
340     break;
341   }
342 
343   return false;
344 }
345 
346 // <impl-path> = [<disambiguator>] <path>
347 // <disambiguator> = "s" <base-62-number>
348 void Demangler::demangleImplPath(IsInType InType) {
349   ScopedOverride<bool> SavePrint(Print, false);
350   parseOptionalBase62Number('s');
351   demanglePath(InType);
352 }
353 
354 // <generic-arg> = <lifetime>
355 //               | <type>
356 //               | "K" <const>
357 // <lifetime> = "L" <base-62-number>
358 void Demangler::demangleGenericArg() {
359   if (consumeIf('L'))
360     printLifetime(parseBase62Number());
361   else if (consumeIf('K'))
362     demangleConst();
363   else
364     demangleType();
365 }
366 
367 // <basic-type> = "a"      // i8
368 //              | "b"      // bool
369 //              | "c"      // char
370 //              | "d"      // f64
371 //              | "e"      // str
372 //              | "f"      // f32
373 //              | "h"      // u8
374 //              | "i"      // isize
375 //              | "j"      // usize
376 //              | "l"      // i32
377 //              | "m"      // u32
378 //              | "n"      // i128
379 //              | "o"      // u128
380 //              | "s"      // i16
381 //              | "t"      // u16
382 //              | "u"      // ()
383 //              | "v"      // ...
384 //              | "x"      // i64
385 //              | "y"      // u64
386 //              | "z"      // !
387 //              | "p"      // placeholder (e.g. for generic params), shown as _
388 static bool parseBasicType(char C, BasicType &Type) {
389   switch (C) {
390   case 'a':
391     Type = BasicType::I8;
392     return true;
393   case 'b':
394     Type = BasicType::Bool;
395     return true;
396   case 'c':
397     Type = BasicType::Char;
398     return true;
399   case 'd':
400     Type = BasicType::F64;
401     return true;
402   case 'e':
403     Type = BasicType::Str;
404     return true;
405   case 'f':
406     Type = BasicType::F32;
407     return true;
408   case 'h':
409     Type = BasicType::U8;
410     return true;
411   case 'i':
412     Type = BasicType::ISize;
413     return true;
414   case 'j':
415     Type = BasicType::USize;
416     return true;
417   case 'l':
418     Type = BasicType::I32;
419     return true;
420   case 'm':
421     Type = BasicType::U32;
422     return true;
423   case 'n':
424     Type = BasicType::I128;
425     return true;
426   case 'o':
427     Type = BasicType::U128;
428     return true;
429   case 'p':
430     Type = BasicType::Placeholder;
431     return true;
432   case 's':
433     Type = BasicType::I16;
434     return true;
435   case 't':
436     Type = BasicType::U16;
437     return true;
438   case 'u':
439     Type = BasicType::Unit;
440     return true;
441   case 'v':
442     Type = BasicType::Variadic;
443     return true;
444   case 'x':
445     Type = BasicType::I64;
446     return true;
447   case 'y':
448     Type = BasicType::U64;
449     return true;
450   case 'z':
451     Type = BasicType::Never;
452     return true;
453   default:
454     return false;
455   }
456 }
457 
458 void Demangler::printBasicType(BasicType Type) {
459   switch (Type) {
460   case BasicType::Bool:
461     print("bool");
462     break;
463   case BasicType::Char:
464     print("char");
465     break;
466   case BasicType::I8:
467     print("i8");
468     break;
469   case BasicType::I16:
470     print("i16");
471     break;
472   case BasicType::I32:
473     print("i32");
474     break;
475   case BasicType::I64:
476     print("i64");
477     break;
478   case BasicType::I128:
479     print("i128");
480     break;
481   case BasicType::ISize:
482     print("isize");
483     break;
484   case BasicType::U8:
485     print("u8");
486     break;
487   case BasicType::U16:
488     print("u16");
489     break;
490   case BasicType::U32:
491     print("u32");
492     break;
493   case BasicType::U64:
494     print("u64");
495     break;
496   case BasicType::U128:
497     print("u128");
498     break;
499   case BasicType::USize:
500     print("usize");
501     break;
502   case BasicType::F32:
503     print("f32");
504     break;
505   case BasicType::F64:
506     print("f64");
507     break;
508   case BasicType::Str:
509     print("str");
510     break;
511   case BasicType::Placeholder:
512     print("_");
513     break;
514   case BasicType::Unit:
515     print("()");
516     break;
517   case BasicType::Variadic:
518     print("...");
519     break;
520   case BasicType::Never:
521     print("!");
522     break;
523   }
524 }
525 
526 // <type> = | <basic-type>
527 //          | <path>                      // named type
528 //          | "A" <type> <const>          // [T; N]
529 //          | "S" <type>                  // [T]
530 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
531 //          | "R" [<lifetime>] <type>     // &T
532 //          | "Q" [<lifetime>] <type>     // &mut T
533 //          | "P" <type>                  // *const T
534 //          | "O" <type>                  // *mut T
535 //          | "F" <fn-sig>                // fn(...) -> ...
536 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
537 //          | <backref>                   // backref
538 void Demangler::demangleType() {
539   if (Error || RecursionLevel >= MaxRecursionLevel) {
540     Error = true;
541     return;
542   }
543   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
544 
545   size_t Start = Position;
546   char C = consume();
547   BasicType Type;
548   if (parseBasicType(C, Type))
549     return printBasicType(Type);
550 
551   switch (C) {
552   case 'A':
553     print("[");
554     demangleType();
555     print("; ");
556     demangleConst();
557     print("]");
558     break;
559   case 'S':
560     print("[");
561     demangleType();
562     print("]");
563     break;
564   case 'T': {
565     print("(");
566     size_t I = 0;
567     for (; !Error && !consumeIf('E'); ++I) {
568       if (I > 0)
569         print(", ");
570       demangleType();
571     }
572     if (I == 1)
573       print(",");
574     print(")");
575     break;
576   }
577   case 'R':
578   case 'Q':
579     print('&');
580     if (consumeIf('L')) {
581       if (auto Lifetime = parseBase62Number()) {
582         printLifetime(Lifetime);
583         print(' ');
584       }
585     }
586     if (C == 'Q')
587       print("mut ");
588     demangleType();
589     break;
590   case 'P':
591     print("*const ");
592     demangleType();
593     break;
594   case 'O':
595     print("*mut ");
596     demangleType();
597     break;
598   case 'F':
599     demangleFnSig();
600     break;
601   case 'D':
602     demangleDynBounds();
603     if (consumeIf('L')) {
604       if (auto Lifetime = parseBase62Number()) {
605         print(" + ");
606         printLifetime(Lifetime);
607       }
608     } else {
609       Error = true;
610     }
611     break;
612   case 'B':
613     demangleBackref([&] { demangleType(); });
614     break;
615   default:
616     Position = Start;
617     demanglePath(IsInType::Yes);
618     break;
619   }
620 }
621 
622 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
623 // <abi> = "C"
624 //       | <undisambiguated-identifier>
625 void Demangler::demangleFnSig() {
626   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
627   demangleOptionalBinder();
628 
629   if (consumeIf('U'))
630     print("unsafe ");
631 
632   if (consumeIf('K')) {
633     print("extern \"");
634     if (consumeIf('C')) {
635       print("C");
636     } else {
637       Identifier Ident = parseIdentifier();
638       if (Ident.Punycode)
639         Error = true;
640       for (char C : Ident.Name) {
641         // When mangling ABI string, the "-" is replaced with "_".
642         if (C == '_')
643           C = '-';
644         print(C);
645       }
646     }
647     print("\" ");
648   }
649 
650   print("fn(");
651   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
652     if (I > 0)
653       print(", ");
654     demangleType();
655   }
656   print(")");
657 
658   if (consumeIf('u')) {
659     // Skip the unit type from the output.
660   } else {
661     print(" -> ");
662     demangleType();
663   }
664 }
665 
666 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
667 void Demangler::demangleDynBounds() {
668   ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
669   print("dyn ");
670   demangleOptionalBinder();
671   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
672     if (I > 0)
673       print(" + ");
674     demangleDynTrait();
675   }
676 }
677 
678 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
679 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
680 void Demangler::demangleDynTrait() {
681   bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
682   while (!Error && consumeIf('p')) {
683     if (!IsOpen) {
684       IsOpen = true;
685       print('<');
686     } else {
687       print(", ");
688     }
689     print(parseIdentifier().Name);
690     print(" = ");
691     demangleType();
692   }
693   if (IsOpen)
694     print(">");
695 }
696 
697 // Demangles optional binder and updates the number of bound lifetimes.
698 //
699 // <binder> = "G" <base-62-number>
700 void Demangler::demangleOptionalBinder() {
701   uint64_t Binder = parseOptionalBase62Number('G');
702   if (Error || Binder == 0)
703     return;
704 
705   // In valid inputs each bound lifetime is referenced later. Referencing a
706   // lifetime requires at least one byte of input. Reject inputs that are too
707   // short to reference all bound lifetimes. Otherwise demangling of invalid
708   // binders could generate excessive amounts of output.
709   if (Binder >= Input.size() - BoundLifetimes) {
710     Error = true;
711     return;
712   }
713 
714   print("for<");
715   for (size_t I = 0; I != Binder; ++I) {
716     BoundLifetimes += 1;
717     if (I > 0)
718       print(", ");
719     printLifetime(1);
720   }
721   print("> ");
722 }
723 
724 // <const> = <basic-type> <const-data>
725 //         | "p"                          // placeholder
726 //         | <backref>
727 void Demangler::demangleConst() {
728   if (Error || RecursionLevel >= MaxRecursionLevel) {
729     Error = true;
730     return;
731   }
732   ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
733 
734   char C = consume();
735   BasicType Type;
736   if (parseBasicType(C, Type)) {
737     switch (Type) {
738     case BasicType::I8:
739     case BasicType::I16:
740     case BasicType::I32:
741     case BasicType::I64:
742     case BasicType::I128:
743     case BasicType::ISize:
744     case BasicType::U8:
745     case BasicType::U16:
746     case BasicType::U32:
747     case BasicType::U64:
748     case BasicType::U128:
749     case BasicType::USize:
750       demangleConstInt();
751       break;
752     case BasicType::Bool:
753       demangleConstBool();
754       break;
755     case BasicType::Char:
756       demangleConstChar();
757       break;
758     case BasicType::Placeholder:
759       print('_');
760       break;
761     default:
762       Error = true;
763       break;
764     }
765   } else if (C == 'B') {
766     demangleBackref([&] { demangleConst(); });
767   } else {
768     Error = true;
769   }
770 }
771 
772 // <const-data> = ["n"] <hex-number>
773 void Demangler::demangleConstInt() {
774   if (consumeIf('n'))
775     print('-');
776 
777   std::string_view HexDigits;
778   uint64_t Value = parseHexNumber(HexDigits);
779   if (HexDigits.size() <= 16) {
780     printDecimalNumber(Value);
781   } else {
782     print("0x");
783     print(HexDigits);
784   }
785 }
786 
787 // <const-data> = "0_" // false
788 //              | "1_" // true
789 void Demangler::demangleConstBool() {
790   std::string_view HexDigits;
791   parseHexNumber(HexDigits);
792   if (HexDigits == "0")
793     print("false");
794   else if (HexDigits == "1")
795     print("true");
796   else
797     Error = true;
798 }
799 
800 /// Returns true if CodePoint represents a printable ASCII character.
801 static bool isAsciiPrintable(uint64_t CodePoint) {
802   return 0x20 <= CodePoint && CodePoint <= 0x7e;
803 }
804 
805 // <const-data> = <hex-number>
806 void Demangler::demangleConstChar() {
807   std::string_view HexDigits;
808   uint64_t CodePoint = parseHexNumber(HexDigits);
809   if (Error || HexDigits.size() > 6) {
810     Error = true;
811     return;
812   }
813 
814   print("'");
815   switch (CodePoint) {
816   case '\t':
817     print(R"(\t)");
818     break;
819   case '\r':
820     print(R"(\r)");
821     break;
822   case '\n':
823     print(R"(\n)");
824     break;
825   case '\\':
826     print(R"(\\)");
827     break;
828   case '"':
829     print(R"(")");
830     break;
831   case '\'':
832     print(R"(\')");
833     break;
834   default:
835     if (isAsciiPrintable(CodePoint)) {
836       char C = CodePoint;
837       print(C);
838     } else {
839       print(R"(\u{)");
840       print(HexDigits);
841       print('}');
842     }
843     break;
844   }
845   print('\'');
846 }
847 
848 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
849 Identifier Demangler::parseIdentifier() {
850   bool Punycode = consumeIf('u');
851   uint64_t Bytes = parseDecimalNumber();
852 
853   // Underscore resolves the ambiguity when identifier starts with a decimal
854   // digit or another underscore.
855   consumeIf('_');
856 
857   if (Error || Bytes > Input.size() - Position) {
858     Error = true;
859     return {};
860   }
861   std::string_view S = Input.substr(Position, Bytes);
862   Position += Bytes;
863 
864   if (!std::all_of(S.begin(), S.end(), isValid)) {
865     Error = true;
866     return {};
867   }
868 
869   return {S, Punycode};
870 }
871 
872 // Parses optional base 62 number. The presence of a number is determined using
873 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
874 //
875 // This function is indended for parsing disambiguators and binders which when
876 // not present have their value interpreted as 0, and otherwise as decoded
877 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
878 // 2. When "G" is absent value is 0.
879 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
880   if (!consumeIf(Tag))
881     return 0;
882 
883   uint64_t N = parseBase62Number();
884   if (Error || !addAssign(N, 1))
885     return 0;
886 
887   return N;
888 }
889 
890 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
891 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
892 // "1_" encodes 2, etc.
893 //
894 // <base-62-number> = {<0-9a-zA-Z>} "_"
895 uint64_t Demangler::parseBase62Number() {
896   if (consumeIf('_'))
897     return 0;
898 
899   uint64_t Value = 0;
900 
901   while (true) {
902     uint64_t Digit;
903     char C = consume();
904 
905     if (C == '_') {
906       break;
907     } else if (isDigit(C)) {
908       Digit = C - '0';
909     } else if (isLower(C)) {
910       Digit = 10 + (C - 'a');
911     } else if (isUpper(C)) {
912       Digit = 10 + 26 + (C - 'A');
913     } else {
914       Error = true;
915       return 0;
916     }
917 
918     if (!mulAssign(Value, 62))
919       return 0;
920 
921     if (!addAssign(Value, Digit))
922       return 0;
923   }
924 
925   if (!addAssign(Value, 1))
926     return 0;
927 
928   return Value;
929 }
930 
931 // Parses a decimal number that had been encoded without any leading zeros.
932 //
933 // <decimal-number> = "0"
934 //                  | <1-9> {<0-9>}
935 uint64_t Demangler::parseDecimalNumber() {
936   char C = look();
937   if (!isDigit(C)) {
938     Error = true;
939     return 0;
940   }
941 
942   if (C == '0') {
943     consume();
944     return 0;
945   }
946 
947   uint64_t Value = 0;
948 
949   while (isDigit(look())) {
950     if (!mulAssign(Value, 10)) {
951       Error = true;
952       return 0;
953     }
954 
955     uint64_t D = consume() - '0';
956     if (!addAssign(Value, D))
957       return 0;
958   }
959 
960   return Value;
961 }
962 
963 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
964 // value and stores hex digits in HexDigits. The return value is unspecified if
965 // HexDigits.size() > 16.
966 //
967 // <hex-number> = "0_"
968 //              | <1-9a-f> {<0-9a-f>} "_"
969 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
970   size_t Start = Position;
971   uint64_t Value = 0;
972 
973   if (!isHexDigit(look()))
974     Error = true;
975 
976   if (consumeIf('0')) {
977     if (!consumeIf('_'))
978       Error = true;
979   } else {
980     while (!Error && !consumeIf('_')) {
981       char C = consume();
982       Value *= 16;
983       if (isDigit(C))
984         Value += C - '0';
985       else if ('a' <= C && C <= 'f')
986         Value += 10 + (C - 'a');
987       else
988         Error = true;
989     }
990   }
991 
992   if (Error) {
993     HexDigits = std::string_view();
994     return 0;
995   }
996 
997   size_t End = Position - 1;
998   assert(Start < End);
999   HexDigits = Input.substr(Start, End - Start);
1000   return Value;
1001 }
1002 
1003 void Demangler::print(char C) {
1004   if (Error || !Print)
1005     return;
1006 
1007   Output += C;
1008 }
1009 
1010 void Demangler::print(std::string_view S) {
1011   if (Error || !Print)
1012     return;
1013 
1014   Output += S;
1015 }
1016 
1017 void Demangler::printDecimalNumber(uint64_t N) {
1018   if (Error || !Print)
1019     return;
1020 
1021   Output << N;
1022 }
1023 
1024 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1025 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1026 // bound by one of the enclosing binders.
1027 void Demangler::printLifetime(uint64_t Index) {
1028   if (Index == 0) {
1029     print("'_");
1030     return;
1031   }
1032 
1033   if (Index - 1 >= BoundLifetimes) {
1034     Error = true;
1035     return;
1036   }
1037 
1038   uint64_t Depth = BoundLifetimes - Index;
1039   print('\'');
1040   if (Depth < 26) {
1041     char C = 'a' + Depth;
1042     print(C);
1043   } else {
1044     print('z');
1045     printDecimalNumber(Depth - 26 + 1);
1046   }
1047 }
1048 
1049 static inline bool decodePunycodeDigit(char C, size_t &Value) {
1050   if (isLower(C)) {
1051     Value = C - 'a';
1052     return true;
1053   }
1054 
1055   if (isDigit(C)) {
1056     Value = 26 + (C - '0');
1057     return true;
1058   }
1059 
1060   return false;
1061 }
1062 
1063 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1064   char *Buffer = Output.getBuffer();
1065   char *Start = Buffer + StartIdx;
1066   char *End = Buffer + Output.getCurrentPosition();
1067   Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1068 }
1069 
1070 // Encodes code point as UTF-8 and stores results in Output. Returns false if
1071 // CodePoint is not a valid unicode scalar value.
1072 static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1073   if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1074     return false;
1075 
1076   if (CodePoint <= 0x7F) {
1077     Output[0] = CodePoint;
1078     return true;
1079   }
1080 
1081   if (CodePoint <= 0x7FF) {
1082     Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1083     Output[1] = 0x80 | (CodePoint & 0x3F);
1084     return true;
1085   }
1086 
1087   if (CodePoint <= 0xFFFF) {
1088     Output[0] = 0xE0 | (CodePoint >> 12);
1089     Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1090     Output[2] = 0x80 | (CodePoint & 0x3F);
1091     return true;
1092   }
1093 
1094   if (CodePoint <= 0x10FFFF) {
1095     Output[0] = 0xF0 | (CodePoint >> 18);
1096     Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1097     Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1098     Output[3] = 0x80 | (CodePoint & 0x3F);
1099     return true;
1100   }
1101 
1102   return false;
1103 }
1104 
1105 // Decodes string encoded using punycode and appends results to Output.
1106 // Returns true if decoding was successful.
1107 static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1108   size_t OutputSize = Output.getCurrentPosition();
1109   size_t InputIdx = 0;
1110 
1111   // Rust uses an underscore as a delimiter.
1112   size_t DelimiterPos = std::string_view::npos;
1113   for (size_t I = 0; I != Input.size(); ++I)
1114     if (Input[I] == '_')
1115       DelimiterPos = I;
1116 
1117   if (DelimiterPos != std::string_view::npos) {
1118     // Copy basic code points before the last delimiter to the output.
1119     for (; InputIdx != DelimiterPos; ++InputIdx) {
1120       char C = Input[InputIdx];
1121       if (!isValid(C))
1122         return false;
1123       // Code points are padded with zeros while decoding is in progress.
1124       char UTF8[4] = {C};
1125       Output += std::string_view(UTF8, 4);
1126     }
1127     // Skip over the delimiter.
1128     ++InputIdx;
1129   }
1130 
1131   size_t Base = 36;
1132   size_t Skew = 38;
1133   size_t Bias = 72;
1134   size_t N = 0x80;
1135   size_t TMin = 1;
1136   size_t TMax = 26;
1137   size_t Damp = 700;
1138 
1139   auto Adapt = [&](size_t Delta, size_t NumPoints) {
1140     Delta /= Damp;
1141     Delta += Delta / NumPoints;
1142     Damp = 2;
1143 
1144     size_t K = 0;
1145     while (Delta > (Base - TMin) * TMax / 2) {
1146       Delta /= Base - TMin;
1147       K += Base;
1148     }
1149     return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1150   };
1151 
1152   // Main decoding loop.
1153   for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1154     size_t OldI = I;
1155     size_t W = 1;
1156     size_t Max = std::numeric_limits<size_t>::max();
1157     for (size_t K = Base; true; K += Base) {
1158       if (InputIdx == Input.size())
1159         return false;
1160       char C = Input[InputIdx++];
1161       size_t Digit = 0;
1162       if (!decodePunycodeDigit(C, Digit))
1163         return false;
1164 
1165       if (Digit > (Max - I) / W)
1166         return false;
1167       I += Digit * W;
1168 
1169       size_t T;
1170       if (K <= Bias)
1171         T = TMin;
1172       else if (K >= Bias + TMax)
1173         T = TMax;
1174       else
1175         T = K - Bias;
1176 
1177       if (Digit < T)
1178         break;
1179 
1180       if (W > Max / (Base - T))
1181         return false;
1182       W *= (Base - T);
1183     }
1184     size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1185     Bias = Adapt(I - OldI, NumPoints);
1186 
1187     if (I / NumPoints > Max - N)
1188       return false;
1189     N += I / NumPoints;
1190     I = I % NumPoints;
1191 
1192     // Insert N at position I in the output.
1193     char UTF8[4] = {};
1194     if (!encodeUTF8(N, UTF8))
1195       return false;
1196     Output.insert(OutputSize + I * 4, UTF8, 4);
1197   }
1198 
1199   removeNullBytes(Output, OutputSize);
1200   return true;
1201 }
1202 
1203 void Demangler::printIdentifier(Identifier Ident) {
1204   if (Error || !Print)
1205     return;
1206 
1207   if (Ident.Punycode) {
1208     if (!decodePunycode(Ident.Name, Output))
1209       Error = true;
1210   } else {
1211     print(Ident.Name);
1212   }
1213 }
1214 
1215 char Demangler::look() const {
1216   if (Error || Position >= Input.size())
1217     return 0;
1218 
1219   return Input[Position];
1220 }
1221 
1222 char Demangler::consume() {
1223   if (Error || Position >= Input.size()) {
1224     Error = true;
1225     return 0;
1226   }
1227 
1228   return Input[Position++];
1229 }
1230 
1231 bool Demangler::consumeIf(char Prefix) {
1232   if (Error || Position >= Input.size() || Input[Position] != Prefix)
1233     return false;
1234 
1235   Position += 1;
1236   return true;
1237 }
1238 
1239 /// Computes A + B. When computation wraps around sets the error and returns
1240 /// false. Otherwise assigns the result to A and returns true.
1241 bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1242   if (A > std::numeric_limits<uint64_t>::max() - B) {
1243     Error = true;
1244     return false;
1245   }
1246 
1247   A += B;
1248   return true;
1249 }
1250 
1251 /// Computes A * B. When computation wraps around sets the error and returns
1252 /// false. Otherwise assigns the result to A and returns true.
1253 bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1254   if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1255     Error = true;
1256     return false;
1257   }
1258 
1259   A *= B;
1260   return true;
1261 }
1262