xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 89615a5e925e10f388130e80fba4bf28bc3c3c0a)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isHexDigit(const char C) {
84   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86 
87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88 
89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90 
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
92 static inline bool isValid(const char C) {
93   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95 
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
101 bool Demangler::demangle(StringView Mangled) {
102   Position = 0;
103   Error = false;
104   Print = true;
105   RecursionLevel = 0;
106   BoundLifetimes = 0;
107 
108   if (!Mangled.consumeFront("_R")) {
109     Error = true;
110     return false;
111   }
112   Input = Mangled;
113 
114   demanglePath(rust_demangle::InType::No);
115 
116   // FIXME parse optional <instantiating-crate>.
117 
118   if (Position != Input.size())
119     Error = true;
120 
121   return !Error;
122 }
123 
124 // Demangles a path. InType indicates whether a path is inside a type.
125 //
126 // <path> = "C" <identifier>               // crate root
127 //        | "M" <impl-path> <type>         // <T> (inherent impl)
128 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
129 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
130 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
131 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
132 //        | <backref>
133 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
134 // <ns> = "C"      // closure
135 //      | "S"      // shim
136 //      | <A-Z>    // other special namespaces
137 //      | <a-z>    // internal namespaces
138 void Demangler::demanglePath(InType InType) {
139   if (Error || RecursionLevel >= MaxRecursionLevel) {
140     Error = true;
141     return;
142   }
143   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
144 
145   switch (consume()) {
146   case 'C': {
147     parseOptionalBase62Number('s');
148     Identifier Ident = parseIdentifier();
149     print(Ident.Name);
150     break;
151   }
152   case 'M': {
153     demangleImplPath(InType);
154     print("<");
155     demangleType();
156     print(">");
157     break;
158   }
159   case 'X': {
160     demangleImplPath(InType);
161     print("<");
162     demangleType();
163     print(" as ");
164     demanglePath(rust_demangle::InType::Yes);
165     print(">");
166     break;
167   }
168   case 'Y': {
169     print("<");
170     demangleType();
171     print(" as ");
172     demanglePath(rust_demangle::InType::Yes);
173     print(">");
174     break;
175   }
176   case 'N': {
177     char NS = consume();
178     if (!isLower(NS) && !isUpper(NS)) {
179       Error = true;
180       break;
181     }
182     demanglePath(InType);
183 
184     uint64_t Disambiguator = parseOptionalBase62Number('s');
185     Identifier Ident = parseIdentifier();
186 
187     if (isUpper(NS)) {
188       // Special namespaces
189       print("::{");
190       if (NS == 'C')
191         print("closure");
192       else if (NS == 'S')
193         print("shim");
194       else
195         print(NS);
196       if (!Ident.empty()) {
197         print(":");
198         print(Ident.Name);
199       }
200       print('#');
201       printDecimalNumber(Disambiguator);
202       print('}');
203     } else {
204       // Implementation internal namespaces.
205       if (!Ident.empty()) {
206         print("::");
207         print(Ident.Name);
208       }
209     }
210     break;
211   }
212   case 'I': {
213     demanglePath(InType);
214     // Omit "::" when in a type, where it is optional.
215     if (InType == rust_demangle::InType::No)
216       print("::");
217     print("<");
218     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
219       if (I > 0)
220         print(", ");
221       demangleGenericArg();
222     }
223     print(">");
224     break;
225   }
226   default:
227     // FIXME parse remaining productions.
228     Error = true;
229     break;
230   }
231 }
232 
233 // <impl-path> = [<disambiguator>] <path>
234 // <disambiguator> = "s" <base-62-number>
235 void Demangler::demangleImplPath(InType InType) {
236   SwapAndRestore<bool> SavePrint(Print, false);
237   parseOptionalBase62Number('s');
238   demanglePath(InType);
239 }
240 
241 // <generic-arg> = <lifetime>
242 //               | <type>
243 //               | "K" <const>
244 // <lifetime> = "L" <base-62-number>
245 void Demangler::demangleGenericArg() {
246   if (consumeIf('L'))
247     printLifetime(parseBase62Number());
248   else if (consumeIf('K'))
249     demangleConst();
250   else
251     demangleType();
252 }
253 
254 // <basic-type> = "a"      // i8
255 //              | "b"      // bool
256 //              | "c"      // char
257 //              | "d"      // f64
258 //              | "e"      // str
259 //              | "f"      // f32
260 //              | "h"      // u8
261 //              | "i"      // isize
262 //              | "j"      // usize
263 //              | "l"      // i32
264 //              | "m"      // u32
265 //              | "n"      // i128
266 //              | "o"      // u128
267 //              | "s"      // i16
268 //              | "t"      // u16
269 //              | "u"      // ()
270 //              | "v"      // ...
271 //              | "x"      // i64
272 //              | "y"      // u64
273 //              | "z"      // !
274 //              | "p"      // placeholder (e.g. for generic params), shown as _
275 static bool parseBasicType(char C, BasicType &Type) {
276   switch (C) {
277   case 'a':
278     Type = BasicType::I8;
279     return true;
280   case 'b':
281     Type = BasicType::Bool;
282     return true;
283   case 'c':
284     Type = BasicType::Char;
285     return true;
286   case 'd':
287     Type = BasicType::F64;
288     return true;
289   case 'e':
290     Type = BasicType::Str;
291     return true;
292   case 'f':
293     Type = BasicType::F32;
294     return true;
295   case 'h':
296     Type = BasicType::U8;
297     return true;
298   case 'i':
299     Type = BasicType::ISize;
300     return true;
301   case 'j':
302     Type = BasicType::USize;
303     return true;
304   case 'l':
305     Type = BasicType::I32;
306     return true;
307   case 'm':
308     Type = BasicType::U32;
309     return true;
310   case 'n':
311     Type = BasicType::I128;
312     return true;
313   case 'o':
314     Type = BasicType::U128;
315     return true;
316   case 'p':
317     Type = BasicType::Placeholder;
318     return true;
319   case 's':
320     Type = BasicType::I16;
321     return true;
322   case 't':
323     Type = BasicType::U16;
324     return true;
325   case 'u':
326     Type = BasicType::Unit;
327     return true;
328   case 'v':
329     Type = BasicType::Variadic;
330     return true;
331   case 'x':
332     Type = BasicType::I64;
333     return true;
334   case 'y':
335     Type = BasicType::U64;
336     return true;
337   case 'z':
338     Type = BasicType::Never;
339     return true;
340   default:
341     return false;
342   }
343 }
344 
345 void Demangler::printBasicType(BasicType Type) {
346   switch (Type) {
347   case BasicType::Bool:
348     print("bool");
349     break;
350   case BasicType::Char:
351     print("char");
352     break;
353   case BasicType::I8:
354     print("i8");
355     break;
356   case BasicType::I16:
357     print("i16");
358     break;
359   case BasicType::I32:
360     print("i32");
361     break;
362   case BasicType::I64:
363     print("i64");
364     break;
365   case BasicType::I128:
366     print("i128");
367     break;
368   case BasicType::ISize:
369     print("isize");
370     break;
371   case BasicType::U8:
372     print("u8");
373     break;
374   case BasicType::U16:
375     print("u16");
376     break;
377   case BasicType::U32:
378     print("u32");
379     break;
380   case BasicType::U64:
381     print("u64");
382     break;
383   case BasicType::U128:
384     print("u128");
385     break;
386   case BasicType::USize:
387     print("usize");
388     break;
389   case BasicType::F32:
390     print("f32");
391     break;
392   case BasicType::F64:
393     print("f64");
394     break;
395   case BasicType::Str:
396     print("str");
397     break;
398   case BasicType::Placeholder:
399     print("_");
400     break;
401   case BasicType::Unit:
402     print("()");
403     break;
404   case BasicType::Variadic:
405     print("...");
406     break;
407   case BasicType::Never:
408     print("!");
409     break;
410   }
411 }
412 
413 // <type> = | <basic-type>
414 //          | <path>                      // named type
415 //          | "A" <type> <const>          // [T; N]
416 //          | "S" <type>                  // [T]
417 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
418 //          | "R" [<lifetime>] <type>     // &T
419 //          | "Q" [<lifetime>] <type>     // &mut T
420 //          | "P" <type>                  // *const T
421 //          | "O" <type>                  // *mut T
422 //          | "F" <fn-sig>                // fn(...) -> ...
423 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
424 //          | <backref>                   // backref
425 void Demangler::demangleType() {
426   size_t Start = Position;
427 
428   char C = consume();
429   BasicType Type;
430   if (parseBasicType(C, Type))
431     return printBasicType(Type);
432 
433   switch (C) {
434   case 'A':
435     print("[");
436     demangleType();
437     print("; ");
438     demangleConst();
439     print("]");
440     break;
441   case 'S':
442     print("[");
443     demangleType();
444     print("]");
445     break;
446   case 'T': {
447     print("(");
448     size_t I = 0;
449     for (; !Error && !consumeIf('E'); ++I) {
450       if (I > 0)
451         print(", ");
452       demangleType();
453     }
454     if (I == 1)
455       print(",");
456     print(")");
457     break;
458   }
459   case 'R':
460   case 'Q':
461     print('&');
462     if (consumeIf('L')) {
463       if (auto Lifetime = parseBase62Number()) {
464         printLifetime(Lifetime);
465         print(' ');
466       }
467     }
468     if (C == 'Q')
469       print("mut ");
470     demangleType();
471     break;
472   case 'P':
473     print("*const ");
474     demangleType();
475     break;
476   case 'O':
477     print("*mut ");
478     demangleType();
479     break;
480   case 'F':
481     demangleFnSig();
482     break;
483   case 'D':
484     demangleDynBounds();
485     if (consumeIf('L')) {
486       if (auto Lifetime = parseBase62Number()) {
487         print(" + ");
488         printLifetime(Lifetime);
489       }
490     } else {
491       Error = true;
492     }
493     break;
494   default:
495     Position = Start;
496     demanglePath(rust_demangle::InType::Yes);
497     break;
498   }
499 }
500 
501 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
502 // <abi> = "C"
503 //       | <undisambiguated-identifier>
504 void Demangler::demangleFnSig() {
505   SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
506   demangleOptionalBinder();
507 
508   if (consumeIf('U'))
509     print("unsafe ");
510 
511   if (consumeIf('K')) {
512     print("extern \"");
513     if (consumeIf('C')) {
514       print("C");
515     } else {
516       Identifier Ident = parseIdentifier();
517       for (char C : Ident.Name) {
518         // When mangling ABI string, the "-" is replaced with "_".
519         if (C == '_')
520           C = '-';
521         print(C);
522       }
523     }
524     print("\" ");
525   }
526 
527   print("fn(");
528   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
529     if (I > 0)
530       print(", ");
531     demangleType();
532   }
533   print(")");
534 
535   if (consumeIf('u')) {
536     // Skip the unit type from the output.
537   } else {
538     print(" -> ");
539     demangleType();
540   }
541 }
542 
543 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
544 void Demangler::demangleDynBounds() {
545   SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
546   print("dyn ");
547   demangleOptionalBinder();
548   // FIXME demangle {dyn-trait}
549   if (!consumeIf('E'))
550     Error = true;
551 }
552 
553 // Demangles optional binder and updates the number of bound lifetimes.
554 //
555 // <binder> = "G" <base-62-number>
556 void Demangler::demangleOptionalBinder() {
557   uint64_t Binder = parseOptionalBase62Number('G');
558   if (Error || Binder == 0)
559     return;
560 
561   // In valid inputs each bound lifetime is referenced later. Referencing a
562   // lifetime requires at least one byte of input. Reject inputs that are too
563   // short to reference all bound lifetimes. Otherwise demangling of invalid
564   // binders could generate excessive amounts of output.
565   if (Binder >= Input.size() - BoundLifetimes) {
566     Error = true;
567     return;
568   }
569 
570   print("for<");
571   for (size_t I = 0; I != Binder; ++I) {
572     BoundLifetimes += 1;
573     if (I > 0)
574       print(", ");
575     printLifetime(1);
576   }
577   print("> ");
578 }
579 
580 // <const> = <basic-type> <const-data>
581 //         | "p"                          // placeholder
582 //         | <backref>
583 void Demangler::demangleConst() {
584   BasicType Type;
585   if (parseBasicType(consume(), Type)) {
586     switch (Type) {
587     case BasicType::I8:
588     case BasicType::I16:
589     case BasicType::I32:
590     case BasicType::I64:
591     case BasicType::I128:
592     case BasicType::ISize:
593     case BasicType::U8:
594     case BasicType::U16:
595     case BasicType::U32:
596     case BasicType::U64:
597     case BasicType::U128:
598     case BasicType::USize:
599       demangleConstInt();
600       break;
601     case BasicType::Bool:
602       demangleConstBool();
603       break;
604     case BasicType::Char:
605       demangleConstChar();
606       break;
607     case BasicType::Placeholder:
608       print('_');
609       break;
610     default:
611       // FIXME demangle backreferences.
612       Error = true;
613       break;
614     }
615   } else {
616     Error = true;
617   }
618 }
619 
620 // <const-data> = ["n"] <hex-number>
621 void Demangler::demangleConstInt() {
622   if (consumeIf('n'))
623     print('-');
624 
625   StringView HexDigits;
626   uint64_t Value = parseHexNumber(HexDigits);
627   if (HexDigits.size() <= 16) {
628     printDecimalNumber(Value);
629   } else {
630     print("0x");
631     print(HexDigits);
632   }
633 }
634 
635 // <const-data> = "0_" // false
636 //              | "1_" // true
637 void Demangler::demangleConstBool() {
638   StringView HexDigits;
639   parseHexNumber(HexDigits);
640   if (HexDigits == "0")
641     print("false");
642   else if (HexDigits == "1")
643     print("true");
644   else
645     Error = true;
646 }
647 
648 /// Returns true if CodePoint represents a printable ASCII character.
649 static bool isAsciiPrintable(uint64_t CodePoint) {
650   return 0x20 <= CodePoint && CodePoint <= 0x7e;
651 }
652 
653 // <const-data> = <hex-number>
654 void Demangler::demangleConstChar() {
655   StringView HexDigits;
656   uint64_t CodePoint = parseHexNumber(HexDigits);
657   if (Error || HexDigits.size() > 6) {
658     Error = true;
659     return;
660   }
661 
662   print("'");
663   switch (CodePoint) {
664   case '\t':
665     print(R"(\t)");
666     break;
667   case '\r':
668     print(R"(\r)");
669     break;
670   case '\n':
671     print(R"(\n)");
672     break;
673   case '\\':
674     print(R"(\\)");
675     break;
676   case '"':
677     print(R"(")");
678     break;
679   case '\'':
680     print(R"(\')");
681     break;
682   default:
683     if (isAsciiPrintable(CodePoint)) {
684       char C = CodePoint;
685       print(C);
686     } else {
687       print(R"(\u{)");
688       print(HexDigits);
689       print('}');
690     }
691     break;
692   }
693   print('\'');
694 }
695 
696 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
697 Identifier Demangler::parseIdentifier() {
698   bool Punycode = consumeIf('u');
699   uint64_t Bytes = parseDecimalNumber();
700 
701   // Underscore resolves the ambiguity when identifier starts with a decimal
702   // digit or another underscore.
703   consumeIf('_');
704 
705   if (Error || Bytes > Input.size() - Position) {
706     Error = true;
707     return {};
708   }
709   StringView S = Input.substr(Position, Bytes);
710   Position += Bytes;
711 
712   if (!std::all_of(S.begin(), S.end(), isValid)) {
713     Error = true;
714     return {};
715   }
716 
717   return {S, Punycode};
718 }
719 
720 // Parses optional base 62 number. The presence of a number is determined using
721 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
722 //
723 // This function is indended for parsing disambiguators and binders which when
724 // not present have their value interpreted as 0, and otherwise as decoded
725 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
726 // 2. When "G" is absent value is 0.
727 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
728   if (!consumeIf(Tag))
729     return 0;
730 
731   uint64_t N = parseBase62Number();
732   if (Error || !addAssign(N, 1))
733     return 0;
734 
735   return N;
736 }
737 
738 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
739 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
740 // "1_" encodes 2, etc.
741 //
742 // <base-62-number> = {<0-9a-zA-Z>} "_"
743 uint64_t Demangler::parseBase62Number() {
744   if (consumeIf('_'))
745     return 0;
746 
747   uint64_t Value = 0;
748 
749   while (true) {
750     uint64_t Digit;
751     char C = consume();
752 
753     if (C == '_') {
754       break;
755     } else if (isDigit(C)) {
756       Digit = C - '0';
757     } else if (isLower(C)) {
758       Digit = 10 + (C - 'a');
759     } else if (isUpper(C)) {
760       Digit = 10 + 26 + (C - 'A');
761     } else {
762       Error = true;
763       return 0;
764     }
765 
766     if (!mulAssign(Value, 62))
767       return 0;
768 
769     if (!addAssign(Value, Digit))
770       return 0;
771   }
772 
773   if (!addAssign(Value, 1))
774     return 0;
775 
776   return Value;
777 }
778 
779 // Parses a decimal number that had been encoded without any leading zeros.
780 //
781 // <decimal-number> = "0"
782 //                  | <1-9> {<0-9>}
783 uint64_t Demangler::parseDecimalNumber() {
784   char C = look();
785   if (!isDigit(C)) {
786     Error = true;
787     return 0;
788   }
789 
790   if (C == '0') {
791     consume();
792     return 0;
793   }
794 
795   uint64_t Value = 0;
796 
797   while (isDigit(look())) {
798     if (!mulAssign(Value, 10)) {
799       Error = true;
800       return 0;
801     }
802 
803     uint64_t D = consume() - '0';
804     if (!addAssign(Value, D))
805       return 0;
806   }
807 
808   return Value;
809 }
810 
811 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
812 // value and stores hex digits in HexDigits. The return value is unspecified if
813 // HexDigits.size() > 16.
814 //
815 // <hex-number> = "0_"
816 //              | <1-9a-f> {<0-9a-f>} "_"
817 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
818   size_t Start = Position;
819   uint64_t Value = 0;
820 
821   if (!isHexDigit(look()))
822     Error = true;
823 
824   if (consumeIf('0')) {
825     if (!consumeIf('_'))
826       Error = true;
827   } else {
828     while (!Error && !consumeIf('_')) {
829       char C = consume();
830       Value *= 16;
831       if (isDigit(C))
832         Value += C - '0';
833       else if ('a' <= C && C <= 'f')
834         Value += 10 + (C - 'a');
835       else
836         Error = true;
837     }
838   }
839 
840   if (Error) {
841     HexDigits = StringView();
842     return 0;
843   }
844 
845   size_t End = Position - 1;
846   assert(Start < End);
847   HexDigits = Input.substr(Start, End - Start);
848   return Value;
849 }
850 
851 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices
852 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
853 // bound by one of the enclosing binders.
854 void Demangler::printLifetime(uint64_t Index) {
855   if (Index == 0) {
856     print("'_");
857     return;
858   }
859 
860   if (Index - 1 >= BoundLifetimes) {
861     Error = true;
862     return;
863   }
864 
865   uint64_t Depth = BoundLifetimes - Index;
866   print('\'');
867   if (Depth < 26) {
868     char C = 'a' + Depth;
869     print(C);
870   } else {
871     print('z');
872     printDecimalNumber(Depth - 26 + 1);
873   }
874 }
875