xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Demangle/RustDemangle.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
rustDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
Demangler(size_t MaxRecursionLevel)78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
isDigit(const char C)81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
isHexDigit(const char C)83 static inline bool isHexDigit(const char C) {
84   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86 
isLower(const char C)87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88 
isUpper(const char C)89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90 
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)92 static inline bool isValid(const char C) {
93   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95 
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)101 bool Demangler::demangle(StringView Mangled) {
102   Position = 0;
103   Error = false;
104   Print = true;
105   RecursionLevel = 0;
106 
107   if (!Mangled.consumeFront("_R")) {
108     Error = true;
109     return false;
110   }
111   Input = Mangled;
112 
113   demanglePath(rust_demangle::InType::No);
114 
115   // FIXME parse optional <instantiating-crate>.
116 
117   if (Position != Input.size())
118     Error = true;
119 
120   return !Error;
121 }
122 
123 // Demangles a path. InType indicates whether a path is inside a type.
124 //
125 // <path> = "C" <identifier>               // crate root
126 //        | "M" <impl-path> <type>         // <T> (inherent impl)
127 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
128 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
129 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
130 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
131 //        | <backref>
132 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
133 // <ns> = "C"      // closure
134 //      | "S"      // shim
135 //      | <A-Z>    // other special namespaces
136 //      | <a-z>    // internal namespaces
demanglePath(InType InType)137 void Demangler::demanglePath(InType InType) {
138   if (Error || RecursionLevel >= MaxRecursionLevel) {
139     Error = true;
140     return;
141   }
142   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
143 
144   switch (consume()) {
145   case 'C': {
146     parseOptionalBase62Number('s');
147     Identifier Ident = parseIdentifier();
148     print(Ident.Name);
149     break;
150   }
151   case 'M': {
152     demangleImplPath(InType);
153     print("<");
154     demangleType();
155     print(">");
156     break;
157   }
158   case 'X': {
159     demangleImplPath(InType);
160     print("<");
161     demangleType();
162     print(" as ");
163     demanglePath(rust_demangle::InType::Yes);
164     print(">");
165     break;
166   }
167   case 'Y': {
168     print("<");
169     demangleType();
170     print(" as ");
171     demanglePath(rust_demangle::InType::Yes);
172     print(">");
173     break;
174   }
175   case 'N': {
176     char NS = consume();
177     if (!isLower(NS) && !isUpper(NS)) {
178       Error = true;
179       break;
180     }
181     demanglePath(InType);
182 
183     uint64_t Disambiguator = parseOptionalBase62Number('s');
184     Identifier Ident = parseIdentifier();
185 
186     if (isUpper(NS)) {
187       // Special namespaces
188       print("::{");
189       if (NS == 'C')
190         print("closure");
191       else if (NS == 'S')
192         print("shim");
193       else
194         print(NS);
195       if (!Ident.empty()) {
196         print(":");
197         print(Ident.Name);
198       }
199       print('#');
200       printDecimalNumber(Disambiguator);
201       print('}');
202     } else {
203       // Implementation internal namespaces.
204       if (!Ident.empty()) {
205         print("::");
206         print(Ident.Name);
207       }
208     }
209     break;
210   }
211   case 'I': {
212     demanglePath(InType);
213     // Omit "::" when in a type, where it is optional.
214     if (InType == rust_demangle::InType::No)
215       print("::");
216     print("<");
217     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
218       if (I > 0)
219         print(", ");
220       demangleGenericArg();
221     }
222     print(">");
223     break;
224   }
225   default:
226     // FIXME parse remaining productions.
227     Error = true;
228     break;
229   }
230 }
231 
232 // <impl-path> = [<disambiguator>] <path>
233 // <disambiguator> = "s" <base-62-number>
demangleImplPath(InType InType)234 void Demangler::demangleImplPath(InType InType) {
235   SwapAndRestore<bool> SavePrint(Print, false);
236   parseOptionalBase62Number('s');
237   demanglePath(InType);
238 }
239 
240 // <generic-arg> = <lifetime>
241 //               | <type>
242 //               | "K" <const>
243 // <lifetime> = "L" <base-62-number>
demangleGenericArg()244 void Demangler::demangleGenericArg() {
245   if (consumeIf('K'))
246     demangleConst();
247   else
248     demangleType();
249   // FIXME demangle lifetimes.
250 }
251 
252 // <basic-type> = "a"      // i8
253 //              | "b"      // bool
254 //              | "c"      // char
255 //              | "d"      // f64
256 //              | "e"      // str
257 //              | "f"      // f32
258 //              | "h"      // u8
259 //              | "i"      // isize
260 //              | "j"      // usize
261 //              | "l"      // i32
262 //              | "m"      // u32
263 //              | "n"      // i128
264 //              | "o"      // u128
265 //              | "s"      // i16
266 //              | "t"      // u16
267 //              | "u"      // ()
268 //              | "v"      // ...
269 //              | "x"      // i64
270 //              | "y"      // u64
271 //              | "z"      // !
272 //              | "p"      // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)273 static bool parseBasicType(char C, BasicType &Type) {
274   switch (C) {
275   case 'a':
276     Type = BasicType::I8;
277     return true;
278   case 'b':
279     Type = BasicType::Bool;
280     return true;
281   case 'c':
282     Type = BasicType::Char;
283     return true;
284   case 'd':
285     Type = BasicType::F64;
286     return true;
287   case 'e':
288     Type = BasicType::Str;
289     return true;
290   case 'f':
291     Type = BasicType::F32;
292     return true;
293   case 'h':
294     Type = BasicType::U8;
295     return true;
296   case 'i':
297     Type = BasicType::ISize;
298     return true;
299   case 'j':
300     Type = BasicType::USize;
301     return true;
302   case 'l':
303     Type = BasicType::I32;
304     return true;
305   case 'm':
306     Type = BasicType::U32;
307     return true;
308   case 'n':
309     Type = BasicType::I128;
310     return true;
311   case 'o':
312     Type = BasicType::U128;
313     return true;
314   case 'p':
315     Type = BasicType::Placeholder;
316     return true;
317   case 's':
318     Type = BasicType::I16;
319     return true;
320   case 't':
321     Type = BasicType::U16;
322     return true;
323   case 'u':
324     Type = BasicType::Unit;
325     return true;
326   case 'v':
327     Type = BasicType::Variadic;
328     return true;
329   case 'x':
330     Type = BasicType::I64;
331     return true;
332   case 'y':
333     Type = BasicType::U64;
334     return true;
335   case 'z':
336     Type = BasicType::Never;
337     return true;
338   default:
339     return false;
340   }
341 }
342 
printBasicType(BasicType Type)343 void Demangler::printBasicType(BasicType Type) {
344   switch (Type) {
345   case BasicType::Bool:
346     print("bool");
347     break;
348   case BasicType::Char:
349     print("char");
350     break;
351   case BasicType::I8:
352     print("i8");
353     break;
354   case BasicType::I16:
355     print("i16");
356     break;
357   case BasicType::I32:
358     print("i32");
359     break;
360   case BasicType::I64:
361     print("i64");
362     break;
363   case BasicType::I128:
364     print("i128");
365     break;
366   case BasicType::ISize:
367     print("isize");
368     break;
369   case BasicType::U8:
370     print("u8");
371     break;
372   case BasicType::U16:
373     print("u16");
374     break;
375   case BasicType::U32:
376     print("u32");
377     break;
378   case BasicType::U64:
379     print("u64");
380     break;
381   case BasicType::U128:
382     print("u128");
383     break;
384   case BasicType::USize:
385     print("usize");
386     break;
387   case BasicType::F32:
388     print("f32");
389     break;
390   case BasicType::F64:
391     print("f64");
392     break;
393   case BasicType::Str:
394     print("str");
395     break;
396   case BasicType::Placeholder:
397     print("_");
398     break;
399   case BasicType::Unit:
400     print("()");
401     break;
402   case BasicType::Variadic:
403     print("...");
404     break;
405   case BasicType::Never:
406     print("!");
407     break;
408   }
409 }
410 
411 // <type> = | <basic-type>
412 //          | <path>                      // named type
413 //          | "A" <type> <const>          // [T; N]
414 //          | "S" <type>                  // [T]
415 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
416 //          | "R" [<lifetime>] <type>     // &T
417 //          | "Q" [<lifetime>] <type>     // &mut T
418 //          | "P" <type>                  // *const T
419 //          | "O" <type>                  // *mut T
420 //          | "F" <fn-sig>                // fn(...) -> ...
421 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
422 //          | <backref>                   // backref
demangleType()423 void Demangler::demangleType() {
424   size_t Start = Position;
425 
426   char C = consume();
427   BasicType Type;
428   if (parseBasicType(C, Type))
429     return printBasicType(Type);
430 
431   switch (C) {
432   case 'A':
433     print("[");
434     demangleType();
435     print("; ");
436     demangleConst();
437     print("]");
438     break;
439   case 'S':
440     print("[");
441     demangleType();
442     print("]");
443     break;
444   case 'T': {
445     print("(");
446     size_t I = 0;
447     for (; !Error && !consumeIf('E'); ++I) {
448       if (I > 0)
449         print(", ");
450       demangleType();
451     }
452     if (I == 1)
453       print(",");
454     print(")");
455     break;
456   }
457   case 'R':
458     print("&");
459     // FIXME demangle [<lifetime>].
460     demangleType();
461     break;
462   case 'Q':
463     print("&mut ");
464     // FIXME demangle [<lifetime>].
465     demangleType();
466     break;
467   case 'P':
468     print("*const ");
469     demangleType();
470     break;
471   case 'O':
472     print("*mut ");
473     demangleType();
474     break;
475   case 'F':
476     demangleFnSig();
477     break;
478   default:
479     Position = Start;
480     demanglePath(rust_demangle::InType::Yes);
481     break;
482   }
483 }
484 
485 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
486 // <abi> = "C"
487 //       | <undisambiguated-identifier>
demangleFnSig()488 void Demangler::demangleFnSig() {
489   // FIXME demangle binder.
490 
491   if (consumeIf('U'))
492     print("unsafe ");
493 
494   if (consumeIf('K')) {
495     print("extern \"");
496     if (consumeIf('C')) {
497       print("C");
498     } else {
499       Identifier Ident = parseIdentifier();
500       for (char C : Ident.Name) {
501         // When mangling ABI string, the "-" is replaced with "_".
502         if (C == '_')
503           C = '-';
504         print(C);
505       }
506     }
507     print("\" ");
508   }
509 
510   print("fn(");
511   for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
512     if (I > 0)
513       print(", ");
514     demangleType();
515   }
516   print(")");
517 
518   if (consumeIf('u')) {
519     // Skip the unit type from the output.
520   } else {
521     print(" -> ");
522     demangleType();
523   }
524 }
525 
526 // <const> = <basic-type> <const-data>
527 //         | "p"                          // placeholder
528 //         | <backref>
demangleConst()529 void Demangler::demangleConst() {
530   BasicType Type;
531   if (parseBasicType(consume(), Type)) {
532     switch (Type) {
533     case BasicType::I8:
534     case BasicType::I16:
535     case BasicType::I32:
536     case BasicType::I64:
537     case BasicType::I128:
538     case BasicType::ISize:
539     case BasicType::U8:
540     case BasicType::U16:
541     case BasicType::U32:
542     case BasicType::U64:
543     case BasicType::U128:
544     case BasicType::USize:
545       demangleConstInt();
546       break;
547     case BasicType::Bool:
548       demangleConstBool();
549       break;
550     case BasicType::Char:
551       demangleConstChar();
552       break;
553     case BasicType::Placeholder:
554       print('_');
555       break;
556     default:
557       // FIXME demangle backreferences.
558       Error = true;
559       break;
560     }
561   } else {
562     Error = true;
563   }
564 }
565 
566 // <const-data> = ["n"] <hex-number>
demangleConstInt()567 void Demangler::demangleConstInt() {
568   if (consumeIf('n'))
569     print('-');
570 
571   StringView HexDigits;
572   uint64_t Value = parseHexNumber(HexDigits);
573   if (HexDigits.size() <= 16) {
574     printDecimalNumber(Value);
575   } else {
576     print("0x");
577     print(HexDigits);
578   }
579 }
580 
581 // <const-data> = "0_" // false
582 //              | "1_" // true
demangleConstBool()583 void Demangler::demangleConstBool() {
584   StringView HexDigits;
585   parseHexNumber(HexDigits);
586   if (HexDigits == "0")
587     print("false");
588   else if (HexDigits == "1")
589     print("true");
590   else
591     Error = true;
592 }
593 
594 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)595 static bool isAsciiPrintable(uint64_t CodePoint) {
596   return 0x20 <= CodePoint && CodePoint <= 0x7e;
597 }
598 
599 // <const-data> = <hex-number>
demangleConstChar()600 void Demangler::demangleConstChar() {
601   StringView HexDigits;
602   uint64_t CodePoint = parseHexNumber(HexDigits);
603   if (Error || HexDigits.size() > 6) {
604     Error = true;
605     return;
606   }
607 
608   print("'");
609   switch (CodePoint) {
610   case '\t':
611     print(R"(\t)");
612     break;
613   case '\r':
614     print(R"(\r)");
615     break;
616   case '\n':
617     print(R"(\n)");
618     break;
619   case '\\':
620     print(R"(\\)");
621     break;
622   case '"':
623     print(R"(")");
624     break;
625   case '\'':
626     print(R"(\')");
627     break;
628   default:
629     if (isAsciiPrintable(CodePoint)) {
630       char C = CodePoint;
631       print(C);
632     } else {
633       print(R"(\u{)");
634       print(HexDigits);
635       print('}');
636     }
637     break;
638   }
639   print('\'');
640 }
641 
642 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()643 Identifier Demangler::parseIdentifier() {
644   bool Punycode = consumeIf('u');
645   uint64_t Bytes = parseDecimalNumber();
646 
647   // Underscore resolves the ambiguity when identifier starts with a decimal
648   // digit or another underscore.
649   consumeIf('_');
650 
651   if (Error || Bytes > Input.size() - Position) {
652     Error = true;
653     return {};
654   }
655   StringView S = Input.substr(Position, Bytes);
656   Position += Bytes;
657 
658   if (!std::all_of(S.begin(), S.end(), isValid)) {
659     Error = true;
660     return {};
661   }
662 
663   return {S, Punycode};
664 }
665 
666 // Parses optional base 62 number. The presence of a number is determined using
667 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
parseOptionalBase62Number(char Tag)668 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
669   if (!consumeIf(Tag))
670     return 0;
671 
672   uint64_t N = parseBase62Number();
673   if (Error || !addAssign(N, 1))
674     return 0;
675 
676   return N;
677 }
678 
679 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
680 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
681 // "1_" encodes 2, etc.
682 //
683 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()684 uint64_t Demangler::parseBase62Number() {
685   if (consumeIf('_'))
686     return 0;
687 
688   uint64_t Value = 0;
689 
690   while (true) {
691     uint64_t Digit;
692     char C = consume();
693 
694     if (C == '_') {
695       break;
696     } else if (isDigit(C)) {
697       Digit = C - '0';
698     } else if (isLower(C)) {
699       Digit = 10 + (C - 'a');
700     } else if (isUpper(C)) {
701       Digit = 10 + 26 + (C - 'A');
702     } else {
703       Error = true;
704       return 0;
705     }
706 
707     if (!mulAssign(Value, 62))
708       return 0;
709 
710     if (!addAssign(Value, Digit))
711       return 0;
712   }
713 
714   if (!addAssign(Value, 1))
715     return 0;
716 
717   return Value;
718 }
719 
720 // Parses a decimal number that had been encoded without any leading zeros.
721 //
722 // <decimal-number> = "0"
723 //                  | <1-9> {<0-9>}
parseDecimalNumber()724 uint64_t Demangler::parseDecimalNumber() {
725   char C = look();
726   if (!isDigit(C)) {
727     Error = true;
728     return 0;
729   }
730 
731   if (C == '0') {
732     consume();
733     return 0;
734   }
735 
736   uint64_t Value = 0;
737 
738   while (isDigit(look())) {
739     if (!mulAssign(Value, 10)) {
740       Error = true;
741       return 0;
742     }
743 
744     uint64_t D = consume() - '0';
745     if (!addAssign(Value, D))
746       return 0;
747   }
748 
749   return Value;
750 }
751 
752 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
753 // value and stores hex digits in HexDigits. The return value is unspecified if
754 // HexDigits.size() > 16.
755 //
756 // <hex-number> = "0_"
757 //              | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)758 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
759   size_t Start = Position;
760   uint64_t Value = 0;
761 
762   if (!isHexDigit(look()))
763     Error = true;
764 
765   if (consumeIf('0')) {
766     if (!consumeIf('_'))
767       Error = true;
768   } else {
769     while (!Error && !consumeIf('_')) {
770       char C = consume();
771       Value *= 16;
772       if (isDigit(C))
773         Value += C - '0';
774       else if ('a' <= C && C <= 'f')
775         Value += 10 + (C - 'a');
776       else
777         Error = true;
778     }
779   }
780 
781   if (Error) {
782     HexDigits = StringView();
783     return 0;
784   }
785 
786   size_t End = Position - 1;
787   assert(Start < End);
788   HexDigits = Input.substr(Start, End - Start);
789   return Value;
790 }
791