xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision f933f7fbd047802456f9d614daf0f0dfb3c7c45f)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isHexDigit(const char C) {
84   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86 
87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88 
89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90 
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
92 static inline bool isValid(const char C) {
93   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95 
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
101 bool Demangler::demangle(StringView Mangled) {
102   Position = 0;
103   Error = false;
104   Print = true;
105   RecursionLevel = 0;
106 
107   if (!Mangled.consumeFront("_R")) {
108     Error = true;
109     return false;
110   }
111   Input = Mangled;
112 
113   demanglePath();
114 
115   // FIXME parse optional <instantiating-crate>.
116 
117   if (Position != Input.size())
118     Error = true;
119 
120   return !Error;
121 }
122 
123 // <path> = "C" <identifier>               // crate root
124 //        | "M" <impl-path> <type>         // <T> (inherent impl)
125 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
126 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
127 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
128 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
129 //        | <backref>
130 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
131 // <ns> = "C"      // closure
132 //      | "S"      // shim
133 //      | <A-Z>    // other special namespaces
134 //      | <a-z>    // internal namespaces
135 void Demangler::demanglePath() {
136   if (Error || RecursionLevel >= MaxRecursionLevel) {
137     Error = true;
138     return;
139   }
140   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
141 
142   switch (consume()) {
143   case 'C': {
144     parseOptionalBase62Number('s');
145     Identifier Ident = parseIdentifier();
146     print(Ident.Name);
147     break;
148   }
149   case 'M': {
150     demangleImplPath();
151     print("<");
152     demangleType();
153     print(">");
154     break;
155   }
156   case 'X': {
157     demangleImplPath();
158     print("<");
159     demangleType();
160     print(" as ");
161     demanglePath();
162     print(">");
163     break;
164   }
165   case 'Y': {
166     print("<");
167     demangleType();
168     print(" as ");
169     demanglePath();
170     print(">");
171     break;
172   }
173   case 'N': {
174     char NS = consume();
175     if (!isLower(NS) && !isUpper(NS)) {
176       Error = true;
177       break;
178     }
179     demanglePath();
180 
181     uint64_t Disambiguator = parseOptionalBase62Number('s');
182     Identifier Ident = parseIdentifier();
183 
184     if (isUpper(NS)) {
185       // Special namespaces
186       print("::{");
187       if (NS == 'C')
188         print("closure");
189       else if (NS == 'S')
190         print("shim");
191       else
192         print(NS);
193       if (!Ident.empty()) {
194         print(":");
195         print(Ident.Name);
196       }
197       print('#');
198       printDecimalNumber(Disambiguator);
199       print('}');
200     } else {
201       // Implementation internal namespaces.
202       if (!Ident.empty()) {
203         print("::");
204         print(Ident.Name);
205       }
206     }
207     break;
208   }
209   case 'I': {
210     demanglePath();
211     print("::<");
212     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
213       if (I > 0)
214         print(", ");
215       demangleGenericArg();
216     }
217     print(">");
218     break;
219   }
220   default:
221     // FIXME parse remaining productions.
222     Error = true;
223     break;
224   }
225 }
226 
227 // <impl-path> = [<disambiguator>] <path>
228 // <disambiguator> = "s" <base-62-number>
229 void Demangler::demangleImplPath() {
230   SwapAndRestore<bool> SavePrint(Print, false);
231   parseOptionalBase62Number('s');
232   demanglePath();
233 }
234 
235 // <generic-arg> = <lifetime>
236 //               | <type>
237 //               | "K" <const>
238 // <lifetime> = "L" <base-62-number>
239 void Demangler::demangleGenericArg() {
240   if (consumeIf('K'))
241     demangleConst();
242   else
243     demangleType();
244   // FIXME demangle lifetimes.
245 }
246 
247 // <basic-type> = "a"      // i8
248 //              | "b"      // bool
249 //              | "c"      // char
250 //              | "d"      // f64
251 //              | "e"      // str
252 //              | "f"      // f32
253 //              | "h"      // u8
254 //              | "i"      // isize
255 //              | "j"      // usize
256 //              | "l"      // i32
257 //              | "m"      // u32
258 //              | "n"      // i128
259 //              | "o"      // u128
260 //              | "s"      // i16
261 //              | "t"      // u16
262 //              | "u"      // ()
263 //              | "v"      // ...
264 //              | "x"      // i64
265 //              | "y"      // u64
266 //              | "z"      // !
267 //              | "p"      // placeholder (e.g. for generic params), shown as _
268 static bool parseBasicType(char C, BasicType &Type) {
269   switch (C) {
270   case 'a':
271     Type = BasicType::I8;
272     return true;
273   case 'b':
274     Type = BasicType::Bool;
275     return true;
276   case 'c':
277     Type = BasicType::Char;
278     return true;
279   case 'd':
280     Type = BasicType::F64;
281     return true;
282   case 'e':
283     Type = BasicType::Str;
284     return true;
285   case 'f':
286     Type = BasicType::F32;
287     return true;
288   case 'h':
289     Type = BasicType::U8;
290     return true;
291   case 'i':
292     Type = BasicType::ISize;
293     return true;
294   case 'j':
295     Type = BasicType::USize;
296     return true;
297   case 'l':
298     Type = BasicType::I32;
299     return true;
300   case 'm':
301     Type = BasicType::U32;
302     return true;
303   case 'n':
304     Type = BasicType::I128;
305     return true;
306   case 'o':
307     Type = BasicType::U128;
308     return true;
309   case 'p':
310     Type = BasicType::Placeholder;
311     return true;
312   case 's':
313     Type = BasicType::I16;
314     return true;
315   case 't':
316     Type = BasicType::U16;
317     return true;
318   case 'u':
319     Type = BasicType::Unit;
320     return true;
321   case 'v':
322     Type = BasicType::Variadic;
323     return true;
324   case 'x':
325     Type = BasicType::I64;
326     return true;
327   case 'y':
328     Type = BasicType::U64;
329     return true;
330   case 'z':
331     Type = BasicType::Never;
332     return true;
333   default:
334     return false;
335   }
336 }
337 
338 void Demangler::printBasicType(BasicType Type) {
339   switch (Type) {
340   case BasicType::Bool:
341     print("bool");
342     break;
343   case BasicType::Char:
344     print("char");
345     break;
346   case BasicType::I8:
347     print("i8");
348     break;
349   case BasicType::I16:
350     print("i16");
351     break;
352   case BasicType::I32:
353     print("i32");
354     break;
355   case BasicType::I64:
356     print("i64");
357     break;
358   case BasicType::I128:
359     print("i128");
360     break;
361   case BasicType::ISize:
362     print("isize");
363     break;
364   case BasicType::U8:
365     print("u8");
366     break;
367   case BasicType::U16:
368     print("u16");
369     break;
370   case BasicType::U32:
371     print("u32");
372     break;
373   case BasicType::U64:
374     print("u64");
375     break;
376   case BasicType::U128:
377     print("u128");
378     break;
379   case BasicType::USize:
380     print("usize");
381     break;
382   case BasicType::F32:
383     print("f32");
384     break;
385   case BasicType::F64:
386     print("f64");
387     break;
388   case BasicType::Str:
389     print("str");
390     break;
391   case BasicType::Placeholder:
392     print("_");
393     break;
394   case BasicType::Unit:
395     print("()");
396     break;
397   case BasicType::Variadic:
398     print("...");
399     break;
400   case BasicType::Never:
401     print("!");
402     break;
403   }
404 }
405 
406 // <type> = | <basic-type>
407 //          | <path>                      // named type
408 //          | "A" <type> <const>          // [T; N]
409 //          | "S" <type>                  // [T]
410 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
411 //          | "R" [<lifetime>] <type>     // &T
412 //          | "Q" [<lifetime>] <type>     // &mut T
413 //          | "P" <type>                  // *const T
414 //          | "O" <type>                  // *mut T
415 //          | "F" <fn-sig>                // fn(...) -> ...
416 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
417 //          | <backref>                   // backref
418 void Demangler::demangleType() {
419   BasicType Type;
420   if (parseBasicType(consume(), Type))
421     printBasicType(Type);
422   else
423     Error = true; // FIXME parse remaining productions.
424 }
425 
426 // <const> = <basic-type> <const-data>
427 //         | "p"                          // placeholder
428 //         | <backref>
429 void Demangler::demangleConst() {
430   BasicType Type;
431   if (parseBasicType(consume(), Type)) {
432     switch (Type) {
433     case BasicType::I8:
434     case BasicType::I16:
435     case BasicType::I32:
436     case BasicType::I64:
437     case BasicType::I128:
438     case BasicType::ISize:
439     case BasicType::U8:
440     case BasicType::U16:
441     case BasicType::U32:
442     case BasicType::U64:
443     case BasicType::U128:
444     case BasicType::USize:
445       demangleConstInt();
446       break;
447     case BasicType::Bool:
448       demangleConstBool();
449       break;
450     case BasicType::Char:
451       demangleConstChar();
452       break;
453     case BasicType::Placeholder:
454       print('_');
455       break;
456     default:
457       // FIXME demangle backreferences.
458       Error = true;
459       break;
460     }
461   } else {
462     Error = true;
463   }
464 }
465 
466 // <const-data> = ["n"] <hex-number>
467 void Demangler::demangleConstInt() {
468   if (consumeIf('n'))
469     print('-');
470 
471   StringView HexDigits;
472   uint64_t Value = parseHexNumber(HexDigits);
473   if (HexDigits.size() <= 16) {
474     printDecimalNumber(Value);
475   } else {
476     print("0x");
477     print(HexDigits);
478   }
479 }
480 
481 // <const-data> = "0_" // false
482 //              | "1_" // true
483 void Demangler::demangleConstBool() {
484   StringView HexDigits;
485   parseHexNumber(HexDigits);
486   if (HexDigits == "0")
487     print("false");
488   else if (HexDigits == "1")
489     print("true");
490   else
491     Error = true;
492 }
493 
494 /// Returns true if CodePoint represents a printable ASCII character.
495 static bool isAsciiPrintable(uint64_t CodePoint) {
496   return 0x20 <= CodePoint && CodePoint <= 0x7e;
497 }
498 
499 // <const-data> = <hex-number>
500 void Demangler::demangleConstChar() {
501   StringView HexDigits;
502   uint64_t CodePoint = parseHexNumber(HexDigits);
503   if (Error || HexDigits.size() > 6) {
504     Error = true;
505     return;
506   }
507 
508   print("'");
509   switch (CodePoint) {
510   case '\t':
511     print(R"(\t)");
512     break;
513   case '\r':
514     print(R"(\r)");
515     break;
516   case '\n':
517     print(R"(\n)");
518     break;
519   case '\\':
520     print(R"(\\)");
521     break;
522   case '"':
523     print(R"(")");
524     break;
525   case '\'':
526     print(R"(\')");
527     break;
528   default:
529     if (isAsciiPrintable(CodePoint)) {
530       char C = CodePoint;
531       print(C);
532     } else {
533       print(R"(\u{)");
534       print(HexDigits);
535       print('}');
536     }
537     break;
538   }
539   print('\'');
540 }
541 
542 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
543 Identifier Demangler::parseIdentifier() {
544   bool Punycode = consumeIf('u');
545   uint64_t Bytes = parseDecimalNumber();
546 
547   // Underscore resolves the ambiguity when identifier starts with a decimal
548   // digit or another underscore.
549   consumeIf('_');
550 
551   if (Error || Bytes > Input.size() - Position) {
552     Error = true;
553     return {};
554   }
555   StringView S = Input.substr(Position, Bytes);
556   Position += Bytes;
557 
558   if (!std::all_of(S.begin(), S.end(), isValid)) {
559     Error = true;
560     return {};
561   }
562 
563   return {S, Punycode};
564 }
565 
566 // Parses optional base 62 number. The presence of a number is determined using
567 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
568 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
569   if (!consumeIf(Tag))
570     return 0;
571 
572   uint64_t N = parseBase62Number();
573   if (Error || !addAssign(N, 1))
574     return 0;
575 
576   return N;
577 }
578 
579 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
580 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
581 // "1_" encodes 2, etc.
582 //
583 // <base-62-number> = {<0-9a-zA-Z>} "_"
584 uint64_t Demangler::parseBase62Number() {
585   if (consumeIf('_'))
586     return 0;
587 
588   uint64_t Value = 0;
589 
590   while (true) {
591     uint64_t Digit;
592     char C = consume();
593 
594     if (C == '_') {
595       break;
596     } else if (isDigit(C)) {
597       Digit = C - '0';
598     } else if (isLower(C)) {
599       Digit = 10 + (C - 'a');
600     } else if (isUpper(C)) {
601       Digit = 10 + 26 + (C - 'A');
602     } else {
603       Error = true;
604       return 0;
605     }
606 
607     if (!mulAssign(Value, 62))
608       return 0;
609 
610     if (!addAssign(Value, Digit))
611       return 0;
612   }
613 
614   if (!addAssign(Value, 1))
615     return 0;
616 
617   return Value;
618 }
619 
620 // Parses a decimal number that had been encoded without any leading zeros.
621 //
622 // <decimal-number> = "0"
623 //                  | <1-9> {<0-9>}
624 uint64_t Demangler::parseDecimalNumber() {
625   char C = look();
626   if (!isDigit(C)) {
627     Error = true;
628     return 0;
629   }
630 
631   if (C == '0') {
632     consume();
633     return 0;
634   }
635 
636   uint64_t Value = 0;
637 
638   while (isDigit(look())) {
639     if (!mulAssign(Value, 10)) {
640       Error = true;
641       return 0;
642     }
643 
644     uint64_t D = consume() - '0';
645     if (!addAssign(Value, D))
646       return 0;
647   }
648 
649   return Value;
650 }
651 
652 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
653 // value and stores hex digits in HexDigits. The return value is unspecified if
654 // HexDigits.size() > 16.
655 //
656 // <hex-number> = "0_"
657 //              | <1-9a-f> {<0-9a-f>} "_"
658 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
659   size_t Start = Position;
660   uint64_t Value = 0;
661 
662   if (!isHexDigit(look()))
663     Error = true;
664 
665   if (consumeIf('0')) {
666     if (!consumeIf('_'))
667       Error = true;
668   } else {
669     while (!Error && !consumeIf('_')) {
670       char C = consume();
671       Value *= 16;
672       if (isDigit(C))
673         Value += C - '0';
674       else if ('a' <= C && C <= 'f')
675         Value += 10 + (C - 'a');
676       else
677         Error = true;
678     }
679   }
680 
681   if (Error) {
682     HexDigits = StringView();
683     return 0;
684   }
685 
686   size_t End = Position - 1;
687   assert(Start < End);
688   HexDigits = Input.substr(Start, End - Start);
689   return Value;
690 }
691