xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision f0f2a8b21cad8f291908ea411c7b38b8c4c8ee42)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isHexDigit(const char C) {
84   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86 
87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88 
89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90 
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
92 static inline bool isValid(const char C) {
93   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95 
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
101 bool Demangler::demangle(StringView Mangled) {
102   Position = 0;
103   Error = false;
104   Print = true;
105   RecursionLevel = 0;
106 
107   if (!Mangled.consumeFront("_R")) {
108     Error = true;
109     return false;
110   }
111   Input = Mangled;
112 
113   demanglePath();
114 
115   // FIXME parse optional <instantiating-crate>.
116 
117   if (Position != Input.size())
118     Error = true;
119 
120   return !Error;
121 }
122 
123 // <path> = "C" <identifier>               // crate root
124 //        | "M" <impl-path> <type>         // <T> (inherent impl)
125 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
126 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
127 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
128 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
129 //        | <backref>
130 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
131 // <ns> = "C"      // closure
132 //      | "S"      // shim
133 //      | <A-Z>    // other special namespaces
134 //      | <a-z>    // internal namespaces
135 void Demangler::demanglePath() {
136   if (Error || RecursionLevel >= MaxRecursionLevel) {
137     Error = true;
138     return;
139   }
140   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
141 
142   switch (consume()) {
143   case 'C': {
144     parseOptionalBase62Number('s');
145     Identifier Ident = parseIdentifier();
146     print(Ident.Name);
147     break;
148   }
149   case 'M': {
150     demangleImplPath();
151     print("<");
152     demangleType();
153     print(">");
154     break;
155   }
156   case 'N': {
157     char NS = consume();
158     if (!isLower(NS) && !isUpper(NS)) {
159       Error = true;
160       break;
161     }
162     demanglePath();
163 
164     uint64_t Disambiguator = parseOptionalBase62Number('s');
165     Identifier Ident = parseIdentifier();
166 
167     if (isUpper(NS)) {
168       // Special namespaces
169       print("::{");
170       if (NS == 'C')
171         print("closure");
172       else if (NS == 'S')
173         print("shim");
174       else
175         print(NS);
176       if (!Ident.empty()) {
177         print(":");
178         print(Ident.Name);
179       }
180       print('#');
181       printDecimalNumber(Disambiguator);
182       print('}');
183     } else {
184       // Implementation internal namespaces.
185       if (!Ident.empty()) {
186         print("::");
187         print(Ident.Name);
188       }
189     }
190     break;
191   }
192   case 'I': {
193     demanglePath();
194     print("::<");
195     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
196       if (I > 0)
197         print(", ");
198       demangleGenericArg();
199     }
200     print(">");
201     break;
202   }
203   default:
204     // FIXME parse remaining productions.
205     Error = true;
206     break;
207   }
208 }
209 
210 // <impl-path> = [<disambiguator>] <path>
211 // <disambiguator> = "s" <base-62-number>
212 void Demangler::demangleImplPath() {
213   SwapAndRestore<bool> SavePrint(Print, false);
214   parseOptionalBase62Number('s');
215   demanglePath();
216 }
217 
218 // <generic-arg> = <lifetime>
219 //               | <type>
220 //               | "K" <const>
221 // <lifetime> = "L" <base-62-number>
222 void Demangler::demangleGenericArg() {
223   if (consumeIf('K'))
224     demangleConst();
225   else
226     demangleType();
227   // FIXME demangle lifetimes.
228 }
229 
230 // <basic-type> = "a"      // i8
231 //              | "b"      // bool
232 //              | "c"      // char
233 //              | "d"      // f64
234 //              | "e"      // str
235 //              | "f"      // f32
236 //              | "h"      // u8
237 //              | "i"      // isize
238 //              | "j"      // usize
239 //              | "l"      // i32
240 //              | "m"      // u32
241 //              | "n"      // i128
242 //              | "o"      // u128
243 //              | "s"      // i16
244 //              | "t"      // u16
245 //              | "u"      // ()
246 //              | "v"      // ...
247 //              | "x"      // i64
248 //              | "y"      // u64
249 //              | "z"      // !
250 //              | "p"      // placeholder (e.g. for generic params), shown as _
251 static bool parseBasicType(char C, BasicType &Type) {
252   switch (C) {
253   case 'a':
254     Type = BasicType::I8;
255     return true;
256   case 'b':
257     Type = BasicType::Bool;
258     return true;
259   case 'c':
260     Type = BasicType::Char;
261     return true;
262   case 'd':
263     Type = BasicType::F64;
264     return true;
265   case 'e':
266     Type = BasicType::Str;
267     return true;
268   case 'f':
269     Type = BasicType::F32;
270     return true;
271   case 'h':
272     Type = BasicType::U8;
273     return true;
274   case 'i':
275     Type = BasicType::ISize;
276     return true;
277   case 'j':
278     Type = BasicType::USize;
279     return true;
280   case 'l':
281     Type = BasicType::I32;
282     return true;
283   case 'm':
284     Type = BasicType::U32;
285     return true;
286   case 'n':
287     Type = BasicType::I128;
288     return true;
289   case 'o':
290     Type = BasicType::U128;
291     return true;
292   case 'p':
293     Type = BasicType::Placeholder;
294     return true;
295   case 's':
296     Type = BasicType::I16;
297     return true;
298   case 't':
299     Type = BasicType::U16;
300     return true;
301   case 'u':
302     Type = BasicType::Unit;
303     return true;
304   case 'v':
305     Type = BasicType::Variadic;
306     return true;
307   case 'x':
308     Type = BasicType::I64;
309     return true;
310   case 'y':
311     Type = BasicType::U64;
312     return true;
313   case 'z':
314     Type = BasicType::Never;
315     return true;
316   default:
317     return false;
318   }
319 }
320 
321 void Demangler::printBasicType(BasicType Type) {
322   switch (Type) {
323   case BasicType::Bool:
324     print("bool");
325     break;
326   case BasicType::Char:
327     print("char");
328     break;
329   case BasicType::I8:
330     print("i8");
331     break;
332   case BasicType::I16:
333     print("i16");
334     break;
335   case BasicType::I32:
336     print("i32");
337     break;
338   case BasicType::I64:
339     print("i64");
340     break;
341   case BasicType::I128:
342     print("i128");
343     break;
344   case BasicType::ISize:
345     print("isize");
346     break;
347   case BasicType::U8:
348     print("u8");
349     break;
350   case BasicType::U16:
351     print("u16");
352     break;
353   case BasicType::U32:
354     print("u32");
355     break;
356   case BasicType::U64:
357     print("u64");
358     break;
359   case BasicType::U128:
360     print("u128");
361     break;
362   case BasicType::USize:
363     print("usize");
364     break;
365   case BasicType::F32:
366     print("f32");
367     break;
368   case BasicType::F64:
369     print("f64");
370     break;
371   case BasicType::Str:
372     print("str");
373     break;
374   case BasicType::Placeholder:
375     print("_");
376     break;
377   case BasicType::Unit:
378     print("()");
379     break;
380   case BasicType::Variadic:
381     print("...");
382     break;
383   case BasicType::Never:
384     print("!");
385     break;
386   }
387 }
388 
389 // <type> = | <basic-type>
390 //          | <path>                      // named type
391 //          | "A" <type> <const>          // [T; N]
392 //          | "S" <type>                  // [T]
393 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
394 //          | "R" [<lifetime>] <type>     // &T
395 //          | "Q" [<lifetime>] <type>     // &mut T
396 //          | "P" <type>                  // *const T
397 //          | "O" <type>                  // *mut T
398 //          | "F" <fn-sig>                // fn(...) -> ...
399 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
400 //          | <backref>                   // backref
401 void Demangler::demangleType() {
402   BasicType Type;
403   if (parseBasicType(consume(), Type))
404     printBasicType(Type);
405   else
406     Error = true; // FIXME parse remaining productions.
407 }
408 
409 // <const> = <basic-type> <const-data>
410 //         | "p"                          // placeholder
411 //         | <backref>
412 void Demangler::demangleConst() {
413   BasicType Type;
414   if (parseBasicType(consume(), Type)) {
415     switch (Type) {
416     case BasicType::I8:
417     case BasicType::I16:
418     case BasicType::I32:
419     case BasicType::I64:
420     case BasicType::I128:
421     case BasicType::ISize:
422     case BasicType::U8:
423     case BasicType::U16:
424     case BasicType::U32:
425     case BasicType::U64:
426     case BasicType::U128:
427     case BasicType::USize:
428       demangleConstInt();
429       break;
430     case BasicType::Bool:
431       demangleConstBool();
432       break;
433     case BasicType::Char:
434       demangleConstChar();
435       break;
436     case BasicType::Placeholder:
437       print('_');
438       break;
439     default:
440       // FIXME demangle backreferences.
441       Error = true;
442       break;
443     }
444   } else {
445     Error = true;
446   }
447 }
448 
449 // <const-data> = ["n"] <hex-number>
450 void Demangler::demangleConstInt() {
451   if (consumeIf('n'))
452     print('-');
453 
454   StringView HexDigits;
455   uint64_t Value = parseHexNumber(HexDigits);
456   if (HexDigits.size() <= 16) {
457     printDecimalNumber(Value);
458   } else {
459     print("0x");
460     print(HexDigits);
461   }
462 }
463 
464 // <const-data> = "0_" // false
465 //              | "1_" // true
466 void Demangler::demangleConstBool() {
467   StringView HexDigits;
468   parseHexNumber(HexDigits);
469   if (HexDigits == "0")
470     print("false");
471   else if (HexDigits == "1")
472     print("true");
473   else
474     Error = true;
475 }
476 
477 /// Returns true if CodePoint represents a printable ASCII character.
478 static bool isAsciiPrintable(uint64_t CodePoint) {
479   return 0x20 <= CodePoint && CodePoint <= 0x7e;
480 }
481 
482 // <const-data> = <hex-number>
483 void Demangler::demangleConstChar() {
484   StringView HexDigits;
485   uint64_t CodePoint = parseHexNumber(HexDigits);
486   if (Error || HexDigits.size() > 6) {
487     Error = true;
488     return;
489   }
490 
491   print("'");
492   switch (CodePoint) {
493   case '\t':
494     print(R"(\t)");
495     break;
496   case '\r':
497     print(R"(\r)");
498     break;
499   case '\n':
500     print(R"(\n)");
501     break;
502   case '\\':
503     print(R"(\\)");
504     break;
505   case '"':
506     print(R"(")");
507     break;
508   case '\'':
509     print(R"(\')");
510     break;
511   default:
512     if (isAsciiPrintable(CodePoint)) {
513       char C = CodePoint;
514       print(C);
515     } else {
516       print(R"(\u{)");
517       print(HexDigits);
518       print('}');
519     }
520     break;
521   }
522   print('\'');
523 }
524 
525 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
526 Identifier Demangler::parseIdentifier() {
527   bool Punycode = consumeIf('u');
528   uint64_t Bytes = parseDecimalNumber();
529 
530   // Underscore resolves the ambiguity when identifier starts with a decimal
531   // digit or another underscore.
532   consumeIf('_');
533 
534   if (Error || Bytes > Input.size() - Position) {
535     Error = true;
536     return {};
537   }
538   StringView S = Input.substr(Position, Bytes);
539   Position += Bytes;
540 
541   if (!std::all_of(S.begin(), S.end(), isValid)) {
542     Error = true;
543     return {};
544   }
545 
546   return {S, Punycode};
547 }
548 
549 // Parses optional base 62 number. The presence of a number is determined using
550 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
551 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
552   if (!consumeIf(Tag))
553     return 0;
554 
555   uint64_t N = parseBase62Number();
556   if (Error || !addAssign(N, 1))
557     return 0;
558 
559   return N;
560 }
561 
562 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
563 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
564 // "1_" encodes 2, etc.
565 //
566 // <base-62-number> = {<0-9a-zA-Z>} "_"
567 uint64_t Demangler::parseBase62Number() {
568   if (consumeIf('_'))
569     return 0;
570 
571   uint64_t Value = 0;
572 
573   while (true) {
574     uint64_t Digit;
575     char C = consume();
576 
577     if (C == '_') {
578       break;
579     } else if (isDigit(C)) {
580       Digit = C - '0';
581     } else if (isLower(C)) {
582       Digit = 10 + (C - 'a');
583     } else if (isUpper(C)) {
584       Digit = 10 + 26 + (C - 'A');
585     } else {
586       Error = true;
587       return 0;
588     }
589 
590     if (!mulAssign(Value, 62))
591       return 0;
592 
593     if (!addAssign(Value, Digit))
594       return 0;
595   }
596 
597   if (!addAssign(Value, 1))
598     return 0;
599 
600   return Value;
601 }
602 
603 // Parses a decimal number that had been encoded without any leading zeros.
604 //
605 // <decimal-number> = "0"
606 //                  | <1-9> {<0-9>}
607 uint64_t Demangler::parseDecimalNumber() {
608   char C = look();
609   if (!isDigit(C)) {
610     Error = true;
611     return 0;
612   }
613 
614   if (C == '0') {
615     consume();
616     return 0;
617   }
618 
619   uint64_t Value = 0;
620 
621   while (isDigit(look())) {
622     if (!mulAssign(Value, 10)) {
623       Error = true;
624       return 0;
625     }
626 
627     uint64_t D = consume() - '0';
628     if (!addAssign(Value, D))
629       return 0;
630   }
631 
632   return Value;
633 }
634 
635 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
636 // value and stores hex digits in HexDigits. The return value is unspecified if
637 // HexDigits.size() > 16.
638 //
639 // <hex-number> = "0_"
640 //              | <1-9a-f> {<0-9a-f>} "_"
641 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
642   size_t Start = Position;
643   uint64_t Value = 0;
644 
645   if (!isHexDigit(look()))
646     Error = true;
647 
648   if (consumeIf('0')) {
649     if (!consumeIf('_'))
650       Error = true;
651   } else {
652     while (!Error && !consumeIf('_')) {
653       char C = consume();
654       Value *= 16;
655       if (isDigit(C))
656         Value += C - '0';
657       else if ('a' <= C && C <= 'f')
658         Value += 10 + (C - 'a');
659       else
660         Error = true;
661     }
662   }
663 
664   if (Error) {
665     HexDigits = StringView();
666     return 0;
667   }
668 
669   size_t End = Position - 1;
670   assert(Start < End);
671   HexDigits = Input.substr(Start, End - Start);
672   return Value;
673 }
674