xref: /llvm-project/llvm/lib/Demangle/RustDemangle.cpp (revision 2ba49f6ae611b1dfd1dd64fe0fc4c0143bedc271)
1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21 
22 using namespace llvm;
23 using namespace rust_demangle;
24 
25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26                          int *Status) {
27   if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28     if (Status != nullptr)
29       *Status = demangle_invalid_args;
30     return nullptr;
31   }
32 
33   // Return early if mangled name doesn't look like a Rust symbol.
34   StringView Mangled(MangledName);
35   if (!Mangled.startsWith("_R")) {
36     if (Status != nullptr)
37       *Status = demangle_invalid_mangled_name;
38     return nullptr;
39   }
40 
41   Demangler D;
42   if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43     if (Status != nullptr)
44       *Status = demangle_memory_alloc_failure;
45     return nullptr;
46   }
47 
48   if (!D.demangle(Mangled)) {
49     if (Status != nullptr)
50       *Status = demangle_invalid_mangled_name;
51     std::free(D.Output.getBuffer());
52     return nullptr;
53   }
54 
55   D.Output += '\0';
56   char *Demangled = D.Output.getBuffer();
57   size_t DemangledLen = D.Output.getCurrentPosition();
58 
59   if (Buf != nullptr) {
60     if (DemangledLen <= *N) {
61       std::memcpy(Buf, Demangled, DemangledLen);
62       std::free(Demangled);
63       Demangled = Buf;
64     } else {
65       std::free(Buf);
66     }
67   }
68 
69   if (N != nullptr)
70     *N = DemangledLen;
71 
72   if (Status != nullptr)
73     *Status = demangle_success;
74 
75   return Demangled;
76 }
77 
78 Demangler::Demangler(size_t MaxRecursionLevel)
79     : MaxRecursionLevel(MaxRecursionLevel) {}
80 
81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82 
83 static inline bool isHexDigit(const char C) {
84   return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86 
87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88 
89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90 
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
92 static inline bool isValid(const char C) {
93   return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95 
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
101 bool Demangler::demangle(StringView Mangled) {
102   Position = 0;
103   Error = false;
104   RecursionLevel = 0;
105 
106   if (!Mangled.consumeFront("_R")) {
107     Error = true;
108     return false;
109   }
110   Input = Mangled;
111 
112   demanglePath();
113 
114   // FIXME parse optional <instantiating-crate>.
115 
116   if (Position != Input.size())
117     Error = true;
118 
119   return !Error;
120 }
121 
122 // <path> = "C" <identifier>               // crate root
123 //        | "M" <impl-path> <type>         // <T> (inherent impl)
124 //        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
125 //        | "Y" <type> <path>              // <T as Trait> (trait definition)
126 //        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
127 //        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
128 //        | <backref>
129 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
130 // <ns> = "C"      // closure
131 //      | "S"      // shim
132 //      | <A-Z>    // other special namespaces
133 //      | <a-z>    // internal namespaces
134 void Demangler::demanglePath() {
135   if (Error || RecursionLevel >= MaxRecursionLevel) {
136     Error = true;
137     return;
138   }
139   SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
140 
141   switch (consume()) {
142   case 'C': {
143     parseOptionalBase62Number('s');
144     Identifier Ident = parseIdentifier();
145     print(Ident.Name);
146     break;
147   }
148   case 'N': {
149     char NS = consume();
150     if (!isLower(NS) && !isUpper(NS)) {
151       Error = true;
152       break;
153     }
154     demanglePath();
155 
156     uint64_t Disambiguator = parseOptionalBase62Number('s');
157     Identifier Ident = parseIdentifier();
158 
159     if (isUpper(NS)) {
160       // Special namespaces
161       print("::{");
162       if (NS == 'C')
163         print("closure");
164       else if (NS == 'S')
165         print("shim");
166       else
167         print(NS);
168       if (!Ident.empty()) {
169         print(":");
170         print(Ident.Name);
171       }
172       print('#');
173       printDecimalNumber(Disambiguator);
174       print('}');
175     } else {
176       // Implementation internal namespaces.
177       if (!Ident.empty()) {
178         print("::");
179         print(Ident.Name);
180       }
181     }
182     break;
183   }
184   case 'I': {
185     demanglePath();
186     print("::<");
187     for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
188       if (I > 0)
189         print(", ");
190       demangleGenericArg();
191     }
192     print(">");
193     break;
194   }
195   default:
196     // FIXME parse remaining productions.
197     Error = true;
198     break;
199   }
200 }
201 
202 // <generic-arg> = <lifetime>
203 //               | <type>
204 //               | "K" <const>
205 // <lifetime> = "L" <base-62-number>
206 void Demangler::demangleGenericArg() {
207   if (consumeIf('K'))
208     demangleConst();
209   else
210     demangleType();
211   // FIXME demangle lifetimes.
212 }
213 
214 // <basic-type> = "a"      // i8
215 //              | "b"      // bool
216 //              | "c"      // char
217 //              | "d"      // f64
218 //              | "e"      // str
219 //              | "f"      // f32
220 //              | "h"      // u8
221 //              | "i"      // isize
222 //              | "j"      // usize
223 //              | "l"      // i32
224 //              | "m"      // u32
225 //              | "n"      // i128
226 //              | "o"      // u128
227 //              | "s"      // i16
228 //              | "t"      // u16
229 //              | "u"      // ()
230 //              | "v"      // ...
231 //              | "x"      // i64
232 //              | "y"      // u64
233 //              | "z"      // !
234 //              | "p"      // placeholder (e.g. for generic params), shown as _
235 static bool parseBasicType(char C, BasicType &Type) {
236   switch (C) {
237   case 'a':
238     Type = BasicType::I8;
239     return true;
240   case 'b':
241     Type = BasicType::Bool;
242     return true;
243   case 'c':
244     Type = BasicType::Char;
245     return true;
246   case 'd':
247     Type = BasicType::F64;
248     return true;
249   case 'e':
250     Type = BasicType::Str;
251     return true;
252   case 'f':
253     Type = BasicType::F32;
254     return true;
255   case 'h':
256     Type = BasicType::U8;
257     return true;
258   case 'i':
259     Type = BasicType::ISize;
260     return true;
261   case 'j':
262     Type = BasicType::USize;
263     return true;
264   case 'l':
265     Type = BasicType::I32;
266     return true;
267   case 'm':
268     Type = BasicType::U32;
269     return true;
270   case 'n':
271     Type = BasicType::I128;
272     return true;
273   case 'o':
274     Type = BasicType::U128;
275     return true;
276   case 'p':
277     Type = BasicType::Placeholder;
278     return true;
279   case 's':
280     Type = BasicType::I16;
281     return true;
282   case 't':
283     Type = BasicType::U16;
284     return true;
285   case 'u':
286     Type = BasicType::Unit;
287     return true;
288   case 'v':
289     Type = BasicType::Variadic;
290     return true;
291   case 'x':
292     Type = BasicType::I64;
293     return true;
294   case 'y':
295     Type = BasicType::U64;
296     return true;
297   case 'z':
298     Type = BasicType::Never;
299     return true;
300   default:
301     return false;
302   }
303 }
304 
305 void Demangler::printBasicType(BasicType Type) {
306   switch (Type) {
307   case BasicType::Bool:
308     print("bool");
309     break;
310   case BasicType::Char:
311     print("char");
312     break;
313   case BasicType::I8:
314     print("i8");
315     break;
316   case BasicType::I16:
317     print("i16");
318     break;
319   case BasicType::I32:
320     print("i32");
321     break;
322   case BasicType::I64:
323     print("i64");
324     break;
325   case BasicType::I128:
326     print("i128");
327     break;
328   case BasicType::ISize:
329     print("isize");
330     break;
331   case BasicType::U8:
332     print("u8");
333     break;
334   case BasicType::U16:
335     print("u16");
336     break;
337   case BasicType::U32:
338     print("u32");
339     break;
340   case BasicType::U64:
341     print("u64");
342     break;
343   case BasicType::U128:
344     print("u128");
345     break;
346   case BasicType::USize:
347     print("usize");
348     break;
349   case BasicType::F32:
350     print("f32");
351     break;
352   case BasicType::F64:
353     print("f64");
354     break;
355   case BasicType::Str:
356     print("str");
357     break;
358   case BasicType::Placeholder:
359     print("_");
360     break;
361   case BasicType::Unit:
362     print("()");
363     break;
364   case BasicType::Variadic:
365     print("...");
366     break;
367   case BasicType::Never:
368     print("!");
369     break;
370   }
371 }
372 
373 // <type> = | <basic-type>
374 //          | <path>                      // named type
375 //          | "A" <type> <const>          // [T; N]
376 //          | "S" <type>                  // [T]
377 //          | "T" {<type>} "E"            // (T1, T2, T3, ...)
378 //          | "R" [<lifetime>] <type>     // &T
379 //          | "Q" [<lifetime>] <type>     // &mut T
380 //          | "P" <type>                  // *const T
381 //          | "O" <type>                  // *mut T
382 //          | "F" <fn-sig>                // fn(...) -> ...
383 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
384 //          | <backref>                   // backref
385 void Demangler::demangleType() {
386   BasicType Type;
387   if (parseBasicType(consume(), Type))
388     printBasicType(Type);
389   else
390     Error = true; // FIXME parse remaining productions.
391 }
392 
393 // <const> = <basic-type> <const-data>
394 //         | "p"                          // placeholder
395 //         | <backref>
396 void Demangler::demangleConst() {
397   BasicType Type;
398   if (parseBasicType(consume(), Type)) {
399     switch (Type) {
400     case BasicType::I8:
401     case BasicType::I16:
402     case BasicType::I32:
403     case BasicType::I64:
404     case BasicType::I128:
405     case BasicType::ISize:
406     case BasicType::U8:
407     case BasicType::U16:
408     case BasicType::U32:
409     case BasicType::U64:
410     case BasicType::U128:
411     case BasicType::USize:
412       demangleConstInt();
413       break;
414     case BasicType::Bool:
415       demangleConstBool();
416       break;
417     case BasicType::Char:
418       demangleConstChar();
419       break;
420     case BasicType::Placeholder:
421       print('_');
422       break;
423     default:
424       // FIXME demangle backreferences.
425       Error = true;
426       break;
427     }
428   } else {
429     Error = true;
430   }
431 }
432 
433 // <const-data> = ["n"] <hex-number>
434 void Demangler::demangleConstInt() {
435   if (consumeIf('n'))
436     print('-');
437 
438   StringView HexDigits;
439   uint64_t Value = parseHexNumber(HexDigits);
440   if (HexDigits.size() <= 16) {
441     printDecimalNumber(Value);
442   } else {
443     print("0x");
444     print(HexDigits);
445   }
446 }
447 
448 // <const-data> = "0_" // false
449 //              | "1_" // true
450 void Demangler::demangleConstBool() {
451   StringView HexDigits;
452   parseHexNumber(HexDigits);
453   if (HexDigits == "0")
454     print("false");
455   else if (HexDigits == "1")
456     print("true");
457   else
458     Error = true;
459 }
460 
461 /// Returns true if CodePoint represents a printable ASCII character.
462 static bool isAsciiPrintable(uint64_t CodePoint) {
463   return 0x20 <= CodePoint && CodePoint <= 0x7e;
464 }
465 
466 // <const-data> = <hex-number>
467 void Demangler::demangleConstChar() {
468   StringView HexDigits;
469   uint64_t CodePoint = parseHexNumber(HexDigits);
470   if (Error || HexDigits.size() > 6) {
471     Error = true;
472     return;
473   }
474 
475   print("'");
476   switch (CodePoint) {
477   case '\t':
478     print(R"(\t)");
479     break;
480   case '\r':
481     print(R"(\r)");
482     break;
483   case '\n':
484     print(R"(\n)");
485     break;
486   case '\\':
487     print(R"(\\)");
488     break;
489   case '"':
490     print(R"(")");
491     break;
492   case '\'':
493     print(R"(\')");
494     break;
495   default:
496     if (isAsciiPrintable(CodePoint)) {
497       char C = CodePoint;
498       print(C);
499     } else {
500       print(R"(\u{)");
501       print(HexDigits);
502       print('}');
503     }
504     break;
505   }
506   print('\'');
507 }
508 
509 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
510 Identifier Demangler::parseIdentifier() {
511   bool Punycode = consumeIf('u');
512   uint64_t Bytes = parseDecimalNumber();
513 
514   // Underscore resolves the ambiguity when identifier starts with a decimal
515   // digit or another underscore.
516   consumeIf('_');
517 
518   if (Error || Bytes > Input.size() - Position) {
519     Error = true;
520     return {};
521   }
522   StringView S = Input.substr(Position, Bytes);
523   Position += Bytes;
524 
525   if (!std::all_of(S.begin(), S.end(), isValid)) {
526     Error = true;
527     return {};
528   }
529 
530   return {S, Punycode};
531 }
532 
533 // Parses optional base 62 number. The presence of a number is determined using
534 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
535 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
536   if (!consumeIf(Tag))
537     return 0;
538 
539   uint64_t N = parseBase62Number();
540   if (Error || !addAssign(N, 1))
541     return 0;
542 
543   return N;
544 }
545 
546 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
547 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
548 // "1_" encodes 2, etc.
549 //
550 // <base-62-number> = {<0-9a-zA-Z>} "_"
551 uint64_t Demangler::parseBase62Number() {
552   if (consumeIf('_'))
553     return 0;
554 
555   uint64_t Value = 0;
556 
557   while (true) {
558     uint64_t Digit;
559     char C = consume();
560 
561     if (C == '_') {
562       break;
563     } else if (isDigit(C)) {
564       Digit = C - '0';
565     } else if (isLower(C)) {
566       Digit = 10 + (C - 'a');
567     } else if (isUpper(C)) {
568       Digit = 10 + 26 + (C - 'A');
569     } else {
570       Error = true;
571       return 0;
572     }
573 
574     if (!mulAssign(Value, 62))
575       return 0;
576 
577     if (!addAssign(Value, Digit))
578       return 0;
579   }
580 
581   if (!addAssign(Value, 1))
582     return 0;
583 
584   return Value;
585 }
586 
587 // Parses a decimal number that had been encoded without any leading zeros.
588 //
589 // <decimal-number> = "0"
590 //                  | <1-9> {<0-9>}
591 uint64_t Demangler::parseDecimalNumber() {
592   char C = look();
593   if (!isDigit(C)) {
594     Error = true;
595     return 0;
596   }
597 
598   if (C == '0') {
599     consume();
600     return 0;
601   }
602 
603   uint64_t Value = 0;
604 
605   while (isDigit(look())) {
606     if (!mulAssign(Value, 10)) {
607       Error = true;
608       return 0;
609     }
610 
611     uint64_t D = consume() - '0';
612     if (!addAssign(Value, D))
613       return 0;
614   }
615 
616   return Value;
617 }
618 
619 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
620 // value and stores hex digits in HexDigits. The return value is unspecified if
621 // HexDigits.size() > 16.
622 //
623 // <hex-number> = "0_"
624 //              | <1-9a-f> {<0-9a-f>} "_"
625 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
626   size_t Start = Position;
627   uint64_t Value = 0;
628 
629   if (!isHexDigit(look()))
630     Error = true;
631 
632   if (consumeIf('0')) {
633     if (!consumeIf('_'))
634       Error = true;
635   } else {
636     while (!Error && !consumeIf('_')) {
637       char C = consume();
638       Value *= 16;
639       if (isDigit(C))
640         Value += C - '0';
641       else if ('a' <= C && C <= 'f')
642         Value += 10 + (C - 'a');
643       else
644         Error = true;
645     }
646   }
647 
648   if (Error) {
649     HexDigits = StringView();
650     return 0;
651   }
652 
653   size_t End = Position - 1;
654   assert(Start < End);
655   HexDigits = Input.substr(Start, End - Start);
656   return Value;
657 }
658