1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a demangler for Rust v0 mangled symbols as specified in
10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Demangle/RustDemangle.h"
15 #include "llvm/Demangle/Demangle.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstring>
20 #include <limits>
21
22 using namespace llvm;
23 using namespace rust_demangle;
24
rustDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
26 int *Status) {
27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
28 if (Status != nullptr)
29 *Status = demangle_invalid_args;
30 return nullptr;
31 }
32
33 // Return early if mangled name doesn't look like a Rust symbol.
34 StringView Mangled(MangledName);
35 if (!Mangled.startsWith("_R")) {
36 if (Status != nullptr)
37 *Status = demangle_invalid_mangled_name;
38 return nullptr;
39 }
40
41 Demangler D;
42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
43 if (Status != nullptr)
44 *Status = demangle_memory_alloc_failure;
45 return nullptr;
46 }
47
48 if (!D.demangle(Mangled)) {
49 if (Status != nullptr)
50 *Status = demangle_invalid_mangled_name;
51 std::free(D.Output.getBuffer());
52 return nullptr;
53 }
54
55 D.Output += '\0';
56 char *Demangled = D.Output.getBuffer();
57 size_t DemangledLen = D.Output.getCurrentPosition();
58
59 if (Buf != nullptr) {
60 if (DemangledLen <= *N) {
61 std::memcpy(Buf, Demangled, DemangledLen);
62 std::free(Demangled);
63 Demangled = Buf;
64 } else {
65 std::free(Buf);
66 }
67 }
68
69 if (N != nullptr)
70 *N = DemangledLen;
71
72 if (Status != nullptr)
73 *Status = demangle_success;
74
75 return Demangled;
76 }
77
Demangler(size_t MaxRecursionLevel)78 Demangler::Demangler(size_t MaxRecursionLevel)
79 : MaxRecursionLevel(MaxRecursionLevel) {}
80
isDigit(const char C)81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
82
isHexDigit(const char C)83 static inline bool isHexDigit(const char C) {
84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
85 }
86
isLower(const char C)87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
88
isUpper(const char C)89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
90
91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
isValid(const char C)92 static inline bool isValid(const char C) {
93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
94 }
95
96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false
97 // otherwise. The demangled symbol is stored in Output field. It is
98 // responsibility of the caller to free the memory behind the output stream.
99 //
100 // <symbol-name> = "_R" <path> [<instantiating-crate>]
demangle(StringView Mangled)101 bool Demangler::demangle(StringView Mangled) {
102 Position = 0;
103 Error = false;
104 Print = true;
105 RecursionLevel = 0;
106
107 if (!Mangled.consumeFront("_R")) {
108 Error = true;
109 return false;
110 }
111 Input = Mangled;
112
113 demanglePath(rust_demangle::InType::No);
114
115 // FIXME parse optional <instantiating-crate>.
116
117 if (Position != Input.size())
118 Error = true;
119
120 return !Error;
121 }
122
123 // Demangles a path. InType indicates whether a path is inside a type.
124 //
125 // <path> = "C" <identifier> // crate root
126 // | "M" <impl-path> <type> // <T> (inherent impl)
127 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
128 // | "Y" <type> <path> // <T as Trait> (trait definition)
129 // | "N" <ns> <path> <identifier> // ...::ident (nested path)
130 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
131 // | <backref>
132 // <identifier> = [<disambiguator>] <undisambiguated-identifier>
133 // <ns> = "C" // closure
134 // | "S" // shim
135 // | <A-Z> // other special namespaces
136 // | <a-z> // internal namespaces
demanglePath(InType InType)137 void Demangler::demanglePath(InType InType) {
138 if (Error || RecursionLevel >= MaxRecursionLevel) {
139 Error = true;
140 return;
141 }
142 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
143
144 switch (consume()) {
145 case 'C': {
146 parseOptionalBase62Number('s');
147 Identifier Ident = parseIdentifier();
148 print(Ident.Name);
149 break;
150 }
151 case 'M': {
152 demangleImplPath(InType);
153 print("<");
154 demangleType();
155 print(">");
156 break;
157 }
158 case 'X': {
159 demangleImplPath(InType);
160 print("<");
161 demangleType();
162 print(" as ");
163 demanglePath(rust_demangle::InType::Yes);
164 print(">");
165 break;
166 }
167 case 'Y': {
168 print("<");
169 demangleType();
170 print(" as ");
171 demanglePath(rust_demangle::InType::Yes);
172 print(">");
173 break;
174 }
175 case 'N': {
176 char NS = consume();
177 if (!isLower(NS) && !isUpper(NS)) {
178 Error = true;
179 break;
180 }
181 demanglePath(InType);
182
183 uint64_t Disambiguator = parseOptionalBase62Number('s');
184 Identifier Ident = parseIdentifier();
185
186 if (isUpper(NS)) {
187 // Special namespaces
188 print("::{");
189 if (NS == 'C')
190 print("closure");
191 else if (NS == 'S')
192 print("shim");
193 else
194 print(NS);
195 if (!Ident.empty()) {
196 print(":");
197 print(Ident.Name);
198 }
199 print('#');
200 printDecimalNumber(Disambiguator);
201 print('}');
202 } else {
203 // Implementation internal namespaces.
204 if (!Ident.empty()) {
205 print("::");
206 print(Ident.Name);
207 }
208 }
209 break;
210 }
211 case 'I': {
212 demanglePath(InType);
213 // Omit "::" when in a type, where it is optional.
214 if (InType == rust_demangle::InType::No)
215 print("::");
216 print("<");
217 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
218 if (I > 0)
219 print(", ");
220 demangleGenericArg();
221 }
222 print(">");
223 break;
224 }
225 default:
226 // FIXME parse remaining productions.
227 Error = true;
228 break;
229 }
230 }
231
232 // <impl-path> = [<disambiguator>] <path>
233 // <disambiguator> = "s" <base-62-number>
demangleImplPath(InType InType)234 void Demangler::demangleImplPath(InType InType) {
235 SwapAndRestore<bool> SavePrint(Print, false);
236 parseOptionalBase62Number('s');
237 demanglePath(InType);
238 }
239
240 // <generic-arg> = <lifetime>
241 // | <type>
242 // | "K" <const>
243 // <lifetime> = "L" <base-62-number>
demangleGenericArg()244 void Demangler::demangleGenericArg() {
245 if (consumeIf('K'))
246 demangleConst();
247 else
248 demangleType();
249 // FIXME demangle lifetimes.
250 }
251
252 // <basic-type> = "a" // i8
253 // | "b" // bool
254 // | "c" // char
255 // | "d" // f64
256 // | "e" // str
257 // | "f" // f32
258 // | "h" // u8
259 // | "i" // isize
260 // | "j" // usize
261 // | "l" // i32
262 // | "m" // u32
263 // | "n" // i128
264 // | "o" // u128
265 // | "s" // i16
266 // | "t" // u16
267 // | "u" // ()
268 // | "v" // ...
269 // | "x" // i64
270 // | "y" // u64
271 // | "z" // !
272 // | "p" // placeholder (e.g. for generic params), shown as _
parseBasicType(char C,BasicType & Type)273 static bool parseBasicType(char C, BasicType &Type) {
274 switch (C) {
275 case 'a':
276 Type = BasicType::I8;
277 return true;
278 case 'b':
279 Type = BasicType::Bool;
280 return true;
281 case 'c':
282 Type = BasicType::Char;
283 return true;
284 case 'd':
285 Type = BasicType::F64;
286 return true;
287 case 'e':
288 Type = BasicType::Str;
289 return true;
290 case 'f':
291 Type = BasicType::F32;
292 return true;
293 case 'h':
294 Type = BasicType::U8;
295 return true;
296 case 'i':
297 Type = BasicType::ISize;
298 return true;
299 case 'j':
300 Type = BasicType::USize;
301 return true;
302 case 'l':
303 Type = BasicType::I32;
304 return true;
305 case 'm':
306 Type = BasicType::U32;
307 return true;
308 case 'n':
309 Type = BasicType::I128;
310 return true;
311 case 'o':
312 Type = BasicType::U128;
313 return true;
314 case 'p':
315 Type = BasicType::Placeholder;
316 return true;
317 case 's':
318 Type = BasicType::I16;
319 return true;
320 case 't':
321 Type = BasicType::U16;
322 return true;
323 case 'u':
324 Type = BasicType::Unit;
325 return true;
326 case 'v':
327 Type = BasicType::Variadic;
328 return true;
329 case 'x':
330 Type = BasicType::I64;
331 return true;
332 case 'y':
333 Type = BasicType::U64;
334 return true;
335 case 'z':
336 Type = BasicType::Never;
337 return true;
338 default:
339 return false;
340 }
341 }
342
printBasicType(BasicType Type)343 void Demangler::printBasicType(BasicType Type) {
344 switch (Type) {
345 case BasicType::Bool:
346 print("bool");
347 break;
348 case BasicType::Char:
349 print("char");
350 break;
351 case BasicType::I8:
352 print("i8");
353 break;
354 case BasicType::I16:
355 print("i16");
356 break;
357 case BasicType::I32:
358 print("i32");
359 break;
360 case BasicType::I64:
361 print("i64");
362 break;
363 case BasicType::I128:
364 print("i128");
365 break;
366 case BasicType::ISize:
367 print("isize");
368 break;
369 case BasicType::U8:
370 print("u8");
371 break;
372 case BasicType::U16:
373 print("u16");
374 break;
375 case BasicType::U32:
376 print("u32");
377 break;
378 case BasicType::U64:
379 print("u64");
380 break;
381 case BasicType::U128:
382 print("u128");
383 break;
384 case BasicType::USize:
385 print("usize");
386 break;
387 case BasicType::F32:
388 print("f32");
389 break;
390 case BasicType::F64:
391 print("f64");
392 break;
393 case BasicType::Str:
394 print("str");
395 break;
396 case BasicType::Placeholder:
397 print("_");
398 break;
399 case BasicType::Unit:
400 print("()");
401 break;
402 case BasicType::Variadic:
403 print("...");
404 break;
405 case BasicType::Never:
406 print("!");
407 break;
408 }
409 }
410
411 // <type> = | <basic-type>
412 // | <path> // named type
413 // | "A" <type> <const> // [T; N]
414 // | "S" <type> // [T]
415 // | "T" {<type>} "E" // (T1, T2, T3, ...)
416 // | "R" [<lifetime>] <type> // &T
417 // | "Q" [<lifetime>] <type> // &mut T
418 // | "P" <type> // *const T
419 // | "O" <type> // *mut T
420 // | "F" <fn-sig> // fn(...) -> ...
421 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
422 // | <backref> // backref
demangleType()423 void Demangler::demangleType() {
424 size_t Start = Position;
425
426 char C = consume();
427 BasicType Type;
428 if (parseBasicType(C, Type))
429 return printBasicType(Type);
430
431 switch (C) {
432 case 'A':
433 print("[");
434 demangleType();
435 print("; ");
436 demangleConst();
437 print("]");
438 break;
439 case 'S':
440 print("[");
441 demangleType();
442 print("]");
443 break;
444 case 'T': {
445 print("(");
446 size_t I = 0;
447 for (; !Error && !consumeIf('E'); ++I) {
448 if (I > 0)
449 print(", ");
450 demangleType();
451 }
452 if (I == 1)
453 print(",");
454 print(")");
455 break;
456 }
457 case 'R':
458 print("&");
459 // FIXME demangle [<lifetime>].
460 demangleType();
461 break;
462 case 'Q':
463 print("&mut ");
464 // FIXME demangle [<lifetime>].
465 demangleType();
466 break;
467 case 'P':
468 print("*const ");
469 demangleType();
470 break;
471 case 'O':
472 print("*mut ");
473 demangleType();
474 break;
475 case 'F':
476 demangleFnSig();
477 break;
478 default:
479 Position = Start;
480 demanglePath(rust_demangle::InType::Yes);
481 break;
482 }
483 }
484
485 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
486 // <abi> = "C"
487 // | <undisambiguated-identifier>
demangleFnSig()488 void Demangler::demangleFnSig() {
489 // FIXME demangle binder.
490
491 if (consumeIf('U'))
492 print("unsafe ");
493
494 if (consumeIf('K')) {
495 print("extern \"");
496 if (consumeIf('C')) {
497 print("C");
498 } else {
499 Identifier Ident = parseIdentifier();
500 for (char C : Ident.Name) {
501 // When mangling ABI string, the "-" is replaced with "_".
502 if (C == '_')
503 C = '-';
504 print(C);
505 }
506 }
507 print("\" ");
508 }
509
510 print("fn(");
511 for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
512 if (I > 0)
513 print(", ");
514 demangleType();
515 }
516 print(")");
517
518 if (consumeIf('u')) {
519 // Skip the unit type from the output.
520 } else {
521 print(" -> ");
522 demangleType();
523 }
524 }
525
526 // <const> = <basic-type> <const-data>
527 // | "p" // placeholder
528 // | <backref>
demangleConst()529 void Demangler::demangleConst() {
530 BasicType Type;
531 if (parseBasicType(consume(), Type)) {
532 switch (Type) {
533 case BasicType::I8:
534 case BasicType::I16:
535 case BasicType::I32:
536 case BasicType::I64:
537 case BasicType::I128:
538 case BasicType::ISize:
539 case BasicType::U8:
540 case BasicType::U16:
541 case BasicType::U32:
542 case BasicType::U64:
543 case BasicType::U128:
544 case BasicType::USize:
545 demangleConstInt();
546 break;
547 case BasicType::Bool:
548 demangleConstBool();
549 break;
550 case BasicType::Char:
551 demangleConstChar();
552 break;
553 case BasicType::Placeholder:
554 print('_');
555 break;
556 default:
557 // FIXME demangle backreferences.
558 Error = true;
559 break;
560 }
561 } else {
562 Error = true;
563 }
564 }
565
566 // <const-data> = ["n"] <hex-number>
demangleConstInt()567 void Demangler::demangleConstInt() {
568 if (consumeIf('n'))
569 print('-');
570
571 StringView HexDigits;
572 uint64_t Value = parseHexNumber(HexDigits);
573 if (HexDigits.size() <= 16) {
574 printDecimalNumber(Value);
575 } else {
576 print("0x");
577 print(HexDigits);
578 }
579 }
580
581 // <const-data> = "0_" // false
582 // | "1_" // true
demangleConstBool()583 void Demangler::demangleConstBool() {
584 StringView HexDigits;
585 parseHexNumber(HexDigits);
586 if (HexDigits == "0")
587 print("false");
588 else if (HexDigits == "1")
589 print("true");
590 else
591 Error = true;
592 }
593
594 /// Returns true if CodePoint represents a printable ASCII character.
isAsciiPrintable(uint64_t CodePoint)595 static bool isAsciiPrintable(uint64_t CodePoint) {
596 return 0x20 <= CodePoint && CodePoint <= 0x7e;
597 }
598
599 // <const-data> = <hex-number>
demangleConstChar()600 void Demangler::demangleConstChar() {
601 StringView HexDigits;
602 uint64_t CodePoint = parseHexNumber(HexDigits);
603 if (Error || HexDigits.size() > 6) {
604 Error = true;
605 return;
606 }
607
608 print("'");
609 switch (CodePoint) {
610 case '\t':
611 print(R"(\t)");
612 break;
613 case '\r':
614 print(R"(\r)");
615 break;
616 case '\n':
617 print(R"(\n)");
618 break;
619 case '\\':
620 print(R"(\\)");
621 break;
622 case '"':
623 print(R"(")");
624 break;
625 case '\'':
626 print(R"(\')");
627 break;
628 default:
629 if (isAsciiPrintable(CodePoint)) {
630 char C = CodePoint;
631 print(C);
632 } else {
633 print(R"(\u{)");
634 print(HexDigits);
635 print('}');
636 }
637 break;
638 }
639 print('\'');
640 }
641
642 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
parseIdentifier()643 Identifier Demangler::parseIdentifier() {
644 bool Punycode = consumeIf('u');
645 uint64_t Bytes = parseDecimalNumber();
646
647 // Underscore resolves the ambiguity when identifier starts with a decimal
648 // digit or another underscore.
649 consumeIf('_');
650
651 if (Error || Bytes > Input.size() - Position) {
652 Error = true;
653 return {};
654 }
655 StringView S = Input.substr(Position, Bytes);
656 Position += Bytes;
657
658 if (!std::all_of(S.begin(), S.end(), isValid)) {
659 Error = true;
660 return {};
661 }
662
663 return {S, Punycode};
664 }
665
666 // Parses optional base 62 number. The presence of a number is determined using
667 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise.
parseOptionalBase62Number(char Tag)668 uint64_t Demangler::parseOptionalBase62Number(char Tag) {
669 if (!consumeIf(Tag))
670 return 0;
671
672 uint64_t N = parseBase62Number();
673 if (Error || !addAssign(N, 1))
674 return 0;
675
676 return N;
677 }
678
679 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
680 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
681 // "1_" encodes 2, etc.
682 //
683 // <base-62-number> = {<0-9a-zA-Z>} "_"
parseBase62Number()684 uint64_t Demangler::parseBase62Number() {
685 if (consumeIf('_'))
686 return 0;
687
688 uint64_t Value = 0;
689
690 while (true) {
691 uint64_t Digit;
692 char C = consume();
693
694 if (C == '_') {
695 break;
696 } else if (isDigit(C)) {
697 Digit = C - '0';
698 } else if (isLower(C)) {
699 Digit = 10 + (C - 'a');
700 } else if (isUpper(C)) {
701 Digit = 10 + 26 + (C - 'A');
702 } else {
703 Error = true;
704 return 0;
705 }
706
707 if (!mulAssign(Value, 62))
708 return 0;
709
710 if (!addAssign(Value, Digit))
711 return 0;
712 }
713
714 if (!addAssign(Value, 1))
715 return 0;
716
717 return Value;
718 }
719
720 // Parses a decimal number that had been encoded without any leading zeros.
721 //
722 // <decimal-number> = "0"
723 // | <1-9> {<0-9>}
parseDecimalNumber()724 uint64_t Demangler::parseDecimalNumber() {
725 char C = look();
726 if (!isDigit(C)) {
727 Error = true;
728 return 0;
729 }
730
731 if (C == '0') {
732 consume();
733 return 0;
734 }
735
736 uint64_t Value = 0;
737
738 while (isDigit(look())) {
739 if (!mulAssign(Value, 10)) {
740 Error = true;
741 return 0;
742 }
743
744 uint64_t D = consume() - '0';
745 if (!addAssign(Value, D))
746 return 0;
747 }
748
749 return Value;
750 }
751
752 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
753 // value and stores hex digits in HexDigits. The return value is unspecified if
754 // HexDigits.size() > 16.
755 //
756 // <hex-number> = "0_"
757 // | <1-9a-f> {<0-9a-f>} "_"
parseHexNumber(StringView & HexDigits)758 uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
759 size_t Start = Position;
760 uint64_t Value = 0;
761
762 if (!isHexDigit(look()))
763 Error = true;
764
765 if (consumeIf('0')) {
766 if (!consumeIf('_'))
767 Error = true;
768 } else {
769 while (!Error && !consumeIf('_')) {
770 char C = consume();
771 Value *= 16;
772 if (isDigit(C))
773 Value += C - '0';
774 else if ('a' <= C && C <= 'f')
775 Value += 10 + (C - 'a');
776 else
777 Error = true;
778 }
779 }
780
781 if (Error) {
782 HexDigits = StringView();
783 return 0;
784 }
785
786 size_t End = Position - 1;
787 assert(Start < End);
788 HexDigits = Input.substr(Start, End - Start);
789 return Value;
790 }
791