1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isHexDigit(const char C) { 84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 85 } 86 87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 88 89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 90 91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 92 static inline bool isValid(const char C) { 93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 94 } 95 96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 97 // otherwise. The demangled symbol is stored in Output field. It is 98 // responsibility of the caller to free the memory behind the output stream. 99 // 100 // <symbol-name> = "_R" <path> [<instantiating-crate>] 101 bool Demangler::demangle(StringView Mangled) { 102 Position = 0; 103 Error = false; 104 Print = true; 105 RecursionLevel = 0; 106 BoundLifetimes = 0; 107 108 if (!Mangled.consumeFront("_R")) { 109 Error = true; 110 return false; 111 } 112 Input = Mangled; 113 114 demanglePath(rust_demangle::InType::No); 115 116 // FIXME parse optional <instantiating-crate>. 117 118 if (Position != Input.size()) 119 Error = true; 120 121 return !Error; 122 } 123 124 // Demangles a path. InType indicates whether a path is inside a type. 125 // 126 // <path> = "C" <identifier> // crate root 127 // | "M" <impl-path> <type> // <T> (inherent impl) 128 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 129 // | "Y" <type> <path> // <T as Trait> (trait definition) 130 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 131 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 132 // | <backref> 133 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 134 // <ns> = "C" // closure 135 // | "S" // shim 136 // | <A-Z> // other special namespaces 137 // | <a-z> // internal namespaces 138 void Demangler::demanglePath(InType InType) { 139 if (Error || RecursionLevel >= MaxRecursionLevel) { 140 Error = true; 141 return; 142 } 143 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 144 145 switch (consume()) { 146 case 'C': { 147 parseOptionalBase62Number('s'); 148 Identifier Ident = parseIdentifier(); 149 print(Ident.Name); 150 break; 151 } 152 case 'M': { 153 demangleImplPath(InType); 154 print("<"); 155 demangleType(); 156 print(">"); 157 break; 158 } 159 case 'X': { 160 demangleImplPath(InType); 161 print("<"); 162 demangleType(); 163 print(" as "); 164 demanglePath(rust_demangle::InType::Yes); 165 print(">"); 166 break; 167 } 168 case 'Y': { 169 print("<"); 170 demangleType(); 171 print(" as "); 172 demanglePath(rust_demangle::InType::Yes); 173 print(">"); 174 break; 175 } 176 case 'N': { 177 char NS = consume(); 178 if (!isLower(NS) && !isUpper(NS)) { 179 Error = true; 180 break; 181 } 182 demanglePath(InType); 183 184 uint64_t Disambiguator = parseOptionalBase62Number('s'); 185 Identifier Ident = parseIdentifier(); 186 187 if (isUpper(NS)) { 188 // Special namespaces 189 print("::{"); 190 if (NS == 'C') 191 print("closure"); 192 else if (NS == 'S') 193 print("shim"); 194 else 195 print(NS); 196 if (!Ident.empty()) { 197 print(":"); 198 print(Ident.Name); 199 } 200 print('#'); 201 printDecimalNumber(Disambiguator); 202 print('}'); 203 } else { 204 // Implementation internal namespaces. 205 if (!Ident.empty()) { 206 print("::"); 207 print(Ident.Name); 208 } 209 } 210 break; 211 } 212 case 'I': { 213 demanglePath(InType); 214 // Omit "::" when in a type, where it is optional. 215 if (InType == rust_demangle::InType::No) 216 print("::"); 217 print("<"); 218 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 219 if (I > 0) 220 print(", "); 221 demangleGenericArg(); 222 } 223 print(">"); 224 break; 225 } 226 default: 227 // FIXME parse remaining productions. 228 Error = true; 229 break; 230 } 231 } 232 233 // <impl-path> = [<disambiguator>] <path> 234 // <disambiguator> = "s" <base-62-number> 235 void Demangler::demangleImplPath(InType InType) { 236 SwapAndRestore<bool> SavePrint(Print, false); 237 parseOptionalBase62Number('s'); 238 demanglePath(InType); 239 } 240 241 // <generic-arg> = <lifetime> 242 // | <type> 243 // | "K" <const> 244 // <lifetime> = "L" <base-62-number> 245 void Demangler::demangleGenericArg() { 246 if (consumeIf('L')) 247 printLifetime(parseBase62Number()); 248 else if (consumeIf('K')) 249 demangleConst(); 250 else 251 demangleType(); 252 } 253 254 // <basic-type> = "a" // i8 255 // | "b" // bool 256 // | "c" // char 257 // | "d" // f64 258 // | "e" // str 259 // | "f" // f32 260 // | "h" // u8 261 // | "i" // isize 262 // | "j" // usize 263 // | "l" // i32 264 // | "m" // u32 265 // | "n" // i128 266 // | "o" // u128 267 // | "s" // i16 268 // | "t" // u16 269 // | "u" // () 270 // | "v" // ... 271 // | "x" // i64 272 // | "y" // u64 273 // | "z" // ! 274 // | "p" // placeholder (e.g. for generic params), shown as _ 275 static bool parseBasicType(char C, BasicType &Type) { 276 switch (C) { 277 case 'a': 278 Type = BasicType::I8; 279 return true; 280 case 'b': 281 Type = BasicType::Bool; 282 return true; 283 case 'c': 284 Type = BasicType::Char; 285 return true; 286 case 'd': 287 Type = BasicType::F64; 288 return true; 289 case 'e': 290 Type = BasicType::Str; 291 return true; 292 case 'f': 293 Type = BasicType::F32; 294 return true; 295 case 'h': 296 Type = BasicType::U8; 297 return true; 298 case 'i': 299 Type = BasicType::ISize; 300 return true; 301 case 'j': 302 Type = BasicType::USize; 303 return true; 304 case 'l': 305 Type = BasicType::I32; 306 return true; 307 case 'm': 308 Type = BasicType::U32; 309 return true; 310 case 'n': 311 Type = BasicType::I128; 312 return true; 313 case 'o': 314 Type = BasicType::U128; 315 return true; 316 case 'p': 317 Type = BasicType::Placeholder; 318 return true; 319 case 's': 320 Type = BasicType::I16; 321 return true; 322 case 't': 323 Type = BasicType::U16; 324 return true; 325 case 'u': 326 Type = BasicType::Unit; 327 return true; 328 case 'v': 329 Type = BasicType::Variadic; 330 return true; 331 case 'x': 332 Type = BasicType::I64; 333 return true; 334 case 'y': 335 Type = BasicType::U64; 336 return true; 337 case 'z': 338 Type = BasicType::Never; 339 return true; 340 default: 341 return false; 342 } 343 } 344 345 void Demangler::printBasicType(BasicType Type) { 346 switch (Type) { 347 case BasicType::Bool: 348 print("bool"); 349 break; 350 case BasicType::Char: 351 print("char"); 352 break; 353 case BasicType::I8: 354 print("i8"); 355 break; 356 case BasicType::I16: 357 print("i16"); 358 break; 359 case BasicType::I32: 360 print("i32"); 361 break; 362 case BasicType::I64: 363 print("i64"); 364 break; 365 case BasicType::I128: 366 print("i128"); 367 break; 368 case BasicType::ISize: 369 print("isize"); 370 break; 371 case BasicType::U8: 372 print("u8"); 373 break; 374 case BasicType::U16: 375 print("u16"); 376 break; 377 case BasicType::U32: 378 print("u32"); 379 break; 380 case BasicType::U64: 381 print("u64"); 382 break; 383 case BasicType::U128: 384 print("u128"); 385 break; 386 case BasicType::USize: 387 print("usize"); 388 break; 389 case BasicType::F32: 390 print("f32"); 391 break; 392 case BasicType::F64: 393 print("f64"); 394 break; 395 case BasicType::Str: 396 print("str"); 397 break; 398 case BasicType::Placeholder: 399 print("_"); 400 break; 401 case BasicType::Unit: 402 print("()"); 403 break; 404 case BasicType::Variadic: 405 print("..."); 406 break; 407 case BasicType::Never: 408 print("!"); 409 break; 410 } 411 } 412 413 // <type> = | <basic-type> 414 // | <path> // named type 415 // | "A" <type> <const> // [T; N] 416 // | "S" <type> // [T] 417 // | "T" {<type>} "E" // (T1, T2, T3, ...) 418 // | "R" [<lifetime>] <type> // &T 419 // | "Q" [<lifetime>] <type> // &mut T 420 // | "P" <type> // *const T 421 // | "O" <type> // *mut T 422 // | "F" <fn-sig> // fn(...) -> ... 423 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 424 // | <backref> // backref 425 void Demangler::demangleType() { 426 size_t Start = Position; 427 428 char C = consume(); 429 BasicType Type; 430 if (parseBasicType(C, Type)) 431 return printBasicType(Type); 432 433 switch (C) { 434 case 'A': 435 print("["); 436 demangleType(); 437 print("; "); 438 demangleConst(); 439 print("]"); 440 break; 441 case 'S': 442 print("["); 443 demangleType(); 444 print("]"); 445 break; 446 case 'T': { 447 print("("); 448 size_t I = 0; 449 for (; !Error && !consumeIf('E'); ++I) { 450 if (I > 0) 451 print(", "); 452 demangleType(); 453 } 454 if (I == 1) 455 print(","); 456 print(")"); 457 break; 458 } 459 case 'R': 460 case 'Q': 461 print('&'); 462 if (consumeIf('L')) { 463 if (auto Lifetime = parseBase62Number()) { 464 printLifetime(Lifetime); 465 print(' '); 466 } 467 } 468 if (C == 'Q') 469 print("mut "); 470 demangleType(); 471 break; 472 case 'P': 473 print("*const "); 474 demangleType(); 475 break; 476 case 'O': 477 print("*mut "); 478 demangleType(); 479 break; 480 case 'F': 481 demangleFnSig(); 482 break; 483 case 'D': 484 demangleDynBounds(); 485 if (consumeIf('L')) { 486 if (auto Lifetime = parseBase62Number()) { 487 print(" + "); 488 printLifetime(Lifetime); 489 } 490 } else { 491 Error = true; 492 } 493 break; 494 default: 495 Position = Start; 496 demanglePath(rust_demangle::InType::Yes); 497 break; 498 } 499 } 500 501 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 502 // <abi> = "C" 503 // | <undisambiguated-identifier> 504 void Demangler::demangleFnSig() { 505 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 506 demangleOptionalBinder(); 507 508 if (consumeIf('U')) 509 print("unsafe "); 510 511 if (consumeIf('K')) { 512 print("extern \""); 513 if (consumeIf('C')) { 514 print("C"); 515 } else { 516 Identifier Ident = parseIdentifier(); 517 for (char C : Ident.Name) { 518 // When mangling ABI string, the "-" is replaced with "_". 519 if (C == '_') 520 C = '-'; 521 print(C); 522 } 523 } 524 print("\" "); 525 } 526 527 print("fn("); 528 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 529 if (I > 0) 530 print(", "); 531 demangleType(); 532 } 533 print(")"); 534 535 if (consumeIf('u')) { 536 // Skip the unit type from the output. 537 } else { 538 print(" -> "); 539 demangleType(); 540 } 541 } 542 543 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 544 void Demangler::demangleDynBounds() { 545 SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 546 print("dyn "); 547 demangleOptionalBinder(); 548 // FIXME demangle {dyn-trait} 549 if (!consumeIf('E')) 550 Error = true; 551 } 552 553 // Demangles optional binder and updates the number of bound lifetimes. 554 // 555 // <binder> = "G" <base-62-number> 556 void Demangler::demangleOptionalBinder() { 557 uint64_t Binder = parseOptionalBase62Number('G'); 558 if (Error || Binder == 0) 559 return; 560 561 // In valid inputs each bound lifetime is referenced later. Referencing a 562 // lifetime requires at least one byte of input. Reject inputs that are too 563 // short to reference all bound lifetimes. Otherwise demangling of invalid 564 // binders could generate excessive amounts of output. 565 if (Binder >= Input.size() - BoundLifetimes) { 566 Error = true; 567 return; 568 } 569 570 print("for<"); 571 for (size_t I = 0; I != Binder; ++I) { 572 BoundLifetimes += 1; 573 if (I > 0) 574 print(", "); 575 printLifetime(1); 576 } 577 print("> "); 578 } 579 580 // <const> = <basic-type> <const-data> 581 // | "p" // placeholder 582 // | <backref> 583 void Demangler::demangleConst() { 584 BasicType Type; 585 if (parseBasicType(consume(), Type)) { 586 switch (Type) { 587 case BasicType::I8: 588 case BasicType::I16: 589 case BasicType::I32: 590 case BasicType::I64: 591 case BasicType::I128: 592 case BasicType::ISize: 593 case BasicType::U8: 594 case BasicType::U16: 595 case BasicType::U32: 596 case BasicType::U64: 597 case BasicType::U128: 598 case BasicType::USize: 599 demangleConstInt(); 600 break; 601 case BasicType::Bool: 602 demangleConstBool(); 603 break; 604 case BasicType::Char: 605 demangleConstChar(); 606 break; 607 case BasicType::Placeholder: 608 print('_'); 609 break; 610 default: 611 // FIXME demangle backreferences. 612 Error = true; 613 break; 614 } 615 } else { 616 Error = true; 617 } 618 } 619 620 // <const-data> = ["n"] <hex-number> 621 void Demangler::demangleConstInt() { 622 if (consumeIf('n')) 623 print('-'); 624 625 StringView HexDigits; 626 uint64_t Value = parseHexNumber(HexDigits); 627 if (HexDigits.size() <= 16) { 628 printDecimalNumber(Value); 629 } else { 630 print("0x"); 631 print(HexDigits); 632 } 633 } 634 635 // <const-data> = "0_" // false 636 // | "1_" // true 637 void Demangler::demangleConstBool() { 638 StringView HexDigits; 639 parseHexNumber(HexDigits); 640 if (HexDigits == "0") 641 print("false"); 642 else if (HexDigits == "1") 643 print("true"); 644 else 645 Error = true; 646 } 647 648 /// Returns true if CodePoint represents a printable ASCII character. 649 static bool isAsciiPrintable(uint64_t CodePoint) { 650 return 0x20 <= CodePoint && CodePoint <= 0x7e; 651 } 652 653 // <const-data> = <hex-number> 654 void Demangler::demangleConstChar() { 655 StringView HexDigits; 656 uint64_t CodePoint = parseHexNumber(HexDigits); 657 if (Error || HexDigits.size() > 6) { 658 Error = true; 659 return; 660 } 661 662 print("'"); 663 switch (CodePoint) { 664 case '\t': 665 print(R"(\t)"); 666 break; 667 case '\r': 668 print(R"(\r)"); 669 break; 670 case '\n': 671 print(R"(\n)"); 672 break; 673 case '\\': 674 print(R"(\\)"); 675 break; 676 case '"': 677 print(R"(")"); 678 break; 679 case '\'': 680 print(R"(\')"); 681 break; 682 default: 683 if (isAsciiPrintable(CodePoint)) { 684 char C = CodePoint; 685 print(C); 686 } else { 687 print(R"(\u{)"); 688 print(HexDigits); 689 print('}'); 690 } 691 break; 692 } 693 print('\''); 694 } 695 696 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 697 Identifier Demangler::parseIdentifier() { 698 bool Punycode = consumeIf('u'); 699 uint64_t Bytes = parseDecimalNumber(); 700 701 // Underscore resolves the ambiguity when identifier starts with a decimal 702 // digit or another underscore. 703 consumeIf('_'); 704 705 if (Error || Bytes > Input.size() - Position) { 706 Error = true; 707 return {}; 708 } 709 StringView S = Input.substr(Position, Bytes); 710 Position += Bytes; 711 712 if (!std::all_of(S.begin(), S.end(), isValid)) { 713 Error = true; 714 return {}; 715 } 716 717 return {S, Punycode}; 718 } 719 720 // Parses optional base 62 number. The presence of a number is determined using 721 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 722 // 723 // This function is indended for parsing disambiguators and binders which when 724 // not present have their value interpreted as 0, and otherwise as decoded 725 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 726 // 2. When "G" is absent value is 0. 727 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 728 if (!consumeIf(Tag)) 729 return 0; 730 731 uint64_t N = parseBase62Number(); 732 if (Error || !addAssign(N, 1)) 733 return 0; 734 735 return N; 736 } 737 738 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 739 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 740 // "1_" encodes 2, etc. 741 // 742 // <base-62-number> = {<0-9a-zA-Z>} "_" 743 uint64_t Demangler::parseBase62Number() { 744 if (consumeIf('_')) 745 return 0; 746 747 uint64_t Value = 0; 748 749 while (true) { 750 uint64_t Digit; 751 char C = consume(); 752 753 if (C == '_') { 754 break; 755 } else if (isDigit(C)) { 756 Digit = C - '0'; 757 } else if (isLower(C)) { 758 Digit = 10 + (C - 'a'); 759 } else if (isUpper(C)) { 760 Digit = 10 + 26 + (C - 'A'); 761 } else { 762 Error = true; 763 return 0; 764 } 765 766 if (!mulAssign(Value, 62)) 767 return 0; 768 769 if (!addAssign(Value, Digit)) 770 return 0; 771 } 772 773 if (!addAssign(Value, 1)) 774 return 0; 775 776 return Value; 777 } 778 779 // Parses a decimal number that had been encoded without any leading zeros. 780 // 781 // <decimal-number> = "0" 782 // | <1-9> {<0-9>} 783 uint64_t Demangler::parseDecimalNumber() { 784 char C = look(); 785 if (!isDigit(C)) { 786 Error = true; 787 return 0; 788 } 789 790 if (C == '0') { 791 consume(); 792 return 0; 793 } 794 795 uint64_t Value = 0; 796 797 while (isDigit(look())) { 798 if (!mulAssign(Value, 10)) { 799 Error = true; 800 return 0; 801 } 802 803 uint64_t D = consume() - '0'; 804 if (!addAssign(Value, D)) 805 return 0; 806 } 807 808 return Value; 809 } 810 811 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 812 // value and stores hex digits in HexDigits. The return value is unspecified if 813 // HexDigits.size() > 16. 814 // 815 // <hex-number> = "0_" 816 // | <1-9a-f> {<0-9a-f>} "_" 817 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 818 size_t Start = Position; 819 uint64_t Value = 0; 820 821 if (!isHexDigit(look())) 822 Error = true; 823 824 if (consumeIf('0')) { 825 if (!consumeIf('_')) 826 Error = true; 827 } else { 828 while (!Error && !consumeIf('_')) { 829 char C = consume(); 830 Value *= 16; 831 if (isDigit(C)) 832 Value += C - '0'; 833 else if ('a' <= C && C <= 'f') 834 Value += 10 + (C - 'a'); 835 else 836 Error = true; 837 } 838 } 839 840 if (Error) { 841 HexDigits = StringView(); 842 return 0; 843 } 844 845 size_t End = Position - 1; 846 assert(Start < End); 847 HexDigits = Input.substr(Start, End - Start); 848 return Value; 849 } 850 851 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 852 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 853 // bound by one of the enclosing binders. 854 void Demangler::printLifetime(uint64_t Index) { 855 if (Index == 0) { 856 print("'_"); 857 return; 858 } 859 860 if (Index - 1 >= BoundLifetimes) { 861 Error = true; 862 return; 863 } 864 865 uint64_t Depth = BoundLifetimes - Index; 866 print('\''); 867 if (Depth < 26) { 868 char C = 'a' + Depth; 869 print(C); 870 } else { 871 print('z'); 872 printDecimalNumber(Depth - 26 + 1); 873 } 874 } 875