1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isHexDigit(const char C) { 84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 85 } 86 87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 88 89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 90 91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 92 static inline bool isValid(const char C) { 93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 94 } 95 96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 97 // otherwise. The demangled symbol is stored in Output field. It is 98 // responsibility of the caller to free the memory behind the output stream. 99 // 100 // <symbol-name> = "_R" <path> [<instantiating-crate>] 101 bool Demangler::demangle(StringView Mangled) { 102 Position = 0; 103 Error = false; 104 Print = true; 105 RecursionLevel = 0; 106 107 if (!Mangled.consumeFront("_R")) { 108 Error = true; 109 return false; 110 } 111 Input = Mangled; 112 113 demanglePath(rust_demangle::InType::No); 114 115 // FIXME parse optional <instantiating-crate>. 116 117 if (Position != Input.size()) 118 Error = true; 119 120 return !Error; 121 } 122 123 // Demangles a path. InType indicates whether a path is inside a type. 124 // 125 // <path> = "C" <identifier> // crate root 126 // | "M" <impl-path> <type> // <T> (inherent impl) 127 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 128 // | "Y" <type> <path> // <T as Trait> (trait definition) 129 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 130 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 131 // | <backref> 132 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 133 // <ns> = "C" // closure 134 // | "S" // shim 135 // | <A-Z> // other special namespaces 136 // | <a-z> // internal namespaces 137 void Demangler::demanglePath(InType InType) { 138 if (Error || RecursionLevel >= MaxRecursionLevel) { 139 Error = true; 140 return; 141 } 142 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 143 144 switch (consume()) { 145 case 'C': { 146 parseOptionalBase62Number('s'); 147 Identifier Ident = parseIdentifier(); 148 print(Ident.Name); 149 break; 150 } 151 case 'M': { 152 demangleImplPath(InType); 153 print("<"); 154 demangleType(); 155 print(">"); 156 break; 157 } 158 case 'X': { 159 demangleImplPath(InType); 160 print("<"); 161 demangleType(); 162 print(" as "); 163 demanglePath(rust_demangle::InType::Yes); 164 print(">"); 165 break; 166 } 167 case 'Y': { 168 print("<"); 169 demangleType(); 170 print(" as "); 171 demanglePath(rust_demangle::InType::Yes); 172 print(">"); 173 break; 174 } 175 case 'N': { 176 char NS = consume(); 177 if (!isLower(NS) && !isUpper(NS)) { 178 Error = true; 179 break; 180 } 181 demanglePath(InType); 182 183 uint64_t Disambiguator = parseOptionalBase62Number('s'); 184 Identifier Ident = parseIdentifier(); 185 186 if (isUpper(NS)) { 187 // Special namespaces 188 print("::{"); 189 if (NS == 'C') 190 print("closure"); 191 else if (NS == 'S') 192 print("shim"); 193 else 194 print(NS); 195 if (!Ident.empty()) { 196 print(":"); 197 print(Ident.Name); 198 } 199 print('#'); 200 printDecimalNumber(Disambiguator); 201 print('}'); 202 } else { 203 // Implementation internal namespaces. 204 if (!Ident.empty()) { 205 print("::"); 206 print(Ident.Name); 207 } 208 } 209 break; 210 } 211 case 'I': { 212 demanglePath(InType); 213 // Omit "::" when in a type, where it is optional. 214 if (InType == rust_demangle::InType::No) 215 print("::"); 216 print("<"); 217 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 218 if (I > 0) 219 print(", "); 220 demangleGenericArg(); 221 } 222 print(">"); 223 break; 224 } 225 default: 226 // FIXME parse remaining productions. 227 Error = true; 228 break; 229 } 230 } 231 232 // <impl-path> = [<disambiguator>] <path> 233 // <disambiguator> = "s" <base-62-number> 234 void Demangler::demangleImplPath(InType InType) { 235 SwapAndRestore<bool> SavePrint(Print, false); 236 parseOptionalBase62Number('s'); 237 demanglePath(InType); 238 } 239 240 // <generic-arg> = <lifetime> 241 // | <type> 242 // | "K" <const> 243 // <lifetime> = "L" <base-62-number> 244 void Demangler::demangleGenericArg() { 245 if (consumeIf('K')) 246 demangleConst(); 247 else 248 demangleType(); 249 // FIXME demangle lifetimes. 250 } 251 252 // <basic-type> = "a" // i8 253 // | "b" // bool 254 // | "c" // char 255 // | "d" // f64 256 // | "e" // str 257 // | "f" // f32 258 // | "h" // u8 259 // | "i" // isize 260 // | "j" // usize 261 // | "l" // i32 262 // | "m" // u32 263 // | "n" // i128 264 // | "o" // u128 265 // | "s" // i16 266 // | "t" // u16 267 // | "u" // () 268 // | "v" // ... 269 // | "x" // i64 270 // | "y" // u64 271 // | "z" // ! 272 // | "p" // placeholder (e.g. for generic params), shown as _ 273 static bool parseBasicType(char C, BasicType &Type) { 274 switch (C) { 275 case 'a': 276 Type = BasicType::I8; 277 return true; 278 case 'b': 279 Type = BasicType::Bool; 280 return true; 281 case 'c': 282 Type = BasicType::Char; 283 return true; 284 case 'd': 285 Type = BasicType::F64; 286 return true; 287 case 'e': 288 Type = BasicType::Str; 289 return true; 290 case 'f': 291 Type = BasicType::F32; 292 return true; 293 case 'h': 294 Type = BasicType::U8; 295 return true; 296 case 'i': 297 Type = BasicType::ISize; 298 return true; 299 case 'j': 300 Type = BasicType::USize; 301 return true; 302 case 'l': 303 Type = BasicType::I32; 304 return true; 305 case 'm': 306 Type = BasicType::U32; 307 return true; 308 case 'n': 309 Type = BasicType::I128; 310 return true; 311 case 'o': 312 Type = BasicType::U128; 313 return true; 314 case 'p': 315 Type = BasicType::Placeholder; 316 return true; 317 case 's': 318 Type = BasicType::I16; 319 return true; 320 case 't': 321 Type = BasicType::U16; 322 return true; 323 case 'u': 324 Type = BasicType::Unit; 325 return true; 326 case 'v': 327 Type = BasicType::Variadic; 328 return true; 329 case 'x': 330 Type = BasicType::I64; 331 return true; 332 case 'y': 333 Type = BasicType::U64; 334 return true; 335 case 'z': 336 Type = BasicType::Never; 337 return true; 338 default: 339 return false; 340 } 341 } 342 343 void Demangler::printBasicType(BasicType Type) { 344 switch (Type) { 345 case BasicType::Bool: 346 print("bool"); 347 break; 348 case BasicType::Char: 349 print("char"); 350 break; 351 case BasicType::I8: 352 print("i8"); 353 break; 354 case BasicType::I16: 355 print("i16"); 356 break; 357 case BasicType::I32: 358 print("i32"); 359 break; 360 case BasicType::I64: 361 print("i64"); 362 break; 363 case BasicType::I128: 364 print("i128"); 365 break; 366 case BasicType::ISize: 367 print("isize"); 368 break; 369 case BasicType::U8: 370 print("u8"); 371 break; 372 case BasicType::U16: 373 print("u16"); 374 break; 375 case BasicType::U32: 376 print("u32"); 377 break; 378 case BasicType::U64: 379 print("u64"); 380 break; 381 case BasicType::U128: 382 print("u128"); 383 break; 384 case BasicType::USize: 385 print("usize"); 386 break; 387 case BasicType::F32: 388 print("f32"); 389 break; 390 case BasicType::F64: 391 print("f64"); 392 break; 393 case BasicType::Str: 394 print("str"); 395 break; 396 case BasicType::Placeholder: 397 print("_"); 398 break; 399 case BasicType::Unit: 400 print("()"); 401 break; 402 case BasicType::Variadic: 403 print("..."); 404 break; 405 case BasicType::Never: 406 print("!"); 407 break; 408 } 409 } 410 411 // <type> = | <basic-type> 412 // | <path> // named type 413 // | "A" <type> <const> // [T; N] 414 // | "S" <type> // [T] 415 // | "T" {<type>} "E" // (T1, T2, T3, ...) 416 // | "R" [<lifetime>] <type> // &T 417 // | "Q" [<lifetime>] <type> // &mut T 418 // | "P" <type> // *const T 419 // | "O" <type> // *mut T 420 // | "F" <fn-sig> // fn(...) -> ... 421 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 422 // | <backref> // backref 423 void Demangler::demangleType() { 424 size_t Start = Position; 425 426 char C = consume(); 427 BasicType Type; 428 if (parseBasicType(C, Type)) 429 return printBasicType(Type); 430 431 switch (C) { 432 case 'A': 433 print("["); 434 demangleType(); 435 print("; "); 436 demangleConst(); 437 print("]"); 438 break; 439 case 'S': 440 print("["); 441 demangleType(); 442 print("]"); 443 break; 444 case 'T': { 445 print("("); 446 size_t I = 0; 447 for (; !Error && !consumeIf('E'); ++I) { 448 if (I > 0) 449 print(", "); 450 demangleType(); 451 } 452 if (I == 1) 453 print(","); 454 print(")"); 455 break; 456 } 457 case 'R': 458 print("&"); 459 // FIXME demangle [<lifetime>]. 460 demangleType(); 461 break; 462 case 'Q': 463 print("&mut "); 464 // FIXME demangle [<lifetime>]. 465 demangleType(); 466 break; 467 case 'P': 468 print("*const "); 469 demangleType(); 470 break; 471 case 'O': 472 print("*mut "); 473 demangleType(); 474 break; 475 case 'F': 476 demangleFnSig(); 477 break; 478 default: 479 Position = Start; 480 demanglePath(rust_demangle::InType::Yes); 481 break; 482 } 483 } 484 485 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 486 // <abi> = "C" 487 // | <undisambiguated-identifier> 488 void Demangler::demangleFnSig() { 489 // FIXME demangle binder. 490 491 if (consumeIf('U')) 492 print("unsafe "); 493 494 if (consumeIf('K')) { 495 print("extern \""); 496 if (consumeIf('C')) { 497 print("C"); 498 } else { 499 Identifier Ident = parseIdentifier(); 500 for (char C : Ident.Name) { 501 // When mangling ABI string, the "-" is replaced with "_". 502 if (C == '_') 503 C = '-'; 504 print(C); 505 } 506 } 507 print("\" "); 508 } 509 510 print("fn("); 511 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 512 if (I > 0) 513 print(", "); 514 demangleType(); 515 } 516 print(")"); 517 518 if (consumeIf('u')) { 519 // Skip the unit type from the output. 520 } else { 521 print(" -> "); 522 demangleType(); 523 } 524 } 525 526 // <const> = <basic-type> <const-data> 527 // | "p" // placeholder 528 // | <backref> 529 void Demangler::demangleConst() { 530 BasicType Type; 531 if (parseBasicType(consume(), Type)) { 532 switch (Type) { 533 case BasicType::I8: 534 case BasicType::I16: 535 case BasicType::I32: 536 case BasicType::I64: 537 case BasicType::I128: 538 case BasicType::ISize: 539 case BasicType::U8: 540 case BasicType::U16: 541 case BasicType::U32: 542 case BasicType::U64: 543 case BasicType::U128: 544 case BasicType::USize: 545 demangleConstInt(); 546 break; 547 case BasicType::Bool: 548 demangleConstBool(); 549 break; 550 case BasicType::Char: 551 demangleConstChar(); 552 break; 553 case BasicType::Placeholder: 554 print('_'); 555 break; 556 default: 557 // FIXME demangle backreferences. 558 Error = true; 559 break; 560 } 561 } else { 562 Error = true; 563 } 564 } 565 566 // <const-data> = ["n"] <hex-number> 567 void Demangler::demangleConstInt() { 568 if (consumeIf('n')) 569 print('-'); 570 571 StringView HexDigits; 572 uint64_t Value = parseHexNumber(HexDigits); 573 if (HexDigits.size() <= 16) { 574 printDecimalNumber(Value); 575 } else { 576 print("0x"); 577 print(HexDigits); 578 } 579 } 580 581 // <const-data> = "0_" // false 582 // | "1_" // true 583 void Demangler::demangleConstBool() { 584 StringView HexDigits; 585 parseHexNumber(HexDigits); 586 if (HexDigits == "0") 587 print("false"); 588 else if (HexDigits == "1") 589 print("true"); 590 else 591 Error = true; 592 } 593 594 /// Returns true if CodePoint represents a printable ASCII character. 595 static bool isAsciiPrintable(uint64_t CodePoint) { 596 return 0x20 <= CodePoint && CodePoint <= 0x7e; 597 } 598 599 // <const-data> = <hex-number> 600 void Demangler::demangleConstChar() { 601 StringView HexDigits; 602 uint64_t CodePoint = parseHexNumber(HexDigits); 603 if (Error || HexDigits.size() > 6) { 604 Error = true; 605 return; 606 } 607 608 print("'"); 609 switch (CodePoint) { 610 case '\t': 611 print(R"(\t)"); 612 break; 613 case '\r': 614 print(R"(\r)"); 615 break; 616 case '\n': 617 print(R"(\n)"); 618 break; 619 case '\\': 620 print(R"(\\)"); 621 break; 622 case '"': 623 print(R"(")"); 624 break; 625 case '\'': 626 print(R"(\')"); 627 break; 628 default: 629 if (isAsciiPrintable(CodePoint)) { 630 char C = CodePoint; 631 print(C); 632 } else { 633 print(R"(\u{)"); 634 print(HexDigits); 635 print('}'); 636 } 637 break; 638 } 639 print('\''); 640 } 641 642 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 643 Identifier Demangler::parseIdentifier() { 644 bool Punycode = consumeIf('u'); 645 uint64_t Bytes = parseDecimalNumber(); 646 647 // Underscore resolves the ambiguity when identifier starts with a decimal 648 // digit or another underscore. 649 consumeIf('_'); 650 651 if (Error || Bytes > Input.size() - Position) { 652 Error = true; 653 return {}; 654 } 655 StringView S = Input.substr(Position, Bytes); 656 Position += Bytes; 657 658 if (!std::all_of(S.begin(), S.end(), isValid)) { 659 Error = true; 660 return {}; 661 } 662 663 return {S, Punycode}; 664 } 665 666 // Parses optional base 62 number. The presence of a number is determined using 667 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. 668 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 669 if (!consumeIf(Tag)) 670 return 0; 671 672 uint64_t N = parseBase62Number(); 673 if (Error || !addAssign(N, 1)) 674 return 0; 675 676 return N; 677 } 678 679 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 680 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 681 // "1_" encodes 2, etc. 682 // 683 // <base-62-number> = {<0-9a-zA-Z>} "_" 684 uint64_t Demangler::parseBase62Number() { 685 if (consumeIf('_')) 686 return 0; 687 688 uint64_t Value = 0; 689 690 while (true) { 691 uint64_t Digit; 692 char C = consume(); 693 694 if (C == '_') { 695 break; 696 } else if (isDigit(C)) { 697 Digit = C - '0'; 698 } else if (isLower(C)) { 699 Digit = 10 + (C - 'a'); 700 } else if (isUpper(C)) { 701 Digit = 10 + 26 + (C - 'A'); 702 } else { 703 Error = true; 704 return 0; 705 } 706 707 if (!mulAssign(Value, 62)) 708 return 0; 709 710 if (!addAssign(Value, Digit)) 711 return 0; 712 } 713 714 if (!addAssign(Value, 1)) 715 return 0; 716 717 return Value; 718 } 719 720 // Parses a decimal number that had been encoded without any leading zeros. 721 // 722 // <decimal-number> = "0" 723 // | <1-9> {<0-9>} 724 uint64_t Demangler::parseDecimalNumber() { 725 char C = look(); 726 if (!isDigit(C)) { 727 Error = true; 728 return 0; 729 } 730 731 if (C == '0') { 732 consume(); 733 return 0; 734 } 735 736 uint64_t Value = 0; 737 738 while (isDigit(look())) { 739 if (!mulAssign(Value, 10)) { 740 Error = true; 741 return 0; 742 } 743 744 uint64_t D = consume() - '0'; 745 if (!addAssign(Value, D)) 746 return 0; 747 } 748 749 return Value; 750 } 751 752 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 753 // value and stores hex digits in HexDigits. The return value is unspecified if 754 // HexDigits.size() > 16. 755 // 756 // <hex-number> = "0_" 757 // | <1-9a-f> {<0-9a-f>} "_" 758 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 759 size_t Start = Position; 760 uint64_t Value = 0; 761 762 if (!isHexDigit(look())) 763 Error = true; 764 765 if (consumeIf('0')) { 766 if (!consumeIf('_')) 767 Error = true; 768 } else { 769 while (!Error && !consumeIf('_')) { 770 char C = consume(); 771 Value *= 16; 772 if (isDigit(C)) 773 Value += C - '0'; 774 else if ('a' <= C && C <= 'f') 775 Value += 10 + (C - 'a'); 776 else 777 Error = true; 778 } 779 } 780 781 if (Error) { 782 HexDigits = StringView(); 783 return 0; 784 } 785 786 size_t End = Position - 1; 787 assert(Start < End); 788 HexDigits = Input.substr(Start, End - Start); 789 return Value; 790 } 791