1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/Demangle.h" 15 #include "llvm/Demangle/StringViewExtras.h" 16 #include "llvm/Demangle/Utility.h" 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 24 using namespace llvm; 25 26 using llvm::itanium_demangle::OutputBuffer; 27 using llvm::itanium_demangle::ScopedOverride; 28 29 namespace { 30 31 struct Identifier { 32 std::string_view Name; 33 bool Punycode; 34 35 bool empty() const { return Name.empty(); } 36 }; 37 38 enum class BasicType { 39 Bool, 40 Char, 41 I8, 42 I16, 43 I32, 44 I64, 45 I128, 46 ISize, 47 U8, 48 U16, 49 U32, 50 U64, 51 U128, 52 USize, 53 F32, 54 F64, 55 Str, 56 Placeholder, 57 Unit, 58 Variadic, 59 Never, 60 }; 61 62 enum class IsInType { 63 No, 64 Yes, 65 }; 66 67 enum class LeaveGenericsOpen { 68 No, 69 Yes, 70 }; 71 72 class Demangler { 73 // Maximum recursion level. Used to avoid stack overflow. 74 size_t MaxRecursionLevel; 75 // Current recursion level. 76 size_t RecursionLevel; 77 size_t BoundLifetimes; 78 // Input string that is being demangled with "_R" prefix removed. 79 std::string_view Input; 80 // Position in the input string. 81 size_t Position; 82 // When true, print methods append the output to the stream. 83 // When false, the output is suppressed. 84 bool Print; 85 // True if an error occurred. 86 bool Error; 87 88 public: 89 // Demangled output. 90 OutputBuffer Output; 91 92 Demangler(size_t MaxRecursionLevel = 500); 93 94 bool demangle(std::string_view MangledName); 95 96 private: 97 bool demanglePath(IsInType Type, 98 LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); 99 void demangleImplPath(IsInType InType); 100 void demangleGenericArg(); 101 void demangleType(); 102 void demangleFnSig(); 103 void demangleDynBounds(); 104 void demangleDynTrait(); 105 void demangleOptionalBinder(); 106 void demangleConst(); 107 void demangleConstInt(); 108 void demangleConstBool(); 109 void demangleConstChar(); 110 111 template <typename Callable> void demangleBackref(Callable Demangler) { 112 uint64_t Backref = parseBase62Number(); 113 if (Error || Backref >= Position) { 114 Error = true; 115 return; 116 } 117 118 if (!Print) 119 return; 120 121 ScopedOverride<size_t> SavePosition(Position, Position); 122 Position = Backref; 123 Demangler(); 124 } 125 126 Identifier parseIdentifier(); 127 uint64_t parseOptionalBase62Number(char Tag); 128 uint64_t parseBase62Number(); 129 uint64_t parseDecimalNumber(); 130 uint64_t parseHexNumber(std::string_view &HexDigits); 131 132 void print(char C); 133 void print(std::string_view S); 134 void printDecimalNumber(uint64_t N); 135 void printBasicType(BasicType); 136 void printLifetime(uint64_t Index); 137 void printIdentifier(Identifier Ident); 138 139 char look() const; 140 char consume(); 141 bool consumeIf(char Prefix); 142 143 bool addAssign(uint64_t &A, uint64_t B); 144 bool mulAssign(uint64_t &A, uint64_t B); 145 }; 146 147 } // namespace 148 149 char *llvm::rustDemangle(const char *MangledName) { 150 if (MangledName == nullptr) 151 return nullptr; 152 153 // Return early if mangled name doesn't look like a Rust symbol. 154 std::string_view Mangled(MangledName); 155 if (!llvm::itanium_demangle::starts_with(Mangled, "_R")) 156 return nullptr; 157 158 Demangler D; 159 if (!D.demangle(Mangled)) { 160 std::free(D.Output.getBuffer()); 161 return nullptr; 162 } 163 164 D.Output += '\0'; 165 166 return D.Output.getBuffer(); 167 } 168 169 Demangler::Demangler(size_t MaxRecursionLevel) 170 : MaxRecursionLevel(MaxRecursionLevel) {} 171 172 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 173 174 static inline bool isHexDigit(const char C) { 175 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 176 } 177 178 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 179 180 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 181 182 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 183 static inline bool isValid(const char C) { 184 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 185 } 186 187 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 188 // otherwise. The demangled symbol is stored in Output field. It is 189 // responsibility of the caller to free the memory behind the output stream. 190 // 191 // <symbol-name> = "_R" <path> [<instantiating-crate>] 192 bool Demangler::demangle(std::string_view Mangled) { 193 Position = 0; 194 Error = false; 195 Print = true; 196 RecursionLevel = 0; 197 BoundLifetimes = 0; 198 199 if (!llvm::itanium_demangle::starts_with(Mangled, "_R")) { 200 Error = true; 201 return false; 202 } 203 Mangled.remove_prefix(2); 204 size_t Dot = Mangled.find('.'); 205 Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot); 206 207 demanglePath(IsInType::No); 208 209 if (Position != Input.size()) { 210 ScopedOverride<bool> SavePrint(Print, false); 211 demanglePath(IsInType::No); 212 } 213 214 if (Position != Input.size()) 215 Error = true; 216 217 if (Dot != std::string_view::npos) { 218 print(" ("); 219 print(Mangled.substr(Dot)); 220 print(")"); 221 } 222 223 return !Error; 224 } 225 226 // Demangles a path. InType indicates whether a path is inside a type. When 227 // LeaveOpen is true, a closing `>` after generic arguments is omitted from the 228 // output. Return value indicates whether generics arguments have been left 229 // open. 230 // 231 // <path> = "C" <identifier> // crate root 232 // | "M" <impl-path> <type> // <T> (inherent impl) 233 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 234 // | "Y" <type> <path> // <T as Trait> (trait definition) 235 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 236 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 237 // | <backref> 238 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 239 // <ns> = "C" // closure 240 // | "S" // shim 241 // | <A-Z> // other special namespaces 242 // | <a-z> // internal namespaces 243 bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { 244 if (Error || RecursionLevel >= MaxRecursionLevel) { 245 Error = true; 246 return false; 247 } 248 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 249 250 switch (consume()) { 251 case 'C': { 252 parseOptionalBase62Number('s'); 253 printIdentifier(parseIdentifier()); 254 break; 255 } 256 case 'M': { 257 demangleImplPath(InType); 258 print("<"); 259 demangleType(); 260 print(">"); 261 break; 262 } 263 case 'X': { 264 demangleImplPath(InType); 265 print("<"); 266 demangleType(); 267 print(" as "); 268 demanglePath(IsInType::Yes); 269 print(">"); 270 break; 271 } 272 case 'Y': { 273 print("<"); 274 demangleType(); 275 print(" as "); 276 demanglePath(IsInType::Yes); 277 print(">"); 278 break; 279 } 280 case 'N': { 281 char NS = consume(); 282 if (!isLower(NS) && !isUpper(NS)) { 283 Error = true; 284 break; 285 } 286 demanglePath(InType); 287 288 uint64_t Disambiguator = parseOptionalBase62Number('s'); 289 Identifier Ident = parseIdentifier(); 290 291 if (isUpper(NS)) { 292 // Special namespaces 293 print("::{"); 294 if (NS == 'C') 295 print("closure"); 296 else if (NS == 'S') 297 print("shim"); 298 else 299 print(NS); 300 if (!Ident.empty()) { 301 print(":"); 302 printIdentifier(Ident); 303 } 304 print('#'); 305 printDecimalNumber(Disambiguator); 306 print('}'); 307 } else { 308 // Implementation internal namespaces. 309 if (!Ident.empty()) { 310 print("::"); 311 printIdentifier(Ident); 312 } 313 } 314 break; 315 } 316 case 'I': { 317 demanglePath(InType); 318 // Omit "::" when in a type, where it is optional. 319 if (InType == IsInType::No) 320 print("::"); 321 print("<"); 322 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 323 if (I > 0) 324 print(", "); 325 demangleGenericArg(); 326 } 327 if (LeaveOpen == LeaveGenericsOpen::Yes) 328 return true; 329 else 330 print(">"); 331 break; 332 } 333 case 'B': { 334 bool IsOpen = false; 335 demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); 336 return IsOpen; 337 } 338 default: 339 Error = true; 340 break; 341 } 342 343 return false; 344 } 345 346 // <impl-path> = [<disambiguator>] <path> 347 // <disambiguator> = "s" <base-62-number> 348 void Demangler::demangleImplPath(IsInType InType) { 349 ScopedOverride<bool> SavePrint(Print, false); 350 parseOptionalBase62Number('s'); 351 demanglePath(InType); 352 } 353 354 // <generic-arg> = <lifetime> 355 // | <type> 356 // | "K" <const> 357 // <lifetime> = "L" <base-62-number> 358 void Demangler::demangleGenericArg() { 359 if (consumeIf('L')) 360 printLifetime(parseBase62Number()); 361 else if (consumeIf('K')) 362 demangleConst(); 363 else 364 demangleType(); 365 } 366 367 // <basic-type> = "a" // i8 368 // | "b" // bool 369 // | "c" // char 370 // | "d" // f64 371 // | "e" // str 372 // | "f" // f32 373 // | "h" // u8 374 // | "i" // isize 375 // | "j" // usize 376 // | "l" // i32 377 // | "m" // u32 378 // | "n" // i128 379 // | "o" // u128 380 // | "s" // i16 381 // | "t" // u16 382 // | "u" // () 383 // | "v" // ... 384 // | "x" // i64 385 // | "y" // u64 386 // | "z" // ! 387 // | "p" // placeholder (e.g. for generic params), shown as _ 388 static bool parseBasicType(char C, BasicType &Type) { 389 switch (C) { 390 case 'a': 391 Type = BasicType::I8; 392 return true; 393 case 'b': 394 Type = BasicType::Bool; 395 return true; 396 case 'c': 397 Type = BasicType::Char; 398 return true; 399 case 'd': 400 Type = BasicType::F64; 401 return true; 402 case 'e': 403 Type = BasicType::Str; 404 return true; 405 case 'f': 406 Type = BasicType::F32; 407 return true; 408 case 'h': 409 Type = BasicType::U8; 410 return true; 411 case 'i': 412 Type = BasicType::ISize; 413 return true; 414 case 'j': 415 Type = BasicType::USize; 416 return true; 417 case 'l': 418 Type = BasicType::I32; 419 return true; 420 case 'm': 421 Type = BasicType::U32; 422 return true; 423 case 'n': 424 Type = BasicType::I128; 425 return true; 426 case 'o': 427 Type = BasicType::U128; 428 return true; 429 case 'p': 430 Type = BasicType::Placeholder; 431 return true; 432 case 's': 433 Type = BasicType::I16; 434 return true; 435 case 't': 436 Type = BasicType::U16; 437 return true; 438 case 'u': 439 Type = BasicType::Unit; 440 return true; 441 case 'v': 442 Type = BasicType::Variadic; 443 return true; 444 case 'x': 445 Type = BasicType::I64; 446 return true; 447 case 'y': 448 Type = BasicType::U64; 449 return true; 450 case 'z': 451 Type = BasicType::Never; 452 return true; 453 default: 454 return false; 455 } 456 } 457 458 void Demangler::printBasicType(BasicType Type) { 459 switch (Type) { 460 case BasicType::Bool: 461 print("bool"); 462 break; 463 case BasicType::Char: 464 print("char"); 465 break; 466 case BasicType::I8: 467 print("i8"); 468 break; 469 case BasicType::I16: 470 print("i16"); 471 break; 472 case BasicType::I32: 473 print("i32"); 474 break; 475 case BasicType::I64: 476 print("i64"); 477 break; 478 case BasicType::I128: 479 print("i128"); 480 break; 481 case BasicType::ISize: 482 print("isize"); 483 break; 484 case BasicType::U8: 485 print("u8"); 486 break; 487 case BasicType::U16: 488 print("u16"); 489 break; 490 case BasicType::U32: 491 print("u32"); 492 break; 493 case BasicType::U64: 494 print("u64"); 495 break; 496 case BasicType::U128: 497 print("u128"); 498 break; 499 case BasicType::USize: 500 print("usize"); 501 break; 502 case BasicType::F32: 503 print("f32"); 504 break; 505 case BasicType::F64: 506 print("f64"); 507 break; 508 case BasicType::Str: 509 print("str"); 510 break; 511 case BasicType::Placeholder: 512 print("_"); 513 break; 514 case BasicType::Unit: 515 print("()"); 516 break; 517 case BasicType::Variadic: 518 print("..."); 519 break; 520 case BasicType::Never: 521 print("!"); 522 break; 523 } 524 } 525 526 // <type> = | <basic-type> 527 // | <path> // named type 528 // | "A" <type> <const> // [T; N] 529 // | "S" <type> // [T] 530 // | "T" {<type>} "E" // (T1, T2, T3, ...) 531 // | "R" [<lifetime>] <type> // &T 532 // | "Q" [<lifetime>] <type> // &mut T 533 // | "P" <type> // *const T 534 // | "O" <type> // *mut T 535 // | "F" <fn-sig> // fn(...) -> ... 536 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 537 // | <backref> // backref 538 void Demangler::demangleType() { 539 if (Error || RecursionLevel >= MaxRecursionLevel) { 540 Error = true; 541 return; 542 } 543 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 544 545 size_t Start = Position; 546 char C = consume(); 547 BasicType Type; 548 if (parseBasicType(C, Type)) 549 return printBasicType(Type); 550 551 switch (C) { 552 case 'A': 553 print("["); 554 demangleType(); 555 print("; "); 556 demangleConst(); 557 print("]"); 558 break; 559 case 'S': 560 print("["); 561 demangleType(); 562 print("]"); 563 break; 564 case 'T': { 565 print("("); 566 size_t I = 0; 567 for (; !Error && !consumeIf('E'); ++I) { 568 if (I > 0) 569 print(", "); 570 demangleType(); 571 } 572 if (I == 1) 573 print(","); 574 print(")"); 575 break; 576 } 577 case 'R': 578 case 'Q': 579 print('&'); 580 if (consumeIf('L')) { 581 if (auto Lifetime = parseBase62Number()) { 582 printLifetime(Lifetime); 583 print(' '); 584 } 585 } 586 if (C == 'Q') 587 print("mut "); 588 demangleType(); 589 break; 590 case 'P': 591 print("*const "); 592 demangleType(); 593 break; 594 case 'O': 595 print("*mut "); 596 demangleType(); 597 break; 598 case 'F': 599 demangleFnSig(); 600 break; 601 case 'D': 602 demangleDynBounds(); 603 if (consumeIf('L')) { 604 if (auto Lifetime = parseBase62Number()) { 605 print(" + "); 606 printLifetime(Lifetime); 607 } 608 } else { 609 Error = true; 610 } 611 break; 612 case 'B': 613 demangleBackref([&] { demangleType(); }); 614 break; 615 default: 616 Position = Start; 617 demanglePath(IsInType::Yes); 618 break; 619 } 620 } 621 622 // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> 623 // <abi> = "C" 624 // | <undisambiguated-identifier> 625 void Demangler::demangleFnSig() { 626 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 627 demangleOptionalBinder(); 628 629 if (consumeIf('U')) 630 print("unsafe "); 631 632 if (consumeIf('K')) { 633 print("extern \""); 634 if (consumeIf('C')) { 635 print("C"); 636 } else { 637 Identifier Ident = parseIdentifier(); 638 if (Ident.Punycode) 639 Error = true; 640 for (char C : Ident.Name) { 641 // When mangling ABI string, the "-" is replaced with "_". 642 if (C == '_') 643 C = '-'; 644 print(C); 645 } 646 } 647 print("\" "); 648 } 649 650 print("fn("); 651 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 652 if (I > 0) 653 print(", "); 654 demangleType(); 655 } 656 print(")"); 657 658 if (consumeIf('u')) { 659 // Skip the unit type from the output. 660 } else { 661 print(" -> "); 662 demangleType(); 663 } 664 } 665 666 // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" 667 void Demangler::demangleDynBounds() { 668 ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); 669 print("dyn "); 670 demangleOptionalBinder(); 671 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 672 if (I > 0) 673 print(" + "); 674 demangleDynTrait(); 675 } 676 } 677 678 // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} 679 // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> 680 void Demangler::demangleDynTrait() { 681 bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); 682 while (!Error && consumeIf('p')) { 683 if (!IsOpen) { 684 IsOpen = true; 685 print('<'); 686 } else { 687 print(", "); 688 } 689 print(parseIdentifier().Name); 690 print(" = "); 691 demangleType(); 692 } 693 if (IsOpen) 694 print(">"); 695 } 696 697 // Demangles optional binder and updates the number of bound lifetimes. 698 // 699 // <binder> = "G" <base-62-number> 700 void Demangler::demangleOptionalBinder() { 701 uint64_t Binder = parseOptionalBase62Number('G'); 702 if (Error || Binder == 0) 703 return; 704 705 // In valid inputs each bound lifetime is referenced later. Referencing a 706 // lifetime requires at least one byte of input. Reject inputs that are too 707 // short to reference all bound lifetimes. Otherwise demangling of invalid 708 // binders could generate excessive amounts of output. 709 if (Binder >= Input.size() - BoundLifetimes) { 710 Error = true; 711 return; 712 } 713 714 print("for<"); 715 for (size_t I = 0; I != Binder; ++I) { 716 BoundLifetimes += 1; 717 if (I > 0) 718 print(", "); 719 printLifetime(1); 720 } 721 print("> "); 722 } 723 724 // <const> = <basic-type> <const-data> 725 // | "p" // placeholder 726 // | <backref> 727 void Demangler::demangleConst() { 728 if (Error || RecursionLevel >= MaxRecursionLevel) { 729 Error = true; 730 return; 731 } 732 ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 733 734 char C = consume(); 735 BasicType Type; 736 if (parseBasicType(C, Type)) { 737 switch (Type) { 738 case BasicType::I8: 739 case BasicType::I16: 740 case BasicType::I32: 741 case BasicType::I64: 742 case BasicType::I128: 743 case BasicType::ISize: 744 case BasicType::U8: 745 case BasicType::U16: 746 case BasicType::U32: 747 case BasicType::U64: 748 case BasicType::U128: 749 case BasicType::USize: 750 demangleConstInt(); 751 break; 752 case BasicType::Bool: 753 demangleConstBool(); 754 break; 755 case BasicType::Char: 756 demangleConstChar(); 757 break; 758 case BasicType::Placeholder: 759 print('_'); 760 break; 761 default: 762 Error = true; 763 break; 764 } 765 } else if (C == 'B') { 766 demangleBackref([&] { demangleConst(); }); 767 } else { 768 Error = true; 769 } 770 } 771 772 // <const-data> = ["n"] <hex-number> 773 void Demangler::demangleConstInt() { 774 if (consumeIf('n')) 775 print('-'); 776 777 std::string_view HexDigits; 778 uint64_t Value = parseHexNumber(HexDigits); 779 if (HexDigits.size() <= 16) { 780 printDecimalNumber(Value); 781 } else { 782 print("0x"); 783 print(HexDigits); 784 } 785 } 786 787 // <const-data> = "0_" // false 788 // | "1_" // true 789 void Demangler::demangleConstBool() { 790 std::string_view HexDigits; 791 parseHexNumber(HexDigits); 792 if (HexDigits == "0") 793 print("false"); 794 else if (HexDigits == "1") 795 print("true"); 796 else 797 Error = true; 798 } 799 800 /// Returns true if CodePoint represents a printable ASCII character. 801 static bool isAsciiPrintable(uint64_t CodePoint) { 802 return 0x20 <= CodePoint && CodePoint <= 0x7e; 803 } 804 805 // <const-data> = <hex-number> 806 void Demangler::demangleConstChar() { 807 std::string_view HexDigits; 808 uint64_t CodePoint = parseHexNumber(HexDigits); 809 if (Error || HexDigits.size() > 6) { 810 Error = true; 811 return; 812 } 813 814 print("'"); 815 switch (CodePoint) { 816 case '\t': 817 print(R"(\t)"); 818 break; 819 case '\r': 820 print(R"(\r)"); 821 break; 822 case '\n': 823 print(R"(\n)"); 824 break; 825 case '\\': 826 print(R"(\\)"); 827 break; 828 case '"': 829 print(R"(")"); 830 break; 831 case '\'': 832 print(R"(\')"); 833 break; 834 default: 835 if (isAsciiPrintable(CodePoint)) { 836 char C = CodePoint; 837 print(C); 838 } else { 839 print(R"(\u{)"); 840 print(HexDigits); 841 print('}'); 842 } 843 break; 844 } 845 print('\''); 846 } 847 848 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 849 Identifier Demangler::parseIdentifier() { 850 bool Punycode = consumeIf('u'); 851 uint64_t Bytes = parseDecimalNumber(); 852 853 // Underscore resolves the ambiguity when identifier starts with a decimal 854 // digit or another underscore. 855 consumeIf('_'); 856 857 if (Error || Bytes > Input.size() - Position) { 858 Error = true; 859 return {}; 860 } 861 std::string_view S = Input.substr(Position, Bytes); 862 Position += Bytes; 863 864 if (!std::all_of(S.begin(), S.end(), isValid)) { 865 Error = true; 866 return {}; 867 } 868 869 return {S, Punycode}; 870 } 871 872 // Parses optional base 62 number. The presence of a number is determined using 873 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise 874 // 875 // This function is indended for parsing disambiguators and binders which when 876 // not present have their value interpreted as 0, and otherwise as decoded 877 // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is 878 // 2. When "G" is absent value is 0. 879 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 880 if (!consumeIf(Tag)) 881 return 0; 882 883 uint64_t N = parseBase62Number(); 884 if (Error || !addAssign(N, 1)) 885 return 0; 886 887 return N; 888 } 889 890 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 891 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 892 // "1_" encodes 2, etc. 893 // 894 // <base-62-number> = {<0-9a-zA-Z>} "_" 895 uint64_t Demangler::parseBase62Number() { 896 if (consumeIf('_')) 897 return 0; 898 899 uint64_t Value = 0; 900 901 while (true) { 902 uint64_t Digit; 903 char C = consume(); 904 905 if (C == '_') { 906 break; 907 } else if (isDigit(C)) { 908 Digit = C - '0'; 909 } else if (isLower(C)) { 910 Digit = 10 + (C - 'a'); 911 } else if (isUpper(C)) { 912 Digit = 10 + 26 + (C - 'A'); 913 } else { 914 Error = true; 915 return 0; 916 } 917 918 if (!mulAssign(Value, 62)) 919 return 0; 920 921 if (!addAssign(Value, Digit)) 922 return 0; 923 } 924 925 if (!addAssign(Value, 1)) 926 return 0; 927 928 return Value; 929 } 930 931 // Parses a decimal number that had been encoded without any leading zeros. 932 // 933 // <decimal-number> = "0" 934 // | <1-9> {<0-9>} 935 uint64_t Demangler::parseDecimalNumber() { 936 char C = look(); 937 if (!isDigit(C)) { 938 Error = true; 939 return 0; 940 } 941 942 if (C == '0') { 943 consume(); 944 return 0; 945 } 946 947 uint64_t Value = 0; 948 949 while (isDigit(look())) { 950 if (!mulAssign(Value, 10)) { 951 Error = true; 952 return 0; 953 } 954 955 uint64_t D = consume() - '0'; 956 if (!addAssign(Value, D)) 957 return 0; 958 } 959 960 return Value; 961 } 962 963 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 964 // value and stores hex digits in HexDigits. The return value is unspecified if 965 // HexDigits.size() > 16. 966 // 967 // <hex-number> = "0_" 968 // | <1-9a-f> {<0-9a-f>} "_" 969 uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) { 970 size_t Start = Position; 971 uint64_t Value = 0; 972 973 if (!isHexDigit(look())) 974 Error = true; 975 976 if (consumeIf('0')) { 977 if (!consumeIf('_')) 978 Error = true; 979 } else { 980 while (!Error && !consumeIf('_')) { 981 char C = consume(); 982 Value *= 16; 983 if (isDigit(C)) 984 Value += C - '0'; 985 else if ('a' <= C && C <= 'f') 986 Value += 10 + (C - 'a'); 987 else 988 Error = true; 989 } 990 } 991 992 if (Error) { 993 HexDigits = std::string_view(); 994 return 0; 995 } 996 997 size_t End = Position - 1; 998 assert(Start < End); 999 HexDigits = Input.substr(Start, End - Start); 1000 return Value; 1001 } 1002 1003 void Demangler::print(char C) { 1004 if (Error || !Print) 1005 return; 1006 1007 Output += C; 1008 } 1009 1010 void Demangler::print(std::string_view S) { 1011 if (Error || !Print) 1012 return; 1013 1014 Output += S; 1015 } 1016 1017 void Demangler::printDecimalNumber(uint64_t N) { 1018 if (Error || !Print) 1019 return; 1020 1021 Output << N; 1022 } 1023 1024 // Prints a lifetime. An index 0 always represents an erased lifetime. Indices 1025 // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes 1026 // bound by one of the enclosing binders. 1027 void Demangler::printLifetime(uint64_t Index) { 1028 if (Index == 0) { 1029 print("'_"); 1030 return; 1031 } 1032 1033 if (Index - 1 >= BoundLifetimes) { 1034 Error = true; 1035 return; 1036 } 1037 1038 uint64_t Depth = BoundLifetimes - Index; 1039 print('\''); 1040 if (Depth < 26) { 1041 char C = 'a' + Depth; 1042 print(C); 1043 } else { 1044 print('z'); 1045 printDecimalNumber(Depth - 26 + 1); 1046 } 1047 } 1048 1049 static inline bool decodePunycodeDigit(char C, size_t &Value) { 1050 if (isLower(C)) { 1051 Value = C - 'a'; 1052 return true; 1053 } 1054 1055 if (isDigit(C)) { 1056 Value = 26 + (C - '0'); 1057 return true; 1058 } 1059 1060 return false; 1061 } 1062 1063 static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { 1064 char *Buffer = Output.getBuffer(); 1065 char *Start = Buffer + StartIdx; 1066 char *End = Buffer + Output.getCurrentPosition(); 1067 Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); 1068 } 1069 1070 // Encodes code point as UTF-8 and stores results in Output. Returns false if 1071 // CodePoint is not a valid unicode scalar value. 1072 static inline bool encodeUTF8(size_t CodePoint, char *Output) { 1073 if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) 1074 return false; 1075 1076 if (CodePoint <= 0x7F) { 1077 Output[0] = CodePoint; 1078 return true; 1079 } 1080 1081 if (CodePoint <= 0x7FF) { 1082 Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); 1083 Output[1] = 0x80 | (CodePoint & 0x3F); 1084 return true; 1085 } 1086 1087 if (CodePoint <= 0xFFFF) { 1088 Output[0] = 0xE0 | (CodePoint >> 12); 1089 Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); 1090 Output[2] = 0x80 | (CodePoint & 0x3F); 1091 return true; 1092 } 1093 1094 if (CodePoint <= 0x10FFFF) { 1095 Output[0] = 0xF0 | (CodePoint >> 18); 1096 Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); 1097 Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); 1098 Output[3] = 0x80 | (CodePoint & 0x3F); 1099 return true; 1100 } 1101 1102 return false; 1103 } 1104 1105 // Decodes string encoded using punycode and appends results to Output. 1106 // Returns true if decoding was successful. 1107 static bool decodePunycode(std::string_view Input, OutputBuffer &Output) { 1108 size_t OutputSize = Output.getCurrentPosition(); 1109 size_t InputIdx = 0; 1110 1111 // Rust uses an underscore as a delimiter. 1112 size_t DelimiterPos = std::string_view::npos; 1113 for (size_t I = 0; I != Input.size(); ++I) 1114 if (Input[I] == '_') 1115 DelimiterPos = I; 1116 1117 if (DelimiterPos != std::string_view::npos) { 1118 // Copy basic code points before the last delimiter to the output. 1119 for (; InputIdx != DelimiterPos; ++InputIdx) { 1120 char C = Input[InputIdx]; 1121 if (!isValid(C)) 1122 return false; 1123 // Code points are padded with zeros while decoding is in progress. 1124 char UTF8[4] = {C}; 1125 Output += std::string_view(UTF8, 4); 1126 } 1127 // Skip over the delimiter. 1128 ++InputIdx; 1129 } 1130 1131 size_t Base = 36; 1132 size_t Skew = 38; 1133 size_t Bias = 72; 1134 size_t N = 0x80; 1135 size_t TMin = 1; 1136 size_t TMax = 26; 1137 size_t Damp = 700; 1138 1139 auto Adapt = [&](size_t Delta, size_t NumPoints) { 1140 Delta /= Damp; 1141 Delta += Delta / NumPoints; 1142 Damp = 2; 1143 1144 size_t K = 0; 1145 while (Delta > (Base - TMin) * TMax / 2) { 1146 Delta /= Base - TMin; 1147 K += Base; 1148 } 1149 return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); 1150 }; 1151 1152 // Main decoding loop. 1153 for (size_t I = 0; InputIdx != Input.size(); I += 1) { 1154 size_t OldI = I; 1155 size_t W = 1; 1156 size_t Max = std::numeric_limits<size_t>::max(); 1157 for (size_t K = Base; true; K += Base) { 1158 if (InputIdx == Input.size()) 1159 return false; 1160 char C = Input[InputIdx++]; 1161 size_t Digit = 0; 1162 if (!decodePunycodeDigit(C, Digit)) 1163 return false; 1164 1165 if (Digit > (Max - I) / W) 1166 return false; 1167 I += Digit * W; 1168 1169 size_t T; 1170 if (K <= Bias) 1171 T = TMin; 1172 else if (K >= Bias + TMax) 1173 T = TMax; 1174 else 1175 T = K - Bias; 1176 1177 if (Digit < T) 1178 break; 1179 1180 if (W > Max / (Base - T)) 1181 return false; 1182 W *= (Base - T); 1183 } 1184 size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; 1185 Bias = Adapt(I - OldI, NumPoints); 1186 1187 if (I / NumPoints > Max - N) 1188 return false; 1189 N += I / NumPoints; 1190 I = I % NumPoints; 1191 1192 // Insert N at position I in the output. 1193 char UTF8[4] = {}; 1194 if (!encodeUTF8(N, UTF8)) 1195 return false; 1196 Output.insert(OutputSize + I * 4, UTF8, 4); 1197 } 1198 1199 removeNullBytes(Output, OutputSize); 1200 return true; 1201 } 1202 1203 void Demangler::printIdentifier(Identifier Ident) { 1204 if (Error || !Print) 1205 return; 1206 1207 if (Ident.Punycode) { 1208 if (!decodePunycode(Ident.Name, Output)) 1209 Error = true; 1210 } else { 1211 print(Ident.Name); 1212 } 1213 } 1214 1215 char Demangler::look() const { 1216 if (Error || Position >= Input.size()) 1217 return 0; 1218 1219 return Input[Position]; 1220 } 1221 1222 char Demangler::consume() { 1223 if (Error || Position >= Input.size()) { 1224 Error = true; 1225 return 0; 1226 } 1227 1228 return Input[Position++]; 1229 } 1230 1231 bool Demangler::consumeIf(char Prefix) { 1232 if (Error || Position >= Input.size() || Input[Position] != Prefix) 1233 return false; 1234 1235 Position += 1; 1236 return true; 1237 } 1238 1239 /// Computes A + B. When computation wraps around sets the error and returns 1240 /// false. Otherwise assigns the result to A and returns true. 1241 bool Demangler::addAssign(uint64_t &A, uint64_t B) { 1242 if (A > std::numeric_limits<uint64_t>::max() - B) { 1243 Error = true; 1244 return false; 1245 } 1246 1247 A += B; 1248 return true; 1249 } 1250 1251 /// Computes A * B. When computation wraps around sets the error and returns 1252 /// false. Otherwise assigns the result to A and returns true. 1253 bool Demangler::mulAssign(uint64_t &A, uint64_t B) { 1254 if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { 1255 Error = true; 1256 return false; 1257 } 1258 1259 A *= B; 1260 return true; 1261 } 1262