1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isHexDigit(const char C) { 84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 85 } 86 87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 88 89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 90 91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 92 static inline bool isValid(const char C) { 93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 94 } 95 96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 97 // otherwise. The demangled symbol is stored in Output field. It is 98 // responsibility of the caller to free the memory behind the output stream. 99 // 100 // <symbol-name> = "_R" <path> [<instantiating-crate>] 101 bool Demangler::demangle(StringView Mangled) { 102 Position = 0; 103 Error = false; 104 Print = true; 105 RecursionLevel = 0; 106 107 if (!Mangled.consumeFront("_R")) { 108 Error = true; 109 return false; 110 } 111 Input = Mangled; 112 113 demanglePath(); 114 115 // FIXME parse optional <instantiating-crate>. 116 117 if (Position != Input.size()) 118 Error = true; 119 120 return !Error; 121 } 122 123 // <path> = "C" <identifier> // crate root 124 // | "M" <impl-path> <type> // <T> (inherent impl) 125 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 126 // | "Y" <type> <path> // <T as Trait> (trait definition) 127 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 128 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 129 // | <backref> 130 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 131 // <ns> = "C" // closure 132 // | "S" // shim 133 // | <A-Z> // other special namespaces 134 // | <a-z> // internal namespaces 135 void Demangler::demanglePath() { 136 if (Error || RecursionLevel >= MaxRecursionLevel) { 137 Error = true; 138 return; 139 } 140 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 141 142 switch (consume()) { 143 case 'C': { 144 parseOptionalBase62Number('s'); 145 Identifier Ident = parseIdentifier(); 146 print(Ident.Name); 147 break; 148 } 149 case 'M': { 150 demangleImplPath(); 151 print("<"); 152 demangleType(); 153 print(">"); 154 break; 155 } 156 case 'X': { 157 demangleImplPath(); 158 print("<"); 159 demangleType(); 160 print(" as "); 161 demanglePath(); 162 print(">"); 163 break; 164 } 165 case 'Y': { 166 print("<"); 167 demangleType(); 168 print(" as "); 169 demanglePath(); 170 print(">"); 171 break; 172 } 173 case 'N': { 174 char NS = consume(); 175 if (!isLower(NS) && !isUpper(NS)) { 176 Error = true; 177 break; 178 } 179 demanglePath(); 180 181 uint64_t Disambiguator = parseOptionalBase62Number('s'); 182 Identifier Ident = parseIdentifier(); 183 184 if (isUpper(NS)) { 185 // Special namespaces 186 print("::{"); 187 if (NS == 'C') 188 print("closure"); 189 else if (NS == 'S') 190 print("shim"); 191 else 192 print(NS); 193 if (!Ident.empty()) { 194 print(":"); 195 print(Ident.Name); 196 } 197 print('#'); 198 printDecimalNumber(Disambiguator); 199 print('}'); 200 } else { 201 // Implementation internal namespaces. 202 if (!Ident.empty()) { 203 print("::"); 204 print(Ident.Name); 205 } 206 } 207 break; 208 } 209 case 'I': { 210 demanglePath(); 211 print("::<"); 212 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 213 if (I > 0) 214 print(", "); 215 demangleGenericArg(); 216 } 217 print(">"); 218 break; 219 } 220 default: 221 // FIXME parse remaining productions. 222 Error = true; 223 break; 224 } 225 } 226 227 // <impl-path> = [<disambiguator>] <path> 228 // <disambiguator> = "s" <base-62-number> 229 void Demangler::demangleImplPath() { 230 SwapAndRestore<bool> SavePrint(Print, false); 231 parseOptionalBase62Number('s'); 232 demanglePath(); 233 } 234 235 // <generic-arg> = <lifetime> 236 // | <type> 237 // | "K" <const> 238 // <lifetime> = "L" <base-62-number> 239 void Demangler::demangleGenericArg() { 240 if (consumeIf('K')) 241 demangleConst(); 242 else 243 demangleType(); 244 // FIXME demangle lifetimes. 245 } 246 247 // <basic-type> = "a" // i8 248 // | "b" // bool 249 // | "c" // char 250 // | "d" // f64 251 // | "e" // str 252 // | "f" // f32 253 // | "h" // u8 254 // | "i" // isize 255 // | "j" // usize 256 // | "l" // i32 257 // | "m" // u32 258 // | "n" // i128 259 // | "o" // u128 260 // | "s" // i16 261 // | "t" // u16 262 // | "u" // () 263 // | "v" // ... 264 // | "x" // i64 265 // | "y" // u64 266 // | "z" // ! 267 // | "p" // placeholder (e.g. for generic params), shown as _ 268 static bool parseBasicType(char C, BasicType &Type) { 269 switch (C) { 270 case 'a': 271 Type = BasicType::I8; 272 return true; 273 case 'b': 274 Type = BasicType::Bool; 275 return true; 276 case 'c': 277 Type = BasicType::Char; 278 return true; 279 case 'd': 280 Type = BasicType::F64; 281 return true; 282 case 'e': 283 Type = BasicType::Str; 284 return true; 285 case 'f': 286 Type = BasicType::F32; 287 return true; 288 case 'h': 289 Type = BasicType::U8; 290 return true; 291 case 'i': 292 Type = BasicType::ISize; 293 return true; 294 case 'j': 295 Type = BasicType::USize; 296 return true; 297 case 'l': 298 Type = BasicType::I32; 299 return true; 300 case 'm': 301 Type = BasicType::U32; 302 return true; 303 case 'n': 304 Type = BasicType::I128; 305 return true; 306 case 'o': 307 Type = BasicType::U128; 308 return true; 309 case 'p': 310 Type = BasicType::Placeholder; 311 return true; 312 case 's': 313 Type = BasicType::I16; 314 return true; 315 case 't': 316 Type = BasicType::U16; 317 return true; 318 case 'u': 319 Type = BasicType::Unit; 320 return true; 321 case 'v': 322 Type = BasicType::Variadic; 323 return true; 324 case 'x': 325 Type = BasicType::I64; 326 return true; 327 case 'y': 328 Type = BasicType::U64; 329 return true; 330 case 'z': 331 Type = BasicType::Never; 332 return true; 333 default: 334 return false; 335 } 336 } 337 338 void Demangler::printBasicType(BasicType Type) { 339 switch (Type) { 340 case BasicType::Bool: 341 print("bool"); 342 break; 343 case BasicType::Char: 344 print("char"); 345 break; 346 case BasicType::I8: 347 print("i8"); 348 break; 349 case BasicType::I16: 350 print("i16"); 351 break; 352 case BasicType::I32: 353 print("i32"); 354 break; 355 case BasicType::I64: 356 print("i64"); 357 break; 358 case BasicType::I128: 359 print("i128"); 360 break; 361 case BasicType::ISize: 362 print("isize"); 363 break; 364 case BasicType::U8: 365 print("u8"); 366 break; 367 case BasicType::U16: 368 print("u16"); 369 break; 370 case BasicType::U32: 371 print("u32"); 372 break; 373 case BasicType::U64: 374 print("u64"); 375 break; 376 case BasicType::U128: 377 print("u128"); 378 break; 379 case BasicType::USize: 380 print("usize"); 381 break; 382 case BasicType::F32: 383 print("f32"); 384 break; 385 case BasicType::F64: 386 print("f64"); 387 break; 388 case BasicType::Str: 389 print("str"); 390 break; 391 case BasicType::Placeholder: 392 print("_"); 393 break; 394 case BasicType::Unit: 395 print("()"); 396 break; 397 case BasicType::Variadic: 398 print("..."); 399 break; 400 case BasicType::Never: 401 print("!"); 402 break; 403 } 404 } 405 406 // <type> = | <basic-type> 407 // | <path> // named type 408 // | "A" <type> <const> // [T; N] 409 // | "S" <type> // [T] 410 // | "T" {<type>} "E" // (T1, T2, T3, ...) 411 // | "R" [<lifetime>] <type> // &T 412 // | "Q" [<lifetime>] <type> // &mut T 413 // | "P" <type> // *const T 414 // | "O" <type> // *mut T 415 // | "F" <fn-sig> // fn(...) -> ... 416 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 417 // | <backref> // backref 418 void Demangler::demangleType() { 419 BasicType Type; 420 if (parseBasicType(consume(), Type)) 421 printBasicType(Type); 422 else 423 Error = true; // FIXME parse remaining productions. 424 } 425 426 // <const> = <basic-type> <const-data> 427 // | "p" // placeholder 428 // | <backref> 429 void Demangler::demangleConst() { 430 BasicType Type; 431 if (parseBasicType(consume(), Type)) { 432 switch (Type) { 433 case BasicType::I8: 434 case BasicType::I16: 435 case BasicType::I32: 436 case BasicType::I64: 437 case BasicType::I128: 438 case BasicType::ISize: 439 case BasicType::U8: 440 case BasicType::U16: 441 case BasicType::U32: 442 case BasicType::U64: 443 case BasicType::U128: 444 case BasicType::USize: 445 demangleConstInt(); 446 break; 447 case BasicType::Bool: 448 demangleConstBool(); 449 break; 450 case BasicType::Char: 451 demangleConstChar(); 452 break; 453 case BasicType::Placeholder: 454 print('_'); 455 break; 456 default: 457 // FIXME demangle backreferences. 458 Error = true; 459 break; 460 } 461 } else { 462 Error = true; 463 } 464 } 465 466 // <const-data> = ["n"] <hex-number> 467 void Demangler::demangleConstInt() { 468 if (consumeIf('n')) 469 print('-'); 470 471 StringView HexDigits; 472 uint64_t Value = parseHexNumber(HexDigits); 473 if (HexDigits.size() <= 16) { 474 printDecimalNumber(Value); 475 } else { 476 print("0x"); 477 print(HexDigits); 478 } 479 } 480 481 // <const-data> = "0_" // false 482 // | "1_" // true 483 void Demangler::demangleConstBool() { 484 StringView HexDigits; 485 parseHexNumber(HexDigits); 486 if (HexDigits == "0") 487 print("false"); 488 else if (HexDigits == "1") 489 print("true"); 490 else 491 Error = true; 492 } 493 494 /// Returns true if CodePoint represents a printable ASCII character. 495 static bool isAsciiPrintable(uint64_t CodePoint) { 496 return 0x20 <= CodePoint && CodePoint <= 0x7e; 497 } 498 499 // <const-data> = <hex-number> 500 void Demangler::demangleConstChar() { 501 StringView HexDigits; 502 uint64_t CodePoint = parseHexNumber(HexDigits); 503 if (Error || HexDigits.size() > 6) { 504 Error = true; 505 return; 506 } 507 508 print("'"); 509 switch (CodePoint) { 510 case '\t': 511 print(R"(\t)"); 512 break; 513 case '\r': 514 print(R"(\r)"); 515 break; 516 case '\n': 517 print(R"(\n)"); 518 break; 519 case '\\': 520 print(R"(\\)"); 521 break; 522 case '"': 523 print(R"(")"); 524 break; 525 case '\'': 526 print(R"(\')"); 527 break; 528 default: 529 if (isAsciiPrintable(CodePoint)) { 530 char C = CodePoint; 531 print(C); 532 } else { 533 print(R"(\u{)"); 534 print(HexDigits); 535 print('}'); 536 } 537 break; 538 } 539 print('\''); 540 } 541 542 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 543 Identifier Demangler::parseIdentifier() { 544 bool Punycode = consumeIf('u'); 545 uint64_t Bytes = parseDecimalNumber(); 546 547 // Underscore resolves the ambiguity when identifier starts with a decimal 548 // digit or another underscore. 549 consumeIf('_'); 550 551 if (Error || Bytes > Input.size() - Position) { 552 Error = true; 553 return {}; 554 } 555 StringView S = Input.substr(Position, Bytes); 556 Position += Bytes; 557 558 if (!std::all_of(S.begin(), S.end(), isValid)) { 559 Error = true; 560 return {}; 561 } 562 563 return {S, Punycode}; 564 } 565 566 // Parses optional base 62 number. The presence of a number is determined using 567 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. 568 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 569 if (!consumeIf(Tag)) 570 return 0; 571 572 uint64_t N = parseBase62Number(); 573 if (Error || !addAssign(N, 1)) 574 return 0; 575 576 return N; 577 } 578 579 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 580 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 581 // "1_" encodes 2, etc. 582 // 583 // <base-62-number> = {<0-9a-zA-Z>} "_" 584 uint64_t Demangler::parseBase62Number() { 585 if (consumeIf('_')) 586 return 0; 587 588 uint64_t Value = 0; 589 590 while (true) { 591 uint64_t Digit; 592 char C = consume(); 593 594 if (C == '_') { 595 break; 596 } else if (isDigit(C)) { 597 Digit = C - '0'; 598 } else if (isLower(C)) { 599 Digit = 10 + (C - 'a'); 600 } else if (isUpper(C)) { 601 Digit = 10 + 26 + (C - 'A'); 602 } else { 603 Error = true; 604 return 0; 605 } 606 607 if (!mulAssign(Value, 62)) 608 return 0; 609 610 if (!addAssign(Value, Digit)) 611 return 0; 612 } 613 614 if (!addAssign(Value, 1)) 615 return 0; 616 617 return Value; 618 } 619 620 // Parses a decimal number that had been encoded without any leading zeros. 621 // 622 // <decimal-number> = "0" 623 // | <1-9> {<0-9>} 624 uint64_t Demangler::parseDecimalNumber() { 625 char C = look(); 626 if (!isDigit(C)) { 627 Error = true; 628 return 0; 629 } 630 631 if (C == '0') { 632 consume(); 633 return 0; 634 } 635 636 uint64_t Value = 0; 637 638 while (isDigit(look())) { 639 if (!mulAssign(Value, 10)) { 640 Error = true; 641 return 0; 642 } 643 644 uint64_t D = consume() - '0'; 645 if (!addAssign(Value, D)) 646 return 0; 647 } 648 649 return Value; 650 } 651 652 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 653 // value and stores hex digits in HexDigits. The return value is unspecified if 654 // HexDigits.size() > 16. 655 // 656 // <hex-number> = "0_" 657 // | <1-9a-f> {<0-9a-f>} "_" 658 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 659 size_t Start = Position; 660 uint64_t Value = 0; 661 662 if (!isHexDigit(look())) 663 Error = true; 664 665 if (consumeIf('0')) { 666 if (!consumeIf('_')) 667 Error = true; 668 } else { 669 while (!Error && !consumeIf('_')) { 670 char C = consume(); 671 Value *= 16; 672 if (isDigit(C)) 673 Value += C - '0'; 674 else if ('a' <= C && C <= 'f') 675 Value += 10 + (C - 'a'); 676 else 677 Error = true; 678 } 679 } 680 681 if (Error) { 682 HexDigits = StringView(); 683 return 0; 684 } 685 686 size_t End = Position - 1; 687 assert(Start < End); 688 HexDigits = Input.substr(Start, End - Start); 689 return Value; 690 } 691