1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isHexDigit(const char C) { 84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 85 } 86 87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 88 89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 90 91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 92 static inline bool isValid(const char C) { 93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 94 } 95 96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 97 // otherwise. The demangled symbol is stored in Output field. It is 98 // responsibility of the caller to free the memory behind the output stream. 99 // 100 // <symbol-name> = "_R" <path> [<instantiating-crate>] 101 bool Demangler::demangle(StringView Mangled) { 102 Position = 0; 103 Error = false; 104 Print = true; 105 RecursionLevel = 0; 106 107 if (!Mangled.consumeFront("_R")) { 108 Error = true; 109 return false; 110 } 111 Input = Mangled; 112 113 demanglePath(); 114 115 // FIXME parse optional <instantiating-crate>. 116 117 if (Position != Input.size()) 118 Error = true; 119 120 return !Error; 121 } 122 123 // <path> = "C" <identifier> // crate root 124 // | "M" <impl-path> <type> // <T> (inherent impl) 125 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 126 // | "Y" <type> <path> // <T as Trait> (trait definition) 127 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 128 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 129 // | <backref> 130 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 131 // <ns> = "C" // closure 132 // | "S" // shim 133 // | <A-Z> // other special namespaces 134 // | <a-z> // internal namespaces 135 void Demangler::demanglePath() { 136 if (Error || RecursionLevel >= MaxRecursionLevel) { 137 Error = true; 138 return; 139 } 140 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 141 142 switch (consume()) { 143 case 'C': { 144 parseOptionalBase62Number('s'); 145 Identifier Ident = parseIdentifier(); 146 print(Ident.Name); 147 break; 148 } 149 case 'M': { 150 demangleImplPath(); 151 print("<"); 152 demangleType(); 153 print(">"); 154 break; 155 } 156 case 'N': { 157 char NS = consume(); 158 if (!isLower(NS) && !isUpper(NS)) { 159 Error = true; 160 break; 161 } 162 demanglePath(); 163 164 uint64_t Disambiguator = parseOptionalBase62Number('s'); 165 Identifier Ident = parseIdentifier(); 166 167 if (isUpper(NS)) { 168 // Special namespaces 169 print("::{"); 170 if (NS == 'C') 171 print("closure"); 172 else if (NS == 'S') 173 print("shim"); 174 else 175 print(NS); 176 if (!Ident.empty()) { 177 print(":"); 178 print(Ident.Name); 179 } 180 print('#'); 181 printDecimalNumber(Disambiguator); 182 print('}'); 183 } else { 184 // Implementation internal namespaces. 185 if (!Ident.empty()) { 186 print("::"); 187 print(Ident.Name); 188 } 189 } 190 break; 191 } 192 case 'I': { 193 demanglePath(); 194 print("::<"); 195 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 196 if (I > 0) 197 print(", "); 198 demangleGenericArg(); 199 } 200 print(">"); 201 break; 202 } 203 default: 204 // FIXME parse remaining productions. 205 Error = true; 206 break; 207 } 208 } 209 210 // <impl-path> = [<disambiguator>] <path> 211 // <disambiguator> = "s" <base-62-number> 212 void Demangler::demangleImplPath() { 213 SwapAndRestore<bool> SavePrint(Print, false); 214 parseOptionalBase62Number('s'); 215 demanglePath(); 216 } 217 218 // <generic-arg> = <lifetime> 219 // | <type> 220 // | "K" <const> 221 // <lifetime> = "L" <base-62-number> 222 void Demangler::demangleGenericArg() { 223 if (consumeIf('K')) 224 demangleConst(); 225 else 226 demangleType(); 227 // FIXME demangle lifetimes. 228 } 229 230 // <basic-type> = "a" // i8 231 // | "b" // bool 232 // | "c" // char 233 // | "d" // f64 234 // | "e" // str 235 // | "f" // f32 236 // | "h" // u8 237 // | "i" // isize 238 // | "j" // usize 239 // | "l" // i32 240 // | "m" // u32 241 // | "n" // i128 242 // | "o" // u128 243 // | "s" // i16 244 // | "t" // u16 245 // | "u" // () 246 // | "v" // ... 247 // | "x" // i64 248 // | "y" // u64 249 // | "z" // ! 250 // | "p" // placeholder (e.g. for generic params), shown as _ 251 static bool parseBasicType(char C, BasicType &Type) { 252 switch (C) { 253 case 'a': 254 Type = BasicType::I8; 255 return true; 256 case 'b': 257 Type = BasicType::Bool; 258 return true; 259 case 'c': 260 Type = BasicType::Char; 261 return true; 262 case 'd': 263 Type = BasicType::F64; 264 return true; 265 case 'e': 266 Type = BasicType::Str; 267 return true; 268 case 'f': 269 Type = BasicType::F32; 270 return true; 271 case 'h': 272 Type = BasicType::U8; 273 return true; 274 case 'i': 275 Type = BasicType::ISize; 276 return true; 277 case 'j': 278 Type = BasicType::USize; 279 return true; 280 case 'l': 281 Type = BasicType::I32; 282 return true; 283 case 'm': 284 Type = BasicType::U32; 285 return true; 286 case 'n': 287 Type = BasicType::I128; 288 return true; 289 case 'o': 290 Type = BasicType::U128; 291 return true; 292 case 'p': 293 Type = BasicType::Placeholder; 294 return true; 295 case 's': 296 Type = BasicType::I16; 297 return true; 298 case 't': 299 Type = BasicType::U16; 300 return true; 301 case 'u': 302 Type = BasicType::Unit; 303 return true; 304 case 'v': 305 Type = BasicType::Variadic; 306 return true; 307 case 'x': 308 Type = BasicType::I64; 309 return true; 310 case 'y': 311 Type = BasicType::U64; 312 return true; 313 case 'z': 314 Type = BasicType::Never; 315 return true; 316 default: 317 return false; 318 } 319 } 320 321 void Demangler::printBasicType(BasicType Type) { 322 switch (Type) { 323 case BasicType::Bool: 324 print("bool"); 325 break; 326 case BasicType::Char: 327 print("char"); 328 break; 329 case BasicType::I8: 330 print("i8"); 331 break; 332 case BasicType::I16: 333 print("i16"); 334 break; 335 case BasicType::I32: 336 print("i32"); 337 break; 338 case BasicType::I64: 339 print("i64"); 340 break; 341 case BasicType::I128: 342 print("i128"); 343 break; 344 case BasicType::ISize: 345 print("isize"); 346 break; 347 case BasicType::U8: 348 print("u8"); 349 break; 350 case BasicType::U16: 351 print("u16"); 352 break; 353 case BasicType::U32: 354 print("u32"); 355 break; 356 case BasicType::U64: 357 print("u64"); 358 break; 359 case BasicType::U128: 360 print("u128"); 361 break; 362 case BasicType::USize: 363 print("usize"); 364 break; 365 case BasicType::F32: 366 print("f32"); 367 break; 368 case BasicType::F64: 369 print("f64"); 370 break; 371 case BasicType::Str: 372 print("str"); 373 break; 374 case BasicType::Placeholder: 375 print("_"); 376 break; 377 case BasicType::Unit: 378 print("()"); 379 break; 380 case BasicType::Variadic: 381 print("..."); 382 break; 383 case BasicType::Never: 384 print("!"); 385 break; 386 } 387 } 388 389 // <type> = | <basic-type> 390 // | <path> // named type 391 // | "A" <type> <const> // [T; N] 392 // | "S" <type> // [T] 393 // | "T" {<type>} "E" // (T1, T2, T3, ...) 394 // | "R" [<lifetime>] <type> // &T 395 // | "Q" [<lifetime>] <type> // &mut T 396 // | "P" <type> // *const T 397 // | "O" <type> // *mut T 398 // | "F" <fn-sig> // fn(...) -> ... 399 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 400 // | <backref> // backref 401 void Demangler::demangleType() { 402 BasicType Type; 403 if (parseBasicType(consume(), Type)) 404 printBasicType(Type); 405 else 406 Error = true; // FIXME parse remaining productions. 407 } 408 409 // <const> = <basic-type> <const-data> 410 // | "p" // placeholder 411 // | <backref> 412 void Demangler::demangleConst() { 413 BasicType Type; 414 if (parseBasicType(consume(), Type)) { 415 switch (Type) { 416 case BasicType::I8: 417 case BasicType::I16: 418 case BasicType::I32: 419 case BasicType::I64: 420 case BasicType::I128: 421 case BasicType::ISize: 422 case BasicType::U8: 423 case BasicType::U16: 424 case BasicType::U32: 425 case BasicType::U64: 426 case BasicType::U128: 427 case BasicType::USize: 428 demangleConstInt(); 429 break; 430 case BasicType::Bool: 431 demangleConstBool(); 432 break; 433 case BasicType::Char: 434 demangleConstChar(); 435 break; 436 case BasicType::Placeholder: 437 print('_'); 438 break; 439 default: 440 // FIXME demangle backreferences. 441 Error = true; 442 break; 443 } 444 } else { 445 Error = true; 446 } 447 } 448 449 // <const-data> = ["n"] <hex-number> 450 void Demangler::demangleConstInt() { 451 if (consumeIf('n')) 452 print('-'); 453 454 StringView HexDigits; 455 uint64_t Value = parseHexNumber(HexDigits); 456 if (HexDigits.size() <= 16) { 457 printDecimalNumber(Value); 458 } else { 459 print("0x"); 460 print(HexDigits); 461 } 462 } 463 464 // <const-data> = "0_" // false 465 // | "1_" // true 466 void Demangler::demangleConstBool() { 467 StringView HexDigits; 468 parseHexNumber(HexDigits); 469 if (HexDigits == "0") 470 print("false"); 471 else if (HexDigits == "1") 472 print("true"); 473 else 474 Error = true; 475 } 476 477 /// Returns true if CodePoint represents a printable ASCII character. 478 static bool isAsciiPrintable(uint64_t CodePoint) { 479 return 0x20 <= CodePoint && CodePoint <= 0x7e; 480 } 481 482 // <const-data> = <hex-number> 483 void Demangler::demangleConstChar() { 484 StringView HexDigits; 485 uint64_t CodePoint = parseHexNumber(HexDigits); 486 if (Error || HexDigits.size() > 6) { 487 Error = true; 488 return; 489 } 490 491 print("'"); 492 switch (CodePoint) { 493 case '\t': 494 print(R"(\t)"); 495 break; 496 case '\r': 497 print(R"(\r)"); 498 break; 499 case '\n': 500 print(R"(\n)"); 501 break; 502 case '\\': 503 print(R"(\\)"); 504 break; 505 case '"': 506 print(R"(")"); 507 break; 508 case '\'': 509 print(R"(\')"); 510 break; 511 default: 512 if (isAsciiPrintable(CodePoint)) { 513 char C = CodePoint; 514 print(C); 515 } else { 516 print(R"(\u{)"); 517 print(HexDigits); 518 print('}'); 519 } 520 break; 521 } 522 print('\''); 523 } 524 525 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 526 Identifier Demangler::parseIdentifier() { 527 bool Punycode = consumeIf('u'); 528 uint64_t Bytes = parseDecimalNumber(); 529 530 // Underscore resolves the ambiguity when identifier starts with a decimal 531 // digit or another underscore. 532 consumeIf('_'); 533 534 if (Error || Bytes > Input.size() - Position) { 535 Error = true; 536 return {}; 537 } 538 StringView S = Input.substr(Position, Bytes); 539 Position += Bytes; 540 541 if (!std::all_of(S.begin(), S.end(), isValid)) { 542 Error = true; 543 return {}; 544 } 545 546 return {S, Punycode}; 547 } 548 549 // Parses optional base 62 number. The presence of a number is determined using 550 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. 551 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 552 if (!consumeIf(Tag)) 553 return 0; 554 555 uint64_t N = parseBase62Number(); 556 if (Error || !addAssign(N, 1)) 557 return 0; 558 559 return N; 560 } 561 562 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 563 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 564 // "1_" encodes 2, etc. 565 // 566 // <base-62-number> = {<0-9a-zA-Z>} "_" 567 uint64_t Demangler::parseBase62Number() { 568 if (consumeIf('_')) 569 return 0; 570 571 uint64_t Value = 0; 572 573 while (true) { 574 uint64_t Digit; 575 char C = consume(); 576 577 if (C == '_') { 578 break; 579 } else if (isDigit(C)) { 580 Digit = C - '0'; 581 } else if (isLower(C)) { 582 Digit = 10 + (C - 'a'); 583 } else if (isUpper(C)) { 584 Digit = 10 + 26 + (C - 'A'); 585 } else { 586 Error = true; 587 return 0; 588 } 589 590 if (!mulAssign(Value, 62)) 591 return 0; 592 593 if (!addAssign(Value, Digit)) 594 return 0; 595 } 596 597 if (!addAssign(Value, 1)) 598 return 0; 599 600 return Value; 601 } 602 603 // Parses a decimal number that had been encoded without any leading zeros. 604 // 605 // <decimal-number> = "0" 606 // | <1-9> {<0-9>} 607 uint64_t Demangler::parseDecimalNumber() { 608 char C = look(); 609 if (!isDigit(C)) { 610 Error = true; 611 return 0; 612 } 613 614 if (C == '0') { 615 consume(); 616 return 0; 617 } 618 619 uint64_t Value = 0; 620 621 while (isDigit(look())) { 622 if (!mulAssign(Value, 10)) { 623 Error = true; 624 return 0; 625 } 626 627 uint64_t D = consume() - '0'; 628 if (!addAssign(Value, D)) 629 return 0; 630 } 631 632 return Value; 633 } 634 635 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 636 // value and stores hex digits in HexDigits. The return value is unspecified if 637 // HexDigits.size() > 16. 638 // 639 // <hex-number> = "0_" 640 // | <1-9a-f> {<0-9a-f>} "_" 641 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 642 size_t Start = Position; 643 uint64_t Value = 0; 644 645 if (!isHexDigit(look())) 646 Error = true; 647 648 if (consumeIf('0')) { 649 if (!consumeIf('_')) 650 Error = true; 651 } else { 652 while (!Error && !consumeIf('_')) { 653 char C = consume(); 654 Value *= 16; 655 if (isDigit(C)) 656 Value += C - '0'; 657 else if ('a' <= C && C <= 'f') 658 Value += 10 + (C - 'a'); 659 else 660 Error = true; 661 } 662 } 663 664 if (Error) { 665 HexDigits = StringView(); 666 return 0; 667 } 668 669 size_t End = Position - 1; 670 assert(Start < End); 671 HexDigits = Input.substr(Start, End - Start); 672 return Value; 673 } 674