1 //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a demangler for Rust v0 mangled symbols as specified in 10 // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Demangle/RustDemangle.h" 15 #include "llvm/Demangle/Demangle.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstring> 20 #include <limits> 21 22 using namespace llvm; 23 using namespace rust_demangle; 24 25 char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, 26 int *Status) { 27 if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { 28 if (Status != nullptr) 29 *Status = demangle_invalid_args; 30 return nullptr; 31 } 32 33 // Return early if mangled name doesn't look like a Rust symbol. 34 StringView Mangled(MangledName); 35 if (!Mangled.startsWith("_R")) { 36 if (Status != nullptr) 37 *Status = demangle_invalid_mangled_name; 38 return nullptr; 39 } 40 41 Demangler D; 42 if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { 43 if (Status != nullptr) 44 *Status = demangle_memory_alloc_failure; 45 return nullptr; 46 } 47 48 if (!D.demangle(Mangled)) { 49 if (Status != nullptr) 50 *Status = demangle_invalid_mangled_name; 51 std::free(D.Output.getBuffer()); 52 return nullptr; 53 } 54 55 D.Output += '\0'; 56 char *Demangled = D.Output.getBuffer(); 57 size_t DemangledLen = D.Output.getCurrentPosition(); 58 59 if (Buf != nullptr) { 60 if (DemangledLen <= *N) { 61 std::memcpy(Buf, Demangled, DemangledLen); 62 std::free(Demangled); 63 Demangled = Buf; 64 } else { 65 std::free(Buf); 66 } 67 } 68 69 if (N != nullptr) 70 *N = DemangledLen; 71 72 if (Status != nullptr) 73 *Status = demangle_success; 74 75 return Demangled; 76 } 77 78 Demangler::Demangler(size_t MaxRecursionLevel) 79 : MaxRecursionLevel(MaxRecursionLevel) {} 80 81 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } 82 83 static inline bool isHexDigit(const char C) { 84 return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); 85 } 86 87 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } 88 89 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } 90 91 /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. 92 static inline bool isValid(const char C) { 93 return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; 94 } 95 96 // Demangles Rust v0 mangled symbol. Returns true when successful, and false 97 // otherwise. The demangled symbol is stored in Output field. It is 98 // responsibility of the caller to free the memory behind the output stream. 99 // 100 // <symbol-name> = "_R" <path> [<instantiating-crate>] 101 bool Demangler::demangle(StringView Mangled) { 102 Position = 0; 103 Error = false; 104 RecursionLevel = 0; 105 106 if (!Mangled.consumeFront("_R")) { 107 Error = true; 108 return false; 109 } 110 Input = Mangled; 111 112 demanglePath(); 113 114 // FIXME parse optional <instantiating-crate>. 115 116 if (Position != Input.size()) 117 Error = true; 118 119 return !Error; 120 } 121 122 // <path> = "C" <identifier> // crate root 123 // | "M" <impl-path> <type> // <T> (inherent impl) 124 // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) 125 // | "Y" <type> <path> // <T as Trait> (trait definition) 126 // | "N" <ns> <path> <identifier> // ...::ident (nested path) 127 // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) 128 // | <backref> 129 // <identifier> = [<disambiguator>] <undisambiguated-identifier> 130 // <ns> = "C" // closure 131 // | "S" // shim 132 // | <A-Z> // other special namespaces 133 // | <a-z> // internal namespaces 134 void Demangler::demanglePath() { 135 if (Error || RecursionLevel >= MaxRecursionLevel) { 136 Error = true; 137 return; 138 } 139 SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); 140 141 switch (consume()) { 142 case 'C': { 143 parseOptionalBase62Number('s'); 144 Identifier Ident = parseIdentifier(); 145 print(Ident.Name); 146 break; 147 } 148 case 'N': { 149 char NS = consume(); 150 if (!isLower(NS) && !isUpper(NS)) { 151 Error = true; 152 break; 153 } 154 demanglePath(); 155 156 uint64_t Disambiguator = parseOptionalBase62Number('s'); 157 Identifier Ident = parseIdentifier(); 158 159 if (isUpper(NS)) { 160 // Special namespaces 161 print("::{"); 162 if (NS == 'C') 163 print("closure"); 164 else if (NS == 'S') 165 print("shim"); 166 else 167 print(NS); 168 if (!Ident.empty()) { 169 print(":"); 170 print(Ident.Name); 171 } 172 print('#'); 173 printDecimalNumber(Disambiguator); 174 print('}'); 175 } else { 176 // Implementation internal namespaces. 177 if (!Ident.empty()) { 178 print("::"); 179 print(Ident.Name); 180 } 181 } 182 break; 183 } 184 case 'I': { 185 demanglePath(); 186 print("::<"); 187 for (size_t I = 0; !Error && !consumeIf('E'); ++I) { 188 if (I > 0) 189 print(", "); 190 demangleGenericArg(); 191 } 192 print(">"); 193 break; 194 } 195 default: 196 // FIXME parse remaining productions. 197 Error = true; 198 break; 199 } 200 } 201 202 // <generic-arg> = <lifetime> 203 // | <type> 204 // | "K" <const> 205 // <lifetime> = "L" <base-62-number> 206 void Demangler::demangleGenericArg() { 207 if (consumeIf('K')) 208 demangleConst(); 209 else 210 demangleType(); 211 // FIXME demangle lifetimes. 212 } 213 214 // <basic-type> = "a" // i8 215 // | "b" // bool 216 // | "c" // char 217 // | "d" // f64 218 // | "e" // str 219 // | "f" // f32 220 // | "h" // u8 221 // | "i" // isize 222 // | "j" // usize 223 // | "l" // i32 224 // | "m" // u32 225 // | "n" // i128 226 // | "o" // u128 227 // | "s" // i16 228 // | "t" // u16 229 // | "u" // () 230 // | "v" // ... 231 // | "x" // i64 232 // | "y" // u64 233 // | "z" // ! 234 // | "p" // placeholder (e.g. for generic params), shown as _ 235 static bool parseBasicType(char C, BasicType &Type) { 236 switch (C) { 237 case 'a': 238 Type = BasicType::I8; 239 return true; 240 case 'b': 241 Type = BasicType::Bool; 242 return true; 243 case 'c': 244 Type = BasicType::Char; 245 return true; 246 case 'd': 247 Type = BasicType::F64; 248 return true; 249 case 'e': 250 Type = BasicType::Str; 251 return true; 252 case 'f': 253 Type = BasicType::F32; 254 return true; 255 case 'h': 256 Type = BasicType::U8; 257 return true; 258 case 'i': 259 Type = BasicType::ISize; 260 return true; 261 case 'j': 262 Type = BasicType::USize; 263 return true; 264 case 'l': 265 Type = BasicType::I32; 266 return true; 267 case 'm': 268 Type = BasicType::U32; 269 return true; 270 case 'n': 271 Type = BasicType::I128; 272 return true; 273 case 'o': 274 Type = BasicType::U128; 275 return true; 276 case 'p': 277 Type = BasicType::Placeholder; 278 return true; 279 case 's': 280 Type = BasicType::I16; 281 return true; 282 case 't': 283 Type = BasicType::U16; 284 return true; 285 case 'u': 286 Type = BasicType::Unit; 287 return true; 288 case 'v': 289 Type = BasicType::Variadic; 290 return true; 291 case 'x': 292 Type = BasicType::I64; 293 return true; 294 case 'y': 295 Type = BasicType::U64; 296 return true; 297 case 'z': 298 Type = BasicType::Never; 299 return true; 300 default: 301 return false; 302 } 303 } 304 305 void Demangler::printBasicType(BasicType Type) { 306 switch (Type) { 307 case BasicType::Bool: 308 print("bool"); 309 break; 310 case BasicType::Char: 311 print("char"); 312 break; 313 case BasicType::I8: 314 print("i8"); 315 break; 316 case BasicType::I16: 317 print("i16"); 318 break; 319 case BasicType::I32: 320 print("i32"); 321 break; 322 case BasicType::I64: 323 print("i64"); 324 break; 325 case BasicType::I128: 326 print("i128"); 327 break; 328 case BasicType::ISize: 329 print("isize"); 330 break; 331 case BasicType::U8: 332 print("u8"); 333 break; 334 case BasicType::U16: 335 print("u16"); 336 break; 337 case BasicType::U32: 338 print("u32"); 339 break; 340 case BasicType::U64: 341 print("u64"); 342 break; 343 case BasicType::U128: 344 print("u128"); 345 break; 346 case BasicType::USize: 347 print("usize"); 348 break; 349 case BasicType::F32: 350 print("f32"); 351 break; 352 case BasicType::F64: 353 print("f64"); 354 break; 355 case BasicType::Str: 356 print("str"); 357 break; 358 case BasicType::Placeholder: 359 print("_"); 360 break; 361 case BasicType::Unit: 362 print("()"); 363 break; 364 case BasicType::Variadic: 365 print("..."); 366 break; 367 case BasicType::Never: 368 print("!"); 369 break; 370 } 371 } 372 373 // <type> = | <basic-type> 374 // | <path> // named type 375 // | "A" <type> <const> // [T; N] 376 // | "S" <type> // [T] 377 // | "T" {<type>} "E" // (T1, T2, T3, ...) 378 // | "R" [<lifetime>] <type> // &T 379 // | "Q" [<lifetime>] <type> // &mut T 380 // | "P" <type> // *const T 381 // | "O" <type> // *mut T 382 // | "F" <fn-sig> // fn(...) -> ... 383 // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a 384 // | <backref> // backref 385 void Demangler::demangleType() { 386 BasicType Type; 387 if (parseBasicType(consume(), Type)) 388 printBasicType(Type); 389 else 390 Error = true; // FIXME parse remaining productions. 391 } 392 393 // <const> = <basic-type> <const-data> 394 // | "p" // placeholder 395 // | <backref> 396 void Demangler::demangleConst() { 397 BasicType Type; 398 if (parseBasicType(consume(), Type)) { 399 switch (Type) { 400 case BasicType::I8: 401 case BasicType::I16: 402 case BasicType::I32: 403 case BasicType::I64: 404 case BasicType::I128: 405 case BasicType::ISize: 406 case BasicType::U8: 407 case BasicType::U16: 408 case BasicType::U32: 409 case BasicType::U64: 410 case BasicType::U128: 411 case BasicType::USize: 412 demangleConstInt(); 413 break; 414 case BasicType::Bool: 415 demangleConstBool(); 416 break; 417 case BasicType::Char: 418 demangleConstChar(); 419 break; 420 case BasicType::Placeholder: 421 print('_'); 422 break; 423 default: 424 // FIXME demangle backreferences. 425 Error = true; 426 break; 427 } 428 } else { 429 Error = true; 430 } 431 } 432 433 // <const-data> = ["n"] <hex-number> 434 void Demangler::demangleConstInt() { 435 if (consumeIf('n')) 436 print('-'); 437 438 StringView HexDigits; 439 uint64_t Value = parseHexNumber(HexDigits); 440 if (HexDigits.size() <= 16) { 441 printDecimalNumber(Value); 442 } else { 443 print("0x"); 444 print(HexDigits); 445 } 446 } 447 448 // <const-data> = "0_" // false 449 // | "1_" // true 450 void Demangler::demangleConstBool() { 451 StringView HexDigits; 452 parseHexNumber(HexDigits); 453 if (HexDigits == "0") 454 print("false"); 455 else if (HexDigits == "1") 456 print("true"); 457 else 458 Error = true; 459 } 460 461 /// Returns true if CodePoint represents a printable ASCII character. 462 static bool isAsciiPrintable(uint64_t CodePoint) { 463 return 0x20 <= CodePoint && CodePoint <= 0x7e; 464 } 465 466 // <const-data> = <hex-number> 467 void Demangler::demangleConstChar() { 468 StringView HexDigits; 469 uint64_t CodePoint = parseHexNumber(HexDigits); 470 if (Error || HexDigits.size() > 6) { 471 Error = true; 472 return; 473 } 474 475 print("'"); 476 switch (CodePoint) { 477 case '\t': 478 print(R"(\t)"); 479 break; 480 case '\r': 481 print(R"(\r)"); 482 break; 483 case '\n': 484 print(R"(\n)"); 485 break; 486 case '\\': 487 print(R"(\\)"); 488 break; 489 case '"': 490 print(R"(")"); 491 break; 492 case '\'': 493 print(R"(\')"); 494 break; 495 default: 496 if (isAsciiPrintable(CodePoint)) { 497 char C = CodePoint; 498 print(C); 499 } else { 500 print(R"(\u{)"); 501 print(HexDigits); 502 print('}'); 503 } 504 break; 505 } 506 print('\''); 507 } 508 509 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> 510 Identifier Demangler::parseIdentifier() { 511 bool Punycode = consumeIf('u'); 512 uint64_t Bytes = parseDecimalNumber(); 513 514 // Underscore resolves the ambiguity when identifier starts with a decimal 515 // digit or another underscore. 516 consumeIf('_'); 517 518 if (Error || Bytes > Input.size() - Position) { 519 Error = true; 520 return {}; 521 } 522 StringView S = Input.substr(Position, Bytes); 523 Position += Bytes; 524 525 if (!std::all_of(S.begin(), S.end(), isValid)) { 526 Error = true; 527 return {}; 528 } 529 530 return {S, Punycode}; 531 } 532 533 // Parses optional base 62 number. The presence of a number is determined using 534 // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise. 535 uint64_t Demangler::parseOptionalBase62Number(char Tag) { 536 if (!consumeIf(Tag)) 537 return 0; 538 539 uint64_t N = parseBase62Number(); 540 if (Error || !addAssign(N, 1)) 541 return 0; 542 543 return N; 544 } 545 546 // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by 547 // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, 548 // "1_" encodes 2, etc. 549 // 550 // <base-62-number> = {<0-9a-zA-Z>} "_" 551 uint64_t Demangler::parseBase62Number() { 552 if (consumeIf('_')) 553 return 0; 554 555 uint64_t Value = 0; 556 557 while (true) { 558 uint64_t Digit; 559 char C = consume(); 560 561 if (C == '_') { 562 break; 563 } else if (isDigit(C)) { 564 Digit = C - '0'; 565 } else if (isLower(C)) { 566 Digit = 10 + (C - 'a'); 567 } else if (isUpper(C)) { 568 Digit = 10 + 26 + (C - 'A'); 569 } else { 570 Error = true; 571 return 0; 572 } 573 574 if (!mulAssign(Value, 62)) 575 return 0; 576 577 if (!addAssign(Value, Digit)) 578 return 0; 579 } 580 581 if (!addAssign(Value, 1)) 582 return 0; 583 584 return Value; 585 } 586 587 // Parses a decimal number that had been encoded without any leading zeros. 588 // 589 // <decimal-number> = "0" 590 // | <1-9> {<0-9>} 591 uint64_t Demangler::parseDecimalNumber() { 592 char C = look(); 593 if (!isDigit(C)) { 594 Error = true; 595 return 0; 596 } 597 598 if (C == '0') { 599 consume(); 600 return 0; 601 } 602 603 uint64_t Value = 0; 604 605 while (isDigit(look())) { 606 if (!mulAssign(Value, 10)) { 607 Error = true; 608 return 0; 609 } 610 611 uint64_t D = consume() - '0'; 612 if (!addAssign(Value, D)) 613 return 0; 614 } 615 616 return Value; 617 } 618 619 // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed 620 // value and stores hex digits in HexDigits. The return value is unspecified if 621 // HexDigits.size() > 16. 622 // 623 // <hex-number> = "0_" 624 // | <1-9a-f> {<0-9a-f>} "_" 625 uint64_t Demangler::parseHexNumber(StringView &HexDigits) { 626 size_t Start = Position; 627 uint64_t Value = 0; 628 629 if (!isHexDigit(look())) 630 Error = true; 631 632 if (consumeIf('0')) { 633 if (!consumeIf('_')) 634 Error = true; 635 } else { 636 while (!Error && !consumeIf('_')) { 637 char C = consume(); 638 Value *= 16; 639 if (isDigit(C)) 640 Value += C - '0'; 641 else if ('a' <= C && C <= 'f') 642 Value += 10 + (C - 'a'); 643 else 644 Error = true; 645 } 646 } 647 648 if (Error) { 649 HexDigits = StringView(); 650 return 0; 651 } 652 653 size_t End = Position - 1; 654 assert(Start < End); 655 HexDigits = Input.substr(Start, End - Start); 656 return Value; 657 } 658