1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "CPlusPlusNameParser.h" 10 11 #include "clang/Basic/IdentifierTable.h" 12 #include "clang/Basic/TokenKinds.h" 13 #include "llvm/ADT/StringMap.h" 14 #include "llvm/Support/Threading.h" 15 16 using namespace lldb; 17 using namespace lldb_private; 18 using llvm::Optional; 19 using llvm::None; 20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22 namespace tok = clang::tok; 23 24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified e.g. 29 // main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type e.g. 38 // void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49 } 50 51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.value().basename_range); 60 result.context = GetTextForRange(name_ranges.value().context_range); 61 return result; 62 } 63 64 bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66 } 67 68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81 } 82 83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92 } 93 94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96 } 97 98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100 clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103 } 104 105 Optional<ParsedFunction> 106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.value().basename_range); 130 result.name.context = GetTextForRange(maybe_name.value().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135 } 136 137 Optional<ParsedFunction> 138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173 } 174 175 bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177 } 178 179 bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. In 187 // some rare cases tokens '<' and '>' can appear inside template arguments as 188 // arithmetic or shift operators not as template brackets. Examples: 189 // std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of '>' 192 // usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No 212 // need to do the same for '>' because compiler actually makes sure that 213 // '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 // Handle templates tagged with an ABI tag. 230 // An example demangled/prettified version is: 231 // func[abi:tag1][abi:tag2]<type[abi:tag3]>(int) 232 if (ConsumeAbiTag()) 233 can_open_template = true; 234 else if (ConsumeBrackets(tok::l_square, tok::r_square)) 235 can_open_template = false; 236 else 237 return false; 238 break; 239 case tok::l_paren: 240 if (!ConsumeArguments()) 241 return false; 242 can_open_template = false; 243 break; 244 default: 245 can_open_template = false; 246 Advance(); 247 break; 248 } 249 } 250 251 if (template_counter != 0) { 252 return false; 253 } 254 start_position.Remove(); 255 return true; 256 } 257 258 bool CPlusPlusNameParser::ConsumeAbiTag() { 259 Bookmark start_position = SetBookmark(); 260 if (!ConsumeToken(tok::l_square)) 261 return false; 262 263 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 264 Peek().getRawIdentifier() == "abi") 265 Advance(); 266 else 267 return false; 268 269 if (!ConsumeToken(tok::colon)) 270 return false; 271 272 // Consume the actual tag string (and allow some special characters) 273 while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period, 274 tok::numeric_constant)) 275 ; 276 277 if (!ConsumeToken(tok::r_square)) 278 return false; 279 280 start_position.Remove(); 281 return true; 282 } 283 284 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 285 Bookmark start_position = SetBookmark(); 286 if (!ConsumeToken(tok::l_paren)) { 287 return false; 288 } 289 constexpr llvm::StringLiteral g_anonymous("anonymous"); 290 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 291 Peek().getRawIdentifier() == g_anonymous) { 292 Advance(); 293 } else { 294 return false; 295 } 296 297 if (!ConsumeToken(tok::kw_namespace)) { 298 return false; 299 } 300 301 if (!ConsumeToken(tok::r_paren)) { 302 return false; 303 } 304 start_position.Remove(); 305 return true; 306 } 307 308 bool CPlusPlusNameParser::ConsumeLambda() { 309 Bookmark start_position = SetBookmark(); 310 if (!ConsumeToken(tok::l_brace)) { 311 return false; 312 } 313 constexpr llvm::StringLiteral g_lambda("lambda"); 314 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 315 Peek().getRawIdentifier() == g_lambda) { 316 // Put the matched brace back so we can use ConsumeBrackets 317 TakeBack(); 318 } else { 319 return false; 320 } 321 322 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 323 return false; 324 } 325 326 start_position.Remove(); 327 return true; 328 } 329 330 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 331 tok::TokenKind right) { 332 Bookmark start_position = SetBookmark(); 333 if (!HasMoreTokens() || Peek().getKind() != left) 334 return false; 335 Advance(); 336 337 int counter = 1; 338 while (HasMoreTokens() && counter > 0) { 339 tok::TokenKind kind = Peek().getKind(); 340 if (kind == right) 341 --counter; 342 else if (kind == left) 343 ++counter; 344 Advance(); 345 } 346 347 assert(counter >= 0); 348 if (counter > 0) { 349 return false; 350 } 351 start_position.Remove(); 352 return true; 353 } 354 355 bool CPlusPlusNameParser::ConsumeOperator() { 356 Bookmark start_position = SetBookmark(); 357 if (!ConsumeToken(tok::kw_operator)) 358 return false; 359 360 if (!HasMoreTokens()) { 361 return false; 362 } 363 364 const auto &token = Peek(); 365 366 // When clang generates debug info it adds template parameters to names. 367 // Since clang doesn't add a space between the name and the template parameter 368 // in some cases we are not generating valid C++ names e.g.: 369 // 370 // operator<<A::B> 371 // 372 // In some of these cases we will not parse them correctly. This fixes the 373 // issue by detecting this case and inserting tok::less in place of 374 // tok::lessless and returning successfully that we consumed the operator. 375 if (token.getKind() == tok::lessless) { 376 // Make sure we have more tokens before attempting to look ahead one more. 377 if (m_next_token_index + 1 < m_tokens.size()) { 378 // Look ahead two tokens. 379 clang::Token n_token = m_tokens[m_next_token_index + 1]; 380 // If we find ( or < then this is indeed operator<< no need for fix. 381 if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) { 382 clang::Token tmp_tok; 383 tmp_tok.startToken(); 384 tmp_tok.setLength(1); 385 tmp_tok.setLocation(token.getLocation().getLocWithOffset(1)); 386 tmp_tok.setKind(tok::less); 387 388 m_tokens[m_next_token_index] = tmp_tok; 389 390 start_position.Remove(); 391 return true; 392 } 393 } 394 } 395 396 switch (token.getKind()) { 397 case tok::kw_new: 398 case tok::kw_delete: 399 // This is 'new' or 'delete' operators. 400 Advance(); 401 // Check for array new/delete. 402 if (HasMoreTokens() && Peek().is(tok::l_square)) { 403 // Consume the '[' and ']'. 404 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 405 return false; 406 } 407 break; 408 409 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 410 case tok::Token: \ 411 Advance(); \ 412 break; 413 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 414 #include "clang/Basic/OperatorKinds.def" 415 #undef OVERLOADED_OPERATOR 416 #undef OVERLOADED_OPERATOR_MULTI 417 418 case tok::l_paren: 419 // Call operator consume '(' ... ')'. 420 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 421 break; 422 return false; 423 424 case tok::l_square: 425 // This is a [] operator. 426 // Consume the '[' and ']'. 427 if (ConsumeBrackets(tok::l_square, tok::r_square)) 428 break; 429 return false; 430 431 default: 432 // This might be a cast operator. 433 if (ConsumeTypename()) 434 break; 435 return false; 436 } 437 start_position.Remove(); 438 return true; 439 } 440 441 void CPlusPlusNameParser::SkipTypeQualifiers() { 442 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 443 ; 444 } 445 446 void CPlusPlusNameParser::SkipFunctionQualifiers() { 447 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 448 ; 449 } 450 451 bool CPlusPlusNameParser::ConsumeBuiltinType() { 452 bool result = false; 453 bool continue_parsing = true; 454 // Built-in types can be made of a few keywords like 'unsigned long long 455 // int'. This function consumes all built-in type keywords without checking 456 // if they make sense like 'unsigned char void'. 457 while (continue_parsing && HasMoreTokens()) { 458 switch (Peek().getKind()) { 459 case tok::kw_short: 460 case tok::kw_long: 461 case tok::kw___int64: 462 case tok::kw___int128: 463 case tok::kw_signed: 464 case tok::kw_unsigned: 465 case tok::kw_void: 466 case tok::kw_char: 467 case tok::kw_int: 468 case tok::kw_half: 469 case tok::kw_float: 470 case tok::kw_double: 471 case tok::kw___float128: 472 case tok::kw_wchar_t: 473 case tok::kw_bool: 474 case tok::kw_char16_t: 475 case tok::kw_char32_t: 476 result = true; 477 Advance(); 478 break; 479 default: 480 continue_parsing = false; 481 break; 482 } 483 } 484 return result; 485 } 486 487 void CPlusPlusNameParser::SkipPtrsAndRefs() { 488 // Ignoring result. 489 ConsumePtrsAndRefs(); 490 } 491 492 bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 493 bool found = false; 494 SkipTypeQualifiers(); 495 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 496 tok::kw_volatile)) { 497 found = true; 498 SkipTypeQualifiers(); 499 } 500 return found; 501 } 502 503 bool CPlusPlusNameParser::ConsumeDecltype() { 504 Bookmark start_position = SetBookmark(); 505 if (!ConsumeToken(tok::kw_decltype)) 506 return false; 507 508 if (!ConsumeArguments()) 509 return false; 510 511 start_position.Remove(); 512 return true; 513 } 514 515 bool CPlusPlusNameParser::ConsumeTypename() { 516 Bookmark start_position = SetBookmark(); 517 SkipTypeQualifiers(); 518 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 519 if (!ParseFullNameImpl()) 520 return false; 521 } 522 SkipPtrsAndRefs(); 523 start_position.Remove(); 524 return true; 525 } 526 527 Optional<CPlusPlusNameParser::ParsedNameRanges> 528 CPlusPlusNameParser::ParseFullNameImpl() { 529 // Name parsing state machine. 530 enum class State { 531 Beginning, // start of the name 532 AfterTwoColons, // right after :: 533 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 534 AfterTemplate, // right after template brackets (<something>) 535 AfterOperator, // right after name of C++ operator 536 }; 537 538 Bookmark start_position = SetBookmark(); 539 State state = State::Beginning; 540 bool continue_parsing = true; 541 Optional<size_t> last_coloncolon_position; 542 543 while (continue_parsing && HasMoreTokens()) { 544 const auto &token = Peek(); 545 switch (token.getKind()) { 546 case tok::raw_identifier: // Just a name. 547 if (state != State::Beginning && state != State::AfterTwoColons) { 548 continue_parsing = false; 549 break; 550 } 551 Advance(); 552 state = State::AfterIdentifier; 553 break; 554 case tok::l_square: { 555 // Handles types or functions that were tagged 556 // with, e.g., 557 // [[gnu::abi_tag("tag1","tag2")]] func() 558 // and demangled/prettified into: 559 // func[abi:tag1][abi:tag2]() 560 561 // ABI tags only appear after a method or type name 562 const bool valid_state = 563 state == State::AfterIdentifier || state == State::AfterOperator; 564 if (!valid_state || !ConsumeAbiTag()) { 565 continue_parsing = false; 566 } 567 568 break; 569 } 570 case tok::l_paren: { 571 if (state == State::Beginning || state == State::AfterTwoColons) { 572 // (anonymous namespace) 573 if (ConsumeAnonymousNamespace()) { 574 state = State::AfterIdentifier; 575 break; 576 } 577 } 578 579 // Type declared inside a function 'func()::Type' 580 if (state != State::AfterIdentifier && state != State::AfterTemplate && 581 state != State::AfterOperator) { 582 continue_parsing = false; 583 break; 584 } 585 Bookmark l_paren_position = SetBookmark(); 586 // Consume the '(' ... ') [const]'. 587 if (!ConsumeArguments()) { 588 continue_parsing = false; 589 break; 590 } 591 SkipFunctionQualifiers(); 592 593 // Consume '::' 594 size_t coloncolon_position = GetCurrentPosition(); 595 if (!ConsumeToken(tok::coloncolon)) { 596 continue_parsing = false; 597 break; 598 } 599 l_paren_position.Remove(); 600 last_coloncolon_position = coloncolon_position; 601 state = State::AfterTwoColons; 602 break; 603 } 604 case tok::l_brace: 605 if (state == State::Beginning || state == State::AfterTwoColons) { 606 if (ConsumeLambda()) { 607 state = State::AfterIdentifier; 608 break; 609 } 610 } 611 continue_parsing = false; 612 break; 613 case tok::coloncolon: // Type nesting delimiter. 614 if (state != State::Beginning && state != State::AfterIdentifier && 615 state != State::AfterTemplate) { 616 continue_parsing = false; 617 break; 618 } 619 last_coloncolon_position = GetCurrentPosition(); 620 Advance(); 621 state = State::AfterTwoColons; 622 break; 623 case tok::less: // Template brackets. 624 if (state != State::AfterIdentifier && state != State::AfterOperator) { 625 continue_parsing = false; 626 break; 627 } 628 if (!ConsumeTemplateArgs()) { 629 continue_parsing = false; 630 break; 631 } 632 state = State::AfterTemplate; 633 break; 634 case tok::kw_operator: // C++ operator overloading. 635 if (state != State::Beginning && state != State::AfterTwoColons) { 636 continue_parsing = false; 637 break; 638 } 639 if (!ConsumeOperator()) { 640 continue_parsing = false; 641 break; 642 } 643 state = State::AfterOperator; 644 break; 645 case tok::tilde: // Destructor. 646 if (state != State::Beginning && state != State::AfterTwoColons) { 647 continue_parsing = false; 648 break; 649 } 650 Advance(); 651 if (ConsumeToken(tok::raw_identifier)) { 652 state = State::AfterIdentifier; 653 } else { 654 TakeBack(); 655 continue_parsing = false; 656 } 657 break; 658 default: 659 continue_parsing = false; 660 break; 661 } 662 } 663 664 if (state == State::AfterIdentifier || state == State::AfterOperator || 665 state == State::AfterTemplate) { 666 ParsedNameRanges result; 667 if (last_coloncolon_position) { 668 result.context_range = Range(start_position.GetSavedPosition(), 669 last_coloncolon_position.value()); 670 result.basename_range = 671 Range(last_coloncolon_position.value() + 1, GetCurrentPosition()); 672 } else { 673 result.basename_range = 674 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 675 } 676 start_position.Remove(); 677 return result; 678 } else { 679 return None; 680 } 681 } 682 683 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 684 if (range.empty()) 685 return llvm::StringRef(); 686 assert(range.begin_index < range.end_index); 687 assert(range.begin_index < m_tokens.size()); 688 assert(range.end_index <= m_tokens.size()); 689 clang::Token &first_token = m_tokens[range.begin_index]; 690 clang::Token &last_token = m_tokens[range.end_index - 1]; 691 clang::SourceLocation start_loc = first_token.getLocation(); 692 clang::SourceLocation end_loc = last_token.getLocation(); 693 unsigned start_pos = start_loc.getRawEncoding(); 694 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 695 return m_text.take_front(end_pos).drop_front(start_pos); 696 } 697 698 static const clang::LangOptions &GetLangOptions() { 699 static clang::LangOptions g_options; 700 static llvm::once_flag g_once_flag; 701 llvm::call_once(g_once_flag, []() { 702 g_options.LineComment = true; 703 g_options.C99 = true; 704 g_options.C11 = true; 705 g_options.CPlusPlus = true; 706 g_options.CPlusPlus11 = true; 707 g_options.CPlusPlus14 = true; 708 g_options.CPlusPlus17 = true; 709 }); 710 return g_options; 711 } 712 713 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 714 static llvm::StringMap<tok::TokenKind> g_map{ 715 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 716 #include "clang/Basic/TokenKinds.def" 717 #undef KEYWORD 718 }; 719 return g_map; 720 } 721 722 void CPlusPlusNameParser::ExtractTokens() { 723 if (m_text.empty()) 724 return; 725 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 726 m_text.data(), m_text.data() + m_text.size()); 727 const auto &kw_map = GetKeywordsMap(); 728 clang::Token token; 729 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 730 lexer.LexFromRawLexer(token)) { 731 if (token.is(clang::tok::raw_identifier)) { 732 auto it = kw_map.find(token.getRawIdentifier()); 733 if (it != kw_map.end()) { 734 token.setKind(it->getValue()); 735 } 736 } 737 738 m_tokens.push_back(token); 739 } 740 } 741