1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "CPlusPlusNameParser.h" 10 11 #include "clang/Basic/IdentifierTable.h" 12 #include "clang/Basic/TokenKinds.h" 13 #include "llvm/ADT/StringMap.h" 14 #include "llvm/Support/Threading.h" 15 16 using namespace lldb; 17 using namespace lldb_private; 18 using llvm::Optional; 19 using llvm::None; 20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22 namespace tok = clang::tok; 23 24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified e.g. 29 // main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type e.g. 38 // void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49 } 50 51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.value().basename_range); 60 result.context = GetTextForRange(name_ranges.value().context_range); 61 return result; 62 } 63 64 bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66 } 67 68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81 } 82 83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92 } 93 94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96 } 97 98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100 clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103 } 104 105 Optional<ParsedFunction> 106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.value().basename_range); 130 result.name.context = GetTextForRange(maybe_name.value().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135 } 136 137 Optional<ParsedFunction> 138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173 } 174 175 bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177 } 178 179 bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. In 187 // some rare cases tokens '<' and '>' can appear inside template arguments as 188 // arithmetic or shift operators not as template brackets. Examples: 189 // std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of '>' 192 // usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No 212 // need to do the same for '>' because compiler actually makes sure that 213 // '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 230 return false; 231 can_open_template = false; 232 break; 233 case tok::l_paren: 234 if (!ConsumeArguments()) 235 return false; 236 can_open_template = false; 237 break; 238 default: 239 can_open_template = false; 240 Advance(); 241 break; 242 } 243 } 244 245 if (template_counter != 0) { 246 return false; 247 } 248 start_position.Remove(); 249 return true; 250 } 251 252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 253 Bookmark start_position = SetBookmark(); 254 if (!ConsumeToken(tok::l_paren)) { 255 return false; 256 } 257 constexpr llvm::StringLiteral g_anonymous("anonymous"); 258 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 259 Peek().getRawIdentifier() == g_anonymous) { 260 Advance(); 261 } else { 262 return false; 263 } 264 265 if (!ConsumeToken(tok::kw_namespace)) { 266 return false; 267 } 268 269 if (!ConsumeToken(tok::r_paren)) { 270 return false; 271 } 272 start_position.Remove(); 273 return true; 274 } 275 276 bool CPlusPlusNameParser::ConsumeLambda() { 277 Bookmark start_position = SetBookmark(); 278 if (!ConsumeToken(tok::l_brace)) { 279 return false; 280 } 281 constexpr llvm::StringLiteral g_lambda("lambda"); 282 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 283 Peek().getRawIdentifier() == g_lambda) { 284 // Put the matched brace back so we can use ConsumeBrackets 285 TakeBack(); 286 } else { 287 return false; 288 } 289 290 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 291 return false; 292 } 293 294 start_position.Remove(); 295 return true; 296 } 297 298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 299 tok::TokenKind right) { 300 Bookmark start_position = SetBookmark(); 301 if (!HasMoreTokens() || Peek().getKind() != left) 302 return false; 303 Advance(); 304 305 int counter = 1; 306 while (HasMoreTokens() && counter > 0) { 307 tok::TokenKind kind = Peek().getKind(); 308 if (kind == right) 309 --counter; 310 else if (kind == left) 311 ++counter; 312 Advance(); 313 } 314 315 assert(counter >= 0); 316 if (counter > 0) { 317 return false; 318 } 319 start_position.Remove(); 320 return true; 321 } 322 323 bool CPlusPlusNameParser::ConsumeOperator() { 324 Bookmark start_position = SetBookmark(); 325 if (!ConsumeToken(tok::kw_operator)) 326 return false; 327 328 if (!HasMoreTokens()) { 329 return false; 330 } 331 332 const auto &token = Peek(); 333 334 // When clang generates debug info it adds template parameters to names. 335 // Since clang doesn't add a space between the name and the template parameter 336 // in some cases we are not generating valid C++ names e.g.: 337 // 338 // operator<<A::B> 339 // 340 // In some of these cases we will not parse them correctly. This fixes the 341 // issue by detecting this case and inserting tok::less in place of 342 // tok::lessless and returning successfully that we consumed the operator. 343 if (token.getKind() == tok::lessless) { 344 // Make sure we have more tokens before attempting to look ahead one more. 345 if (m_next_token_index + 1 < m_tokens.size()) { 346 // Look ahead two tokens. 347 clang::Token n_token = m_tokens[m_next_token_index + 1]; 348 // If we find ( or < then this is indeed operator<< no need for fix. 349 if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) { 350 clang::Token tmp_tok; 351 tmp_tok.startToken(); 352 tmp_tok.setLength(1); 353 tmp_tok.setLocation(token.getLocation().getLocWithOffset(1)); 354 tmp_tok.setKind(tok::less); 355 356 m_tokens[m_next_token_index] = tmp_tok; 357 358 start_position.Remove(); 359 return true; 360 } 361 } 362 } 363 364 switch (token.getKind()) { 365 case tok::kw_new: 366 case tok::kw_delete: 367 // This is 'new' or 'delete' operators. 368 Advance(); 369 // Check for array new/delete. 370 if (HasMoreTokens() && Peek().is(tok::l_square)) { 371 // Consume the '[' and ']'. 372 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 373 return false; 374 } 375 break; 376 377 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 378 case tok::Token: \ 379 Advance(); \ 380 break; 381 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 382 #include "clang/Basic/OperatorKinds.def" 383 #undef OVERLOADED_OPERATOR 384 #undef OVERLOADED_OPERATOR_MULTI 385 386 case tok::l_paren: 387 // Call operator consume '(' ... ')'. 388 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 389 break; 390 return false; 391 392 case tok::l_square: 393 // This is a [] operator. 394 // Consume the '[' and ']'. 395 if (ConsumeBrackets(tok::l_square, tok::r_square)) 396 break; 397 return false; 398 399 default: 400 // This might be a cast operator. 401 if (ConsumeTypename()) 402 break; 403 return false; 404 } 405 start_position.Remove(); 406 return true; 407 } 408 409 void CPlusPlusNameParser::SkipTypeQualifiers() { 410 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 411 ; 412 } 413 414 void CPlusPlusNameParser::SkipFunctionQualifiers() { 415 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 416 ; 417 } 418 419 bool CPlusPlusNameParser::ConsumeBuiltinType() { 420 bool result = false; 421 bool continue_parsing = true; 422 // Built-in types can be made of a few keywords like 'unsigned long long 423 // int'. This function consumes all built-in type keywords without checking 424 // if they make sense like 'unsigned char void'. 425 while (continue_parsing && HasMoreTokens()) { 426 switch (Peek().getKind()) { 427 case tok::kw_short: 428 case tok::kw_long: 429 case tok::kw___int64: 430 case tok::kw___int128: 431 case tok::kw_signed: 432 case tok::kw_unsigned: 433 case tok::kw_void: 434 case tok::kw_char: 435 case tok::kw_int: 436 case tok::kw_half: 437 case tok::kw_float: 438 case tok::kw_double: 439 case tok::kw___float128: 440 case tok::kw_wchar_t: 441 case tok::kw_bool: 442 case tok::kw_char16_t: 443 case tok::kw_char32_t: 444 result = true; 445 Advance(); 446 break; 447 default: 448 continue_parsing = false; 449 break; 450 } 451 } 452 return result; 453 } 454 455 void CPlusPlusNameParser::SkipPtrsAndRefs() { 456 // Ignoring result. 457 ConsumePtrsAndRefs(); 458 } 459 460 bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 461 bool found = false; 462 SkipTypeQualifiers(); 463 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 464 tok::kw_volatile)) { 465 found = true; 466 SkipTypeQualifiers(); 467 } 468 return found; 469 } 470 471 bool CPlusPlusNameParser::ConsumeDecltype() { 472 Bookmark start_position = SetBookmark(); 473 if (!ConsumeToken(tok::kw_decltype)) 474 return false; 475 476 if (!ConsumeArguments()) 477 return false; 478 479 start_position.Remove(); 480 return true; 481 } 482 483 bool CPlusPlusNameParser::ConsumeTypename() { 484 Bookmark start_position = SetBookmark(); 485 SkipTypeQualifiers(); 486 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 487 if (!ParseFullNameImpl()) 488 return false; 489 } 490 SkipPtrsAndRefs(); 491 start_position.Remove(); 492 return true; 493 } 494 495 Optional<CPlusPlusNameParser::ParsedNameRanges> 496 CPlusPlusNameParser::ParseFullNameImpl() { 497 // Name parsing state machine. 498 enum class State { 499 Beginning, // start of the name 500 AfterTwoColons, // right after :: 501 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 502 AfterTemplate, // right after template brackets (<something>) 503 AfterOperator, // right after name of C++ operator 504 }; 505 506 Bookmark start_position = SetBookmark(); 507 State state = State::Beginning; 508 bool continue_parsing = true; 509 Optional<size_t> last_coloncolon_position; 510 511 while (continue_parsing && HasMoreTokens()) { 512 const auto &token = Peek(); 513 switch (token.getKind()) { 514 case tok::raw_identifier: // Just a name. 515 if (state != State::Beginning && state != State::AfterTwoColons) { 516 continue_parsing = false; 517 break; 518 } 519 Advance(); 520 state = State::AfterIdentifier; 521 break; 522 case tok::l_paren: { 523 if (state == State::Beginning || state == State::AfterTwoColons) { 524 // (anonymous namespace) 525 if (ConsumeAnonymousNamespace()) { 526 state = State::AfterIdentifier; 527 break; 528 } 529 } 530 531 // Type declared inside a function 'func()::Type' 532 if (state != State::AfterIdentifier && state != State::AfterTemplate && 533 state != State::AfterOperator) { 534 continue_parsing = false; 535 break; 536 } 537 Bookmark l_paren_position = SetBookmark(); 538 // Consume the '(' ... ') [const]'. 539 if (!ConsumeArguments()) { 540 continue_parsing = false; 541 break; 542 } 543 SkipFunctionQualifiers(); 544 545 // Consume '::' 546 size_t coloncolon_position = GetCurrentPosition(); 547 if (!ConsumeToken(tok::coloncolon)) { 548 continue_parsing = false; 549 break; 550 } 551 l_paren_position.Remove(); 552 last_coloncolon_position = coloncolon_position; 553 state = State::AfterTwoColons; 554 break; 555 } 556 case tok::l_brace: 557 if (state == State::Beginning || state == State::AfterTwoColons) { 558 if (ConsumeLambda()) { 559 state = State::AfterIdentifier; 560 break; 561 } 562 } 563 continue_parsing = false; 564 break; 565 case tok::coloncolon: // Type nesting delimiter. 566 if (state != State::Beginning && state != State::AfterIdentifier && 567 state != State::AfterTemplate) { 568 continue_parsing = false; 569 break; 570 } 571 last_coloncolon_position = GetCurrentPosition(); 572 Advance(); 573 state = State::AfterTwoColons; 574 break; 575 case tok::less: // Template brackets. 576 if (state != State::AfterIdentifier && state != State::AfterOperator) { 577 continue_parsing = false; 578 break; 579 } 580 if (!ConsumeTemplateArgs()) { 581 continue_parsing = false; 582 break; 583 } 584 state = State::AfterTemplate; 585 break; 586 case tok::kw_operator: // C++ operator overloading. 587 if (state != State::Beginning && state != State::AfterTwoColons) { 588 continue_parsing = false; 589 break; 590 } 591 if (!ConsumeOperator()) { 592 continue_parsing = false; 593 break; 594 } 595 state = State::AfterOperator; 596 break; 597 case tok::tilde: // Destructor. 598 if (state != State::Beginning && state != State::AfterTwoColons) { 599 continue_parsing = false; 600 break; 601 } 602 Advance(); 603 if (ConsumeToken(tok::raw_identifier)) { 604 state = State::AfterIdentifier; 605 } else { 606 TakeBack(); 607 continue_parsing = false; 608 } 609 break; 610 default: 611 continue_parsing = false; 612 break; 613 } 614 } 615 616 if (state == State::AfterIdentifier || state == State::AfterOperator || 617 state == State::AfterTemplate) { 618 ParsedNameRanges result; 619 if (last_coloncolon_position) { 620 result.context_range = Range(start_position.GetSavedPosition(), 621 last_coloncolon_position.value()); 622 result.basename_range = 623 Range(last_coloncolon_position.value() + 1, GetCurrentPosition()); 624 } else { 625 result.basename_range = 626 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 627 } 628 start_position.Remove(); 629 return result; 630 } else { 631 return None; 632 } 633 } 634 635 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 636 if (range.empty()) 637 return llvm::StringRef(); 638 assert(range.begin_index < range.end_index); 639 assert(range.begin_index < m_tokens.size()); 640 assert(range.end_index <= m_tokens.size()); 641 clang::Token &first_token = m_tokens[range.begin_index]; 642 clang::Token &last_token = m_tokens[range.end_index - 1]; 643 clang::SourceLocation start_loc = first_token.getLocation(); 644 clang::SourceLocation end_loc = last_token.getLocation(); 645 unsigned start_pos = start_loc.getRawEncoding(); 646 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 647 return m_text.take_front(end_pos).drop_front(start_pos); 648 } 649 650 static const clang::LangOptions &GetLangOptions() { 651 static clang::LangOptions g_options; 652 static llvm::once_flag g_once_flag; 653 llvm::call_once(g_once_flag, []() { 654 g_options.LineComment = true; 655 g_options.C99 = true; 656 g_options.C11 = true; 657 g_options.CPlusPlus = true; 658 g_options.CPlusPlus11 = true; 659 g_options.CPlusPlus14 = true; 660 g_options.CPlusPlus17 = true; 661 }); 662 return g_options; 663 } 664 665 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 666 static llvm::StringMap<tok::TokenKind> g_map{ 667 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 668 #include "clang/Basic/TokenKinds.def" 669 #undef KEYWORD 670 }; 671 return g_map; 672 } 673 674 void CPlusPlusNameParser::ExtractTokens() { 675 if (m_text.empty()) 676 return; 677 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 678 m_text.data(), m_text.data() + m_text.size()); 679 const auto &kw_map = GetKeywordsMap(); 680 clang::Token token; 681 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 682 lexer.LexFromRawLexer(token)) { 683 if (token.is(clang::tok::raw_identifier)) { 684 auto it = kw_map.find(token.getRawIdentifier()); 685 if (it != kw_map.end()) { 686 token.setKind(it->getValue()); 687 } 688 } 689 690 m_tokens.push_back(token); 691 } 692 } 693