1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "CPlusPlusNameParser.h" 11 12 #include "clang/Basic/IdentifierTable.h" 13 #include "llvm/ADT/StringMap.h" 14 #include "llvm/Support/Threading.h" 15 16 using namespace lldb; 17 using namespace lldb_private; 18 using llvm::Optional; 19 using llvm::None; 20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22 namespace tok = clang::tok; 23 24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified 29 // e.g. main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type 38 // e.g. void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49 } 50 51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 60 result.context = GetTextForRange(name_ranges.getValue().context_range); 61 return result; 62 } 63 64 bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66 } 67 68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81 } 82 83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92 } 93 94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96 } 97 98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100 clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103 } 104 105 Optional<ParsedFunction> 106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 130 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135 } 136 137 Optional<ParsedFunction> 138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173 } 174 175 bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177 } 178 179 bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. 187 // In some rare cases tokens '<' and '>' can appear inside template arguments 188 // as arithmetic or shift operators not as template brackets. 189 // Examples: std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of 192 // '>' usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. 212 // No need to do the same for '>' because compiler actually makes sure 213 // that '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 230 return false; 231 can_open_template = false; 232 break; 233 case tok::l_paren: 234 if (!ConsumeArguments()) 235 return false; 236 can_open_template = false; 237 break; 238 default: 239 can_open_template = false; 240 Advance(); 241 break; 242 } 243 } 244 245 if (template_counter != 0) { 246 return false; 247 } 248 start_position.Remove(); 249 return true; 250 } 251 252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 253 Bookmark start_position = SetBookmark(); 254 if (!ConsumeToken(tok::l_paren)) { 255 return false; 256 } 257 constexpr llvm::StringLiteral g_anonymous("anonymous"); 258 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 259 Peek().getRawIdentifier() == g_anonymous) { 260 Advance(); 261 } else { 262 return false; 263 } 264 265 if (!ConsumeToken(tok::kw_namespace)) { 266 return false; 267 } 268 269 if (!ConsumeToken(tok::r_paren)) { 270 return false; 271 } 272 start_position.Remove(); 273 return true; 274 } 275 276 bool CPlusPlusNameParser::ConsumeLambda() { 277 Bookmark start_position = SetBookmark(); 278 if (!ConsumeToken(tok::l_brace)) { 279 return false; 280 } 281 constexpr llvm::StringLiteral g_lambda("lambda"); 282 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 283 Peek().getRawIdentifier() == g_lambda) { 284 // Put the matched brace back so we can use ConsumeBrackets 285 TakeBack(); 286 } else { 287 return false; 288 } 289 290 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 291 return false; 292 } 293 294 start_position.Remove(); 295 return true; 296 } 297 298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 299 tok::TokenKind right) { 300 Bookmark start_position = SetBookmark(); 301 if (!HasMoreTokens() || Peek().getKind() != left) 302 return false; 303 Advance(); 304 305 int counter = 1; 306 while (HasMoreTokens() && counter > 0) { 307 tok::TokenKind kind = Peek().getKind(); 308 if (kind == right) 309 --counter; 310 else if (kind == left) 311 ++counter; 312 Advance(); 313 } 314 315 assert(counter >= 0); 316 if (counter > 0) { 317 return false; 318 } 319 start_position.Remove(); 320 return true; 321 } 322 323 bool CPlusPlusNameParser::ConsumeOperator() { 324 Bookmark start_position = SetBookmark(); 325 if (!ConsumeToken(tok::kw_operator)) 326 return false; 327 328 if (!HasMoreTokens()) { 329 return false; 330 } 331 332 const auto &token = Peek(); 333 switch (token.getKind()) { 334 case tok::kw_new: 335 case tok::kw_delete: 336 // This is 'new' or 'delete' operators. 337 Advance(); 338 // Check for array new/delete. 339 if (HasMoreTokens() && Peek().is(tok::l_square)) { 340 // Consume the '[' and ']'. 341 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 342 return false; 343 } 344 break; 345 346 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 347 case tok::Token: \ 348 Advance(); \ 349 break; 350 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 351 #include "clang/Basic/OperatorKinds.def" 352 #undef OVERLOADED_OPERATOR 353 #undef OVERLOADED_OPERATOR_MULTI 354 355 case tok::l_paren: 356 // Call operator consume '(' ... ')'. 357 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 358 break; 359 return false; 360 361 case tok::l_square: 362 // This is a [] operator. 363 // Consume the '[' and ']'. 364 if (ConsumeBrackets(tok::l_square, tok::r_square)) 365 break; 366 return false; 367 368 default: 369 // This might be a cast operator. 370 if (ConsumeTypename()) 371 break; 372 return false; 373 } 374 start_position.Remove(); 375 return true; 376 } 377 378 void CPlusPlusNameParser::SkipTypeQualifiers() { 379 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 380 ; 381 } 382 383 void CPlusPlusNameParser::SkipFunctionQualifiers() { 384 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 385 ; 386 } 387 388 bool CPlusPlusNameParser::ConsumeBuiltinType() { 389 bool result = false; 390 bool continue_parsing = true; 391 // Built-in types can be made of a few keywords 392 // like 'unsigned long long int'. This function 393 // consumes all built-in type keywords without 394 // checking if they make sense like 'unsigned char void'. 395 while (continue_parsing && HasMoreTokens()) { 396 switch (Peek().getKind()) { 397 case tok::kw_short: 398 case tok::kw_long: 399 case tok::kw___int64: 400 case tok::kw___int128: 401 case tok::kw_signed: 402 case tok::kw_unsigned: 403 case tok::kw_void: 404 case tok::kw_char: 405 case tok::kw_int: 406 case tok::kw_half: 407 case tok::kw_float: 408 case tok::kw_double: 409 case tok::kw___float128: 410 case tok::kw_wchar_t: 411 case tok::kw_bool: 412 case tok::kw_char16_t: 413 case tok::kw_char32_t: 414 result = true; 415 Advance(); 416 break; 417 default: 418 continue_parsing = false; 419 break; 420 } 421 } 422 return result; 423 } 424 425 void CPlusPlusNameParser::SkipPtrsAndRefs() { 426 // Ignoring result. 427 ConsumePtrsAndRefs(); 428 } 429 430 bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 431 bool found = false; 432 SkipTypeQualifiers(); 433 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 434 tok::kw_volatile)) { 435 found = true; 436 SkipTypeQualifiers(); 437 } 438 return found; 439 } 440 441 bool CPlusPlusNameParser::ConsumeDecltype() { 442 Bookmark start_position = SetBookmark(); 443 if (!ConsumeToken(tok::kw_decltype)) 444 return false; 445 446 if (!ConsumeArguments()) 447 return false; 448 449 start_position.Remove(); 450 return true; 451 } 452 453 bool CPlusPlusNameParser::ConsumeTypename() { 454 Bookmark start_position = SetBookmark(); 455 SkipTypeQualifiers(); 456 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 457 if (!ParseFullNameImpl()) 458 return false; 459 } 460 SkipPtrsAndRefs(); 461 start_position.Remove(); 462 return true; 463 } 464 465 Optional<CPlusPlusNameParser::ParsedNameRanges> 466 CPlusPlusNameParser::ParseFullNameImpl() { 467 // Name parsing state machine. 468 enum class State { 469 Beginning, // start of the name 470 AfterTwoColons, // right after :: 471 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 472 AfterTemplate, // right after template brackets (<something>) 473 AfterOperator, // right after name of C++ operator 474 }; 475 476 Bookmark start_position = SetBookmark(); 477 State state = State::Beginning; 478 bool continue_parsing = true; 479 Optional<size_t> last_coloncolon_position = None; 480 481 while (continue_parsing && HasMoreTokens()) { 482 const auto &token = Peek(); 483 switch (token.getKind()) { 484 case tok::raw_identifier: // Just a name. 485 if (state != State::Beginning && state != State::AfterTwoColons) { 486 continue_parsing = false; 487 break; 488 } 489 Advance(); 490 state = State::AfterIdentifier; 491 break; 492 case tok::l_paren: { 493 if (state == State::Beginning || state == State::AfterTwoColons) { 494 // (anonymous namespace) 495 if (ConsumeAnonymousNamespace()) { 496 state = State::AfterIdentifier; 497 break; 498 } 499 } 500 501 // Type declared inside a function 'func()::Type' 502 if (state != State::AfterIdentifier && state != State::AfterTemplate && 503 state != State::AfterOperator) { 504 continue_parsing = false; 505 break; 506 } 507 Bookmark l_paren_position = SetBookmark(); 508 // Consume the '(' ... ') [const]'. 509 if (!ConsumeArguments()) { 510 continue_parsing = false; 511 break; 512 } 513 SkipFunctionQualifiers(); 514 515 // Consume '::' 516 size_t coloncolon_position = GetCurrentPosition(); 517 if (!ConsumeToken(tok::coloncolon)) { 518 continue_parsing = false; 519 break; 520 } 521 l_paren_position.Remove(); 522 last_coloncolon_position = coloncolon_position; 523 state = State::AfterTwoColons; 524 break; 525 } 526 case tok::l_brace: 527 if (state == State::Beginning || state == State::AfterTwoColons) { 528 if (ConsumeLambda()) { 529 state = State::AfterIdentifier; 530 break; 531 } 532 } 533 continue_parsing = false; 534 break; 535 case tok::coloncolon: // Type nesting delimiter. 536 if (state != State::Beginning && state != State::AfterIdentifier && 537 state != State::AfterTemplate) { 538 continue_parsing = false; 539 break; 540 } 541 last_coloncolon_position = GetCurrentPosition(); 542 Advance(); 543 state = State::AfterTwoColons; 544 break; 545 case tok::less: // Template brackets. 546 if (state != State::AfterIdentifier && state != State::AfterOperator) { 547 continue_parsing = false; 548 break; 549 } 550 if (!ConsumeTemplateArgs()) { 551 continue_parsing = false; 552 break; 553 } 554 state = State::AfterTemplate; 555 break; 556 case tok::kw_operator: // C++ operator overloading. 557 if (state != State::Beginning && state != State::AfterTwoColons) { 558 continue_parsing = false; 559 break; 560 } 561 if (!ConsumeOperator()) { 562 continue_parsing = false; 563 break; 564 } 565 state = State::AfterOperator; 566 break; 567 case tok::tilde: // Destructor. 568 if (state != State::Beginning && state != State::AfterTwoColons) { 569 continue_parsing = false; 570 break; 571 } 572 Advance(); 573 if (ConsumeToken(tok::raw_identifier)) { 574 state = State::AfterIdentifier; 575 } else { 576 TakeBack(); 577 continue_parsing = false; 578 } 579 break; 580 default: 581 continue_parsing = false; 582 break; 583 } 584 } 585 586 if (state == State::AfterIdentifier || state == State::AfterOperator || 587 state == State::AfterTemplate) { 588 ParsedNameRanges result; 589 if (last_coloncolon_position) { 590 result.context_range = Range(start_position.GetSavedPosition(), 591 last_coloncolon_position.getValue()); 592 result.basename_range = 593 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 594 } else { 595 result.basename_range = 596 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 597 } 598 start_position.Remove(); 599 return result; 600 } else { 601 return None; 602 } 603 } 604 605 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 606 if (range.empty()) 607 return llvm::StringRef(); 608 assert(range.begin_index < range.end_index); 609 assert(range.begin_index < m_tokens.size()); 610 assert(range.end_index <= m_tokens.size()); 611 clang::Token &first_token = m_tokens[range.begin_index]; 612 clang::Token &last_token = m_tokens[range.end_index - 1]; 613 clang::SourceLocation start_loc = first_token.getLocation(); 614 clang::SourceLocation end_loc = last_token.getLocation(); 615 unsigned start_pos = start_loc.getRawEncoding(); 616 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 617 return m_text.take_front(end_pos).drop_front(start_pos); 618 } 619 620 static const clang::LangOptions &GetLangOptions() { 621 static clang::LangOptions g_options; 622 static llvm::once_flag g_once_flag; 623 llvm::call_once(g_once_flag, []() { 624 g_options.LineComment = true; 625 g_options.C99 = true; 626 g_options.C11 = true; 627 g_options.CPlusPlus = true; 628 g_options.CPlusPlus11 = true; 629 g_options.CPlusPlus14 = true; 630 g_options.CPlusPlus17 = true; 631 }); 632 return g_options; 633 } 634 635 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 636 static llvm::StringMap<tok::TokenKind> g_map{ 637 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 638 #include "clang/Basic/TokenKinds.def" 639 #undef KEYWORD 640 }; 641 return g_map; 642 } 643 644 void CPlusPlusNameParser::ExtractTokens() { 645 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 646 m_text.data(), m_text.data() + m_text.size()); 647 const auto &kw_map = GetKeywordsMap(); 648 clang::Token token; 649 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 650 lexer.LexFromRawLexer(token)) { 651 if (token.is(clang::tok::raw_identifier)) { 652 auto it = kw_map.find(token.getRawIdentifier()); 653 if (it != kw_map.end()) { 654 token.setKind(it->getValue()); 655 } 656 } 657 658 m_tokens.push_back(token); 659 } 660 } 661