1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "CPlusPlusNameParser.h" 11 12 #include "clang/Basic/IdentifierTable.h" 13 #include "llvm/ADT/StringMap.h" 14 #include "llvm/Support/Threading.h" 15 16 using namespace lldb; 17 using namespace lldb_private; 18 using llvm::Optional; 19 using llvm::None; 20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22 namespace tok = clang::tok; 23 24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified 29 // e.g. main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type 38 // e.g. void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 return result; 47 } 48 49 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 50 m_next_token_index = 0; 51 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 52 if (!name_ranges) 53 return None; 54 ParsedName result; 55 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 56 result.context = GetTextForRange(name_ranges.getValue().context_range); 57 return result; 58 } 59 60 bool CPlusPlusNameParser::HasMoreTokens() { 61 return m_next_token_index < m_tokens.size(); 62 } 63 64 void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 65 66 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 67 68 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 69 if (!HasMoreTokens()) 70 return false; 71 72 if (!Peek().is(kind)) 73 return false; 74 75 Advance(); 76 return true; 77 } 78 79 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 80 if (!HasMoreTokens()) 81 return false; 82 83 if (!Peek().isOneOf(kinds...)) 84 return false; 85 86 Advance(); 87 return true; 88 } 89 90 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 91 return Bookmark(m_next_token_index); 92 } 93 94 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 95 96 clang::Token &CPlusPlusNameParser::Peek() { 97 assert(HasMoreTokens()); 98 return m_tokens[m_next_token_index]; 99 } 100 101 Optional<ParsedFunction> 102 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 103 Bookmark start_position = SetBookmark(); 104 if (expect_return_type) { 105 // Consume return type if it's expected. 106 if (!ConsumeTypename()) 107 return None; 108 } 109 110 auto maybe_name = ParseFullNameImpl(); 111 if (!maybe_name) { 112 return None; 113 } 114 115 size_t argument_start = GetCurrentPosition(); 116 if (!ConsumeArguments()) { 117 return None; 118 } 119 120 size_t qualifiers_start = GetCurrentPosition(); 121 SkipFunctionQualifiers(); 122 size_t end_position = GetCurrentPosition(); 123 124 ParsedFunction result; 125 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 126 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 127 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 128 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 129 start_position.Remove(); 130 return result; 131 } 132 133 Optional<ParsedFunction> 134 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 135 Bookmark start_position = SetBookmark(); 136 if (expect_return_type) { 137 // Consume return type. 138 if (!ConsumeTypename()) 139 return None; 140 } 141 142 if (!ConsumeToken(tok::l_paren)) 143 return None; 144 if (!ConsumePtrsAndRefs()) 145 return None; 146 147 { 148 Bookmark before_inner_function_pos = SetBookmark(); 149 auto maybe_inner_function_name = ParseFunctionImpl(false); 150 if (maybe_inner_function_name) 151 if (ConsumeToken(tok::r_paren)) 152 if (ConsumeArguments()) { 153 SkipFunctionQualifiers(); 154 start_position.Remove(); 155 before_inner_function_pos.Remove(); 156 return maybe_inner_function_name; 157 } 158 } 159 160 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 161 if (maybe_inner_function_ptr_name) 162 if (ConsumeToken(tok::r_paren)) 163 if (ConsumeArguments()) { 164 SkipFunctionQualifiers(); 165 start_position.Remove(); 166 return maybe_inner_function_ptr_name; 167 } 168 return None; 169 } 170 171 bool CPlusPlusNameParser::ConsumeArguments() { 172 return ConsumeBrackets(tok::l_paren, tok::r_paren); 173 } 174 175 bool CPlusPlusNameParser::ConsumeTemplateArgs() { 176 Bookmark start_position = SetBookmark(); 177 if (!HasMoreTokens() || Peek().getKind() != tok::less) 178 return false; 179 Advance(); 180 181 // Consuming template arguments is a bit trickier than consuming function 182 // arguments, because '<' '>' brackets are not always trivially balanced. 183 // In some rare cases tokens '<' and '>' can appear inside template arguments 184 // as arithmetic or shift operators not as template brackets. 185 // Examples: std::enable_if<(10u)<(64), bool> 186 // f<A<operator<(X,Y)::Subclass>> 187 // Good thing that compiler makes sure that really ambiguous cases of 188 // '>' usage should be enclosed within '()' brackets. 189 int template_counter = 1; 190 bool can_open_template = false; 191 while (HasMoreTokens() && template_counter > 0) { 192 tok::TokenKind kind = Peek().getKind(); 193 switch (kind) { 194 case tok::greatergreater: 195 template_counter -= 2; 196 can_open_template = false; 197 Advance(); 198 break; 199 case tok::greater: 200 --template_counter; 201 can_open_template = false; 202 Advance(); 203 break; 204 case tok::less: 205 // '<' is an attempt to open a subteamplte 206 // check if parser is at the point where it's actually possible, 207 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. 208 // No need to do the same for '>' because compiler actually makes sure 209 // that '>' always surrounded by brackets to avoid ambiguity. 210 if (can_open_template) 211 ++template_counter; 212 can_open_template = false; 213 Advance(); 214 break; 215 case tok::kw_operator: // C++ operator overloading. 216 if (!ConsumeOperator()) 217 return false; 218 can_open_template = true; 219 break; 220 case tok::raw_identifier: 221 can_open_template = true; 222 Advance(); 223 break; 224 case tok::l_square: 225 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 226 return false; 227 can_open_template = false; 228 break; 229 case tok::l_paren: 230 if (!ConsumeArguments()) 231 return false; 232 can_open_template = false; 233 break; 234 default: 235 can_open_template = false; 236 Advance(); 237 break; 238 } 239 } 240 241 assert(template_counter >= 0); 242 if (template_counter > 0) { 243 return false; 244 } 245 start_position.Remove(); 246 return true; 247 } 248 249 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 250 Bookmark start_position = SetBookmark(); 251 if (!ConsumeToken(tok::l_paren)) { 252 return false; 253 } 254 constexpr llvm::StringLiteral g_anonymous("anonymous"); 255 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 256 Peek().getRawIdentifier() == g_anonymous) { 257 Advance(); 258 } else { 259 return false; 260 } 261 262 if (!ConsumeToken(tok::kw_namespace)) { 263 return false; 264 } 265 266 if (!ConsumeToken(tok::r_paren)) { 267 return false; 268 } 269 start_position.Remove(); 270 return true; 271 } 272 273 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 274 tok::TokenKind right) { 275 Bookmark start_position = SetBookmark(); 276 if (!HasMoreTokens() || Peek().getKind() != left) 277 return false; 278 Advance(); 279 280 int counter = 1; 281 while (HasMoreTokens() && counter > 0) { 282 tok::TokenKind kind = Peek().getKind(); 283 if (kind == right) 284 --counter; 285 else if (kind == left) 286 ++counter; 287 Advance(); 288 } 289 290 assert(counter >= 0); 291 if (counter > 0) { 292 return false; 293 } 294 start_position.Remove(); 295 return true; 296 } 297 298 bool CPlusPlusNameParser::ConsumeOperator() { 299 Bookmark start_position = SetBookmark(); 300 if (!ConsumeToken(tok::kw_operator)) 301 return false; 302 303 if (!HasMoreTokens()) { 304 return false; 305 } 306 307 const auto &token = Peek(); 308 switch (token.getKind()) { 309 case tok::kw_new: 310 case tok::kw_delete: 311 // This is 'new' or 'delete' operators. 312 Advance(); 313 // Check for array new/delete. 314 if (HasMoreTokens() && Peek().is(tok::l_square)) { 315 // Consume the '[' and ']'. 316 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 317 return false; 318 } 319 break; 320 321 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 322 case tok::Token: \ 323 Advance(); \ 324 break; 325 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 326 #include "clang/Basic/OperatorKinds.def" 327 #undef OVERLOADED_OPERATOR 328 #undef OVERLOADED_OPERATOR_MULTI 329 330 case tok::l_paren: 331 // Call operator consume '(' ... ')'. 332 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 333 break; 334 return false; 335 336 case tok::l_square: 337 // This is a [] operator. 338 // Consume the '[' and ']'. 339 if (ConsumeBrackets(tok::l_square, tok::r_square)) 340 break; 341 return false; 342 343 default: 344 // This might be a cast operator. 345 if (ConsumeTypename()) 346 break; 347 return false; 348 } 349 start_position.Remove(); 350 return true; 351 } 352 353 void CPlusPlusNameParser::SkipTypeQualifiers() { 354 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 355 ; 356 } 357 358 void CPlusPlusNameParser::SkipFunctionQualifiers() { 359 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 360 ; 361 } 362 363 bool CPlusPlusNameParser::ConsumeBuiltinType() { 364 bool result = false; 365 bool continue_parsing = true; 366 // Built-in types can be made of a few keywords 367 // like 'unsigned long long int'. This function 368 // consumes all built-in type keywords without 369 // checking if they make sense like 'unsigned char void'. 370 while (continue_parsing && HasMoreTokens()) { 371 switch (Peek().getKind()) { 372 case tok::kw_short: 373 case tok::kw_long: 374 case tok::kw___int64: 375 case tok::kw___int128: 376 case tok::kw_signed: 377 case tok::kw_unsigned: 378 case tok::kw_void: 379 case tok::kw_char: 380 case tok::kw_int: 381 case tok::kw_half: 382 case tok::kw_float: 383 case tok::kw_double: 384 case tok::kw___float128: 385 case tok::kw_wchar_t: 386 case tok::kw_bool: 387 case tok::kw_char16_t: 388 case tok::kw_char32_t: 389 result = true; 390 Advance(); 391 break; 392 default: 393 continue_parsing = false; 394 break; 395 } 396 } 397 return result; 398 } 399 400 void CPlusPlusNameParser::SkipPtrsAndRefs() { 401 // Ignoring result. 402 ConsumePtrsAndRefs(); 403 } 404 405 bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 406 bool found = false; 407 SkipTypeQualifiers(); 408 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 409 tok::kw_volatile)) { 410 found = true; 411 SkipTypeQualifiers(); 412 } 413 return found; 414 } 415 416 bool CPlusPlusNameParser::ConsumeTypename() { 417 Bookmark start_position = SetBookmark(); 418 SkipTypeQualifiers(); 419 if (!ConsumeBuiltinType()) { 420 if (!ParseFullNameImpl()) 421 return false; 422 } 423 SkipPtrsAndRefs(); 424 start_position.Remove(); 425 return true; 426 } 427 428 Optional<CPlusPlusNameParser::ParsedNameRanges> 429 CPlusPlusNameParser::ParseFullNameImpl() { 430 // Name parsing state machine. 431 enum class State { 432 Beginning, // start of the name 433 AfterTwoColons, // right after :: 434 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 435 AfterTemplate, // right after template brackets (<something>) 436 AfterOperator, // right after name of C++ operator 437 }; 438 439 Bookmark start_position = SetBookmark(); 440 State state = State::Beginning; 441 bool continue_parsing = true; 442 Optional<size_t> last_coloncolon_position = None; 443 444 while (continue_parsing && HasMoreTokens()) { 445 const auto &token = Peek(); 446 switch (token.getKind()) { 447 case tok::raw_identifier: // Just a name. 448 if (state != State::Beginning && state != State::AfterTwoColons) { 449 continue_parsing = false; 450 break; 451 } 452 Advance(); 453 state = State::AfterIdentifier; 454 break; 455 case tok::l_paren: { 456 if (state == State::Beginning || state == State::AfterTwoColons) { 457 // (anonymous namespace) 458 if (ConsumeAnonymousNamespace()) { 459 state = State::AfterIdentifier; 460 break; 461 } 462 } 463 464 // Type declared inside a function 'func()::Type' 465 if (state != State::AfterIdentifier && state != State::AfterTemplate && 466 state != State::AfterOperator) { 467 continue_parsing = false; 468 break; 469 } 470 Bookmark l_paren_position = SetBookmark(); 471 // Consume the '(' ... ') [const]'. 472 if (!ConsumeArguments()) { 473 continue_parsing = false; 474 break; 475 } 476 SkipFunctionQualifiers(); 477 478 // Consume '::' 479 size_t coloncolon_position = GetCurrentPosition(); 480 if (!ConsumeToken(tok::coloncolon)) { 481 continue_parsing = false; 482 break; 483 } 484 l_paren_position.Remove(); 485 last_coloncolon_position = coloncolon_position; 486 state = State::AfterTwoColons; 487 break; 488 } 489 case tok::coloncolon: // Type nesting delimiter. 490 if (state != State::Beginning && state != State::AfterIdentifier && 491 state != State::AfterTemplate) { 492 continue_parsing = false; 493 break; 494 } 495 last_coloncolon_position = GetCurrentPosition(); 496 Advance(); 497 state = State::AfterTwoColons; 498 break; 499 case tok::less: // Template brackets. 500 if (state != State::AfterIdentifier && state != State::AfterOperator) { 501 continue_parsing = false; 502 break; 503 } 504 if (!ConsumeTemplateArgs()) { 505 continue_parsing = false; 506 break; 507 } 508 state = State::AfterTemplate; 509 break; 510 case tok::kw_operator: // C++ operator overloading. 511 if (state != State::Beginning && state != State::AfterTwoColons) { 512 continue_parsing = false; 513 break; 514 } 515 if (!ConsumeOperator()) { 516 continue_parsing = false; 517 break; 518 } 519 state = State::AfterOperator; 520 break; 521 case tok::tilde: // Destructor. 522 if (state != State::Beginning && state != State::AfterTwoColons) { 523 continue_parsing = false; 524 break; 525 } 526 Advance(); 527 if (ConsumeToken(tok::raw_identifier)) { 528 state = State::AfterIdentifier; 529 } else { 530 TakeBack(); 531 continue_parsing = false; 532 } 533 break; 534 default: 535 continue_parsing = false; 536 break; 537 } 538 } 539 540 if (state == State::AfterIdentifier || state == State::AfterOperator || 541 state == State::AfterTemplate) { 542 ParsedNameRanges result; 543 if (last_coloncolon_position) { 544 result.context_range = Range(start_position.GetSavedPosition(), 545 last_coloncolon_position.getValue()); 546 result.basename_range = 547 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 548 } else { 549 result.basename_range = 550 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 551 } 552 start_position.Remove(); 553 return result; 554 } else { 555 return None; 556 } 557 } 558 559 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 560 if (range.empty()) 561 return llvm::StringRef(); 562 assert(range.begin_index < range.end_index); 563 assert(range.begin_index < m_tokens.size()); 564 assert(range.end_index <= m_tokens.size()); 565 clang::Token &first_token = m_tokens[range.begin_index]; 566 clang::Token &last_token = m_tokens[range.end_index - 1]; 567 clang::SourceLocation start_loc = first_token.getLocation(); 568 clang::SourceLocation end_loc = last_token.getLocation(); 569 unsigned start_pos = start_loc.getRawEncoding(); 570 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 571 return m_text.take_front(end_pos).drop_front(start_pos); 572 } 573 574 static const clang::LangOptions &GetLangOptions() { 575 static clang::LangOptions g_options; 576 static llvm::once_flag g_once_flag; 577 llvm::call_once(g_once_flag, []() { 578 g_options.LineComment = true; 579 g_options.C99 = true; 580 g_options.C11 = true; 581 g_options.CPlusPlus = true; 582 g_options.CPlusPlus11 = true; 583 g_options.CPlusPlus14 = true; 584 g_options.CPlusPlus1z = true; 585 }); 586 return g_options; 587 } 588 589 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 590 static llvm::StringMap<tok::TokenKind> g_map{ 591 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 592 #include "clang/Basic/TokenKinds.def" 593 #undef KEYWORD 594 }; 595 return g_map; 596 } 597 598 void CPlusPlusNameParser::ExtractTokens() { 599 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 600 m_text.data(), m_text.data() + m_text.size()); 601 const auto &kw_map = GetKeywordsMap(); 602 clang::Token token; 603 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 604 lexer.LexFromRawLexer(token)) { 605 if (token.is(clang::tok::raw_identifier)) { 606 auto it = kw_map.find(token.getRawIdentifier()); 607 if (it != kw_map.end()) { 608 token.setKind(it->getValue()); 609 } 610 } 611 612 m_tokens.push_back(token); 613 } 614 } 615