1 //===- Parser.cpp - Matcher expression parser -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Recursive parser implementation for the matcher expression grammar. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ASTMatchers/Dynamic/Parser.h" 15 #include "clang/ASTMatchers/ASTMatchersInternal.h" 16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h" 17 #include "clang/ASTMatchers/Dynamic/Registry.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "llvm/ADT/Optional.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/ManagedStatic.h" 23 #include <algorithm> 24 #include <cassert> 25 #include <cerrno> 26 #include <cstddef> 27 #include <cstdlib> 28 #include <optional> 29 #include <string> 30 #include <utility> 31 #include <vector> 32 33 namespace clang { 34 namespace ast_matchers { 35 namespace dynamic { 36 37 /// Simple structure to hold information for one token from the parser. 38 struct Parser::TokenInfo { 39 /// Different possible tokens. 40 enum TokenKind { 41 TK_Eof, 42 TK_NewLine, 43 TK_OpenParen, 44 TK_CloseParen, 45 TK_Comma, 46 TK_Period, 47 TK_Literal, 48 TK_Ident, 49 TK_InvalidChar, 50 TK_Error, 51 TK_CodeCompletion 52 }; 53 54 /// Some known identifiers. 55 static const char* const ID_Bind; 56 static const char *const ID_With; 57 58 TokenInfo() = default; 59 60 StringRef Text; 61 TokenKind Kind = TK_Eof; 62 SourceRange Range; 63 VariantValue Value; 64 }; 65 66 const char* const Parser::TokenInfo::ID_Bind = "bind"; 67 const char *const Parser::TokenInfo::ID_With = "with"; 68 69 /// Simple tokenizer for the parser. 70 class Parser::CodeTokenizer { 71 public: 72 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) 73 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { 74 NextToken = getNextToken(); 75 } 76 77 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, 78 unsigned CodeCompletionOffset) 79 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), 80 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { 81 NextToken = getNextToken(); 82 } 83 84 /// Returns but doesn't consume the next token. 85 const TokenInfo &peekNextToken() const { return NextToken; } 86 87 /// Consumes and returns the next token. 88 TokenInfo consumeNextToken() { 89 TokenInfo ThisToken = NextToken; 90 NextToken = getNextToken(); 91 return ThisToken; 92 } 93 94 TokenInfo SkipNewlines() { 95 while (NextToken.Kind == TokenInfo::TK_NewLine) 96 NextToken = getNextToken(); 97 return NextToken; 98 } 99 100 TokenInfo consumeNextTokenIgnoreNewlines() { 101 SkipNewlines(); 102 if (NextToken.Kind == TokenInfo::TK_Eof) 103 return NextToken; 104 return consumeNextToken(); 105 } 106 107 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } 108 109 private: 110 TokenInfo getNextToken() { 111 consumeWhitespace(); 112 TokenInfo Result; 113 Result.Range.Start = currentLocation(); 114 115 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { 116 Result.Kind = TokenInfo::TK_CodeCompletion; 117 Result.Text = StringRef(CodeCompletionLocation, 0); 118 CodeCompletionLocation = nullptr; 119 return Result; 120 } 121 122 if (Code.empty()) { 123 Result.Kind = TokenInfo::TK_Eof; 124 Result.Text = ""; 125 return Result; 126 } 127 128 switch (Code[0]) { 129 case '#': 130 Code = Code.drop_until([](char c) { return c == '\n'; }); 131 return getNextToken(); 132 case ',': 133 Result.Kind = TokenInfo::TK_Comma; 134 Result.Text = Code.substr(0, 1); 135 Code = Code.drop_front(); 136 break; 137 case '.': 138 Result.Kind = TokenInfo::TK_Period; 139 Result.Text = Code.substr(0, 1); 140 Code = Code.drop_front(); 141 break; 142 case '\n': 143 ++Line; 144 StartOfLine = Code.drop_front(); 145 Result.Kind = TokenInfo::TK_NewLine; 146 Result.Text = Code.substr(0, 1); 147 Code = Code.drop_front(); 148 break; 149 case '(': 150 Result.Kind = TokenInfo::TK_OpenParen; 151 Result.Text = Code.substr(0, 1); 152 Code = Code.drop_front(); 153 break; 154 case ')': 155 Result.Kind = TokenInfo::TK_CloseParen; 156 Result.Text = Code.substr(0, 1); 157 Code = Code.drop_front(); 158 break; 159 160 case '"': 161 case '\'': 162 // Parse a string literal. 163 consumeStringLiteral(&Result); 164 break; 165 166 case '0': case '1': case '2': case '3': case '4': 167 case '5': case '6': case '7': case '8': case '9': 168 // Parse an unsigned and float literal. 169 consumeNumberLiteral(&Result); 170 break; 171 172 default: 173 if (isAlphanumeric(Code[0])) { 174 // Parse an identifier 175 size_t TokenLength = 1; 176 while (true) { 177 // A code completion location in/immediately after an identifier will 178 // cause the portion of the identifier before the code completion 179 // location to become a code completion token. 180 if (CodeCompletionLocation == Code.data() + TokenLength) { 181 CodeCompletionLocation = nullptr; 182 Result.Kind = TokenInfo::TK_CodeCompletion; 183 Result.Text = Code.substr(0, TokenLength); 184 Code = Code.drop_front(TokenLength); 185 return Result; 186 } 187 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength])) 188 break; 189 ++TokenLength; 190 } 191 if (TokenLength == 4 && Code.startswith("true")) { 192 Result.Kind = TokenInfo::TK_Literal; 193 Result.Value = true; 194 } else if (TokenLength == 5 && Code.startswith("false")) { 195 Result.Kind = TokenInfo::TK_Literal; 196 Result.Value = false; 197 } else { 198 Result.Kind = TokenInfo::TK_Ident; 199 Result.Text = Code.substr(0, TokenLength); 200 } 201 Code = Code.drop_front(TokenLength); 202 } else { 203 Result.Kind = TokenInfo::TK_InvalidChar; 204 Result.Text = Code.substr(0, 1); 205 Code = Code.drop_front(1); 206 } 207 break; 208 } 209 210 Result.Range.End = currentLocation(); 211 return Result; 212 } 213 214 /// Consume an unsigned and float literal. 215 void consumeNumberLiteral(TokenInfo *Result) { 216 bool isFloatingLiteral = false; 217 unsigned Length = 1; 218 if (Code.size() > 1) { 219 // Consume the 'x' or 'b' radix modifier, if present. 220 switch (toLowercase(Code[1])) { 221 case 'x': case 'b': Length = 2; 222 } 223 } 224 while (Length < Code.size() && isHexDigit(Code[Length])) 225 ++Length; 226 227 // Try to recognize a floating point literal. 228 while (Length < Code.size()) { 229 char c = Code[Length]; 230 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { 231 isFloatingLiteral = true; 232 Length++; 233 } else { 234 break; 235 } 236 } 237 238 Result->Text = Code.substr(0, Length); 239 Code = Code.drop_front(Length); 240 241 if (isFloatingLiteral) { 242 char *end; 243 errno = 0; 244 std::string Text = Result->Text.str(); 245 double doubleValue = strtod(Text.c_str(), &end); 246 if (*end == 0 && errno == 0) { 247 Result->Kind = TokenInfo::TK_Literal; 248 Result->Value = doubleValue; 249 return; 250 } 251 } else { 252 unsigned Value; 253 if (!Result->Text.getAsInteger(0, Value)) { 254 Result->Kind = TokenInfo::TK_Literal; 255 Result->Value = Value; 256 return; 257 } 258 } 259 260 SourceRange Range; 261 Range.Start = Result->Range.Start; 262 Range.End = currentLocation(); 263 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text; 264 Result->Kind = TokenInfo::TK_Error; 265 } 266 267 /// Consume a string literal. 268 /// 269 /// \c Code must be positioned at the start of the literal (the opening 270 /// quote). Consumed until it finds the same closing quote character. 271 void consumeStringLiteral(TokenInfo *Result) { 272 bool InEscape = false; 273 const char Marker = Code[0]; 274 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { 275 if (InEscape) { 276 InEscape = false; 277 continue; 278 } 279 if (Code[Length] == '\\') { 280 InEscape = true; 281 continue; 282 } 283 if (Code[Length] == Marker) { 284 Result->Kind = TokenInfo::TK_Literal; 285 Result->Text = Code.substr(0, Length + 1); 286 Result->Value = Code.substr(1, Length - 1); 287 Code = Code.drop_front(Length + 1); 288 return; 289 } 290 } 291 292 StringRef ErrorText = Code; 293 Code = Code.drop_front(Code.size()); 294 SourceRange Range; 295 Range.Start = Result->Range.Start; 296 Range.End = currentLocation(); 297 Error->addError(Range, Error->ET_ParserStringError) << ErrorText; 298 Result->Kind = TokenInfo::TK_Error; 299 } 300 301 /// Consume all leading whitespace from \c Code. 302 void consumeWhitespace() { 303 Code = Code.drop_while([](char c) { 304 // Don't trim newlines. 305 return StringRef(" \t\v\f\r").contains(c); 306 }); 307 } 308 309 SourceLocation currentLocation() { 310 SourceLocation Location; 311 Location.Line = Line; 312 Location.Column = Code.data() - StartOfLine.data() + 1; 313 return Location; 314 } 315 316 StringRef &Code; 317 StringRef StartOfLine; 318 unsigned Line = 1; 319 Diagnostics *Error; 320 TokenInfo NextToken; 321 const char *CodeCompletionLocation = nullptr; 322 }; 323 324 Parser::Sema::~Sema() = default; 325 326 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( 327 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 328 return {}; 329 } 330 331 std::vector<MatcherCompletion> 332 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { 333 return {}; 334 } 335 336 struct Parser::ScopedContextEntry { 337 Parser *P; 338 339 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { 340 P->ContextStack.push_back(std::make_pair(C, 0u)); 341 } 342 343 ~ScopedContextEntry() { 344 P->ContextStack.pop_back(); 345 } 346 347 void nextArg() { 348 ++P->ContextStack.back().second; 349 } 350 }; 351 352 /// Parse expressions that start with an identifier. 353 /// 354 /// This function can parse named values and matchers. 355 /// In case of failure it will try to determine the user's intent to give 356 /// an appropriate error message. 357 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { 358 const TokenInfo NameToken = Tokenizer->consumeNextToken(); 359 360 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 361 // Parse as a named value. 362 if (const VariantValue NamedValue = 363 NamedValues ? NamedValues->lookup(NameToken.Text) 364 : VariantValue()) { 365 366 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { 367 *Value = NamedValue; 368 return true; 369 } 370 371 std::string BindID; 372 Tokenizer->consumeNextToken(); 373 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 374 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 375 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 376 return false; 377 } 378 379 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 380 (ChainCallToken.Text != TokenInfo::ID_Bind && 381 ChainCallToken.Text != TokenInfo::ID_With)) { 382 Error->addError(ChainCallToken.Range, 383 Error->ET_ParserMalformedChainedExpr); 384 return false; 385 } 386 if (ChainCallToken.Text == TokenInfo::ID_With) { 387 388 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 389 NameToken.Text, NameToken.Range); 390 391 Error->addError(ChainCallToken.Range, 392 Error->ET_RegistryMatcherNoWithSupport); 393 return false; 394 } 395 if (!parseBindID(BindID)) 396 return false; 397 398 assert(NamedValue.isMatcher()); 399 llvm::Optional<DynTypedMatcher> Result = 400 NamedValue.getMatcher().getSingleMatcher(); 401 if (Result) { 402 llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID); 403 if (Bound) { 404 *Value = VariantMatcher::SingleMatcher(*Bound); 405 return true; 406 } 407 } 408 return false; 409 } 410 411 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { 412 Error->addError(Tokenizer->peekNextToken().Range, 413 Error->ET_ParserNoOpenParen) 414 << "NewLine"; 415 return false; 416 } 417 418 // If the syntax is correct and the name is not a matcher either, report 419 // unknown named value. 420 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || 421 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || 422 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || 423 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && 424 !S->lookupMatcherCtor(NameToken.Text)) { 425 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound) 426 << NameToken.Text; 427 return false; 428 } 429 // Otherwise, fallback to the matcher parser. 430 } 431 432 Tokenizer->SkipNewlines(); 433 434 assert(NameToken.Kind == TokenInfo::TK_Ident); 435 TokenInfo OpenToken = Tokenizer->consumeNextToken(); 436 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 437 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen) 438 << OpenToken.Text; 439 return false; 440 } 441 442 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text); 443 444 // Parse as a matcher expression. 445 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); 446 } 447 448 bool Parser::parseBindID(std::string &BindID) { 449 // Parse the parenthesized argument to .bind("foo") 450 const TokenInfo OpenToken = Tokenizer->consumeNextToken(); 451 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 452 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 453 454 // TODO: We could use different error codes for each/some to be more 455 // explicit about the syntax error. 456 if (OpenToken.Kind != TokenInfo::TK_OpenParen) { 457 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr); 458 return false; 459 } 460 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { 461 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr); 462 return false; 463 } 464 if (CloseToken.Kind != TokenInfo::TK_CloseParen) { 465 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr); 466 return false; 467 } 468 BindID = IDToken.Value.getString(); 469 return true; 470 } 471 472 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, 473 const TokenInfo &OpenToken, 474 VariantValue *Value) { 475 std::vector<ParserValue> Args; 476 TokenInfo EndToken; 477 478 Tokenizer->SkipNewlines(); 479 480 { 481 ScopedContextEntry SCE(this, Ctor); 482 483 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 484 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 485 // End of args. 486 EndToken = Tokenizer->consumeNextToken(); 487 break; 488 } 489 if (!Args.empty()) { 490 // We must find a , token to continue. 491 TokenInfo CommaToken = Tokenizer->consumeNextToken(); 492 if (CommaToken.Kind != TokenInfo::TK_Comma) { 493 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 494 << CommaToken.Text; 495 return false; 496 } 497 } 498 499 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 500 NameToken.Text, NameToken.Range, 501 Args.size() + 1); 502 ParserValue ArgValue; 503 Tokenizer->SkipNewlines(); 504 505 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { 506 addExpressionCompletions(); 507 return false; 508 } 509 510 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); 511 512 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { 513 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 514 << NameToken.Text; 515 return false; 516 } 517 518 ArgValue.Text = NodeMatcherToken.Text; 519 ArgValue.Range = NodeMatcherToken.Range; 520 521 llvm::Optional<MatcherCtor> MappedMatcher = 522 S->lookupMatcherCtor(ArgValue.Text); 523 524 if (!MappedMatcher) { 525 Error->addError(NodeMatcherToken.Range, 526 Error->ET_RegistryMatcherNotFound) 527 << NodeMatcherToken.Text; 528 return false; 529 } 530 531 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); 532 533 if (NK.isNone()) { 534 Error->addError(NodeMatcherToken.Range, 535 Error->ET_RegistryNonNodeMatcher) 536 << NodeMatcherToken.Text; 537 return false; 538 } 539 540 ArgValue.Value = NK; 541 542 Tokenizer->SkipNewlines(); 543 Args.push_back(ArgValue); 544 545 SCE.nextArg(); 546 } 547 } 548 549 if (EndToken.Kind == TokenInfo::TK_Eof) { 550 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 551 return false; 552 } 553 554 internal::MatcherDescriptorPtr BuiltCtor = 555 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error); 556 557 if (!BuiltCtor.get()) { 558 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher) 559 << NameToken.Text; 560 return false; 561 } 562 563 std::string BindID; 564 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 565 Tokenizer->consumeNextToken(); 566 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 567 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 568 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 569 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1)); 570 return false; 571 } 572 if (ChainCallToken.Kind != TokenInfo::TK_Ident || 573 (ChainCallToken.Text != TokenInfo::ID_Bind && 574 ChainCallToken.Text != TokenInfo::ID_With)) { 575 Error->addError(ChainCallToken.Range, 576 Error->ET_ParserMalformedChainedExpr); 577 return false; 578 } 579 if (ChainCallToken.Text == TokenInfo::ID_Bind) { 580 if (!parseBindID(BindID)) 581 return false; 582 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 583 NameToken.Text, NameToken.Range); 584 SourceRange MatcherRange = NameToken.Range; 585 MatcherRange.End = ChainCallToken.Range.End; 586 VariantMatcher Result = S->actOnMatcherExpression( 587 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 588 if (Result.isNull()) 589 return false; 590 591 *Value = Result; 592 return true; 593 } else if (ChainCallToken.Text == TokenInfo::ID_With) { 594 Tokenizer->SkipNewlines(); 595 596 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { 597 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof 598 ? StringRef("EOF") 599 : Tokenizer->peekNextToken().Text; 600 Error->addError(Tokenizer->peekNextToken().Range, 601 Error->ET_ParserNoOpenParen) 602 << ErrTxt; 603 return false; 604 } 605 606 TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); 607 608 return parseMatcherExpressionImpl(NameToken, WithOpenToken, 609 BuiltCtor.get(), Value); 610 } 611 } 612 613 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 614 NameToken.Text, NameToken.Range); 615 SourceRange MatcherRange = NameToken.Range; 616 MatcherRange.End = EndToken.Range.End; 617 VariantMatcher Result = S->actOnMatcherExpression( 618 BuiltCtor.get(), MatcherRange, BindID, {}, Error); 619 if (Result.isNull()) 620 return false; 621 622 *Value = Result; 623 return true; 624 } 625 626 /// Parse and validate a matcher expression. 627 /// \return \c true on success, in which case \c Value has the matcher parsed. 628 /// If the input is malformed, or some argument has an error, it 629 /// returns \c false. 630 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, 631 const TokenInfo &OpenToken, 632 llvm::Optional<MatcherCtor> Ctor, 633 VariantValue *Value) { 634 if (!Ctor) { 635 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound) 636 << NameToken.Text; 637 // Do not return here. We need to continue to give completion suggestions. 638 } 639 640 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) 641 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value); 642 643 std::vector<ParserValue> Args; 644 TokenInfo EndToken; 645 646 Tokenizer->SkipNewlines(); 647 648 { 649 ScopedContextEntry SCE(this, Ctor.value_or(nullptr)); 650 651 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { 652 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { 653 // End of args. 654 EndToken = Tokenizer->consumeNextToken(); 655 break; 656 } 657 if (!Args.empty()) { 658 // We must find a , token to continue. 659 const TokenInfo CommaToken = Tokenizer->consumeNextToken(); 660 if (CommaToken.Kind != TokenInfo::TK_Comma) { 661 Error->addError(CommaToken.Range, Error->ET_ParserNoComma) 662 << CommaToken.Text; 663 return false; 664 } 665 } 666 667 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, 668 NameToken.Text, NameToken.Range, 669 Args.size() + 1); 670 ParserValue ArgValue; 671 Tokenizer->SkipNewlines(); 672 ArgValue.Text = Tokenizer->peekNextToken().Text; 673 ArgValue.Range = Tokenizer->peekNextToken().Range; 674 if (!parseExpressionImpl(&ArgValue.Value)) { 675 return false; 676 } 677 678 Tokenizer->SkipNewlines(); 679 Args.push_back(ArgValue); 680 SCE.nextArg(); 681 } 682 } 683 684 if (EndToken.Kind == TokenInfo::TK_Eof) { 685 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen); 686 return false; 687 } 688 689 std::string BindID; 690 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { 691 Tokenizer->consumeNextToken(); 692 TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); 693 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { 694 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1)); 695 return false; 696 } 697 698 if (ChainCallToken.Kind != TokenInfo::TK_Ident) { 699 Error->addError(ChainCallToken.Range, 700 Error->ET_ParserMalformedChainedExpr); 701 return false; 702 } 703 if (ChainCallToken.Text == TokenInfo::ID_With) { 704 705 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 706 NameToken.Text, NameToken.Range); 707 708 Error->addError(ChainCallToken.Range, 709 Error->ET_RegistryMatcherNoWithSupport); 710 return false; 711 } 712 if (ChainCallToken.Text != TokenInfo::ID_Bind) { 713 Error->addError(ChainCallToken.Range, 714 Error->ET_ParserMalformedChainedExpr); 715 return false; 716 } 717 if (!parseBindID(BindID)) 718 return false; 719 } 720 721 if (!Ctor) 722 return false; 723 724 // Merge the start and end infos. 725 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, 726 NameToken.Text, NameToken.Range); 727 SourceRange MatcherRange = NameToken.Range; 728 MatcherRange.End = EndToken.Range.End; 729 VariantMatcher Result = S->actOnMatcherExpression( 730 *Ctor, MatcherRange, BindID, Args, Error); 731 if (Result.isNull()) return false; 732 733 *Value = Result; 734 return true; 735 } 736 737 // If the prefix of this completion matches the completion token, add it to 738 // Completions minus the prefix. 739 void Parser::addCompletion(const TokenInfo &CompToken, 740 const MatcherCompletion& Completion) { 741 if (StringRef(Completion.TypedText).startswith(CompToken.Text) && 742 Completion.Specificity > 0) { 743 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()), 744 Completion.MatcherDecl, Completion.Specificity); 745 } 746 } 747 748 std::vector<MatcherCompletion> Parser::getNamedValueCompletions( 749 ArrayRef<ArgKind> AcceptedTypes) { 750 if (!NamedValues) return std::vector<MatcherCompletion>(); 751 std::vector<MatcherCompletion> Result; 752 for (const auto &Entry : *NamedValues) { 753 unsigned Specificity; 754 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) { 755 std::string Decl = 756 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); 757 Result.emplace_back(Entry.getKey(), Decl, Specificity); 758 } 759 } 760 return Result; 761 } 762 763 void Parser::addExpressionCompletions() { 764 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); 765 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); 766 767 // We cannot complete code if there is an invalid element on the context 768 // stack. 769 for (ContextStackTy::iterator I = ContextStack.begin(), 770 E = ContextStack.end(); 771 I != E; ++I) { 772 if (!I->first) 773 return; 774 } 775 776 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack); 777 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { 778 addCompletion(CompToken, Completion); 779 } 780 781 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { 782 addCompletion(CompToken, Completion); 783 } 784 } 785 786 /// Parse an <Expression> 787 bool Parser::parseExpressionImpl(VariantValue *Value) { 788 switch (Tokenizer->nextTokenKind()) { 789 case TokenInfo::TK_Literal: 790 *Value = Tokenizer->consumeNextToken().Value; 791 return true; 792 793 case TokenInfo::TK_Ident: 794 return parseIdentifierPrefixImpl(Value); 795 796 case TokenInfo::TK_CodeCompletion: 797 addExpressionCompletions(); 798 return false; 799 800 case TokenInfo::TK_Eof: 801 Error->addError(Tokenizer->consumeNextToken().Range, 802 Error->ET_ParserNoCode); 803 return false; 804 805 case TokenInfo::TK_Error: 806 // This error was already reported by the tokenizer. 807 return false; 808 case TokenInfo::TK_NewLine: 809 case TokenInfo::TK_OpenParen: 810 case TokenInfo::TK_CloseParen: 811 case TokenInfo::TK_Comma: 812 case TokenInfo::TK_Period: 813 case TokenInfo::TK_InvalidChar: 814 const TokenInfo Token = Tokenizer->consumeNextToken(); 815 Error->addError(Token.Range, Error->ET_ParserInvalidToken) 816 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); 817 return false; 818 } 819 820 llvm_unreachable("Unknown token kind."); 821 } 822 823 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; 824 825 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, 826 const NamedValueMap *NamedValues, Diagnostics *Error) 827 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), 828 NamedValues(NamedValues), Error(Error) {} 829 830 Parser::RegistrySema::~RegistrySema() = default; 831 832 llvm::Optional<MatcherCtor> 833 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { 834 return Registry::lookupMatcherCtor(MatcherName); 835 } 836 837 VariantMatcher Parser::RegistrySema::actOnMatcherExpression( 838 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, 839 ArrayRef<ParserValue> Args, Diagnostics *Error) { 840 if (BindID.empty()) { 841 return Registry::constructMatcher(Ctor, NameRange, Args, Error); 842 } else { 843 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, 844 Error); 845 } 846 } 847 848 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( 849 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { 850 return Registry::getAcceptedCompletionTypes(Context); 851 } 852 853 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( 854 ArrayRef<ArgKind> AcceptedTypes) { 855 return Registry::getMatcherCompletions(AcceptedTypes); 856 } 857 858 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { 859 return Registry::isBuilderMatcher(Ctor); 860 } 861 862 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { 863 return Registry::nodeMatcherType(Ctor); 864 } 865 866 internal::MatcherDescriptorPtr 867 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, 868 ArrayRef<ParserValue> Args, 869 Diagnostics *Error) const { 870 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); 871 } 872 873 bool Parser::parseExpression(StringRef &Code, Sema *S, 874 const NamedValueMap *NamedValues, 875 VariantValue *Value, Diagnostics *Error) { 876 CodeTokenizer Tokenizer(Code, Error); 877 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) 878 return false; 879 auto NT = Tokenizer.peekNextToken(); 880 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { 881 Error->addError(Tokenizer.peekNextToken().Range, 882 Error->ET_ParserTrailingCode); 883 return false; 884 } 885 return true; 886 } 887 888 std::vector<MatcherCompletion> 889 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, 890 const NamedValueMap *NamedValues) { 891 Diagnostics Error; 892 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); 893 Parser P(&Tokenizer, S, NamedValues, &Error); 894 VariantValue Dummy; 895 P.parseExpressionImpl(&Dummy); 896 897 // Sort by specificity, then by name. 898 llvm::sort(P.Completions, 899 [](const MatcherCompletion &A, const MatcherCompletion &B) { 900 if (A.Specificity != B.Specificity) 901 return A.Specificity > B.Specificity; 902 return A.TypedText < B.TypedText; 903 }); 904 905 return P.Completions; 906 } 907 908 llvm::Optional<DynTypedMatcher> 909 Parser::parseMatcherExpression(StringRef &Code, Sema *S, 910 const NamedValueMap *NamedValues, 911 Diagnostics *Error) { 912 VariantValue Value; 913 if (!parseExpression(Code, S, NamedValues, &Value, Error)) 914 return std::nullopt; 915 if (!Value.isMatcher()) { 916 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); 917 return std::nullopt; 918 } 919 llvm::Optional<DynTypedMatcher> Result = 920 Value.getMatcher().getSingleMatcher(); 921 if (!Result) { 922 Error->addError(SourceRange(), Error->ET_ParserOverloadedType) 923 << Value.getTypeAsString(); 924 } 925 return Result; 926 } 927 928 } // namespace dynamic 929 } // namespace ast_matchers 930 } // namespace clang 931