xref: /llvm-project/mlir/lib/Query/Matcher/Parser.cpp (revision 58b44c8102afb0e76d1cb70d4a5d089f70d2f657)
102d9f4d1SDevajith //===- Parser.cpp - Matcher expression parser -----------------------------===//
202d9f4d1SDevajith //
302d9f4d1SDevajith // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
402d9f4d1SDevajith // See https://llvm.org/LICENSE.txt for license information.
502d9f4d1SDevajith // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
602d9f4d1SDevajith //
702d9f4d1SDevajith //===----------------------------------------------------------------------===//
802d9f4d1SDevajith //
902d9f4d1SDevajith // Recursive parser implementation for the matcher expression grammar.
1002d9f4d1SDevajith //
1102d9f4d1SDevajith //===----------------------------------------------------------------------===//
1202d9f4d1SDevajith 
1302d9f4d1SDevajith #include "Parser.h"
1402d9f4d1SDevajith 
1502d9f4d1SDevajith #include <vector>
1602d9f4d1SDevajith 
1702d9f4d1SDevajith namespace mlir::query::matcher::internal {
1802d9f4d1SDevajith 
1902d9f4d1SDevajith // Simple structure to hold information for one token from the parser.
2002d9f4d1SDevajith struct Parser::TokenInfo {
2102d9f4d1SDevajith   TokenInfo() = default;
2202d9f4d1SDevajith 
2302d9f4d1SDevajith   // Method to set the kind and text of the token
setmlir::query::matcher::internal::Parser::TokenInfo2402d9f4d1SDevajith   void set(TokenKind newKind, llvm::StringRef newText) {
2502d9f4d1SDevajith     kind = newKind;
2602d9f4d1SDevajith     text = newText;
2702d9f4d1SDevajith   }
2802d9f4d1SDevajith 
29*58b44c81SJacques Pienaar   // Known identifiers.
30*58b44c81SJacques Pienaar   static const char *const ID_Extract;
31*58b44c81SJacques Pienaar 
3202d9f4d1SDevajith   llvm::StringRef text;
3302d9f4d1SDevajith   TokenKind kind = TokenKind::Eof;
3402d9f4d1SDevajith   SourceRange range;
3502d9f4d1SDevajith   VariantValue value;
3602d9f4d1SDevajith };
3702d9f4d1SDevajith 
38*58b44c81SJacques Pienaar const char *const Parser::TokenInfo::ID_Extract = "extract";
39*58b44c81SJacques Pienaar 
4002d9f4d1SDevajith class Parser::CodeTokenizer {
4102d9f4d1SDevajith public:
4202d9f4d1SDevajith   // Constructor with matcherCode and error
CodeTokenizer(llvm::StringRef matcherCode,Diagnostics * error)4302d9f4d1SDevajith   explicit CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error)
4402d9f4d1SDevajith       : code(matcherCode), startOfLine(matcherCode), error(error) {
4502d9f4d1SDevajith     nextToken = getNextToken();
4602d9f4d1SDevajith   }
4702d9f4d1SDevajith 
4802d9f4d1SDevajith   // Constructor with matcherCode, error, and codeCompletionOffset
CodeTokenizer(llvm::StringRef matcherCode,Diagnostics * error,unsigned codeCompletionOffset)4902d9f4d1SDevajith   CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error,
5002d9f4d1SDevajith                 unsigned codeCompletionOffset)
5102d9f4d1SDevajith       : code(matcherCode), startOfLine(matcherCode), error(error),
5202d9f4d1SDevajith         codeCompletionLocation(matcherCode.data() + codeCompletionOffset) {
5302d9f4d1SDevajith     nextToken = getNextToken();
5402d9f4d1SDevajith   }
5502d9f4d1SDevajith 
5602d9f4d1SDevajith   // Peek at next token without consuming it
peekNextToken() const5702d9f4d1SDevajith   const TokenInfo &peekNextToken() const { return nextToken; }
5802d9f4d1SDevajith 
5902d9f4d1SDevajith   // Consume and return the next token
consumeNextToken()6002d9f4d1SDevajith   TokenInfo consumeNextToken() {
6102d9f4d1SDevajith     TokenInfo thisToken = nextToken;
6202d9f4d1SDevajith     nextToken = getNextToken();
6302d9f4d1SDevajith     return thisToken;
6402d9f4d1SDevajith   }
6502d9f4d1SDevajith 
6602d9f4d1SDevajith   // Skip any newline tokens
skipNewlines()6702d9f4d1SDevajith   TokenInfo skipNewlines() {
6802d9f4d1SDevajith     while (nextToken.kind == TokenKind::NewLine)
6902d9f4d1SDevajith       nextToken = getNextToken();
7002d9f4d1SDevajith     return nextToken;
7102d9f4d1SDevajith   }
7202d9f4d1SDevajith 
7302d9f4d1SDevajith   // Consume and return next token, ignoring newlines
consumeNextTokenIgnoreNewlines()7402d9f4d1SDevajith   TokenInfo consumeNextTokenIgnoreNewlines() {
7502d9f4d1SDevajith     skipNewlines();
7602d9f4d1SDevajith     return nextToken.kind == TokenKind::Eof ? nextToken : consumeNextToken();
7702d9f4d1SDevajith   }
7802d9f4d1SDevajith 
7902d9f4d1SDevajith   // Return kind of next token
nextTokenKind() const8002d9f4d1SDevajith   TokenKind nextTokenKind() const { return nextToken.kind; }
8102d9f4d1SDevajith 
8202d9f4d1SDevajith private:
8302d9f4d1SDevajith   // Helper function to get the first character as a new StringRef and drop it
8402d9f4d1SDevajith   // from the original string
firstCharacterAndDrop(llvm::StringRef & str)8502d9f4d1SDevajith   llvm::StringRef firstCharacterAndDrop(llvm::StringRef &str) {
8602d9f4d1SDevajith     assert(!str.empty());
8702d9f4d1SDevajith     llvm::StringRef firstChar = str.substr(0, 1);
8802d9f4d1SDevajith     str = str.drop_front();
8902d9f4d1SDevajith     return firstChar;
9002d9f4d1SDevajith   }
9102d9f4d1SDevajith 
9202d9f4d1SDevajith   // Get next token, consuming whitespaces and handling different token types
getNextToken()9302d9f4d1SDevajith   TokenInfo getNextToken() {
9402d9f4d1SDevajith     consumeWhitespace();
9502d9f4d1SDevajith     TokenInfo result;
9602d9f4d1SDevajith     result.range.start = currentLocation();
9702d9f4d1SDevajith 
9802d9f4d1SDevajith     // Code completion case
9902d9f4d1SDevajith     if (codeCompletionLocation && codeCompletionLocation <= code.data()) {
10002d9f4d1SDevajith       result.set(TokenKind::CodeCompletion,
10102d9f4d1SDevajith                  llvm::StringRef(codeCompletionLocation, 0));
10202d9f4d1SDevajith       codeCompletionLocation = nullptr;
10302d9f4d1SDevajith       return result;
10402d9f4d1SDevajith     }
10502d9f4d1SDevajith 
10602d9f4d1SDevajith     // End of file case
10702d9f4d1SDevajith     if (code.empty()) {
10802d9f4d1SDevajith       result.set(TokenKind::Eof, "");
10902d9f4d1SDevajith       return result;
11002d9f4d1SDevajith     }
11102d9f4d1SDevajith 
11202d9f4d1SDevajith     // Switch to handle specific characters
11302d9f4d1SDevajith     switch (code[0]) {
11402d9f4d1SDevajith     case '#':
11502d9f4d1SDevajith       code = code.drop_until([](char c) { return c == '\n'; });
11602d9f4d1SDevajith       return getNextToken();
11702d9f4d1SDevajith     case ',':
11802d9f4d1SDevajith       result.set(TokenKind::Comma, firstCharacterAndDrop(code));
11902d9f4d1SDevajith       break;
12002d9f4d1SDevajith     case '.':
12102d9f4d1SDevajith       result.set(TokenKind::Period, firstCharacterAndDrop(code));
12202d9f4d1SDevajith       break;
12302d9f4d1SDevajith     case '\n':
12402d9f4d1SDevajith       ++line;
12502d9f4d1SDevajith       startOfLine = code.drop_front();
12602d9f4d1SDevajith       result.set(TokenKind::NewLine, firstCharacterAndDrop(code));
12702d9f4d1SDevajith       break;
12802d9f4d1SDevajith     case '(':
12902d9f4d1SDevajith       result.set(TokenKind::OpenParen, firstCharacterAndDrop(code));
13002d9f4d1SDevajith       break;
13102d9f4d1SDevajith     case ')':
13202d9f4d1SDevajith       result.set(TokenKind::CloseParen, firstCharacterAndDrop(code));
13302d9f4d1SDevajith       break;
13402d9f4d1SDevajith     case '"':
13502d9f4d1SDevajith     case '\'':
13602d9f4d1SDevajith       consumeStringLiteral(&result);
13702d9f4d1SDevajith       break;
13802d9f4d1SDevajith     default:
13902d9f4d1SDevajith       parseIdentifierOrInvalid(&result);
14002d9f4d1SDevajith       break;
14102d9f4d1SDevajith     }
14202d9f4d1SDevajith 
14302d9f4d1SDevajith     result.range.end = currentLocation();
14402d9f4d1SDevajith     return result;
14502d9f4d1SDevajith   }
14602d9f4d1SDevajith 
14702d9f4d1SDevajith   // Consume a string literal, handle escape sequences and missing closing
14802d9f4d1SDevajith   // quote.
consumeStringLiteral(TokenInfo * result)14902d9f4d1SDevajith   void consumeStringLiteral(TokenInfo *result) {
15002d9f4d1SDevajith     bool inEscape = false;
15102d9f4d1SDevajith     const char marker = code[0];
15202d9f4d1SDevajith     for (size_t length = 1; length < code.size(); ++length) {
15302d9f4d1SDevajith       if (inEscape) {
15402d9f4d1SDevajith         inEscape = false;
15502d9f4d1SDevajith         continue;
15602d9f4d1SDevajith       }
15702d9f4d1SDevajith       if (code[length] == '\\') {
15802d9f4d1SDevajith         inEscape = true;
15902d9f4d1SDevajith         continue;
16002d9f4d1SDevajith       }
16102d9f4d1SDevajith       if (code[length] == marker) {
16202d9f4d1SDevajith         result->kind = TokenKind::Literal;
16302d9f4d1SDevajith         result->text = code.substr(0, length + 1);
16402d9f4d1SDevajith         result->value = code.substr(1, length - 1);
16502d9f4d1SDevajith         code = code.drop_front(length + 1);
16602d9f4d1SDevajith         return;
16702d9f4d1SDevajith       }
16802d9f4d1SDevajith     }
16902d9f4d1SDevajith     llvm::StringRef errorText = code;
17002d9f4d1SDevajith     code = code.drop_front(code.size());
17102d9f4d1SDevajith     SourceRange range;
17202d9f4d1SDevajith     range.start = result->range.start;
17302d9f4d1SDevajith     range.end = currentLocation();
17402d9f4d1SDevajith     error->addError(range, ErrorType::ParserStringError) << errorText;
17502d9f4d1SDevajith     result->kind = TokenKind::Error;
17602d9f4d1SDevajith   }
17702d9f4d1SDevajith 
parseIdentifierOrInvalid(TokenInfo * result)17802d9f4d1SDevajith   void parseIdentifierOrInvalid(TokenInfo *result) {
17902d9f4d1SDevajith     if (isalnum(code[0])) {
18002d9f4d1SDevajith       // Parse an identifier
18102d9f4d1SDevajith       size_t tokenLength = 1;
18202d9f4d1SDevajith 
18302d9f4d1SDevajith       while (true) {
18402d9f4d1SDevajith         // A code completion location in/immediately after an identifier will
18502d9f4d1SDevajith         // cause the portion of the identifier before the code completion
18602d9f4d1SDevajith         // location to become a code completion token.
18702d9f4d1SDevajith         if (codeCompletionLocation == code.data() + tokenLength) {
18802d9f4d1SDevajith           codeCompletionLocation = nullptr;
18902d9f4d1SDevajith           result->kind = TokenKind::CodeCompletion;
19002d9f4d1SDevajith           result->text = code.substr(0, tokenLength);
19102d9f4d1SDevajith           code = code.drop_front(tokenLength);
19202d9f4d1SDevajith           return;
19302d9f4d1SDevajith         }
19402d9f4d1SDevajith         if (tokenLength == code.size() || !(isalnum(code[tokenLength])))
19502d9f4d1SDevajith           break;
19602d9f4d1SDevajith         ++tokenLength;
19702d9f4d1SDevajith       }
19802d9f4d1SDevajith       result->kind = TokenKind::Ident;
19902d9f4d1SDevajith       result->text = code.substr(0, tokenLength);
20002d9f4d1SDevajith       code = code.drop_front(tokenLength);
20102d9f4d1SDevajith     } else {
20202d9f4d1SDevajith       result->kind = TokenKind::InvalidChar;
20302d9f4d1SDevajith       result->text = code.substr(0, 1);
20402d9f4d1SDevajith       code = code.drop_front(1);
20502d9f4d1SDevajith     }
20602d9f4d1SDevajith   }
20702d9f4d1SDevajith 
20802d9f4d1SDevajith   // Consume all leading whitespace from code, except newlines
consumeWhitespace()209abaa79b2SKazu Hirata   void consumeWhitespace() { code = code.ltrim(" \t\v\f\r"); }
21002d9f4d1SDevajith 
21102d9f4d1SDevajith   // Returns the current location in the source code
currentLocation()21202d9f4d1SDevajith   SourceLocation currentLocation() {
21302d9f4d1SDevajith     SourceLocation location;
21402d9f4d1SDevajith     location.line = line;
21502d9f4d1SDevajith     location.column = code.data() - startOfLine.data() + 1;
21602d9f4d1SDevajith     return location;
21702d9f4d1SDevajith   }
21802d9f4d1SDevajith 
21902d9f4d1SDevajith   llvm::StringRef code;
22002d9f4d1SDevajith   llvm::StringRef startOfLine;
22102d9f4d1SDevajith   unsigned line = 1;
22202d9f4d1SDevajith   Diagnostics *error;
22302d9f4d1SDevajith   TokenInfo nextToken;
22402d9f4d1SDevajith   const char *codeCompletionLocation = nullptr;
22502d9f4d1SDevajith };
22602d9f4d1SDevajith 
22702d9f4d1SDevajith Parser::Sema::~Sema() = default;
22802d9f4d1SDevajith 
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> context)22902d9f4d1SDevajith std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
23002d9f4d1SDevajith     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) {
23102d9f4d1SDevajith   return {};
23202d9f4d1SDevajith }
23302d9f4d1SDevajith 
23402d9f4d1SDevajith std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)23502d9f4d1SDevajith Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) {
23602d9f4d1SDevajith   return {};
23702d9f4d1SDevajith }
23802d9f4d1SDevajith 
23902d9f4d1SDevajith // Entry for the scope of a parser
24002d9f4d1SDevajith struct Parser::ScopedContextEntry {
24102d9f4d1SDevajith   Parser *parser;
24202d9f4d1SDevajith 
ScopedContextEntrymlir::query::matcher::internal::Parser::ScopedContextEntry24302d9f4d1SDevajith   ScopedContextEntry(Parser *parser, MatcherCtor c) : parser(parser) {
24402d9f4d1SDevajith     parser->contextStack.emplace_back(c, 0u);
24502d9f4d1SDevajith   }
24602d9f4d1SDevajith 
~ScopedContextEntrymlir::query::matcher::internal::Parser::ScopedContextEntry24702d9f4d1SDevajith   ~ScopedContextEntry() { parser->contextStack.pop_back(); }
24802d9f4d1SDevajith 
nextArgmlir::query::matcher::internal::Parser::ScopedContextEntry24902d9f4d1SDevajith   void nextArg() { ++parser->contextStack.back().second; }
25002d9f4d1SDevajith };
25102d9f4d1SDevajith 
25202d9f4d1SDevajith // Parse and validate expressions starting with an identifier.
25302d9f4d1SDevajith // This function can parse named values and matchers. In case of failure, it
25402d9f4d1SDevajith // will try to determine the user's intent to give an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * value)25502d9f4d1SDevajith bool Parser::parseIdentifierPrefixImpl(VariantValue *value) {
25602d9f4d1SDevajith   const TokenInfo nameToken = tokenizer->consumeNextToken();
25702d9f4d1SDevajith 
25802d9f4d1SDevajith   if (tokenizer->nextTokenKind() != TokenKind::OpenParen) {
25902d9f4d1SDevajith     // Parse as a named value.
26002d9f4d1SDevajith     auto namedValue =
26102d9f4d1SDevajith         namedValues ? namedValues->lookup(nameToken.text) : VariantValue();
26202d9f4d1SDevajith 
26302d9f4d1SDevajith     if (!namedValue.isMatcher()) {
26402d9f4d1SDevajith       error->addError(tokenizer->peekNextToken().range,
26502d9f4d1SDevajith                       ErrorType::ParserNotAMatcher);
26602d9f4d1SDevajith       return false;
26702d9f4d1SDevajith     }
26802d9f4d1SDevajith 
26902d9f4d1SDevajith     if (tokenizer->nextTokenKind() == TokenKind::NewLine) {
27002d9f4d1SDevajith       error->addError(tokenizer->peekNextToken().range,
27102d9f4d1SDevajith                       ErrorType::ParserNoOpenParen)
27202d9f4d1SDevajith           << "NewLine";
27302d9f4d1SDevajith       return false;
27402d9f4d1SDevajith     }
27502d9f4d1SDevajith 
27602d9f4d1SDevajith     // If the syntax is correct and the name is not a matcher either, report
27702d9f4d1SDevajith     // an unknown named value.
27802d9f4d1SDevajith     if ((tokenizer->nextTokenKind() == TokenKind::Comma ||
27902d9f4d1SDevajith          tokenizer->nextTokenKind() == TokenKind::CloseParen ||
28002d9f4d1SDevajith          tokenizer->nextTokenKind() == TokenKind::NewLine ||
28102d9f4d1SDevajith          tokenizer->nextTokenKind() == TokenKind::Eof) &&
28202d9f4d1SDevajith         !sema->lookupMatcherCtor(nameToken.text)) {
28302d9f4d1SDevajith       error->addError(nameToken.range, ErrorType::RegistryValueNotFound)
28402d9f4d1SDevajith           << nameToken.text;
28502d9f4d1SDevajith       return false;
28602d9f4d1SDevajith     }
28702d9f4d1SDevajith     // Otherwise, fallback to the matcher parser.
28802d9f4d1SDevajith   }
28902d9f4d1SDevajith 
29002d9f4d1SDevajith   tokenizer->skipNewlines();
29102d9f4d1SDevajith 
29202d9f4d1SDevajith   assert(nameToken.kind == TokenKind::Ident);
29302d9f4d1SDevajith   TokenInfo openToken = tokenizer->consumeNextToken();
29402d9f4d1SDevajith   if (openToken.kind != TokenKind::OpenParen) {
29502d9f4d1SDevajith     error->addError(openToken.range, ErrorType::ParserNoOpenParen)
29602d9f4d1SDevajith         << openToken.text;
29702d9f4d1SDevajith     return false;
29802d9f4d1SDevajith   }
29902d9f4d1SDevajith 
30002d9f4d1SDevajith   std::optional<MatcherCtor> ctor = sema->lookupMatcherCtor(nameToken.text);
30102d9f4d1SDevajith 
30202d9f4d1SDevajith   // Parse as a matcher expression.
30302d9f4d1SDevajith   return parseMatcherExpressionImpl(nameToken, openToken, ctor, value);
30402d9f4d1SDevajith }
30502d9f4d1SDevajith 
parseChainedExpression(std::string & argument)306*58b44c81SJacques Pienaar bool Parser::parseChainedExpression(std::string &argument) {
307*58b44c81SJacques Pienaar   // Parse the parenthesized argument to .extract("foo")
308*58b44c81SJacques Pienaar   // Note: EOF is handled inside the consume functions and would fail below when
309*58b44c81SJacques Pienaar   // checking token kind.
310*58b44c81SJacques Pienaar   const TokenInfo openToken = tokenizer->consumeNextToken();
311*58b44c81SJacques Pienaar   const TokenInfo argumentToken = tokenizer->consumeNextTokenIgnoreNewlines();
312*58b44c81SJacques Pienaar   const TokenInfo closeToken = tokenizer->consumeNextTokenIgnoreNewlines();
313*58b44c81SJacques Pienaar 
314*58b44c81SJacques Pienaar   if (openToken.kind != TokenKind::OpenParen) {
315*58b44c81SJacques Pienaar     error->addError(openToken.range, ErrorType::ParserChainedExprNoOpenParen);
316*58b44c81SJacques Pienaar     return false;
317*58b44c81SJacques Pienaar   }
318*58b44c81SJacques Pienaar 
319*58b44c81SJacques Pienaar   if (argumentToken.kind != TokenKind::Literal ||
320*58b44c81SJacques Pienaar       !argumentToken.value.isString()) {
321*58b44c81SJacques Pienaar     error->addError(argumentToken.range,
322*58b44c81SJacques Pienaar                     ErrorType::ParserChainedExprInvalidArg);
323*58b44c81SJacques Pienaar     return false;
324*58b44c81SJacques Pienaar   }
325*58b44c81SJacques Pienaar 
326*58b44c81SJacques Pienaar   if (closeToken.kind != TokenKind::CloseParen) {
327*58b44c81SJacques Pienaar     error->addError(closeToken.range, ErrorType::ParserChainedExprNoCloseParen);
328*58b44c81SJacques Pienaar     return false;
329*58b44c81SJacques Pienaar   }
330*58b44c81SJacques Pienaar 
331*58b44c81SJacques Pienaar   // If all checks passed, extract the argument and return true.
332*58b44c81SJacques Pienaar   argument = argumentToken.value.getString();
333*58b44c81SJacques Pienaar   return true;
334*58b44c81SJacques Pienaar }
335*58b44c81SJacques Pienaar 
33602d9f4d1SDevajith // Parse the arguments of a matcher
parseMatcherArgs(std::vector<ParserValue> & args,MatcherCtor ctor,const TokenInfo & nameToken,TokenInfo & endToken)33702d9f4d1SDevajith bool Parser::parseMatcherArgs(std::vector<ParserValue> &args, MatcherCtor ctor,
33802d9f4d1SDevajith                               const TokenInfo &nameToken, TokenInfo &endToken) {
33902d9f4d1SDevajith   ScopedContextEntry sce(this, ctor);
34002d9f4d1SDevajith 
34102d9f4d1SDevajith   while (tokenizer->nextTokenKind() != TokenKind::Eof) {
34202d9f4d1SDevajith     if (tokenizer->nextTokenKind() == TokenKind::CloseParen) {
34302d9f4d1SDevajith       // end of args.
34402d9f4d1SDevajith       endToken = tokenizer->consumeNextToken();
34502d9f4d1SDevajith       break;
34602d9f4d1SDevajith     }
34702d9f4d1SDevajith 
34802d9f4d1SDevajith     if (!args.empty()) {
34902d9f4d1SDevajith       // We must find a , token to continue.
35002d9f4d1SDevajith       TokenInfo commaToken = tokenizer->consumeNextToken();
35102d9f4d1SDevajith       if (commaToken.kind != TokenKind::Comma) {
35202d9f4d1SDevajith         error->addError(commaToken.range, ErrorType::ParserNoComma)
35302d9f4d1SDevajith             << commaToken.text;
35402d9f4d1SDevajith         return false;
35502d9f4d1SDevajith       }
35602d9f4d1SDevajith     }
35702d9f4d1SDevajith 
35802d9f4d1SDevajith     ParserValue argValue;
35902d9f4d1SDevajith     tokenizer->skipNewlines();
36002d9f4d1SDevajith 
36102d9f4d1SDevajith     argValue.text = tokenizer->peekNextToken().text;
36202d9f4d1SDevajith     argValue.range = tokenizer->peekNextToken().range;
36302d9f4d1SDevajith     if (!parseExpressionImpl(&argValue.value)) {
36402d9f4d1SDevajith       return false;
36502d9f4d1SDevajith     }
36602d9f4d1SDevajith 
36702d9f4d1SDevajith     tokenizer->skipNewlines();
36802d9f4d1SDevajith     args.push_back(argValue);
36902d9f4d1SDevajith     sce.nextArg();
37002d9f4d1SDevajith   }
37102d9f4d1SDevajith 
37202d9f4d1SDevajith   return true;
37302d9f4d1SDevajith }
37402d9f4d1SDevajith 
37502d9f4d1SDevajith // Parse and validate a matcher expression.
parseMatcherExpressionImpl(const TokenInfo & nameToken,const TokenInfo & openToken,std::optional<MatcherCtor> ctor,VariantValue * value)37602d9f4d1SDevajith bool Parser::parseMatcherExpressionImpl(const TokenInfo &nameToken,
37702d9f4d1SDevajith                                         const TokenInfo &openToken,
37802d9f4d1SDevajith                                         std::optional<MatcherCtor> ctor,
37902d9f4d1SDevajith                                         VariantValue *value) {
38002d9f4d1SDevajith   if (!ctor) {
38102d9f4d1SDevajith     error->addError(nameToken.range, ErrorType::RegistryMatcherNotFound)
38202d9f4d1SDevajith         << nameToken.text;
38302d9f4d1SDevajith     // Do not return here. We need to continue to give completion suggestions.
38402d9f4d1SDevajith   }
38502d9f4d1SDevajith 
38602d9f4d1SDevajith   std::vector<ParserValue> args;
38702d9f4d1SDevajith   TokenInfo endToken;
38802d9f4d1SDevajith 
38902d9f4d1SDevajith   tokenizer->skipNewlines();
39002d9f4d1SDevajith 
39102d9f4d1SDevajith   if (!parseMatcherArgs(args, ctor.value_or(nullptr), nameToken, endToken)) {
39202d9f4d1SDevajith     return false;
39302d9f4d1SDevajith   }
39402d9f4d1SDevajith 
39502d9f4d1SDevajith   // Check for the missing closing parenthesis
39602d9f4d1SDevajith   if (endToken.kind != TokenKind::CloseParen) {
39702d9f4d1SDevajith     error->addError(openToken.range, ErrorType::ParserNoCloseParen)
39802d9f4d1SDevajith         << nameToken.text;
39902d9f4d1SDevajith     return false;
40002d9f4d1SDevajith   }
40102d9f4d1SDevajith 
402*58b44c81SJacques Pienaar   std::string functionName;
403*58b44c81SJacques Pienaar   if (tokenizer->peekNextToken().kind == TokenKind::Period) {
404*58b44c81SJacques Pienaar     tokenizer->consumeNextToken();
405*58b44c81SJacques Pienaar     TokenInfo chainCallToken = tokenizer->consumeNextToken();
406*58b44c81SJacques Pienaar     if (chainCallToken.kind == TokenKind::CodeCompletion) {
407*58b44c81SJacques Pienaar       addCompletion(chainCallToken, MatcherCompletion("extract(\"", "extract"));
408*58b44c81SJacques Pienaar       return false;
409*58b44c81SJacques Pienaar     }
410*58b44c81SJacques Pienaar 
411*58b44c81SJacques Pienaar     if (chainCallToken.kind != TokenKind::Ident ||
412*58b44c81SJacques Pienaar         chainCallToken.text != TokenInfo::ID_Extract) {
413*58b44c81SJacques Pienaar       error->addError(chainCallToken.range,
414*58b44c81SJacques Pienaar                       ErrorType::ParserMalformedChainedExpr);
415*58b44c81SJacques Pienaar       return false;
416*58b44c81SJacques Pienaar     }
417*58b44c81SJacques Pienaar 
418*58b44c81SJacques Pienaar     if (chainCallToken.text == TokenInfo::ID_Extract &&
419*58b44c81SJacques Pienaar         !parseChainedExpression(functionName))
420*58b44c81SJacques Pienaar       return false;
421*58b44c81SJacques Pienaar   }
422*58b44c81SJacques Pienaar 
42302d9f4d1SDevajith   if (!ctor)
42402d9f4d1SDevajith     return false;
42502d9f4d1SDevajith   // Merge the start and end infos.
42602d9f4d1SDevajith   SourceRange matcherRange = nameToken.range;
42702d9f4d1SDevajith   matcherRange.end = endToken.range.end;
428*58b44c81SJacques Pienaar   VariantMatcher result = sema->actOnMatcherExpression(
429*58b44c81SJacques Pienaar       *ctor, matcherRange, functionName, args, error);
43002d9f4d1SDevajith   if (result.isNull())
43102d9f4d1SDevajith     return false;
43202d9f4d1SDevajith   *value = result;
43302d9f4d1SDevajith   return true;
43402d9f4d1SDevajith }
43502d9f4d1SDevajith 
43602d9f4d1SDevajith // If the prefix of this completion matches the completion token, add it to
43702d9f4d1SDevajith // completions minus the prefix.
addCompletion(const TokenInfo & compToken,const MatcherCompletion & completion)43802d9f4d1SDevajith void Parser::addCompletion(const TokenInfo &compToken,
43902d9f4d1SDevajith                            const MatcherCompletion &completion) {
44088d319a2SKazu Hirata   if (llvm::StringRef(completion.typedText).starts_with(compToken.text)) {
44102d9f4d1SDevajith     completions.emplace_back(completion.typedText.substr(compToken.text.size()),
44202d9f4d1SDevajith                              completion.matcherDecl);
44302d9f4d1SDevajith   }
44402d9f4d1SDevajith }
44502d9f4d1SDevajith 
44602d9f4d1SDevajith std::vector<MatcherCompletion>
getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)44702d9f4d1SDevajith Parser::getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) {
44802d9f4d1SDevajith   if (!namedValues)
44902d9f4d1SDevajith     return {};
45002d9f4d1SDevajith 
45102d9f4d1SDevajith   std::vector<MatcherCompletion> result;
45202d9f4d1SDevajith   for (const auto &entry : *namedValues) {
45302d9f4d1SDevajith     std::string decl =
45402d9f4d1SDevajith         (entry.getValue().getTypeAsString() + " " + entry.getKey()).str();
45502d9f4d1SDevajith     result.emplace_back(entry.getKey(), decl);
45602d9f4d1SDevajith   }
45702d9f4d1SDevajith   return result;
45802d9f4d1SDevajith }
45902d9f4d1SDevajith 
addExpressionCompletions()46002d9f4d1SDevajith void Parser::addExpressionCompletions() {
46102d9f4d1SDevajith   const TokenInfo compToken = tokenizer->consumeNextTokenIgnoreNewlines();
46202d9f4d1SDevajith   assert(compToken.kind == TokenKind::CodeCompletion);
46302d9f4d1SDevajith 
46402d9f4d1SDevajith   // We cannot complete code if there is an invalid element on the context
46502d9f4d1SDevajith   // stack.
46602d9f4d1SDevajith   for (const auto &entry : contextStack) {
46702d9f4d1SDevajith     if (!entry.first)
46802d9f4d1SDevajith       return;
46902d9f4d1SDevajith   }
47002d9f4d1SDevajith 
47102d9f4d1SDevajith   auto acceptedTypes = sema->getAcceptedCompletionTypes(contextStack);
47202d9f4d1SDevajith   for (const auto &completion : sema->getMatcherCompletions(acceptedTypes)) {
47302d9f4d1SDevajith     addCompletion(compToken, completion);
47402d9f4d1SDevajith   }
47502d9f4d1SDevajith 
47602d9f4d1SDevajith   for (const auto &completion : getNamedValueCompletions(acceptedTypes)) {
47702d9f4d1SDevajith     addCompletion(compToken, completion);
47802d9f4d1SDevajith   }
47902d9f4d1SDevajith }
48002d9f4d1SDevajith 
48102d9f4d1SDevajith // Parse an <Expresssion>
parseExpressionImpl(VariantValue * value)48202d9f4d1SDevajith bool Parser::parseExpressionImpl(VariantValue *value) {
48302d9f4d1SDevajith   switch (tokenizer->nextTokenKind()) {
48402d9f4d1SDevajith   case TokenKind::Literal:
48502d9f4d1SDevajith     *value = tokenizer->consumeNextToken().value;
48602d9f4d1SDevajith     return true;
48702d9f4d1SDevajith   case TokenKind::Ident:
48802d9f4d1SDevajith     return parseIdentifierPrefixImpl(value);
48902d9f4d1SDevajith   case TokenKind::CodeCompletion:
49002d9f4d1SDevajith     addExpressionCompletions();
49102d9f4d1SDevajith     return false;
49202d9f4d1SDevajith   case TokenKind::Eof:
49302d9f4d1SDevajith     error->addError(tokenizer->consumeNextToken().range,
49402d9f4d1SDevajith                     ErrorType::ParserNoCode);
49502d9f4d1SDevajith     return false;
49602d9f4d1SDevajith 
49702d9f4d1SDevajith   case TokenKind::Error:
49802d9f4d1SDevajith     // This error was already reported by the tokenizer.
49902d9f4d1SDevajith     return false;
50002d9f4d1SDevajith   case TokenKind::NewLine:
50102d9f4d1SDevajith   case TokenKind::OpenParen:
50202d9f4d1SDevajith   case TokenKind::CloseParen:
50302d9f4d1SDevajith   case TokenKind::Comma:
50402d9f4d1SDevajith   case TokenKind::Period:
50502d9f4d1SDevajith   case TokenKind::InvalidChar:
50602d9f4d1SDevajith     const TokenInfo token = tokenizer->consumeNextToken();
50702d9f4d1SDevajith     error->addError(token.range, ErrorType::ParserInvalidToken)
50802d9f4d1SDevajith         << (token.kind == TokenKind::NewLine ? "NewLine" : token.text);
50902d9f4d1SDevajith     return false;
51002d9f4d1SDevajith   }
51102d9f4d1SDevajith 
51202d9f4d1SDevajith   llvm_unreachable("Unknown token kind.");
51302d9f4d1SDevajith }
51402d9f4d1SDevajith 
Parser(CodeTokenizer * tokenizer,const Registry & matcherRegistry,const NamedValueMap * namedValues,Diagnostics * error)51502d9f4d1SDevajith Parser::Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry,
51602d9f4d1SDevajith                const NamedValueMap *namedValues, Diagnostics *error)
51702d9f4d1SDevajith     : tokenizer(tokenizer),
51802d9f4d1SDevajith       sema(std::make_unique<RegistrySema>(matcherRegistry)),
51902d9f4d1SDevajith       namedValues(namedValues), error(error) {}
52002d9f4d1SDevajith 
52102d9f4d1SDevajith Parser::RegistrySema::~RegistrySema() = default;
52202d9f4d1SDevajith 
52302d9f4d1SDevajith std::optional<MatcherCtor>
lookupMatcherCtor(llvm::StringRef matcherName)52402d9f4d1SDevajith Parser::RegistrySema::lookupMatcherCtor(llvm::StringRef matcherName) {
52502d9f4d1SDevajith   return RegistryManager::lookupMatcherCtor(matcherName, matcherRegistry);
52602d9f4d1SDevajith }
52702d9f4d1SDevajith 
actOnMatcherExpression(MatcherCtor ctor,SourceRange nameRange,llvm::StringRef functionName,llvm::ArrayRef<ParserValue> args,Diagnostics * error)52802d9f4d1SDevajith VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
529*58b44c81SJacques Pienaar     MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName,
530*58b44c81SJacques Pienaar     llvm::ArrayRef<ParserValue> args, Diagnostics *error) {
531*58b44c81SJacques Pienaar   return RegistryManager::constructMatcher(ctor, nameRange, functionName, args,
532*58b44c81SJacques Pienaar                                            error);
53302d9f4d1SDevajith }
53402d9f4d1SDevajith 
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> context)53502d9f4d1SDevajith std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
53602d9f4d1SDevajith     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) {
53702d9f4d1SDevajith   return RegistryManager::getAcceptedCompletionTypes(context);
53802d9f4d1SDevajith }
53902d9f4d1SDevajith 
getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)54002d9f4d1SDevajith std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
54102d9f4d1SDevajith     llvm::ArrayRef<ArgKind> acceptedTypes) {
54202d9f4d1SDevajith   return RegistryManager::getMatcherCompletions(acceptedTypes, matcherRegistry);
54302d9f4d1SDevajith }
54402d9f4d1SDevajith 
parseExpression(llvm::StringRef & code,const Registry & matcherRegistry,const NamedValueMap * namedValues,VariantValue * value,Diagnostics * error)54502d9f4d1SDevajith bool Parser::parseExpression(llvm::StringRef &code,
54602d9f4d1SDevajith                              const Registry &matcherRegistry,
54702d9f4d1SDevajith                              const NamedValueMap *namedValues,
54802d9f4d1SDevajith                              VariantValue *value, Diagnostics *error) {
54902d9f4d1SDevajith   CodeTokenizer tokenizer(code, error);
55002d9f4d1SDevajith   Parser parser(&tokenizer, matcherRegistry, namedValues, error);
55102d9f4d1SDevajith   if (!parser.parseExpressionImpl(value))
55202d9f4d1SDevajith     return false;
55302d9f4d1SDevajith   auto nextToken = tokenizer.peekNextToken();
55402d9f4d1SDevajith   if (nextToken.kind != TokenKind::Eof &&
55502d9f4d1SDevajith       nextToken.kind != TokenKind::NewLine) {
55602d9f4d1SDevajith     error->addError(tokenizer.peekNextToken().range,
55702d9f4d1SDevajith                     ErrorType::ParserTrailingCode);
55802d9f4d1SDevajith     return false;
55902d9f4d1SDevajith   }
56002d9f4d1SDevajith   return true;
56102d9f4d1SDevajith }
56202d9f4d1SDevajith 
56302d9f4d1SDevajith std::vector<MatcherCompletion>
completeExpression(llvm::StringRef & code,unsigned completionOffset,const Registry & matcherRegistry,const NamedValueMap * namedValues)56402d9f4d1SDevajith Parser::completeExpression(llvm::StringRef &code, unsigned completionOffset,
56502d9f4d1SDevajith                            const Registry &matcherRegistry,
56602d9f4d1SDevajith                            const NamedValueMap *namedValues) {
56702d9f4d1SDevajith   Diagnostics error;
56802d9f4d1SDevajith   CodeTokenizer tokenizer(code, &error, completionOffset);
56902d9f4d1SDevajith   Parser parser(&tokenizer, matcherRegistry, namedValues, &error);
57002d9f4d1SDevajith   VariantValue dummy;
57102d9f4d1SDevajith   parser.parseExpressionImpl(&dummy);
57202d9f4d1SDevajith 
57302d9f4d1SDevajith   return parser.completions;
57402d9f4d1SDevajith }
57502d9f4d1SDevajith 
parseMatcherExpression(llvm::StringRef & code,const Registry & matcherRegistry,const NamedValueMap * namedValues,Diagnostics * error)57602d9f4d1SDevajith std::optional<DynMatcher> Parser::parseMatcherExpression(
57702d9f4d1SDevajith     llvm::StringRef &code, const Registry &matcherRegistry,
57802d9f4d1SDevajith     const NamedValueMap *namedValues, Diagnostics *error) {
57902d9f4d1SDevajith   VariantValue value;
58002d9f4d1SDevajith   if (!parseExpression(code, matcherRegistry, namedValues, &value, error))
58102d9f4d1SDevajith     return std::nullopt;
58202d9f4d1SDevajith   if (!value.isMatcher()) {
58302d9f4d1SDevajith     error->addError(SourceRange(), ErrorType::ParserNotAMatcher);
58402d9f4d1SDevajith     return std::nullopt;
58502d9f4d1SDevajith   }
58602d9f4d1SDevajith   std::optional<DynMatcher> result = value.getMatcher().getDynMatcher();
58702d9f4d1SDevajith   if (!result) {
58802d9f4d1SDevajith     error->addError(SourceRange(), ErrorType::ParserOverloadedType)
58902d9f4d1SDevajith         << value.getTypeAsString();
59002d9f4d1SDevajith   }
59102d9f4d1SDevajith   return result;
59202d9f4d1SDevajith }
59302d9f4d1SDevajith 
59402d9f4d1SDevajith } // namespace mlir::query::matcher::internal
595