102d9f4d1SDevajith //===- Parser.cpp - Matcher expression parser -----------------------------===//
202d9f4d1SDevajith //
302d9f4d1SDevajith // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
402d9f4d1SDevajith // See https://llvm.org/LICENSE.txt for license information.
502d9f4d1SDevajith // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
602d9f4d1SDevajith //
702d9f4d1SDevajith //===----------------------------------------------------------------------===//
802d9f4d1SDevajith //
902d9f4d1SDevajith // Recursive parser implementation for the matcher expression grammar.
1002d9f4d1SDevajith //
1102d9f4d1SDevajith //===----------------------------------------------------------------------===//
1202d9f4d1SDevajith
1302d9f4d1SDevajith #include "Parser.h"
1402d9f4d1SDevajith
1502d9f4d1SDevajith #include <vector>
1602d9f4d1SDevajith
1702d9f4d1SDevajith namespace mlir::query::matcher::internal {
1802d9f4d1SDevajith
1902d9f4d1SDevajith // Simple structure to hold information for one token from the parser.
2002d9f4d1SDevajith struct Parser::TokenInfo {
2102d9f4d1SDevajith TokenInfo() = default;
2202d9f4d1SDevajith
2302d9f4d1SDevajith // Method to set the kind and text of the token
setmlir::query::matcher::internal::Parser::TokenInfo2402d9f4d1SDevajith void set(TokenKind newKind, llvm::StringRef newText) {
2502d9f4d1SDevajith kind = newKind;
2602d9f4d1SDevajith text = newText;
2702d9f4d1SDevajith }
2802d9f4d1SDevajith
29*58b44c81SJacques Pienaar // Known identifiers.
30*58b44c81SJacques Pienaar static const char *const ID_Extract;
31*58b44c81SJacques Pienaar
3202d9f4d1SDevajith llvm::StringRef text;
3302d9f4d1SDevajith TokenKind kind = TokenKind::Eof;
3402d9f4d1SDevajith SourceRange range;
3502d9f4d1SDevajith VariantValue value;
3602d9f4d1SDevajith };
3702d9f4d1SDevajith
38*58b44c81SJacques Pienaar const char *const Parser::TokenInfo::ID_Extract = "extract";
39*58b44c81SJacques Pienaar
4002d9f4d1SDevajith class Parser::CodeTokenizer {
4102d9f4d1SDevajith public:
4202d9f4d1SDevajith // Constructor with matcherCode and error
CodeTokenizer(llvm::StringRef matcherCode,Diagnostics * error)4302d9f4d1SDevajith explicit CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error)
4402d9f4d1SDevajith : code(matcherCode), startOfLine(matcherCode), error(error) {
4502d9f4d1SDevajith nextToken = getNextToken();
4602d9f4d1SDevajith }
4702d9f4d1SDevajith
4802d9f4d1SDevajith // Constructor with matcherCode, error, and codeCompletionOffset
CodeTokenizer(llvm::StringRef matcherCode,Diagnostics * error,unsigned codeCompletionOffset)4902d9f4d1SDevajith CodeTokenizer(llvm::StringRef matcherCode, Diagnostics *error,
5002d9f4d1SDevajith unsigned codeCompletionOffset)
5102d9f4d1SDevajith : code(matcherCode), startOfLine(matcherCode), error(error),
5202d9f4d1SDevajith codeCompletionLocation(matcherCode.data() + codeCompletionOffset) {
5302d9f4d1SDevajith nextToken = getNextToken();
5402d9f4d1SDevajith }
5502d9f4d1SDevajith
5602d9f4d1SDevajith // Peek at next token without consuming it
peekNextToken() const5702d9f4d1SDevajith const TokenInfo &peekNextToken() const { return nextToken; }
5802d9f4d1SDevajith
5902d9f4d1SDevajith // Consume and return the next token
consumeNextToken()6002d9f4d1SDevajith TokenInfo consumeNextToken() {
6102d9f4d1SDevajith TokenInfo thisToken = nextToken;
6202d9f4d1SDevajith nextToken = getNextToken();
6302d9f4d1SDevajith return thisToken;
6402d9f4d1SDevajith }
6502d9f4d1SDevajith
6602d9f4d1SDevajith // Skip any newline tokens
skipNewlines()6702d9f4d1SDevajith TokenInfo skipNewlines() {
6802d9f4d1SDevajith while (nextToken.kind == TokenKind::NewLine)
6902d9f4d1SDevajith nextToken = getNextToken();
7002d9f4d1SDevajith return nextToken;
7102d9f4d1SDevajith }
7202d9f4d1SDevajith
7302d9f4d1SDevajith // Consume and return next token, ignoring newlines
consumeNextTokenIgnoreNewlines()7402d9f4d1SDevajith TokenInfo consumeNextTokenIgnoreNewlines() {
7502d9f4d1SDevajith skipNewlines();
7602d9f4d1SDevajith return nextToken.kind == TokenKind::Eof ? nextToken : consumeNextToken();
7702d9f4d1SDevajith }
7802d9f4d1SDevajith
7902d9f4d1SDevajith // Return kind of next token
nextTokenKind() const8002d9f4d1SDevajith TokenKind nextTokenKind() const { return nextToken.kind; }
8102d9f4d1SDevajith
8202d9f4d1SDevajith private:
8302d9f4d1SDevajith // Helper function to get the first character as a new StringRef and drop it
8402d9f4d1SDevajith // from the original string
firstCharacterAndDrop(llvm::StringRef & str)8502d9f4d1SDevajith llvm::StringRef firstCharacterAndDrop(llvm::StringRef &str) {
8602d9f4d1SDevajith assert(!str.empty());
8702d9f4d1SDevajith llvm::StringRef firstChar = str.substr(0, 1);
8802d9f4d1SDevajith str = str.drop_front();
8902d9f4d1SDevajith return firstChar;
9002d9f4d1SDevajith }
9102d9f4d1SDevajith
9202d9f4d1SDevajith // Get next token, consuming whitespaces and handling different token types
getNextToken()9302d9f4d1SDevajith TokenInfo getNextToken() {
9402d9f4d1SDevajith consumeWhitespace();
9502d9f4d1SDevajith TokenInfo result;
9602d9f4d1SDevajith result.range.start = currentLocation();
9702d9f4d1SDevajith
9802d9f4d1SDevajith // Code completion case
9902d9f4d1SDevajith if (codeCompletionLocation && codeCompletionLocation <= code.data()) {
10002d9f4d1SDevajith result.set(TokenKind::CodeCompletion,
10102d9f4d1SDevajith llvm::StringRef(codeCompletionLocation, 0));
10202d9f4d1SDevajith codeCompletionLocation = nullptr;
10302d9f4d1SDevajith return result;
10402d9f4d1SDevajith }
10502d9f4d1SDevajith
10602d9f4d1SDevajith // End of file case
10702d9f4d1SDevajith if (code.empty()) {
10802d9f4d1SDevajith result.set(TokenKind::Eof, "");
10902d9f4d1SDevajith return result;
11002d9f4d1SDevajith }
11102d9f4d1SDevajith
11202d9f4d1SDevajith // Switch to handle specific characters
11302d9f4d1SDevajith switch (code[0]) {
11402d9f4d1SDevajith case '#':
11502d9f4d1SDevajith code = code.drop_until([](char c) { return c == '\n'; });
11602d9f4d1SDevajith return getNextToken();
11702d9f4d1SDevajith case ',':
11802d9f4d1SDevajith result.set(TokenKind::Comma, firstCharacterAndDrop(code));
11902d9f4d1SDevajith break;
12002d9f4d1SDevajith case '.':
12102d9f4d1SDevajith result.set(TokenKind::Period, firstCharacterAndDrop(code));
12202d9f4d1SDevajith break;
12302d9f4d1SDevajith case '\n':
12402d9f4d1SDevajith ++line;
12502d9f4d1SDevajith startOfLine = code.drop_front();
12602d9f4d1SDevajith result.set(TokenKind::NewLine, firstCharacterAndDrop(code));
12702d9f4d1SDevajith break;
12802d9f4d1SDevajith case '(':
12902d9f4d1SDevajith result.set(TokenKind::OpenParen, firstCharacterAndDrop(code));
13002d9f4d1SDevajith break;
13102d9f4d1SDevajith case ')':
13202d9f4d1SDevajith result.set(TokenKind::CloseParen, firstCharacterAndDrop(code));
13302d9f4d1SDevajith break;
13402d9f4d1SDevajith case '"':
13502d9f4d1SDevajith case '\'':
13602d9f4d1SDevajith consumeStringLiteral(&result);
13702d9f4d1SDevajith break;
13802d9f4d1SDevajith default:
13902d9f4d1SDevajith parseIdentifierOrInvalid(&result);
14002d9f4d1SDevajith break;
14102d9f4d1SDevajith }
14202d9f4d1SDevajith
14302d9f4d1SDevajith result.range.end = currentLocation();
14402d9f4d1SDevajith return result;
14502d9f4d1SDevajith }
14602d9f4d1SDevajith
14702d9f4d1SDevajith // Consume a string literal, handle escape sequences and missing closing
14802d9f4d1SDevajith // quote.
consumeStringLiteral(TokenInfo * result)14902d9f4d1SDevajith void consumeStringLiteral(TokenInfo *result) {
15002d9f4d1SDevajith bool inEscape = false;
15102d9f4d1SDevajith const char marker = code[0];
15202d9f4d1SDevajith for (size_t length = 1; length < code.size(); ++length) {
15302d9f4d1SDevajith if (inEscape) {
15402d9f4d1SDevajith inEscape = false;
15502d9f4d1SDevajith continue;
15602d9f4d1SDevajith }
15702d9f4d1SDevajith if (code[length] == '\\') {
15802d9f4d1SDevajith inEscape = true;
15902d9f4d1SDevajith continue;
16002d9f4d1SDevajith }
16102d9f4d1SDevajith if (code[length] == marker) {
16202d9f4d1SDevajith result->kind = TokenKind::Literal;
16302d9f4d1SDevajith result->text = code.substr(0, length + 1);
16402d9f4d1SDevajith result->value = code.substr(1, length - 1);
16502d9f4d1SDevajith code = code.drop_front(length + 1);
16602d9f4d1SDevajith return;
16702d9f4d1SDevajith }
16802d9f4d1SDevajith }
16902d9f4d1SDevajith llvm::StringRef errorText = code;
17002d9f4d1SDevajith code = code.drop_front(code.size());
17102d9f4d1SDevajith SourceRange range;
17202d9f4d1SDevajith range.start = result->range.start;
17302d9f4d1SDevajith range.end = currentLocation();
17402d9f4d1SDevajith error->addError(range, ErrorType::ParserStringError) << errorText;
17502d9f4d1SDevajith result->kind = TokenKind::Error;
17602d9f4d1SDevajith }
17702d9f4d1SDevajith
parseIdentifierOrInvalid(TokenInfo * result)17802d9f4d1SDevajith void parseIdentifierOrInvalid(TokenInfo *result) {
17902d9f4d1SDevajith if (isalnum(code[0])) {
18002d9f4d1SDevajith // Parse an identifier
18102d9f4d1SDevajith size_t tokenLength = 1;
18202d9f4d1SDevajith
18302d9f4d1SDevajith while (true) {
18402d9f4d1SDevajith // A code completion location in/immediately after an identifier will
18502d9f4d1SDevajith // cause the portion of the identifier before the code completion
18602d9f4d1SDevajith // location to become a code completion token.
18702d9f4d1SDevajith if (codeCompletionLocation == code.data() + tokenLength) {
18802d9f4d1SDevajith codeCompletionLocation = nullptr;
18902d9f4d1SDevajith result->kind = TokenKind::CodeCompletion;
19002d9f4d1SDevajith result->text = code.substr(0, tokenLength);
19102d9f4d1SDevajith code = code.drop_front(tokenLength);
19202d9f4d1SDevajith return;
19302d9f4d1SDevajith }
19402d9f4d1SDevajith if (tokenLength == code.size() || !(isalnum(code[tokenLength])))
19502d9f4d1SDevajith break;
19602d9f4d1SDevajith ++tokenLength;
19702d9f4d1SDevajith }
19802d9f4d1SDevajith result->kind = TokenKind::Ident;
19902d9f4d1SDevajith result->text = code.substr(0, tokenLength);
20002d9f4d1SDevajith code = code.drop_front(tokenLength);
20102d9f4d1SDevajith } else {
20202d9f4d1SDevajith result->kind = TokenKind::InvalidChar;
20302d9f4d1SDevajith result->text = code.substr(0, 1);
20402d9f4d1SDevajith code = code.drop_front(1);
20502d9f4d1SDevajith }
20602d9f4d1SDevajith }
20702d9f4d1SDevajith
20802d9f4d1SDevajith // Consume all leading whitespace from code, except newlines
consumeWhitespace()209abaa79b2SKazu Hirata void consumeWhitespace() { code = code.ltrim(" \t\v\f\r"); }
21002d9f4d1SDevajith
21102d9f4d1SDevajith // Returns the current location in the source code
currentLocation()21202d9f4d1SDevajith SourceLocation currentLocation() {
21302d9f4d1SDevajith SourceLocation location;
21402d9f4d1SDevajith location.line = line;
21502d9f4d1SDevajith location.column = code.data() - startOfLine.data() + 1;
21602d9f4d1SDevajith return location;
21702d9f4d1SDevajith }
21802d9f4d1SDevajith
21902d9f4d1SDevajith llvm::StringRef code;
22002d9f4d1SDevajith llvm::StringRef startOfLine;
22102d9f4d1SDevajith unsigned line = 1;
22202d9f4d1SDevajith Diagnostics *error;
22302d9f4d1SDevajith TokenInfo nextToken;
22402d9f4d1SDevajith const char *codeCompletionLocation = nullptr;
22502d9f4d1SDevajith };
22602d9f4d1SDevajith
22702d9f4d1SDevajith Parser::Sema::~Sema() = default;
22802d9f4d1SDevajith
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> context)22902d9f4d1SDevajith std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
23002d9f4d1SDevajith llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) {
23102d9f4d1SDevajith return {};
23202d9f4d1SDevajith }
23302d9f4d1SDevajith
23402d9f4d1SDevajith std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)23502d9f4d1SDevajith Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) {
23602d9f4d1SDevajith return {};
23702d9f4d1SDevajith }
23802d9f4d1SDevajith
23902d9f4d1SDevajith // Entry for the scope of a parser
24002d9f4d1SDevajith struct Parser::ScopedContextEntry {
24102d9f4d1SDevajith Parser *parser;
24202d9f4d1SDevajith
ScopedContextEntrymlir::query::matcher::internal::Parser::ScopedContextEntry24302d9f4d1SDevajith ScopedContextEntry(Parser *parser, MatcherCtor c) : parser(parser) {
24402d9f4d1SDevajith parser->contextStack.emplace_back(c, 0u);
24502d9f4d1SDevajith }
24602d9f4d1SDevajith
~ScopedContextEntrymlir::query::matcher::internal::Parser::ScopedContextEntry24702d9f4d1SDevajith ~ScopedContextEntry() { parser->contextStack.pop_back(); }
24802d9f4d1SDevajith
nextArgmlir::query::matcher::internal::Parser::ScopedContextEntry24902d9f4d1SDevajith void nextArg() { ++parser->contextStack.back().second; }
25002d9f4d1SDevajith };
25102d9f4d1SDevajith
25202d9f4d1SDevajith // Parse and validate expressions starting with an identifier.
25302d9f4d1SDevajith // This function can parse named values and matchers. In case of failure, it
25402d9f4d1SDevajith // will try to determine the user's intent to give an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * value)25502d9f4d1SDevajith bool Parser::parseIdentifierPrefixImpl(VariantValue *value) {
25602d9f4d1SDevajith const TokenInfo nameToken = tokenizer->consumeNextToken();
25702d9f4d1SDevajith
25802d9f4d1SDevajith if (tokenizer->nextTokenKind() != TokenKind::OpenParen) {
25902d9f4d1SDevajith // Parse as a named value.
26002d9f4d1SDevajith auto namedValue =
26102d9f4d1SDevajith namedValues ? namedValues->lookup(nameToken.text) : VariantValue();
26202d9f4d1SDevajith
26302d9f4d1SDevajith if (!namedValue.isMatcher()) {
26402d9f4d1SDevajith error->addError(tokenizer->peekNextToken().range,
26502d9f4d1SDevajith ErrorType::ParserNotAMatcher);
26602d9f4d1SDevajith return false;
26702d9f4d1SDevajith }
26802d9f4d1SDevajith
26902d9f4d1SDevajith if (tokenizer->nextTokenKind() == TokenKind::NewLine) {
27002d9f4d1SDevajith error->addError(tokenizer->peekNextToken().range,
27102d9f4d1SDevajith ErrorType::ParserNoOpenParen)
27202d9f4d1SDevajith << "NewLine";
27302d9f4d1SDevajith return false;
27402d9f4d1SDevajith }
27502d9f4d1SDevajith
27602d9f4d1SDevajith // If the syntax is correct and the name is not a matcher either, report
27702d9f4d1SDevajith // an unknown named value.
27802d9f4d1SDevajith if ((tokenizer->nextTokenKind() == TokenKind::Comma ||
27902d9f4d1SDevajith tokenizer->nextTokenKind() == TokenKind::CloseParen ||
28002d9f4d1SDevajith tokenizer->nextTokenKind() == TokenKind::NewLine ||
28102d9f4d1SDevajith tokenizer->nextTokenKind() == TokenKind::Eof) &&
28202d9f4d1SDevajith !sema->lookupMatcherCtor(nameToken.text)) {
28302d9f4d1SDevajith error->addError(nameToken.range, ErrorType::RegistryValueNotFound)
28402d9f4d1SDevajith << nameToken.text;
28502d9f4d1SDevajith return false;
28602d9f4d1SDevajith }
28702d9f4d1SDevajith // Otherwise, fallback to the matcher parser.
28802d9f4d1SDevajith }
28902d9f4d1SDevajith
29002d9f4d1SDevajith tokenizer->skipNewlines();
29102d9f4d1SDevajith
29202d9f4d1SDevajith assert(nameToken.kind == TokenKind::Ident);
29302d9f4d1SDevajith TokenInfo openToken = tokenizer->consumeNextToken();
29402d9f4d1SDevajith if (openToken.kind != TokenKind::OpenParen) {
29502d9f4d1SDevajith error->addError(openToken.range, ErrorType::ParserNoOpenParen)
29602d9f4d1SDevajith << openToken.text;
29702d9f4d1SDevajith return false;
29802d9f4d1SDevajith }
29902d9f4d1SDevajith
30002d9f4d1SDevajith std::optional<MatcherCtor> ctor = sema->lookupMatcherCtor(nameToken.text);
30102d9f4d1SDevajith
30202d9f4d1SDevajith // Parse as a matcher expression.
30302d9f4d1SDevajith return parseMatcherExpressionImpl(nameToken, openToken, ctor, value);
30402d9f4d1SDevajith }
30502d9f4d1SDevajith
parseChainedExpression(std::string & argument)306*58b44c81SJacques Pienaar bool Parser::parseChainedExpression(std::string &argument) {
307*58b44c81SJacques Pienaar // Parse the parenthesized argument to .extract("foo")
308*58b44c81SJacques Pienaar // Note: EOF is handled inside the consume functions and would fail below when
309*58b44c81SJacques Pienaar // checking token kind.
310*58b44c81SJacques Pienaar const TokenInfo openToken = tokenizer->consumeNextToken();
311*58b44c81SJacques Pienaar const TokenInfo argumentToken = tokenizer->consumeNextTokenIgnoreNewlines();
312*58b44c81SJacques Pienaar const TokenInfo closeToken = tokenizer->consumeNextTokenIgnoreNewlines();
313*58b44c81SJacques Pienaar
314*58b44c81SJacques Pienaar if (openToken.kind != TokenKind::OpenParen) {
315*58b44c81SJacques Pienaar error->addError(openToken.range, ErrorType::ParserChainedExprNoOpenParen);
316*58b44c81SJacques Pienaar return false;
317*58b44c81SJacques Pienaar }
318*58b44c81SJacques Pienaar
319*58b44c81SJacques Pienaar if (argumentToken.kind != TokenKind::Literal ||
320*58b44c81SJacques Pienaar !argumentToken.value.isString()) {
321*58b44c81SJacques Pienaar error->addError(argumentToken.range,
322*58b44c81SJacques Pienaar ErrorType::ParserChainedExprInvalidArg);
323*58b44c81SJacques Pienaar return false;
324*58b44c81SJacques Pienaar }
325*58b44c81SJacques Pienaar
326*58b44c81SJacques Pienaar if (closeToken.kind != TokenKind::CloseParen) {
327*58b44c81SJacques Pienaar error->addError(closeToken.range, ErrorType::ParserChainedExprNoCloseParen);
328*58b44c81SJacques Pienaar return false;
329*58b44c81SJacques Pienaar }
330*58b44c81SJacques Pienaar
331*58b44c81SJacques Pienaar // If all checks passed, extract the argument and return true.
332*58b44c81SJacques Pienaar argument = argumentToken.value.getString();
333*58b44c81SJacques Pienaar return true;
334*58b44c81SJacques Pienaar }
335*58b44c81SJacques Pienaar
33602d9f4d1SDevajith // Parse the arguments of a matcher
parseMatcherArgs(std::vector<ParserValue> & args,MatcherCtor ctor,const TokenInfo & nameToken,TokenInfo & endToken)33702d9f4d1SDevajith bool Parser::parseMatcherArgs(std::vector<ParserValue> &args, MatcherCtor ctor,
33802d9f4d1SDevajith const TokenInfo &nameToken, TokenInfo &endToken) {
33902d9f4d1SDevajith ScopedContextEntry sce(this, ctor);
34002d9f4d1SDevajith
34102d9f4d1SDevajith while (tokenizer->nextTokenKind() != TokenKind::Eof) {
34202d9f4d1SDevajith if (tokenizer->nextTokenKind() == TokenKind::CloseParen) {
34302d9f4d1SDevajith // end of args.
34402d9f4d1SDevajith endToken = tokenizer->consumeNextToken();
34502d9f4d1SDevajith break;
34602d9f4d1SDevajith }
34702d9f4d1SDevajith
34802d9f4d1SDevajith if (!args.empty()) {
34902d9f4d1SDevajith // We must find a , token to continue.
35002d9f4d1SDevajith TokenInfo commaToken = tokenizer->consumeNextToken();
35102d9f4d1SDevajith if (commaToken.kind != TokenKind::Comma) {
35202d9f4d1SDevajith error->addError(commaToken.range, ErrorType::ParserNoComma)
35302d9f4d1SDevajith << commaToken.text;
35402d9f4d1SDevajith return false;
35502d9f4d1SDevajith }
35602d9f4d1SDevajith }
35702d9f4d1SDevajith
35802d9f4d1SDevajith ParserValue argValue;
35902d9f4d1SDevajith tokenizer->skipNewlines();
36002d9f4d1SDevajith
36102d9f4d1SDevajith argValue.text = tokenizer->peekNextToken().text;
36202d9f4d1SDevajith argValue.range = tokenizer->peekNextToken().range;
36302d9f4d1SDevajith if (!parseExpressionImpl(&argValue.value)) {
36402d9f4d1SDevajith return false;
36502d9f4d1SDevajith }
36602d9f4d1SDevajith
36702d9f4d1SDevajith tokenizer->skipNewlines();
36802d9f4d1SDevajith args.push_back(argValue);
36902d9f4d1SDevajith sce.nextArg();
37002d9f4d1SDevajith }
37102d9f4d1SDevajith
37202d9f4d1SDevajith return true;
37302d9f4d1SDevajith }
37402d9f4d1SDevajith
37502d9f4d1SDevajith // Parse and validate a matcher expression.
parseMatcherExpressionImpl(const TokenInfo & nameToken,const TokenInfo & openToken,std::optional<MatcherCtor> ctor,VariantValue * value)37602d9f4d1SDevajith bool Parser::parseMatcherExpressionImpl(const TokenInfo &nameToken,
37702d9f4d1SDevajith const TokenInfo &openToken,
37802d9f4d1SDevajith std::optional<MatcherCtor> ctor,
37902d9f4d1SDevajith VariantValue *value) {
38002d9f4d1SDevajith if (!ctor) {
38102d9f4d1SDevajith error->addError(nameToken.range, ErrorType::RegistryMatcherNotFound)
38202d9f4d1SDevajith << nameToken.text;
38302d9f4d1SDevajith // Do not return here. We need to continue to give completion suggestions.
38402d9f4d1SDevajith }
38502d9f4d1SDevajith
38602d9f4d1SDevajith std::vector<ParserValue> args;
38702d9f4d1SDevajith TokenInfo endToken;
38802d9f4d1SDevajith
38902d9f4d1SDevajith tokenizer->skipNewlines();
39002d9f4d1SDevajith
39102d9f4d1SDevajith if (!parseMatcherArgs(args, ctor.value_or(nullptr), nameToken, endToken)) {
39202d9f4d1SDevajith return false;
39302d9f4d1SDevajith }
39402d9f4d1SDevajith
39502d9f4d1SDevajith // Check for the missing closing parenthesis
39602d9f4d1SDevajith if (endToken.kind != TokenKind::CloseParen) {
39702d9f4d1SDevajith error->addError(openToken.range, ErrorType::ParserNoCloseParen)
39802d9f4d1SDevajith << nameToken.text;
39902d9f4d1SDevajith return false;
40002d9f4d1SDevajith }
40102d9f4d1SDevajith
402*58b44c81SJacques Pienaar std::string functionName;
403*58b44c81SJacques Pienaar if (tokenizer->peekNextToken().kind == TokenKind::Period) {
404*58b44c81SJacques Pienaar tokenizer->consumeNextToken();
405*58b44c81SJacques Pienaar TokenInfo chainCallToken = tokenizer->consumeNextToken();
406*58b44c81SJacques Pienaar if (chainCallToken.kind == TokenKind::CodeCompletion) {
407*58b44c81SJacques Pienaar addCompletion(chainCallToken, MatcherCompletion("extract(\"", "extract"));
408*58b44c81SJacques Pienaar return false;
409*58b44c81SJacques Pienaar }
410*58b44c81SJacques Pienaar
411*58b44c81SJacques Pienaar if (chainCallToken.kind != TokenKind::Ident ||
412*58b44c81SJacques Pienaar chainCallToken.text != TokenInfo::ID_Extract) {
413*58b44c81SJacques Pienaar error->addError(chainCallToken.range,
414*58b44c81SJacques Pienaar ErrorType::ParserMalformedChainedExpr);
415*58b44c81SJacques Pienaar return false;
416*58b44c81SJacques Pienaar }
417*58b44c81SJacques Pienaar
418*58b44c81SJacques Pienaar if (chainCallToken.text == TokenInfo::ID_Extract &&
419*58b44c81SJacques Pienaar !parseChainedExpression(functionName))
420*58b44c81SJacques Pienaar return false;
421*58b44c81SJacques Pienaar }
422*58b44c81SJacques Pienaar
42302d9f4d1SDevajith if (!ctor)
42402d9f4d1SDevajith return false;
42502d9f4d1SDevajith // Merge the start and end infos.
42602d9f4d1SDevajith SourceRange matcherRange = nameToken.range;
42702d9f4d1SDevajith matcherRange.end = endToken.range.end;
428*58b44c81SJacques Pienaar VariantMatcher result = sema->actOnMatcherExpression(
429*58b44c81SJacques Pienaar *ctor, matcherRange, functionName, args, error);
43002d9f4d1SDevajith if (result.isNull())
43102d9f4d1SDevajith return false;
43202d9f4d1SDevajith *value = result;
43302d9f4d1SDevajith return true;
43402d9f4d1SDevajith }
43502d9f4d1SDevajith
43602d9f4d1SDevajith // If the prefix of this completion matches the completion token, add it to
43702d9f4d1SDevajith // completions minus the prefix.
addCompletion(const TokenInfo & compToken,const MatcherCompletion & completion)43802d9f4d1SDevajith void Parser::addCompletion(const TokenInfo &compToken,
43902d9f4d1SDevajith const MatcherCompletion &completion) {
44088d319a2SKazu Hirata if (llvm::StringRef(completion.typedText).starts_with(compToken.text)) {
44102d9f4d1SDevajith completions.emplace_back(completion.typedText.substr(compToken.text.size()),
44202d9f4d1SDevajith completion.matcherDecl);
44302d9f4d1SDevajith }
44402d9f4d1SDevajith }
44502d9f4d1SDevajith
44602d9f4d1SDevajith std::vector<MatcherCompletion>
getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)44702d9f4d1SDevajith Parser::getNamedValueCompletions(llvm::ArrayRef<ArgKind> acceptedTypes) {
44802d9f4d1SDevajith if (!namedValues)
44902d9f4d1SDevajith return {};
45002d9f4d1SDevajith
45102d9f4d1SDevajith std::vector<MatcherCompletion> result;
45202d9f4d1SDevajith for (const auto &entry : *namedValues) {
45302d9f4d1SDevajith std::string decl =
45402d9f4d1SDevajith (entry.getValue().getTypeAsString() + " " + entry.getKey()).str();
45502d9f4d1SDevajith result.emplace_back(entry.getKey(), decl);
45602d9f4d1SDevajith }
45702d9f4d1SDevajith return result;
45802d9f4d1SDevajith }
45902d9f4d1SDevajith
addExpressionCompletions()46002d9f4d1SDevajith void Parser::addExpressionCompletions() {
46102d9f4d1SDevajith const TokenInfo compToken = tokenizer->consumeNextTokenIgnoreNewlines();
46202d9f4d1SDevajith assert(compToken.kind == TokenKind::CodeCompletion);
46302d9f4d1SDevajith
46402d9f4d1SDevajith // We cannot complete code if there is an invalid element on the context
46502d9f4d1SDevajith // stack.
46602d9f4d1SDevajith for (const auto &entry : contextStack) {
46702d9f4d1SDevajith if (!entry.first)
46802d9f4d1SDevajith return;
46902d9f4d1SDevajith }
47002d9f4d1SDevajith
47102d9f4d1SDevajith auto acceptedTypes = sema->getAcceptedCompletionTypes(contextStack);
47202d9f4d1SDevajith for (const auto &completion : sema->getMatcherCompletions(acceptedTypes)) {
47302d9f4d1SDevajith addCompletion(compToken, completion);
47402d9f4d1SDevajith }
47502d9f4d1SDevajith
47602d9f4d1SDevajith for (const auto &completion : getNamedValueCompletions(acceptedTypes)) {
47702d9f4d1SDevajith addCompletion(compToken, completion);
47802d9f4d1SDevajith }
47902d9f4d1SDevajith }
48002d9f4d1SDevajith
48102d9f4d1SDevajith // Parse an <Expresssion>
parseExpressionImpl(VariantValue * value)48202d9f4d1SDevajith bool Parser::parseExpressionImpl(VariantValue *value) {
48302d9f4d1SDevajith switch (tokenizer->nextTokenKind()) {
48402d9f4d1SDevajith case TokenKind::Literal:
48502d9f4d1SDevajith *value = tokenizer->consumeNextToken().value;
48602d9f4d1SDevajith return true;
48702d9f4d1SDevajith case TokenKind::Ident:
48802d9f4d1SDevajith return parseIdentifierPrefixImpl(value);
48902d9f4d1SDevajith case TokenKind::CodeCompletion:
49002d9f4d1SDevajith addExpressionCompletions();
49102d9f4d1SDevajith return false;
49202d9f4d1SDevajith case TokenKind::Eof:
49302d9f4d1SDevajith error->addError(tokenizer->consumeNextToken().range,
49402d9f4d1SDevajith ErrorType::ParserNoCode);
49502d9f4d1SDevajith return false;
49602d9f4d1SDevajith
49702d9f4d1SDevajith case TokenKind::Error:
49802d9f4d1SDevajith // This error was already reported by the tokenizer.
49902d9f4d1SDevajith return false;
50002d9f4d1SDevajith case TokenKind::NewLine:
50102d9f4d1SDevajith case TokenKind::OpenParen:
50202d9f4d1SDevajith case TokenKind::CloseParen:
50302d9f4d1SDevajith case TokenKind::Comma:
50402d9f4d1SDevajith case TokenKind::Period:
50502d9f4d1SDevajith case TokenKind::InvalidChar:
50602d9f4d1SDevajith const TokenInfo token = tokenizer->consumeNextToken();
50702d9f4d1SDevajith error->addError(token.range, ErrorType::ParserInvalidToken)
50802d9f4d1SDevajith << (token.kind == TokenKind::NewLine ? "NewLine" : token.text);
50902d9f4d1SDevajith return false;
51002d9f4d1SDevajith }
51102d9f4d1SDevajith
51202d9f4d1SDevajith llvm_unreachable("Unknown token kind.");
51302d9f4d1SDevajith }
51402d9f4d1SDevajith
Parser(CodeTokenizer * tokenizer,const Registry & matcherRegistry,const NamedValueMap * namedValues,Diagnostics * error)51502d9f4d1SDevajith Parser::Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry,
51602d9f4d1SDevajith const NamedValueMap *namedValues, Diagnostics *error)
51702d9f4d1SDevajith : tokenizer(tokenizer),
51802d9f4d1SDevajith sema(std::make_unique<RegistrySema>(matcherRegistry)),
51902d9f4d1SDevajith namedValues(namedValues), error(error) {}
52002d9f4d1SDevajith
52102d9f4d1SDevajith Parser::RegistrySema::~RegistrySema() = default;
52202d9f4d1SDevajith
52302d9f4d1SDevajith std::optional<MatcherCtor>
lookupMatcherCtor(llvm::StringRef matcherName)52402d9f4d1SDevajith Parser::RegistrySema::lookupMatcherCtor(llvm::StringRef matcherName) {
52502d9f4d1SDevajith return RegistryManager::lookupMatcherCtor(matcherName, matcherRegistry);
52602d9f4d1SDevajith }
52702d9f4d1SDevajith
actOnMatcherExpression(MatcherCtor ctor,SourceRange nameRange,llvm::StringRef functionName,llvm::ArrayRef<ParserValue> args,Diagnostics * error)52802d9f4d1SDevajith VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
529*58b44c81SJacques Pienaar MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName,
530*58b44c81SJacques Pienaar llvm::ArrayRef<ParserValue> args, Diagnostics *error) {
531*58b44c81SJacques Pienaar return RegistryManager::constructMatcher(ctor, nameRange, functionName, args,
532*58b44c81SJacques Pienaar error);
53302d9f4d1SDevajith }
53402d9f4d1SDevajith
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> context)53502d9f4d1SDevajith std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
53602d9f4d1SDevajith llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> context) {
53702d9f4d1SDevajith return RegistryManager::getAcceptedCompletionTypes(context);
53802d9f4d1SDevajith }
53902d9f4d1SDevajith
getMatcherCompletions(llvm::ArrayRef<ArgKind> acceptedTypes)54002d9f4d1SDevajith std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
54102d9f4d1SDevajith llvm::ArrayRef<ArgKind> acceptedTypes) {
54202d9f4d1SDevajith return RegistryManager::getMatcherCompletions(acceptedTypes, matcherRegistry);
54302d9f4d1SDevajith }
54402d9f4d1SDevajith
parseExpression(llvm::StringRef & code,const Registry & matcherRegistry,const NamedValueMap * namedValues,VariantValue * value,Diagnostics * error)54502d9f4d1SDevajith bool Parser::parseExpression(llvm::StringRef &code,
54602d9f4d1SDevajith const Registry &matcherRegistry,
54702d9f4d1SDevajith const NamedValueMap *namedValues,
54802d9f4d1SDevajith VariantValue *value, Diagnostics *error) {
54902d9f4d1SDevajith CodeTokenizer tokenizer(code, error);
55002d9f4d1SDevajith Parser parser(&tokenizer, matcherRegistry, namedValues, error);
55102d9f4d1SDevajith if (!parser.parseExpressionImpl(value))
55202d9f4d1SDevajith return false;
55302d9f4d1SDevajith auto nextToken = tokenizer.peekNextToken();
55402d9f4d1SDevajith if (nextToken.kind != TokenKind::Eof &&
55502d9f4d1SDevajith nextToken.kind != TokenKind::NewLine) {
55602d9f4d1SDevajith error->addError(tokenizer.peekNextToken().range,
55702d9f4d1SDevajith ErrorType::ParserTrailingCode);
55802d9f4d1SDevajith return false;
55902d9f4d1SDevajith }
56002d9f4d1SDevajith return true;
56102d9f4d1SDevajith }
56202d9f4d1SDevajith
56302d9f4d1SDevajith std::vector<MatcherCompletion>
completeExpression(llvm::StringRef & code,unsigned completionOffset,const Registry & matcherRegistry,const NamedValueMap * namedValues)56402d9f4d1SDevajith Parser::completeExpression(llvm::StringRef &code, unsigned completionOffset,
56502d9f4d1SDevajith const Registry &matcherRegistry,
56602d9f4d1SDevajith const NamedValueMap *namedValues) {
56702d9f4d1SDevajith Diagnostics error;
56802d9f4d1SDevajith CodeTokenizer tokenizer(code, &error, completionOffset);
56902d9f4d1SDevajith Parser parser(&tokenizer, matcherRegistry, namedValues, &error);
57002d9f4d1SDevajith VariantValue dummy;
57102d9f4d1SDevajith parser.parseExpressionImpl(&dummy);
57202d9f4d1SDevajith
57302d9f4d1SDevajith return parser.completions;
57402d9f4d1SDevajith }
57502d9f4d1SDevajith
parseMatcherExpression(llvm::StringRef & code,const Registry & matcherRegistry,const NamedValueMap * namedValues,Diagnostics * error)57602d9f4d1SDevajith std::optional<DynMatcher> Parser::parseMatcherExpression(
57702d9f4d1SDevajith llvm::StringRef &code, const Registry &matcherRegistry,
57802d9f4d1SDevajith const NamedValueMap *namedValues, Diagnostics *error) {
57902d9f4d1SDevajith VariantValue value;
58002d9f4d1SDevajith if (!parseExpression(code, matcherRegistry, namedValues, &value, error))
58102d9f4d1SDevajith return std::nullopt;
58202d9f4d1SDevajith if (!value.isMatcher()) {
58302d9f4d1SDevajith error->addError(SourceRange(), ErrorType::ParserNotAMatcher);
58402d9f4d1SDevajith return std::nullopt;
58502d9f4d1SDevajith }
58602d9f4d1SDevajith std::optional<DynMatcher> result = value.getMatcher().getDynMatcher();
58702d9f4d1SDevajith if (!result) {
58802d9f4d1SDevajith error->addError(SourceRange(), ErrorType::ParserOverloadedType)
58902d9f4d1SDevajith << value.getTypeAsString();
59002d9f4d1SDevajith }
59102d9f4d1SDevajith return result;
59202d9f4d1SDevajith }
59302d9f4d1SDevajith
59402d9f4d1SDevajith } // namespace mlir::query::matcher::internal
595