xref: /llvm-project/mlir/lib/Tools/PDLL/Parser/Lexer.h (revision db791b278a414fb6df1acc1799adcf11d8fb9169)
1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_
11 
12 #include "mlir/Support/LLVM.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/SMLoc.h"
15 
16 namespace llvm {
17 class SourceMgr;
18 } // namespace llvm
19 
20 namespace mlir {
21 namespace pdll {
22 class CodeCompleteContext;
23 
24 namespace ast {
25 class DiagnosticEngine;
26 } // namespace ast
27 
28 //===----------------------------------------------------------------------===//
29 // Token
30 //===----------------------------------------------------------------------===//
31 
32 class Token {
33 public:
34   enum Kind {
35     /// Markers.
36     eof,
37     error,
38     /// Token signifying a code completion location.
39     code_complete,
40     /// Token signifying a code completion location within a string.
41     code_complete_string,
42 
43     /// Keywords.
44     KW_BEGIN,
45     /// Dependent keywords, i.e. those that are treated as keywords depending on
46     /// the current parser context.
47     KW_DEPENDENT_BEGIN,
48     kw_attr,
49     kw_op,
50     kw_type,
51     KW_DEPENDENT_END,
52 
53     /// General keywords.
54     kw_Attr,
55     kw_erase,
56     kw_let,
57     kw_Constraint,
58     kw_not,
59     kw_Op,
60     kw_OpName,
61     kw_Pattern,
62     kw_replace,
63     kw_return,
64     kw_rewrite,
65     kw_Rewrite,
66     kw_Type,
67     kw_TypeRange,
68     kw_Value,
69     kw_ValueRange,
70     kw_with,
71     KW_END,
72 
73     /// Punctuation.
74     arrow,
75     colon,
76     comma,
77     dot,
78     equal,
79     equal_arrow,
80     semicolon,
81     /// Paired punctuation.
82     less,
83     greater,
84     l_brace,
85     r_brace,
86     l_paren,
87     r_paren,
88     l_square,
89     r_square,
90     underscore,
91 
92     /// Tokens.
93     directive,
94     identifier,
95     integer,
96     string_block,
97     string
98   };
Token(Kind kind,StringRef spelling)99   Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
100 
101   /// Given a token containing a string literal, return its value, including
102   /// removing the quote characters and unescaping the contents of the string.
103   std::string getStringValue() const;
104 
105   /// Returns true if the current token is a string literal.
isString()106   bool isString() const { return isAny(Token::string, Token::string_block); }
107 
108   /// Returns true if the current token is a keyword.
isKeyword()109   bool isKeyword() const {
110     return kind > Token::KW_BEGIN && kind < Token::KW_END;
111   }
112 
113   /// Returns true if the current token is a keyword in a dependent context, and
114   /// in any other situation (e.g. variable names) may be treated as an
115   /// identifier.
isDependentKeyword()116   bool isDependentKeyword() const {
117     return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
118   }
119 
120   /// Return the bytes that make up this token.
getSpelling()121   StringRef getSpelling() const { return spelling; }
122 
123   /// Return the kind of this token.
getKind()124   Kind getKind() const { return kind; }
125 
126   /// Return true if this token is one of the specified kinds.
isAny(Kind k1,Kind k2)127   bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
128   template <typename... T>
isAny(Kind k1,Kind k2,Kind k3,T...others)129   bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
130     return is(k1) || isAny(k2, k3, others...);
131   }
132 
133   /// Return if the token does not have the given kind.
isNot(Kind k)134   bool isNot(Kind k) const { return k != kind; }
135   template <typename... T>
isNot(Kind k1,Kind k2,T...others)136   bool isNot(Kind k1, Kind k2, T... others) const {
137     return !isAny(k1, k2, others...);
138   }
139 
140   /// Return if the token has the given kind.
is(Kind k)141   bool is(Kind k) const { return kind == k; }
142 
143   /// Return a location for the start of this token.
getStartLoc()144   SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
145   /// Return a location at the end of this token.
getEndLoc()146   SMLoc getEndLoc() const {
147     return SMLoc::getFromPointer(spelling.data() + spelling.size());
148   }
149   /// Return a location for the range of this token.
getLoc()150   SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
151 
152 private:
153   /// Discriminator that indicates the kind of token this is.
154   Kind kind;
155 
156   /// A reference to the entire token contents; this is always a pointer into
157   /// a memory buffer owned by the source manager.
158   StringRef spelling;
159 };
160 
161 //===----------------------------------------------------------------------===//
162 // Lexer
163 //===----------------------------------------------------------------------===//
164 
165 class Lexer {
166 public:
167   Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
168         CodeCompleteContext *codeCompleteContext);
169   ~Lexer();
170 
171   /// Return a reference to the source manager used by the lexer.
getSourceMgr()172   llvm::SourceMgr &getSourceMgr() { return srcMgr; }
173 
174   /// Return a reference to the diagnostic engine used by the lexer.
getDiagEngine()175   ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
176 
177   /// Push an include of the given file. This will cause the lexer to start
178   /// processing the provided file. Returns failure if the file could not be
179   /// opened, success otherwise.
180   LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
181 
182   /// Lex the next token and return it.
183   Token lexToken();
184 
185   /// Change the position of the lexer cursor. The next token we lex will start
186   /// at the designated point in the input.
resetPointer(const char * newPointer)187   void resetPointer(const char *newPointer) { curPtr = newPointer; }
188 
189   /// Emit an error to the lexer with the given location and message.
190   Token emitError(SMRange loc, const Twine &msg);
191   Token emitError(const char *loc, const Twine &msg);
192   Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
193                          const Twine &note);
194 
195 private:
formToken(Token::Kind kind,const char * tokStart)196   Token formToken(Token::Kind kind, const char *tokStart) {
197     return Token(kind, StringRef(tokStart, curPtr - tokStart));
198   }
199 
200   /// Return the next character in the stream.
201   int getNextChar();
202 
203   /// Lex methods.
204   void lexComment();
205   Token lexDirective(const char *tokStart);
206   Token lexIdentifier(const char *tokStart);
207   Token lexNumber(const char *tokStart);
208   Token lexString(const char *tokStart, bool isStringBlock);
209 
210   llvm::SourceMgr &srcMgr;
211   int curBufferID;
212   StringRef curBuffer;
213   const char *curPtr;
214 
215   /// The engine used to emit diagnostics during lexing/parsing.
216   ast::DiagnosticEngine &diagEngine;
217 
218   /// A flag indicating if we added a default diagnostic handler to the provided
219   /// diagEngine.
220   bool addedHandlerToDiagEngine;
221 
222   /// The optional code completion point within the input file.
223   const char *codeCompletionLocation;
224 };
225 } // namespace pdll
226 } // namespace mlir
227 
228 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_
229