1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_ 10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_ 11 12 #include "mlir/Support/LLVM.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/SMLoc.h" 15 16 namespace llvm { 17 class SourceMgr; 18 } // namespace llvm 19 20 namespace mlir { 21 namespace pdll { 22 class CodeCompleteContext; 23 24 namespace ast { 25 class DiagnosticEngine; 26 } // namespace ast 27 28 //===----------------------------------------------------------------------===// 29 // Token 30 //===----------------------------------------------------------------------===// 31 32 class Token { 33 public: 34 enum Kind { 35 /// Markers. 36 eof, 37 error, 38 /// Token signifying a code completion location. 39 code_complete, 40 /// Token signifying a code completion location within a string. 41 code_complete_string, 42 43 /// Keywords. 44 KW_BEGIN, 45 /// Dependent keywords, i.e. those that are treated as keywords depending on 46 /// the current parser context. 47 KW_DEPENDENT_BEGIN, 48 kw_attr, 49 kw_op, 50 kw_type, 51 KW_DEPENDENT_END, 52 53 /// General keywords. 54 kw_Attr, 55 kw_erase, 56 kw_let, 57 kw_Constraint, 58 kw_not, 59 kw_Op, 60 kw_OpName, 61 kw_Pattern, 62 kw_replace, 63 kw_return, 64 kw_rewrite, 65 kw_Rewrite, 66 kw_Type, 67 kw_TypeRange, 68 kw_Value, 69 kw_ValueRange, 70 kw_with, 71 KW_END, 72 73 /// Punctuation. 74 arrow, 75 colon, 76 comma, 77 dot, 78 equal, 79 equal_arrow, 80 semicolon, 81 /// Paired punctuation. 82 less, 83 greater, 84 l_brace, 85 r_brace, 86 l_paren, 87 r_paren, 88 l_square, 89 r_square, 90 underscore, 91 92 /// Tokens. 93 directive, 94 identifier, 95 integer, 96 string_block, 97 string 98 }; Token(Kind kind,StringRef spelling)99 Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} 100 101 /// Given a token containing a string literal, return its value, including 102 /// removing the quote characters and unescaping the contents of the string. 103 std::string getStringValue() const; 104 105 /// Returns true if the current token is a string literal. isString()106 bool isString() const { return isAny(Token::string, Token::string_block); } 107 108 /// Returns true if the current token is a keyword. isKeyword()109 bool isKeyword() const { 110 return kind > Token::KW_BEGIN && kind < Token::KW_END; 111 } 112 113 /// Returns true if the current token is a keyword in a dependent context, and 114 /// in any other situation (e.g. variable names) may be treated as an 115 /// identifier. isDependentKeyword()116 bool isDependentKeyword() const { 117 return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END; 118 } 119 120 /// Return the bytes that make up this token. getSpelling()121 StringRef getSpelling() const { return spelling; } 122 123 /// Return the kind of this token. getKind()124 Kind getKind() const { return kind; } 125 126 /// Return true if this token is one of the specified kinds. isAny(Kind k1,Kind k2)127 bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); } 128 template <typename... T> isAny(Kind k1,Kind k2,Kind k3,T...others)129 bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { 130 return is(k1) || isAny(k2, k3, others...); 131 } 132 133 /// Return if the token does not have the given kind. isNot(Kind k)134 bool isNot(Kind k) const { return k != kind; } 135 template <typename... T> isNot(Kind k1,Kind k2,T...others)136 bool isNot(Kind k1, Kind k2, T... others) const { 137 return !isAny(k1, k2, others...); 138 } 139 140 /// Return if the token has the given kind. is(Kind k)141 bool is(Kind k) const { return kind == k; } 142 143 /// Return a location for the start of this token. getStartLoc()144 SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); } 145 /// Return a location at the end of this token. getEndLoc()146 SMLoc getEndLoc() const { 147 return SMLoc::getFromPointer(spelling.data() + spelling.size()); 148 } 149 /// Return a location for the range of this token. getLoc()150 SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); } 151 152 private: 153 /// Discriminator that indicates the kind of token this is. 154 Kind kind; 155 156 /// A reference to the entire token contents; this is always a pointer into 157 /// a memory buffer owned by the source manager. 158 StringRef spelling; 159 }; 160 161 //===----------------------------------------------------------------------===// 162 // Lexer 163 //===----------------------------------------------------------------------===// 164 165 class Lexer { 166 public: 167 Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, 168 CodeCompleteContext *codeCompleteContext); 169 ~Lexer(); 170 171 /// Return a reference to the source manager used by the lexer. getSourceMgr()172 llvm::SourceMgr &getSourceMgr() { return srcMgr; } 173 174 /// Return a reference to the diagnostic engine used by the lexer. getDiagEngine()175 ast::DiagnosticEngine &getDiagEngine() { return diagEngine; } 176 177 /// Push an include of the given file. This will cause the lexer to start 178 /// processing the provided file. Returns failure if the file could not be 179 /// opened, success otherwise. 180 LogicalResult pushInclude(StringRef filename, SMRange includeLoc); 181 182 /// Lex the next token and return it. 183 Token lexToken(); 184 185 /// Change the position of the lexer cursor. The next token we lex will start 186 /// at the designated point in the input. resetPointer(const char * newPointer)187 void resetPointer(const char *newPointer) { curPtr = newPointer; } 188 189 /// Emit an error to the lexer with the given location and message. 190 Token emitError(SMRange loc, const Twine &msg); 191 Token emitError(const char *loc, const Twine &msg); 192 Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, 193 const Twine ¬e); 194 195 private: formToken(Token::Kind kind,const char * tokStart)196 Token formToken(Token::Kind kind, const char *tokStart) { 197 return Token(kind, StringRef(tokStart, curPtr - tokStart)); 198 } 199 200 /// Return the next character in the stream. 201 int getNextChar(); 202 203 /// Lex methods. 204 void lexComment(); 205 Token lexDirective(const char *tokStart); 206 Token lexIdentifier(const char *tokStart); 207 Token lexNumber(const char *tokStart); 208 Token lexString(const char *tokStart, bool isStringBlock); 209 210 llvm::SourceMgr &srcMgr; 211 int curBufferID; 212 StringRef curBuffer; 213 const char *curPtr; 214 215 /// The engine used to emit diagnostics during lexing/parsing. 216 ast::DiagnosticEngine &diagEngine; 217 218 /// A flag indicating if we added a default diagnostic handler to the provided 219 /// diagEngine. 220 bool addedHandlerToDiagEngine; 221 222 /// The optional code completion point within the input file. 223 const char *codeCompletionLocation; 224 }; 225 } // namespace pdll 226 } // namespace mlir 227 228 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_ 229