xref: /llvm-project/mlir/lib/AsmParser/Lexer.cpp (revision 285a229f205ae67dca48c8eac8206a115320c677)
1c60b897dSRiver Riddle //===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
2c60b897dSRiver Riddle //
3c60b897dSRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c60b897dSRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
5c60b897dSRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c60b897dSRiver Riddle //
7c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
8c60b897dSRiver Riddle //
9c60b897dSRiver Riddle // This file implements the lexer for the MLIR textual form.
10c60b897dSRiver Riddle //
11c60b897dSRiver Riddle //===----------------------------------------------------------------------===//
12c60b897dSRiver Riddle 
13c60b897dSRiver Riddle #include "Lexer.h"
14*285a229fSMehdi Amini #include "Token.h"
15c60b897dSRiver Riddle #include "mlir/AsmParser/CodeComplete.h"
16c60b897dSRiver Riddle #include "mlir/IR/Diagnostics.h"
17c60b897dSRiver Riddle #include "mlir/IR/Location.h"
18c60b897dSRiver Riddle #include "mlir/IR/MLIRContext.h"
19*285a229fSMehdi Amini #include "mlir/Support/LLVM.h"
20*285a229fSMehdi Amini #include "llvm/ADT/STLExtras.h"
21c60b897dSRiver Riddle #include "llvm/ADT/StringExtras.h"
22c60b897dSRiver Riddle #include "llvm/ADT/StringSwitch.h"
23*285a229fSMehdi Amini #include "llvm/Support/ErrorHandling.h"
24c60b897dSRiver Riddle #include "llvm/Support/SourceMgr.h"
25*285a229fSMehdi Amini #include <cassert>
26*285a229fSMehdi Amini #include <cctype>
27c60b897dSRiver Riddle 
28c60b897dSRiver Riddle using namespace mlir;
29c60b897dSRiver Riddle 
30c60b897dSRiver Riddle // Returns true if 'c' is an allowable punctuation character: [$._-]
31c60b897dSRiver Riddle // Returns false otherwise.
isPunct(char c)32c60b897dSRiver Riddle static bool isPunct(char c) {
33c60b897dSRiver Riddle   return c == '$' || c == '.' || c == '_' || c == '-';
34c60b897dSRiver Riddle }
35c60b897dSRiver Riddle 
Lexer(const llvm::SourceMgr & sourceMgr,MLIRContext * context,AsmParserCodeCompleteContext * codeCompleteContext)36c60b897dSRiver Riddle Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context,
37c60b897dSRiver Riddle              AsmParserCodeCompleteContext *codeCompleteContext)
38c60b897dSRiver Riddle     : sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
39c60b897dSRiver Riddle   auto bufferID = sourceMgr.getMainFileID();
40c60b897dSRiver Riddle   curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
41c60b897dSRiver Riddle   curPtr = curBuffer.begin();
42c60b897dSRiver Riddle 
43c60b897dSRiver Riddle   // Set the code completion location if it was provided.
44c60b897dSRiver Riddle   if (codeCompleteContext)
45c60b897dSRiver Riddle     codeCompleteLoc = codeCompleteContext->getCodeCompleteLoc().getPointer();
46c60b897dSRiver Riddle }
47c60b897dSRiver Riddle 
48c60b897dSRiver Riddle /// Encode the specified source location information into an attribute for
49c60b897dSRiver Riddle /// attachment to the IR.
getEncodedSourceLocation(SMLoc loc)50c60b897dSRiver Riddle Location Lexer::getEncodedSourceLocation(SMLoc loc) {
51c60b897dSRiver Riddle   auto &sourceMgr = getSourceMgr();
52c60b897dSRiver Riddle   unsigned mainFileID = sourceMgr.getMainFileID();
53c60b897dSRiver Riddle 
54c60b897dSRiver Riddle   // TODO: Fix performance issues in SourceMgr::getLineAndColumn so that we can
55c60b897dSRiver Riddle   //       use it here.
56c60b897dSRiver Riddle   auto &bufferInfo = sourceMgr.getBufferInfo(mainFileID);
57c60b897dSRiver Riddle   unsigned lineNo = bufferInfo.getLineNumber(loc.getPointer());
58c60b897dSRiver Riddle   unsigned column =
59c60b897dSRiver Riddle       (loc.getPointer() - bufferInfo.getPointerForLineNumber(lineNo)) + 1;
60c60b897dSRiver Riddle   auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
61c60b897dSRiver Riddle 
62c60b897dSRiver Riddle   return FileLineColLoc::get(context, buffer->getBufferIdentifier(), lineNo,
63c60b897dSRiver Riddle                              column);
64c60b897dSRiver Riddle }
65c60b897dSRiver Riddle 
66c60b897dSRiver Riddle /// emitError - Emit an error message and return an Token::error token.
emitError(const char * loc,const Twine & message)67c60b897dSRiver Riddle Token Lexer::emitError(const char *loc, const Twine &message) {
68c60b897dSRiver Riddle   mlir::emitError(getEncodedSourceLocation(SMLoc::getFromPointer(loc)),
69c60b897dSRiver Riddle                   message);
70c60b897dSRiver Riddle   return formToken(Token::error, loc);
71c60b897dSRiver Riddle }
72c60b897dSRiver Riddle 
lexToken()73c60b897dSRiver Riddle Token Lexer::lexToken() {
74c60b897dSRiver Riddle   while (true) {
75c60b897dSRiver Riddle     const char *tokStart = curPtr;
76c60b897dSRiver Riddle 
77c60b897dSRiver Riddle     // Check to see if the current token is at the code completion location.
78c60b897dSRiver Riddle     if (tokStart == codeCompleteLoc)
79c60b897dSRiver Riddle       return formToken(Token::code_complete, tokStart);
80c60b897dSRiver Riddle 
81c60b897dSRiver Riddle     // Lex the next token.
82c60b897dSRiver Riddle     switch (*curPtr++) {
83c60b897dSRiver Riddle     default:
84c60b897dSRiver Riddle       // Handle bare identifiers.
85c60b897dSRiver Riddle       if (isalpha(curPtr[-1]))
86c60b897dSRiver Riddle         return lexBareIdentifierOrKeyword(tokStart);
87c60b897dSRiver Riddle 
88c60b897dSRiver Riddle       // Unknown character, emit an error.
89c60b897dSRiver Riddle       return emitError(tokStart, "unexpected character");
90c60b897dSRiver Riddle 
91c60b897dSRiver Riddle     case ' ':
92c60b897dSRiver Riddle     case '\t':
93c60b897dSRiver Riddle     case '\n':
94c60b897dSRiver Riddle     case '\r':
95c60b897dSRiver Riddle       // Handle whitespace.
96c60b897dSRiver Riddle       continue;
97c60b897dSRiver Riddle 
98c60b897dSRiver Riddle     case '_':
99c60b897dSRiver Riddle       // Handle bare identifiers.
100c60b897dSRiver Riddle       return lexBareIdentifierOrKeyword(tokStart);
101c60b897dSRiver Riddle 
102c60b897dSRiver Riddle     case 0:
103c60b897dSRiver Riddle       // This may either be a nul character in the source file or may be the EOF
104c60b897dSRiver Riddle       // marker that llvm::MemoryBuffer guarantees will be there.
105c60b897dSRiver Riddle       if (curPtr - 1 == curBuffer.end())
106c60b897dSRiver Riddle         return formToken(Token::eof, tokStart);
107c60b897dSRiver Riddle       continue;
108c60b897dSRiver Riddle 
109c60b897dSRiver Riddle     case ':':
110c60b897dSRiver Riddle       return formToken(Token::colon, tokStart);
111c60b897dSRiver Riddle     case ',':
112c60b897dSRiver Riddle       return formToken(Token::comma, tokStart);
113c60b897dSRiver Riddle     case '.':
114c60b897dSRiver Riddle       return lexEllipsis(tokStart);
115c60b897dSRiver Riddle     case '(':
116c60b897dSRiver Riddle       return formToken(Token::l_paren, tokStart);
117c60b897dSRiver Riddle     case ')':
118c60b897dSRiver Riddle       return formToken(Token::r_paren, tokStart);
119c60b897dSRiver Riddle     case '{':
120c60b897dSRiver Riddle       if (*curPtr == '-' && *(curPtr + 1) == '#') {
121c60b897dSRiver Riddle         curPtr += 2;
122c60b897dSRiver Riddle         return formToken(Token::file_metadata_begin, tokStart);
123c60b897dSRiver Riddle       }
124c60b897dSRiver Riddle       return formToken(Token::l_brace, tokStart);
125c60b897dSRiver Riddle     case '}':
126c60b897dSRiver Riddle       return formToken(Token::r_brace, tokStart);
127c60b897dSRiver Riddle     case '[':
128c60b897dSRiver Riddle       return formToken(Token::l_square, tokStart);
129c60b897dSRiver Riddle     case ']':
130c60b897dSRiver Riddle       return formToken(Token::r_square, tokStart);
131c60b897dSRiver Riddle     case '<':
132c60b897dSRiver Riddle       return formToken(Token::less, tokStart);
133c60b897dSRiver Riddle     case '>':
134c60b897dSRiver Riddle       return formToken(Token::greater, tokStart);
135c60b897dSRiver Riddle     case '=':
136c60b897dSRiver Riddle       return formToken(Token::equal, tokStart);
137c60b897dSRiver Riddle 
138c60b897dSRiver Riddle     case '+':
139c60b897dSRiver Riddle       return formToken(Token::plus, tokStart);
140c60b897dSRiver Riddle     case '*':
141c60b897dSRiver Riddle       return formToken(Token::star, tokStart);
142c60b897dSRiver Riddle     case '-':
143c60b897dSRiver Riddle       if (*curPtr == '>') {
144c60b897dSRiver Riddle         ++curPtr;
145c60b897dSRiver Riddle         return formToken(Token::arrow, tokStart);
146c60b897dSRiver Riddle       }
147c60b897dSRiver Riddle       return formToken(Token::minus, tokStart);
148c60b897dSRiver Riddle 
149c60b897dSRiver Riddle     case '?':
150c60b897dSRiver Riddle       return formToken(Token::question, tokStart);
151c60b897dSRiver Riddle 
152c60b897dSRiver Riddle     case '|':
153c60b897dSRiver Riddle       return formToken(Token::vertical_bar, tokStart);
154c60b897dSRiver Riddle 
155c60b897dSRiver Riddle     case '/':
156c60b897dSRiver Riddle       if (*curPtr == '/') {
157c60b897dSRiver Riddle         skipComment();
158c60b897dSRiver Riddle         continue;
159c60b897dSRiver Riddle       }
160c60b897dSRiver Riddle       return emitError(tokStart, "unexpected character");
161c60b897dSRiver Riddle 
162c60b897dSRiver Riddle     case '@':
163c60b897dSRiver Riddle       return lexAtIdentifier(tokStart);
164c60b897dSRiver Riddle 
165c60b897dSRiver Riddle     case '#':
166c60b897dSRiver Riddle       if (*curPtr == '-' && *(curPtr + 1) == '}') {
167c60b897dSRiver Riddle         curPtr += 2;
168c60b897dSRiver Riddle         return formToken(Token::file_metadata_end, tokStart);
169c60b897dSRiver Riddle       }
170fc63c054SFangrui Song       [[fallthrough]];
171c60b897dSRiver Riddle     case '!':
172c60b897dSRiver Riddle     case '^':
173c60b897dSRiver Riddle     case '%':
174c60b897dSRiver Riddle       return lexPrefixedIdentifier(tokStart);
175c60b897dSRiver Riddle     case '"':
176c60b897dSRiver Riddle       return lexString(tokStart);
177c60b897dSRiver Riddle 
178c60b897dSRiver Riddle     case '0':
179c60b897dSRiver Riddle     case '1':
180c60b897dSRiver Riddle     case '2':
181c60b897dSRiver Riddle     case '3':
182c60b897dSRiver Riddle     case '4':
183c60b897dSRiver Riddle     case '5':
184c60b897dSRiver Riddle     case '6':
185c60b897dSRiver Riddle     case '7':
186c60b897dSRiver Riddle     case '8':
187c60b897dSRiver Riddle     case '9':
188c60b897dSRiver Riddle       return lexNumber(tokStart);
189c60b897dSRiver Riddle     }
190c60b897dSRiver Riddle   }
191c60b897dSRiver Riddle }
192c60b897dSRiver Riddle 
193c60b897dSRiver Riddle /// Lex an '@foo' identifier.
194c60b897dSRiver Riddle ///
195c60b897dSRiver Riddle ///   symbol-ref-id ::= `@` (bare-id | string-literal)
196c60b897dSRiver Riddle ///
lexAtIdentifier(const char * tokStart)197c60b897dSRiver Riddle Token Lexer::lexAtIdentifier(const char *tokStart) {
198c60b897dSRiver Riddle   char cur = *curPtr++;
199c60b897dSRiver Riddle 
200c60b897dSRiver Riddle   // Try to parse a string literal, if present.
201c60b897dSRiver Riddle   if (cur == '"') {
202c60b897dSRiver Riddle     Token stringIdentifier = lexString(curPtr);
203c60b897dSRiver Riddle     if (stringIdentifier.is(Token::error))
204c60b897dSRiver Riddle       return stringIdentifier;
205c60b897dSRiver Riddle     return formToken(Token::at_identifier, tokStart);
206c60b897dSRiver Riddle   }
207c60b897dSRiver Riddle 
208c60b897dSRiver Riddle   // Otherwise, these always start with a letter or underscore.
209c60b897dSRiver Riddle   if (!isalpha(cur) && cur != '_')
210c60b897dSRiver Riddle     return emitError(curPtr - 1,
211c60b897dSRiver Riddle                      "@ identifier expected to start with letter or '_'");
212c60b897dSRiver Riddle 
213c60b897dSRiver Riddle   while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
214c60b897dSRiver Riddle          *curPtr == '$' || *curPtr == '.')
215c60b897dSRiver Riddle     ++curPtr;
216c60b897dSRiver Riddle   return formToken(Token::at_identifier, tokStart);
217c60b897dSRiver Riddle }
218c60b897dSRiver Riddle 
219c60b897dSRiver Riddle /// Lex a bare identifier or keyword that starts with a letter.
220c60b897dSRiver Riddle ///
221c60b897dSRiver Riddle ///   bare-id ::= (letter|[_]) (letter|digit|[_$.])*
222c60b897dSRiver Riddle ///   integer-type ::= `[su]?i[1-9][0-9]*`
223c60b897dSRiver Riddle ///
lexBareIdentifierOrKeyword(const char * tokStart)224c60b897dSRiver Riddle Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
225c60b897dSRiver Riddle   // Match the rest of the identifier regex: [0-9a-zA-Z_.$]*
226c60b897dSRiver Riddle   while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
227c60b897dSRiver Riddle          *curPtr == '$' || *curPtr == '.')
228c60b897dSRiver Riddle     ++curPtr;
229c60b897dSRiver Riddle 
230c60b897dSRiver Riddle   // Check to see if this identifier is a keyword.
231c60b897dSRiver Riddle   StringRef spelling(tokStart, curPtr - tokStart);
232c60b897dSRiver Riddle 
233c60b897dSRiver Riddle   auto isAllDigit = [](StringRef str) {
234c60b897dSRiver Riddle     return llvm::all_of(str, llvm::isDigit);
235c60b897dSRiver Riddle   };
236c60b897dSRiver Riddle 
237c60b897dSRiver Riddle   // Check for i123, si456, ui789.
238c60b897dSRiver Riddle   if ((spelling.size() > 1 && tokStart[0] == 'i' &&
239c60b897dSRiver Riddle        isAllDigit(spelling.drop_front())) ||
240c60b897dSRiver Riddle       ((spelling.size() > 2 && tokStart[1] == 'i' &&
241c60b897dSRiver Riddle         (tokStart[0] == 's' || tokStart[0] == 'u')) &&
242c60b897dSRiver Riddle        isAllDigit(spelling.drop_front(2))))
243c60b897dSRiver Riddle     return Token(Token::inttype, spelling);
244c60b897dSRiver Riddle 
245c60b897dSRiver Riddle   Token::Kind kind = StringSwitch<Token::Kind>(spelling)
246c60b897dSRiver Riddle #define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
247c60b897dSRiver Riddle #include "TokenKinds.def"
248c60b897dSRiver Riddle                          .Default(Token::bare_identifier);
249c60b897dSRiver Riddle 
250c60b897dSRiver Riddle   return Token(kind, spelling);
251c60b897dSRiver Riddle }
252c60b897dSRiver Riddle 
253c60b897dSRiver Riddle /// Skip a comment line, starting with a '//'.
254c60b897dSRiver Riddle ///
255c60b897dSRiver Riddle ///   TODO: add a regex for comments here and to the spec.
256c60b897dSRiver Riddle ///
skipComment()257c60b897dSRiver Riddle void Lexer::skipComment() {
258c60b897dSRiver Riddle   // Advance over the second '/' in a '//' comment.
259c60b897dSRiver Riddle   assert(*curPtr == '/');
260c60b897dSRiver Riddle   ++curPtr;
261c60b897dSRiver Riddle 
262c60b897dSRiver Riddle   while (true) {
263c60b897dSRiver Riddle     switch (*curPtr++) {
264c60b897dSRiver Riddle     case '\n':
265c60b897dSRiver Riddle     case '\r':
266c60b897dSRiver Riddle       // Newline is end of comment.
267c60b897dSRiver Riddle       return;
268c60b897dSRiver Riddle     case 0:
269c60b897dSRiver Riddle       // If this is the end of the buffer, end the comment.
270c60b897dSRiver Riddle       if (curPtr - 1 == curBuffer.end()) {
271c60b897dSRiver Riddle         --curPtr;
272c60b897dSRiver Riddle         return;
273c60b897dSRiver Riddle       }
274fc63c054SFangrui Song       [[fallthrough]];
275c60b897dSRiver Riddle     default:
276c60b897dSRiver Riddle       // Skip over other characters.
277c60b897dSRiver Riddle       break;
278c60b897dSRiver Riddle     }
279c60b897dSRiver Riddle   }
280c60b897dSRiver Riddle }
281c60b897dSRiver Riddle 
282c60b897dSRiver Riddle /// Lex an ellipsis.
283c60b897dSRiver Riddle ///
284c60b897dSRiver Riddle ///   ellipsis ::= '...'
285c60b897dSRiver Riddle ///
lexEllipsis(const char * tokStart)286c60b897dSRiver Riddle Token Lexer::lexEllipsis(const char *tokStart) {
287c60b897dSRiver Riddle   assert(curPtr[-1] == '.');
288c60b897dSRiver Riddle 
289c60b897dSRiver Riddle   if (curPtr == curBuffer.end() || *curPtr != '.' || *(curPtr + 1) != '.')
290c60b897dSRiver Riddle     return emitError(curPtr, "expected three consecutive dots for an ellipsis");
291c60b897dSRiver Riddle 
292c60b897dSRiver Riddle   curPtr += 2;
293c60b897dSRiver Riddle   return formToken(Token::ellipsis, tokStart);
294c60b897dSRiver Riddle }
295c60b897dSRiver Riddle 
296c60b897dSRiver Riddle /// Lex a number literal.
297c60b897dSRiver Riddle ///
298c60b897dSRiver Riddle ///   integer-literal ::= digit+ | `0x` hex_digit+
299c60b897dSRiver Riddle ///   float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
300c60b897dSRiver Riddle ///
lexNumber(const char * tokStart)301c60b897dSRiver Riddle Token Lexer::lexNumber(const char *tokStart) {
302c60b897dSRiver Riddle   assert(isdigit(curPtr[-1]));
303c60b897dSRiver Riddle 
304c60b897dSRiver Riddle   // Handle the hexadecimal case.
305c60b897dSRiver Riddle   if (curPtr[-1] == '0' && *curPtr == 'x') {
306c60b897dSRiver Riddle     // If we see stuff like 0xi32, this is a literal `0` followed by an
307c60b897dSRiver Riddle     // identifier `xi32`, stop after `0`.
308c60b897dSRiver Riddle     if (!isxdigit(curPtr[1]))
309c60b897dSRiver Riddle       return formToken(Token::integer, tokStart);
310c60b897dSRiver Riddle 
311c60b897dSRiver Riddle     curPtr += 2;
312c60b897dSRiver Riddle     while (isxdigit(*curPtr))
313c60b897dSRiver Riddle       ++curPtr;
314c60b897dSRiver Riddle 
315c60b897dSRiver Riddle     return formToken(Token::integer, tokStart);
316c60b897dSRiver Riddle   }
317c60b897dSRiver Riddle 
318c60b897dSRiver Riddle   // Handle the normal decimal case.
319c60b897dSRiver Riddle   while (isdigit(*curPtr))
320c60b897dSRiver Riddle     ++curPtr;
321c60b897dSRiver Riddle 
322c60b897dSRiver Riddle   if (*curPtr != '.')
323c60b897dSRiver Riddle     return formToken(Token::integer, tokStart);
324c60b897dSRiver Riddle   ++curPtr;
325c60b897dSRiver Riddle 
326c60b897dSRiver Riddle   // Skip over [0-9]*([eE][-+]?[0-9]+)?
327c60b897dSRiver Riddle   while (isdigit(*curPtr))
328c60b897dSRiver Riddle     ++curPtr;
329c60b897dSRiver Riddle 
330c60b897dSRiver Riddle   if (*curPtr == 'e' || *curPtr == 'E') {
331c60b897dSRiver Riddle     if (isdigit(static_cast<unsigned char>(curPtr[1])) ||
332c60b897dSRiver Riddle         ((curPtr[1] == '-' || curPtr[1] == '+') &&
333c60b897dSRiver Riddle          isdigit(static_cast<unsigned char>(curPtr[2])))) {
334c60b897dSRiver Riddle       curPtr += 2;
335c60b897dSRiver Riddle       while (isdigit(*curPtr))
336c60b897dSRiver Riddle         ++curPtr;
337c60b897dSRiver Riddle     }
338c60b897dSRiver Riddle   }
339c60b897dSRiver Riddle   return formToken(Token::floatliteral, tokStart);
340c60b897dSRiver Riddle }
341c60b897dSRiver Riddle 
342c60b897dSRiver Riddle /// Lex an identifier that starts with a prefix followed by suffix-id.
343c60b897dSRiver Riddle ///
344c60b897dSRiver Riddle ///   attribute-id  ::= `#` suffix-id
345c60b897dSRiver Riddle ///   ssa-id        ::= '%' suffix-id
346c60b897dSRiver Riddle ///   block-id      ::= '^' suffix-id
347c60b897dSRiver Riddle ///   type-id       ::= '!' suffix-id
348c60b897dSRiver Riddle ///   suffix-id     ::= digit+ | (letter|id-punct) (letter|id-punct|digit)*
349c60b897dSRiver Riddle ///   id-punct      ::= `$` | `.` | `_` | `-`
350c60b897dSRiver Riddle ///
lexPrefixedIdentifier(const char * tokStart)351c60b897dSRiver Riddle Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
352c60b897dSRiver Riddle   Token::Kind kind;
353c60b897dSRiver Riddle   StringRef errorKind;
354c60b897dSRiver Riddle   switch (*tokStart) {
355c60b897dSRiver Riddle   case '#':
356c60b897dSRiver Riddle     kind = Token::hash_identifier;
357c60b897dSRiver Riddle     errorKind = "invalid attribute name";
358c60b897dSRiver Riddle     break;
359c60b897dSRiver Riddle   case '%':
360c60b897dSRiver Riddle     kind = Token::percent_identifier;
361c60b897dSRiver Riddle     errorKind = "invalid SSA name";
362c60b897dSRiver Riddle     break;
363c60b897dSRiver Riddle   case '^':
364c60b897dSRiver Riddle     kind = Token::caret_identifier;
365c60b897dSRiver Riddle     errorKind = "invalid block name";
366c60b897dSRiver Riddle     break;
367c60b897dSRiver Riddle   case '!':
368c60b897dSRiver Riddle     kind = Token::exclamation_identifier;
369c60b897dSRiver Riddle     errorKind = "invalid type identifier";
370c60b897dSRiver Riddle     break;
371c60b897dSRiver Riddle   default:
372c60b897dSRiver Riddle     llvm_unreachable("invalid caller");
373c60b897dSRiver Riddle   }
374c60b897dSRiver Riddle 
375c60b897dSRiver Riddle   // Parse suffix-id.
376c60b897dSRiver Riddle   if (isdigit(*curPtr)) {
377c60b897dSRiver Riddle     // If suffix-id starts with a digit, the rest must be digits.
378c60b897dSRiver Riddle     while (isdigit(*curPtr))
379c60b897dSRiver Riddle       ++curPtr;
380c60b897dSRiver Riddle   } else if (isalpha(*curPtr) || isPunct(*curPtr)) {
381c60b897dSRiver Riddle     do {
382c60b897dSRiver Riddle       ++curPtr;
383c60b897dSRiver Riddle     } while (isalpha(*curPtr) || isdigit(*curPtr) || isPunct(*curPtr));
384c60b897dSRiver Riddle   } else if (curPtr == codeCompleteLoc) {
385c60b897dSRiver Riddle     return formToken(Token::code_complete, tokStart);
386c60b897dSRiver Riddle   } else {
387c60b897dSRiver Riddle     return emitError(curPtr - 1, errorKind);
388c60b897dSRiver Riddle   }
389c60b897dSRiver Riddle 
390c60b897dSRiver Riddle   // Check for a code completion within the identifier.
391c60b897dSRiver Riddle   if (codeCompleteLoc && codeCompleteLoc >= tokStart &&
392c60b897dSRiver Riddle       codeCompleteLoc <= curPtr) {
393c60b897dSRiver Riddle     return Token(Token::code_complete,
394c60b897dSRiver Riddle                  StringRef(tokStart, codeCompleteLoc - tokStart));
395c60b897dSRiver Riddle   }
396c60b897dSRiver Riddle 
397c60b897dSRiver Riddle   return formToken(kind, tokStart);
398c60b897dSRiver Riddle }
399c60b897dSRiver Riddle 
400c60b897dSRiver Riddle /// Lex a string literal.
401c60b897dSRiver Riddle ///
402c60b897dSRiver Riddle ///   string-literal ::= '"' [^"\n\f\v\r]* '"'
403c60b897dSRiver Riddle ///
404c60b897dSRiver Riddle /// TODO: define escaping rules.
lexString(const char * tokStart)405c60b897dSRiver Riddle Token Lexer::lexString(const char *tokStart) {
406c60b897dSRiver Riddle   assert(curPtr[-1] == '"');
407c60b897dSRiver Riddle 
408c60b897dSRiver Riddle   while (true) {
409c60b897dSRiver Riddle     // Check to see if there is a code completion location within the string. In
410c60b897dSRiver Riddle     // these cases we generate a completion location and place the currently
411c60b897dSRiver Riddle     // lexed string within the token. This allows for the parser to use the
412c60b897dSRiver Riddle     // partially lexed string when computing the completion results.
413c60b897dSRiver Riddle     if (curPtr == codeCompleteLoc)
414c60b897dSRiver Riddle       return formToken(Token::code_complete, tokStart);
415c60b897dSRiver Riddle 
416c60b897dSRiver Riddle     switch (*curPtr++) {
417c60b897dSRiver Riddle     case '"':
418c60b897dSRiver Riddle       return formToken(Token::string, tokStart);
419c60b897dSRiver Riddle     case 0:
420c60b897dSRiver Riddle       // If this is a random nul character in the middle of a string, just
421c60b897dSRiver Riddle       // include it.  If it is the end of file, then it is an error.
422c60b897dSRiver Riddle       if (curPtr - 1 != curBuffer.end())
423c60b897dSRiver Riddle         continue;
424fc63c054SFangrui Song       [[fallthrough]];
425c60b897dSRiver Riddle     case '\n':
426c60b897dSRiver Riddle     case '\v':
427c60b897dSRiver Riddle     case '\f':
428c60b897dSRiver Riddle       return emitError(curPtr - 1, "expected '\"' in string literal");
429c60b897dSRiver Riddle     case '\\':
430c60b897dSRiver Riddle       // Handle explicitly a few escapes.
431c60b897dSRiver Riddle       if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' || *curPtr == 't')
432c60b897dSRiver Riddle         ++curPtr;
433c60b897dSRiver Riddle       else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
434c60b897dSRiver Riddle         // Support \xx for two hex digits.
435c60b897dSRiver Riddle         curPtr += 2;
436c60b897dSRiver Riddle       else
437c60b897dSRiver Riddle         return emitError(curPtr - 1, "unknown escape in string literal");
438c60b897dSRiver Riddle       continue;
439c60b897dSRiver Riddle 
440c60b897dSRiver Riddle     default:
441c60b897dSRiver Riddle       continue;
442c60b897dSRiver Riddle     }
443c60b897dSRiver Riddle   }
444c60b897dSRiver Riddle }
445