xref: /llvm-project/mlir/lib/Tools/PDLL/Parser/Lexer.cpp (revision db791b278a414fb6df1acc1799adcf11d8fb9169)
111d26bd1SRiver Riddle //===- Lexer.cpp ----------------------------------------------------------===//
211d26bd1SRiver Riddle //
311d26bd1SRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
411d26bd1SRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
511d26bd1SRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
611d26bd1SRiver Riddle //
711d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
811d26bd1SRiver Riddle 
911d26bd1SRiver Riddle #include "Lexer.h"
1011d26bd1SRiver Riddle #include "mlir/Tools/PDLL/AST/Diagnostic.h"
11008de486SRiver Riddle #include "mlir/Tools/PDLL/Parser/CodeComplete.h"
1211d26bd1SRiver Riddle #include "llvm/ADT/StringExtras.h"
1311d26bd1SRiver Riddle #include "llvm/ADT/StringSwitch.h"
1411d26bd1SRiver Riddle #include "llvm/Support/SourceMgr.h"
1511d26bd1SRiver Riddle 
1611d26bd1SRiver Riddle using namespace mlir;
1711d26bd1SRiver Riddle using namespace mlir::pdll;
1811d26bd1SRiver Riddle 
1911d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2011d26bd1SRiver Riddle // Token
2111d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2211d26bd1SRiver Riddle 
getStringValue() const2311d26bd1SRiver Riddle std::string Token::getStringValue() const {
2441d2c6dfSRiver Riddle   assert(getKind() == string || getKind() == string_block ||
2541d2c6dfSRiver Riddle          getKind() == code_complete_string);
2611d26bd1SRiver Riddle 
2711d26bd1SRiver Riddle   // Start by dropping the quotes.
2841d2c6dfSRiver Riddle   StringRef bytes = getSpelling();
2941d2c6dfSRiver Riddle   if (is(string))
3041d2c6dfSRiver Riddle     bytes = bytes.drop_front().drop_back();
3141d2c6dfSRiver Riddle   else if (is(string_block))
3241d2c6dfSRiver Riddle     bytes = bytes.drop_front(2).drop_back(2);
3311d26bd1SRiver Riddle 
3411d26bd1SRiver Riddle   std::string result;
3511d26bd1SRiver Riddle   result.reserve(bytes.size());
3611d26bd1SRiver Riddle   for (unsigned i = 0, e = bytes.size(); i != e;) {
3711d26bd1SRiver Riddle     auto c = bytes[i++];
3811d26bd1SRiver Riddle     if (c != '\\') {
3911d26bd1SRiver Riddle       result.push_back(c);
4011d26bd1SRiver Riddle       continue;
4111d26bd1SRiver Riddle     }
4211d26bd1SRiver Riddle 
4311d26bd1SRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
4411d26bd1SRiver Riddle     auto c1 = bytes[i++];
4511d26bd1SRiver Riddle     switch (c1) {
4611d26bd1SRiver Riddle     case '"':
4711d26bd1SRiver Riddle     case '\\':
4811d26bd1SRiver Riddle       result.push_back(c1);
4911d26bd1SRiver Riddle       continue;
5011d26bd1SRiver Riddle     case 'n':
5111d26bd1SRiver Riddle       result.push_back('\n');
5211d26bd1SRiver Riddle       continue;
5311d26bd1SRiver Riddle     case 't':
5411d26bd1SRiver Riddle       result.push_back('\t');
5511d26bd1SRiver Riddle       continue;
5611d26bd1SRiver Riddle     default:
5711d26bd1SRiver Riddle       break;
5811d26bd1SRiver Riddle     }
5911d26bd1SRiver Riddle 
6011d26bd1SRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
6111d26bd1SRiver Riddle     auto c2 = bytes[i++];
6211d26bd1SRiver Riddle 
6311d26bd1SRiver Riddle     assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
6411d26bd1SRiver Riddle     result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
6511d26bd1SRiver Riddle   }
6611d26bd1SRiver Riddle 
6711d26bd1SRiver Riddle   return result;
6811d26bd1SRiver Riddle }
6911d26bd1SRiver Riddle 
7011d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7111d26bd1SRiver Riddle // Lexer
7211d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7311d26bd1SRiver Riddle 
Lexer(llvm::SourceMgr & mgr,ast::DiagnosticEngine & diagEngine,CodeCompleteContext * codeCompleteContext)74008de486SRiver Riddle Lexer::Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
75008de486SRiver Riddle              CodeCompleteContext *codeCompleteContext)
76008de486SRiver Riddle     : srcMgr(mgr), diagEngine(diagEngine), addedHandlerToDiagEngine(false),
77008de486SRiver Riddle       codeCompletionLocation(nullptr) {
7811d26bd1SRiver Riddle   curBufferID = mgr.getMainFileID();
7911d26bd1SRiver Riddle   curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
8011d26bd1SRiver Riddle   curPtr = curBuffer.begin();
8111d26bd1SRiver Riddle 
82008de486SRiver Riddle   // Set the code completion location if necessary.
83008de486SRiver Riddle   if (codeCompleteContext) {
84008de486SRiver Riddle     codeCompletionLocation =
85008de486SRiver Riddle         codeCompleteContext->getCodeCompleteLoc().getPointer();
86008de486SRiver Riddle   }
87008de486SRiver Riddle 
8811d26bd1SRiver Riddle   // If the diag engine has no handler, add a default that emits to the
8911d26bd1SRiver Riddle   // SourceMgr.
9011d26bd1SRiver Riddle   if (!diagEngine.getHandlerFn()) {
9111d26bd1SRiver Riddle     diagEngine.setHandlerFn([&](const ast::Diagnostic &diag) {
9211d26bd1SRiver Riddle       srcMgr.PrintMessage(diag.getLocation().Start, diag.getSeverity(),
9311d26bd1SRiver Riddle                           diag.getMessage());
9411d26bd1SRiver Riddle       for (const ast::Diagnostic &note : diag.getNotes())
9511d26bd1SRiver Riddle         srcMgr.PrintMessage(note.getLocation().Start, note.getSeverity(),
9611d26bd1SRiver Riddle                             note.getMessage());
9711d26bd1SRiver Riddle     });
9811d26bd1SRiver Riddle     addedHandlerToDiagEngine = true;
9911d26bd1SRiver Riddle   }
10011d26bd1SRiver Riddle }
10111d26bd1SRiver Riddle 
~Lexer()10211d26bd1SRiver Riddle Lexer::~Lexer() {
103b7f93c28SJeff Niu   if (addedHandlerToDiagEngine)
104b7f93c28SJeff Niu     diagEngine.setHandlerFn(nullptr);
10511d26bd1SRiver Riddle }
10611d26bd1SRiver Riddle 
pushInclude(StringRef filename,SMRange includeLoc)10709af7fefSRiver Riddle LogicalResult Lexer::pushInclude(StringRef filename, SMRange includeLoc) {
10811d26bd1SRiver Riddle   std::string includedFile;
10909af7fefSRiver Riddle   int bufferID =
11009af7fefSRiver Riddle       srcMgr.AddIncludeFile(filename.str(), includeLoc.End, includedFile);
11109af7fefSRiver Riddle   if (!bufferID)
11209af7fefSRiver Riddle     return failure();
11311d26bd1SRiver Riddle 
11411d26bd1SRiver Riddle   curBufferID = bufferID;
11511d26bd1SRiver Riddle   curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
11611d26bd1SRiver Riddle   curPtr = curBuffer.begin();
11711d26bd1SRiver Riddle   return success();
11811d26bd1SRiver Riddle }
11911d26bd1SRiver Riddle 
emitError(SMRange loc,const Twine & msg)1206842ec42SRiver Riddle Token Lexer::emitError(SMRange loc, const Twine &msg) {
12111d26bd1SRiver Riddle   diagEngine.emitError(loc, msg);
12211d26bd1SRiver Riddle   return formToken(Token::error, loc.Start.getPointer());
12311d26bd1SRiver Riddle }
emitErrorAndNote(SMRange loc,const Twine & msg,SMRange noteLoc,const Twine & note)124b7f93c28SJeff Niu Token Lexer::emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
125b7f93c28SJeff Niu                               const Twine &note) {
12611d26bd1SRiver Riddle   diagEngine.emitError(loc, msg)->attachNote(note, noteLoc);
12711d26bd1SRiver Riddle   return formToken(Token::error, loc.Start.getPointer());
12811d26bd1SRiver Riddle }
emitError(const char * loc,const Twine & msg)12911d26bd1SRiver Riddle Token Lexer::emitError(const char *loc, const Twine &msg) {
130b7f93c28SJeff Niu   return emitError(
131b7f93c28SJeff Niu       SMRange(SMLoc::getFromPointer(loc), SMLoc::getFromPointer(loc + 1)), msg);
13211d26bd1SRiver Riddle }
13311d26bd1SRiver Riddle 
getNextChar()13411d26bd1SRiver Riddle int Lexer::getNextChar() {
13511d26bd1SRiver Riddle   char curChar = *curPtr++;
13611d26bd1SRiver Riddle   switch (curChar) {
13711d26bd1SRiver Riddle   default:
13811d26bd1SRiver Riddle     return static_cast<unsigned char>(curChar);
13911d26bd1SRiver Riddle   case 0: {
14011d26bd1SRiver Riddle     // A nul character in the stream is either the end of the current buffer
14111d26bd1SRiver Riddle     // or a random nul in the file. Disambiguate that here.
142b7f93c28SJeff Niu     if (curPtr - 1 != curBuffer.end())
143b7f93c28SJeff Niu       return 0;
14411d26bd1SRiver Riddle 
14511d26bd1SRiver Riddle     // Otherwise, return end of file.
14611d26bd1SRiver Riddle     --curPtr;
14711d26bd1SRiver Riddle     return EOF;
14811d26bd1SRiver Riddle   }
14911d26bd1SRiver Riddle   case '\n':
15011d26bd1SRiver Riddle   case '\r':
15111d26bd1SRiver Riddle     // Handle the newline character by ignoring it and incrementing the line
15211d26bd1SRiver Riddle     // count. However, be careful about 'dos style' files with \n\r in them.
15311d26bd1SRiver Riddle     // Only treat a \n\r or \r\n as a single line.
15411d26bd1SRiver Riddle     if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
15511d26bd1SRiver Riddle       ++curPtr;
15611d26bd1SRiver Riddle     return '\n';
15711d26bd1SRiver Riddle   }
15811d26bd1SRiver Riddle }
15911d26bd1SRiver Riddle 
lexToken()16011d26bd1SRiver Riddle Token Lexer::lexToken() {
16111d26bd1SRiver Riddle   while (true) {
16211d26bd1SRiver Riddle     const char *tokStart = curPtr;
16311d26bd1SRiver Riddle 
164008de486SRiver Riddle     // Check to see if this token is at the code completion location.
165008de486SRiver Riddle     if (tokStart == codeCompletionLocation)
166008de486SRiver Riddle       return formToken(Token::code_complete, tokStart);
167008de486SRiver Riddle 
16811d26bd1SRiver Riddle     // This always consumes at least one character.
16911d26bd1SRiver Riddle     int curChar = getNextChar();
17011d26bd1SRiver Riddle     switch (curChar) {
17111d26bd1SRiver Riddle     default:
17211d26bd1SRiver Riddle       // Handle identifiers: [a-zA-Z_]
173b7f93c28SJeff Niu       if (isalpha(curChar) || curChar == '_')
174b7f93c28SJeff Niu         return lexIdentifier(tokStart);
17511d26bd1SRiver Riddle 
17611d26bd1SRiver Riddle       // Unknown character, emit an error.
17711d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
17811d26bd1SRiver Riddle     case EOF: {
17911d26bd1SRiver Riddle       // Return EOF denoting the end of lexing.
18011d26bd1SRiver Riddle       Token eof = formToken(Token::eof, tokStart);
18111d26bd1SRiver Riddle 
18211d26bd1SRiver Riddle       // Check to see if we are in an included file.
1836842ec42SRiver Riddle       SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
18411d26bd1SRiver Riddle       if (parentIncludeLoc.isValid()) {
18511d26bd1SRiver Riddle         curBufferID = srcMgr.FindBufferContainingLoc(parentIncludeLoc);
18611d26bd1SRiver Riddle         curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
18711d26bd1SRiver Riddle         curPtr = parentIncludeLoc.getPointer();
18811d26bd1SRiver Riddle       }
18911d26bd1SRiver Riddle 
19011d26bd1SRiver Riddle       return eof;
19111d26bd1SRiver Riddle     }
19211d26bd1SRiver Riddle 
19311d26bd1SRiver Riddle     // Lex punctuation.
19411d26bd1SRiver Riddle     case '-':
19511d26bd1SRiver Riddle       if (*curPtr == '>') {
19611d26bd1SRiver Riddle         ++curPtr;
19711d26bd1SRiver Riddle         return formToken(Token::arrow, tokStart);
19811d26bd1SRiver Riddle       }
19911d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
20011d26bd1SRiver Riddle     case ':':
20111d26bd1SRiver Riddle       return formToken(Token::colon, tokStart);
20211d26bd1SRiver Riddle     case ',':
20311d26bd1SRiver Riddle       return formToken(Token::comma, tokStart);
20411d26bd1SRiver Riddle     case '.':
20511d26bd1SRiver Riddle       return formToken(Token::dot, tokStart);
20611d26bd1SRiver Riddle     case '=':
20711d26bd1SRiver Riddle       if (*curPtr == '>') {
20811d26bd1SRiver Riddle         ++curPtr;
20911d26bd1SRiver Riddle         return formToken(Token::equal_arrow, tokStart);
21011d26bd1SRiver Riddle       }
21111d26bd1SRiver Riddle       return formToken(Token::equal, tokStart);
21211d26bd1SRiver Riddle     case ';':
21311d26bd1SRiver Riddle       return formToken(Token::semicolon, tokStart);
21411d26bd1SRiver Riddle     case '[':
21511d26bd1SRiver Riddle       if (*curPtr == '{') {
21611d26bd1SRiver Riddle         ++curPtr;
21711d26bd1SRiver Riddle         return lexString(tokStart, /*isStringBlock=*/true);
21811d26bd1SRiver Riddle       }
21911d26bd1SRiver Riddle       return formToken(Token::l_square, tokStart);
22011d26bd1SRiver Riddle     case ']':
22111d26bd1SRiver Riddle       return formToken(Token::r_square, tokStart);
22211d26bd1SRiver Riddle 
22311d26bd1SRiver Riddle     case '<':
22411d26bd1SRiver Riddle       return formToken(Token::less, tokStart);
22511d26bd1SRiver Riddle     case '>':
22611d26bd1SRiver Riddle       return formToken(Token::greater, tokStart);
22711d26bd1SRiver Riddle     case '{':
22811d26bd1SRiver Riddle       return formToken(Token::l_brace, tokStart);
22911d26bd1SRiver Riddle     case '}':
23011d26bd1SRiver Riddle       return formToken(Token::r_brace, tokStart);
23111d26bd1SRiver Riddle     case '(':
23211d26bd1SRiver Riddle       return formToken(Token::l_paren, tokStart);
23311d26bd1SRiver Riddle     case ')':
23411d26bd1SRiver Riddle       return formToken(Token::r_paren, tokStart);
23511d26bd1SRiver Riddle     case '/':
23611d26bd1SRiver Riddle       if (*curPtr == '/') {
23711d26bd1SRiver Riddle         lexComment();
23811d26bd1SRiver Riddle         continue;
23911d26bd1SRiver Riddle       }
24011d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
24111d26bd1SRiver Riddle 
24211d26bd1SRiver Riddle     // Ignore whitespace characters.
24311d26bd1SRiver Riddle     case 0:
24411d26bd1SRiver Riddle     case ' ':
24511d26bd1SRiver Riddle     case '\t':
24611d26bd1SRiver Riddle     case '\n':
24711d26bd1SRiver Riddle       return lexToken();
24811d26bd1SRiver Riddle 
24911d26bd1SRiver Riddle     case '#':
25011d26bd1SRiver Riddle       return lexDirective(tokStart);
25111d26bd1SRiver Riddle     case '"':
25211d26bd1SRiver Riddle       return lexString(tokStart, /*isStringBlock=*/false);
25311d26bd1SRiver Riddle 
25411d26bd1SRiver Riddle     case '0':
25511d26bd1SRiver Riddle     case '1':
25611d26bd1SRiver Riddle     case '2':
25711d26bd1SRiver Riddle     case '3':
25811d26bd1SRiver Riddle     case '4':
25911d26bd1SRiver Riddle     case '5':
26011d26bd1SRiver Riddle     case '6':
26111d26bd1SRiver Riddle     case '7':
26211d26bd1SRiver Riddle     case '8':
26311d26bd1SRiver Riddle     case '9':
26411d26bd1SRiver Riddle       return lexNumber(tokStart);
26511d26bd1SRiver Riddle     }
26611d26bd1SRiver Riddle   }
26711d26bd1SRiver Riddle }
26811d26bd1SRiver Riddle 
26911d26bd1SRiver Riddle /// Skip a comment line, starting with a '//'.
lexComment()27011d26bd1SRiver Riddle void Lexer::lexComment() {
27111d26bd1SRiver Riddle   // Advance over the second '/' in a '//' comment.
27211d26bd1SRiver Riddle   assert(*curPtr == '/');
27311d26bd1SRiver Riddle   ++curPtr;
27411d26bd1SRiver Riddle 
27511d26bd1SRiver Riddle   while (true) {
27611d26bd1SRiver Riddle     switch (*curPtr++) {
27711d26bd1SRiver Riddle     case '\n':
27811d26bd1SRiver Riddle     case '\r':
27911d26bd1SRiver Riddle       // Newline is end of comment.
28011d26bd1SRiver Riddle       return;
28111d26bd1SRiver Riddle     case 0:
28211d26bd1SRiver Riddle       // If this is the end of the buffer, end the comment.
28311d26bd1SRiver Riddle       if (curPtr - 1 == curBuffer.end()) {
28411d26bd1SRiver Riddle         --curPtr;
28511d26bd1SRiver Riddle         return;
28611d26bd1SRiver Riddle       }
287fc63c054SFangrui Song       [[fallthrough]];
28811d26bd1SRiver Riddle     default:
28911d26bd1SRiver Riddle       // Skip over other characters.
29011d26bd1SRiver Riddle       break;
29111d26bd1SRiver Riddle     }
29211d26bd1SRiver Riddle   }
29311d26bd1SRiver Riddle }
29411d26bd1SRiver Riddle 
lexDirective(const char * tokStart)29511d26bd1SRiver Riddle Token Lexer::lexDirective(const char *tokStart) {
29611d26bd1SRiver Riddle   // Match the rest with an identifier regex: [0-9a-zA-Z_]*
297b7f93c28SJeff Niu   while (isalnum(*curPtr) || *curPtr == '_')
298b7f93c28SJeff Niu     ++curPtr;
29911d26bd1SRiver Riddle 
30011d26bd1SRiver Riddle   StringRef str(tokStart, curPtr - tokStart);
30111d26bd1SRiver Riddle   return Token(Token::directive, str);
30211d26bd1SRiver Riddle }
30311d26bd1SRiver Riddle 
lexIdentifier(const char * tokStart)30411d26bd1SRiver Riddle Token Lexer::lexIdentifier(const char *tokStart) {
30511d26bd1SRiver Riddle   // Match the rest of the identifier regex: [0-9a-zA-Z_]*
306b7f93c28SJeff Niu   while (isalnum(*curPtr) || *curPtr == '_')
307b7f93c28SJeff Niu     ++curPtr;
30811d26bd1SRiver Riddle 
30911d26bd1SRiver Riddle   // Check to see if this identifier is a keyword.
31011d26bd1SRiver Riddle   StringRef str(tokStart, curPtr - tokStart);
31111d26bd1SRiver Riddle   Token::Kind kind = StringSwitch<Token::Kind>(str)
31211d26bd1SRiver Riddle                          .Case("attr", Token::kw_attr)
31311d26bd1SRiver Riddle                          .Case("Attr", Token::kw_Attr)
31411d26bd1SRiver Riddle                          .Case("erase", Token::kw_erase)
31511d26bd1SRiver Riddle                          .Case("let", Token::kw_let)
31611d26bd1SRiver Riddle                          .Case("Constraint", Token::kw_Constraint)
317*930916c7SMogball                          .Case("not", Token::kw_not)
31811d26bd1SRiver Riddle                          .Case("op", Token::kw_op)
31911d26bd1SRiver Riddle                          .Case("Op", Token::kw_Op)
32011d26bd1SRiver Riddle                          .Case("OpName", Token::kw_OpName)
32111d26bd1SRiver Riddle                          .Case("Pattern", Token::kw_Pattern)
32211d26bd1SRiver Riddle                          .Case("replace", Token::kw_replace)
323faf42264SRiver Riddle                          .Case("return", Token::kw_return)
32411d26bd1SRiver Riddle                          .Case("rewrite", Token::kw_rewrite)
325faf42264SRiver Riddle                          .Case("Rewrite", Token::kw_Rewrite)
32611d26bd1SRiver Riddle                          .Case("type", Token::kw_type)
32711d26bd1SRiver Riddle                          .Case("Type", Token::kw_Type)
32811d26bd1SRiver Riddle                          .Case("TypeRange", Token::kw_TypeRange)
32911d26bd1SRiver Riddle                          .Case("Value", Token::kw_Value)
33011d26bd1SRiver Riddle                          .Case("ValueRange", Token::kw_ValueRange)
33111d26bd1SRiver Riddle                          .Case("with", Token::kw_with)
33211d26bd1SRiver Riddle                          .Case("_", Token::underscore)
33311d26bd1SRiver Riddle                          .Default(Token::identifier);
33411d26bd1SRiver Riddle   return Token(kind, str);
33511d26bd1SRiver Riddle }
33611d26bd1SRiver Riddle 
lexNumber(const char * tokStart)33711d26bd1SRiver Riddle Token Lexer::lexNumber(const char *tokStart) {
33811d26bd1SRiver Riddle   assert(isdigit(curPtr[-1]));
33911d26bd1SRiver Riddle 
34011d26bd1SRiver Riddle   // Handle the normal decimal case.
341b7f93c28SJeff Niu   while (isdigit(*curPtr))
342b7f93c28SJeff Niu     ++curPtr;
34311d26bd1SRiver Riddle 
34411d26bd1SRiver Riddle   return formToken(Token::integer, tokStart);
34511d26bd1SRiver Riddle }
34611d26bd1SRiver Riddle 
lexString(const char * tokStart,bool isStringBlock)34711d26bd1SRiver Riddle Token Lexer::lexString(const char *tokStart, bool isStringBlock) {
34811d26bd1SRiver Riddle   while (true) {
34941d2c6dfSRiver Riddle     // Check to see if there is a code completion location within the string. In
35041d2c6dfSRiver Riddle     // these cases we generate a completion location and place the currently
35141d2c6dfSRiver Riddle     // lexed string within the token (without the quotes). This allows for the
35241d2c6dfSRiver Riddle     // parser to use the partially lexed string when computing the completion
35341d2c6dfSRiver Riddle     // results.
35441d2c6dfSRiver Riddle     if (curPtr == codeCompletionLocation) {
35541d2c6dfSRiver Riddle       return formToken(Token::code_complete_string,
35641d2c6dfSRiver Riddle                        tokStart + (isStringBlock ? 2 : 1));
35741d2c6dfSRiver Riddle     }
35841d2c6dfSRiver Riddle 
35911d26bd1SRiver Riddle     switch (*curPtr++) {
36011d26bd1SRiver Riddle     case '"':
36111d26bd1SRiver Riddle       // If this is a string block, we only end the string when we encounter a
36211d26bd1SRiver Riddle       // `}]`.
363ebb1e900SRiver Riddle       if (!isStringBlock)
364ebb1e900SRiver Riddle         return formToken(Token::string, tokStart);
36511d26bd1SRiver Riddle       continue;
36611d26bd1SRiver Riddle     case '}':
36711d26bd1SRiver Riddle       // If this is a string block, we only end the string when we encounter a
36811d26bd1SRiver Riddle       // `}]`.
369ebb1e900SRiver Riddle       if (!isStringBlock || *curPtr != ']')
370ebb1e900SRiver Riddle         continue;
37111d26bd1SRiver Riddle       ++curPtr;
37211d26bd1SRiver Riddle       return formToken(Token::string_block, tokStart);
373ebb1e900SRiver Riddle     case 0: {
37411d26bd1SRiver Riddle       // If this is a random nul character in the middle of a string, just
37511d26bd1SRiver Riddle       // include it. If it is the end of file, then it is an error.
376ebb1e900SRiver Riddle       if (curPtr - 1 != curBuffer.end())
377ebb1e900SRiver Riddle         continue;
378ebb1e900SRiver Riddle       --curPtr;
379ebb1e900SRiver Riddle 
380ebb1e900SRiver Riddle       StringRef expectedEndStr = isStringBlock ? "}]" : "\"";
381ebb1e900SRiver Riddle       return emitError(curPtr - 1,
382ebb1e900SRiver Riddle                        "expected '" + expectedEndStr + "' in string literal");
383ebb1e900SRiver Riddle     }
384ebb1e900SRiver Riddle 
38511d26bd1SRiver Riddle     case '\n':
38611d26bd1SRiver Riddle     case '\v':
38711d26bd1SRiver Riddle     case '\f':
38811d26bd1SRiver Riddle       // String blocks allow multiple lines.
38911d26bd1SRiver Riddle       if (!isStringBlock)
39011d26bd1SRiver Riddle         return emitError(curPtr - 1, "expected '\"' in string literal");
39111d26bd1SRiver Riddle       continue;
39211d26bd1SRiver Riddle 
39311d26bd1SRiver Riddle     case '\\':
39411d26bd1SRiver Riddle       // Handle explicitly a few escapes.
39511d26bd1SRiver Riddle       if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' ||
39611d26bd1SRiver Riddle           *curPtr == 't') {
39711d26bd1SRiver Riddle         ++curPtr;
39811d26bd1SRiver Riddle       } else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1])) {
39911d26bd1SRiver Riddle         // Support \xx for two hex digits.
40011d26bd1SRiver Riddle         curPtr += 2;
40111d26bd1SRiver Riddle       } else {
40211d26bd1SRiver Riddle         return emitError(curPtr - 1, "unknown escape in string literal");
40311d26bd1SRiver Riddle       }
40411d26bd1SRiver Riddle       continue;
40511d26bd1SRiver Riddle 
40611d26bd1SRiver Riddle     default:
40711d26bd1SRiver Riddle       continue;
40811d26bd1SRiver Riddle     }
40911d26bd1SRiver Riddle   }
41011d26bd1SRiver Riddle }
411