111d26bd1SRiver Riddle //===- Lexer.cpp ----------------------------------------------------------===//
211d26bd1SRiver Riddle //
311d26bd1SRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
411d26bd1SRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
511d26bd1SRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
611d26bd1SRiver Riddle //
711d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
811d26bd1SRiver Riddle
911d26bd1SRiver Riddle #include "Lexer.h"
1011d26bd1SRiver Riddle #include "mlir/Tools/PDLL/AST/Diagnostic.h"
11008de486SRiver Riddle #include "mlir/Tools/PDLL/Parser/CodeComplete.h"
1211d26bd1SRiver Riddle #include "llvm/ADT/StringExtras.h"
1311d26bd1SRiver Riddle #include "llvm/ADT/StringSwitch.h"
1411d26bd1SRiver Riddle #include "llvm/Support/SourceMgr.h"
1511d26bd1SRiver Riddle
1611d26bd1SRiver Riddle using namespace mlir;
1711d26bd1SRiver Riddle using namespace mlir::pdll;
1811d26bd1SRiver Riddle
1911d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2011d26bd1SRiver Riddle // Token
2111d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2211d26bd1SRiver Riddle
getStringValue() const2311d26bd1SRiver Riddle std::string Token::getStringValue() const {
2441d2c6dfSRiver Riddle assert(getKind() == string || getKind() == string_block ||
2541d2c6dfSRiver Riddle getKind() == code_complete_string);
2611d26bd1SRiver Riddle
2711d26bd1SRiver Riddle // Start by dropping the quotes.
2841d2c6dfSRiver Riddle StringRef bytes = getSpelling();
2941d2c6dfSRiver Riddle if (is(string))
3041d2c6dfSRiver Riddle bytes = bytes.drop_front().drop_back();
3141d2c6dfSRiver Riddle else if (is(string_block))
3241d2c6dfSRiver Riddle bytes = bytes.drop_front(2).drop_back(2);
3311d26bd1SRiver Riddle
3411d26bd1SRiver Riddle std::string result;
3511d26bd1SRiver Riddle result.reserve(bytes.size());
3611d26bd1SRiver Riddle for (unsigned i = 0, e = bytes.size(); i != e;) {
3711d26bd1SRiver Riddle auto c = bytes[i++];
3811d26bd1SRiver Riddle if (c != '\\') {
3911d26bd1SRiver Riddle result.push_back(c);
4011d26bd1SRiver Riddle continue;
4111d26bd1SRiver Riddle }
4211d26bd1SRiver Riddle
4311d26bd1SRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
4411d26bd1SRiver Riddle auto c1 = bytes[i++];
4511d26bd1SRiver Riddle switch (c1) {
4611d26bd1SRiver Riddle case '"':
4711d26bd1SRiver Riddle case '\\':
4811d26bd1SRiver Riddle result.push_back(c1);
4911d26bd1SRiver Riddle continue;
5011d26bd1SRiver Riddle case 'n':
5111d26bd1SRiver Riddle result.push_back('\n');
5211d26bd1SRiver Riddle continue;
5311d26bd1SRiver Riddle case 't':
5411d26bd1SRiver Riddle result.push_back('\t');
5511d26bd1SRiver Riddle continue;
5611d26bd1SRiver Riddle default:
5711d26bd1SRiver Riddle break;
5811d26bd1SRiver Riddle }
5911d26bd1SRiver Riddle
6011d26bd1SRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
6111d26bd1SRiver Riddle auto c2 = bytes[i++];
6211d26bd1SRiver Riddle
6311d26bd1SRiver Riddle assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
6411d26bd1SRiver Riddle result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
6511d26bd1SRiver Riddle }
6611d26bd1SRiver Riddle
6711d26bd1SRiver Riddle return result;
6811d26bd1SRiver Riddle }
6911d26bd1SRiver Riddle
7011d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7111d26bd1SRiver Riddle // Lexer
7211d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7311d26bd1SRiver Riddle
Lexer(llvm::SourceMgr & mgr,ast::DiagnosticEngine & diagEngine,CodeCompleteContext * codeCompleteContext)74008de486SRiver Riddle Lexer::Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
75008de486SRiver Riddle CodeCompleteContext *codeCompleteContext)
76008de486SRiver Riddle : srcMgr(mgr), diagEngine(diagEngine), addedHandlerToDiagEngine(false),
77008de486SRiver Riddle codeCompletionLocation(nullptr) {
7811d26bd1SRiver Riddle curBufferID = mgr.getMainFileID();
7911d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
8011d26bd1SRiver Riddle curPtr = curBuffer.begin();
8111d26bd1SRiver Riddle
82008de486SRiver Riddle // Set the code completion location if necessary.
83008de486SRiver Riddle if (codeCompleteContext) {
84008de486SRiver Riddle codeCompletionLocation =
85008de486SRiver Riddle codeCompleteContext->getCodeCompleteLoc().getPointer();
86008de486SRiver Riddle }
87008de486SRiver Riddle
8811d26bd1SRiver Riddle // If the diag engine has no handler, add a default that emits to the
8911d26bd1SRiver Riddle // SourceMgr.
9011d26bd1SRiver Riddle if (!diagEngine.getHandlerFn()) {
9111d26bd1SRiver Riddle diagEngine.setHandlerFn([&](const ast::Diagnostic &diag) {
9211d26bd1SRiver Riddle srcMgr.PrintMessage(diag.getLocation().Start, diag.getSeverity(),
9311d26bd1SRiver Riddle diag.getMessage());
9411d26bd1SRiver Riddle for (const ast::Diagnostic ¬e : diag.getNotes())
9511d26bd1SRiver Riddle srcMgr.PrintMessage(note.getLocation().Start, note.getSeverity(),
9611d26bd1SRiver Riddle note.getMessage());
9711d26bd1SRiver Riddle });
9811d26bd1SRiver Riddle addedHandlerToDiagEngine = true;
9911d26bd1SRiver Riddle }
10011d26bd1SRiver Riddle }
10111d26bd1SRiver Riddle
~Lexer()10211d26bd1SRiver Riddle Lexer::~Lexer() {
103b7f93c28SJeff Niu if (addedHandlerToDiagEngine)
104b7f93c28SJeff Niu diagEngine.setHandlerFn(nullptr);
10511d26bd1SRiver Riddle }
10611d26bd1SRiver Riddle
pushInclude(StringRef filename,SMRange includeLoc)10709af7fefSRiver Riddle LogicalResult Lexer::pushInclude(StringRef filename, SMRange includeLoc) {
10811d26bd1SRiver Riddle std::string includedFile;
10909af7fefSRiver Riddle int bufferID =
11009af7fefSRiver Riddle srcMgr.AddIncludeFile(filename.str(), includeLoc.End, includedFile);
11109af7fefSRiver Riddle if (!bufferID)
11209af7fefSRiver Riddle return failure();
11311d26bd1SRiver Riddle
11411d26bd1SRiver Riddle curBufferID = bufferID;
11511d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
11611d26bd1SRiver Riddle curPtr = curBuffer.begin();
11711d26bd1SRiver Riddle return success();
11811d26bd1SRiver Riddle }
11911d26bd1SRiver Riddle
emitError(SMRange loc,const Twine & msg)1206842ec42SRiver Riddle Token Lexer::emitError(SMRange loc, const Twine &msg) {
12111d26bd1SRiver Riddle diagEngine.emitError(loc, msg);
12211d26bd1SRiver Riddle return formToken(Token::error, loc.Start.getPointer());
12311d26bd1SRiver Riddle }
emitErrorAndNote(SMRange loc,const Twine & msg,SMRange noteLoc,const Twine & note)124b7f93c28SJeff Niu Token Lexer::emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
125b7f93c28SJeff Niu const Twine ¬e) {
12611d26bd1SRiver Riddle diagEngine.emitError(loc, msg)->attachNote(note, noteLoc);
12711d26bd1SRiver Riddle return formToken(Token::error, loc.Start.getPointer());
12811d26bd1SRiver Riddle }
emitError(const char * loc,const Twine & msg)12911d26bd1SRiver Riddle Token Lexer::emitError(const char *loc, const Twine &msg) {
130b7f93c28SJeff Niu return emitError(
131b7f93c28SJeff Niu SMRange(SMLoc::getFromPointer(loc), SMLoc::getFromPointer(loc + 1)), msg);
13211d26bd1SRiver Riddle }
13311d26bd1SRiver Riddle
getNextChar()13411d26bd1SRiver Riddle int Lexer::getNextChar() {
13511d26bd1SRiver Riddle char curChar = *curPtr++;
13611d26bd1SRiver Riddle switch (curChar) {
13711d26bd1SRiver Riddle default:
13811d26bd1SRiver Riddle return static_cast<unsigned char>(curChar);
13911d26bd1SRiver Riddle case 0: {
14011d26bd1SRiver Riddle // A nul character in the stream is either the end of the current buffer
14111d26bd1SRiver Riddle // or a random nul in the file. Disambiguate that here.
142b7f93c28SJeff Niu if (curPtr - 1 != curBuffer.end())
143b7f93c28SJeff Niu return 0;
14411d26bd1SRiver Riddle
14511d26bd1SRiver Riddle // Otherwise, return end of file.
14611d26bd1SRiver Riddle --curPtr;
14711d26bd1SRiver Riddle return EOF;
14811d26bd1SRiver Riddle }
14911d26bd1SRiver Riddle case '\n':
15011d26bd1SRiver Riddle case '\r':
15111d26bd1SRiver Riddle // Handle the newline character by ignoring it and incrementing the line
15211d26bd1SRiver Riddle // count. However, be careful about 'dos style' files with \n\r in them.
15311d26bd1SRiver Riddle // Only treat a \n\r or \r\n as a single line.
15411d26bd1SRiver Riddle if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
15511d26bd1SRiver Riddle ++curPtr;
15611d26bd1SRiver Riddle return '\n';
15711d26bd1SRiver Riddle }
15811d26bd1SRiver Riddle }
15911d26bd1SRiver Riddle
lexToken()16011d26bd1SRiver Riddle Token Lexer::lexToken() {
16111d26bd1SRiver Riddle while (true) {
16211d26bd1SRiver Riddle const char *tokStart = curPtr;
16311d26bd1SRiver Riddle
164008de486SRiver Riddle // Check to see if this token is at the code completion location.
165008de486SRiver Riddle if (tokStart == codeCompletionLocation)
166008de486SRiver Riddle return formToken(Token::code_complete, tokStart);
167008de486SRiver Riddle
16811d26bd1SRiver Riddle // This always consumes at least one character.
16911d26bd1SRiver Riddle int curChar = getNextChar();
17011d26bd1SRiver Riddle switch (curChar) {
17111d26bd1SRiver Riddle default:
17211d26bd1SRiver Riddle // Handle identifiers: [a-zA-Z_]
173b7f93c28SJeff Niu if (isalpha(curChar) || curChar == '_')
174b7f93c28SJeff Niu return lexIdentifier(tokStart);
17511d26bd1SRiver Riddle
17611d26bd1SRiver Riddle // Unknown character, emit an error.
17711d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
17811d26bd1SRiver Riddle case EOF: {
17911d26bd1SRiver Riddle // Return EOF denoting the end of lexing.
18011d26bd1SRiver Riddle Token eof = formToken(Token::eof, tokStart);
18111d26bd1SRiver Riddle
18211d26bd1SRiver Riddle // Check to see if we are in an included file.
1836842ec42SRiver Riddle SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
18411d26bd1SRiver Riddle if (parentIncludeLoc.isValid()) {
18511d26bd1SRiver Riddle curBufferID = srcMgr.FindBufferContainingLoc(parentIncludeLoc);
18611d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
18711d26bd1SRiver Riddle curPtr = parentIncludeLoc.getPointer();
18811d26bd1SRiver Riddle }
18911d26bd1SRiver Riddle
19011d26bd1SRiver Riddle return eof;
19111d26bd1SRiver Riddle }
19211d26bd1SRiver Riddle
19311d26bd1SRiver Riddle // Lex punctuation.
19411d26bd1SRiver Riddle case '-':
19511d26bd1SRiver Riddle if (*curPtr == '>') {
19611d26bd1SRiver Riddle ++curPtr;
19711d26bd1SRiver Riddle return formToken(Token::arrow, tokStart);
19811d26bd1SRiver Riddle }
19911d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
20011d26bd1SRiver Riddle case ':':
20111d26bd1SRiver Riddle return formToken(Token::colon, tokStart);
20211d26bd1SRiver Riddle case ',':
20311d26bd1SRiver Riddle return formToken(Token::comma, tokStart);
20411d26bd1SRiver Riddle case '.':
20511d26bd1SRiver Riddle return formToken(Token::dot, tokStart);
20611d26bd1SRiver Riddle case '=':
20711d26bd1SRiver Riddle if (*curPtr == '>') {
20811d26bd1SRiver Riddle ++curPtr;
20911d26bd1SRiver Riddle return formToken(Token::equal_arrow, tokStart);
21011d26bd1SRiver Riddle }
21111d26bd1SRiver Riddle return formToken(Token::equal, tokStart);
21211d26bd1SRiver Riddle case ';':
21311d26bd1SRiver Riddle return formToken(Token::semicolon, tokStart);
21411d26bd1SRiver Riddle case '[':
21511d26bd1SRiver Riddle if (*curPtr == '{') {
21611d26bd1SRiver Riddle ++curPtr;
21711d26bd1SRiver Riddle return lexString(tokStart, /*isStringBlock=*/true);
21811d26bd1SRiver Riddle }
21911d26bd1SRiver Riddle return formToken(Token::l_square, tokStart);
22011d26bd1SRiver Riddle case ']':
22111d26bd1SRiver Riddle return formToken(Token::r_square, tokStart);
22211d26bd1SRiver Riddle
22311d26bd1SRiver Riddle case '<':
22411d26bd1SRiver Riddle return formToken(Token::less, tokStart);
22511d26bd1SRiver Riddle case '>':
22611d26bd1SRiver Riddle return formToken(Token::greater, tokStart);
22711d26bd1SRiver Riddle case '{':
22811d26bd1SRiver Riddle return formToken(Token::l_brace, tokStart);
22911d26bd1SRiver Riddle case '}':
23011d26bd1SRiver Riddle return formToken(Token::r_brace, tokStart);
23111d26bd1SRiver Riddle case '(':
23211d26bd1SRiver Riddle return formToken(Token::l_paren, tokStart);
23311d26bd1SRiver Riddle case ')':
23411d26bd1SRiver Riddle return formToken(Token::r_paren, tokStart);
23511d26bd1SRiver Riddle case '/':
23611d26bd1SRiver Riddle if (*curPtr == '/') {
23711d26bd1SRiver Riddle lexComment();
23811d26bd1SRiver Riddle continue;
23911d26bd1SRiver Riddle }
24011d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
24111d26bd1SRiver Riddle
24211d26bd1SRiver Riddle // Ignore whitespace characters.
24311d26bd1SRiver Riddle case 0:
24411d26bd1SRiver Riddle case ' ':
24511d26bd1SRiver Riddle case '\t':
24611d26bd1SRiver Riddle case '\n':
24711d26bd1SRiver Riddle return lexToken();
24811d26bd1SRiver Riddle
24911d26bd1SRiver Riddle case '#':
25011d26bd1SRiver Riddle return lexDirective(tokStart);
25111d26bd1SRiver Riddle case '"':
25211d26bd1SRiver Riddle return lexString(tokStart, /*isStringBlock=*/false);
25311d26bd1SRiver Riddle
25411d26bd1SRiver Riddle case '0':
25511d26bd1SRiver Riddle case '1':
25611d26bd1SRiver Riddle case '2':
25711d26bd1SRiver Riddle case '3':
25811d26bd1SRiver Riddle case '4':
25911d26bd1SRiver Riddle case '5':
26011d26bd1SRiver Riddle case '6':
26111d26bd1SRiver Riddle case '7':
26211d26bd1SRiver Riddle case '8':
26311d26bd1SRiver Riddle case '9':
26411d26bd1SRiver Riddle return lexNumber(tokStart);
26511d26bd1SRiver Riddle }
26611d26bd1SRiver Riddle }
26711d26bd1SRiver Riddle }
26811d26bd1SRiver Riddle
26911d26bd1SRiver Riddle /// Skip a comment line, starting with a '//'.
lexComment()27011d26bd1SRiver Riddle void Lexer::lexComment() {
27111d26bd1SRiver Riddle // Advance over the second '/' in a '//' comment.
27211d26bd1SRiver Riddle assert(*curPtr == '/');
27311d26bd1SRiver Riddle ++curPtr;
27411d26bd1SRiver Riddle
27511d26bd1SRiver Riddle while (true) {
27611d26bd1SRiver Riddle switch (*curPtr++) {
27711d26bd1SRiver Riddle case '\n':
27811d26bd1SRiver Riddle case '\r':
27911d26bd1SRiver Riddle // Newline is end of comment.
28011d26bd1SRiver Riddle return;
28111d26bd1SRiver Riddle case 0:
28211d26bd1SRiver Riddle // If this is the end of the buffer, end the comment.
28311d26bd1SRiver Riddle if (curPtr - 1 == curBuffer.end()) {
28411d26bd1SRiver Riddle --curPtr;
28511d26bd1SRiver Riddle return;
28611d26bd1SRiver Riddle }
287fc63c054SFangrui Song [[fallthrough]];
28811d26bd1SRiver Riddle default:
28911d26bd1SRiver Riddle // Skip over other characters.
29011d26bd1SRiver Riddle break;
29111d26bd1SRiver Riddle }
29211d26bd1SRiver Riddle }
29311d26bd1SRiver Riddle }
29411d26bd1SRiver Riddle
lexDirective(const char * tokStart)29511d26bd1SRiver Riddle Token Lexer::lexDirective(const char *tokStart) {
29611d26bd1SRiver Riddle // Match the rest with an identifier regex: [0-9a-zA-Z_]*
297b7f93c28SJeff Niu while (isalnum(*curPtr) || *curPtr == '_')
298b7f93c28SJeff Niu ++curPtr;
29911d26bd1SRiver Riddle
30011d26bd1SRiver Riddle StringRef str(tokStart, curPtr - tokStart);
30111d26bd1SRiver Riddle return Token(Token::directive, str);
30211d26bd1SRiver Riddle }
30311d26bd1SRiver Riddle
lexIdentifier(const char * tokStart)30411d26bd1SRiver Riddle Token Lexer::lexIdentifier(const char *tokStart) {
30511d26bd1SRiver Riddle // Match the rest of the identifier regex: [0-9a-zA-Z_]*
306b7f93c28SJeff Niu while (isalnum(*curPtr) || *curPtr == '_')
307b7f93c28SJeff Niu ++curPtr;
30811d26bd1SRiver Riddle
30911d26bd1SRiver Riddle // Check to see if this identifier is a keyword.
31011d26bd1SRiver Riddle StringRef str(tokStart, curPtr - tokStart);
31111d26bd1SRiver Riddle Token::Kind kind = StringSwitch<Token::Kind>(str)
31211d26bd1SRiver Riddle .Case("attr", Token::kw_attr)
31311d26bd1SRiver Riddle .Case("Attr", Token::kw_Attr)
31411d26bd1SRiver Riddle .Case("erase", Token::kw_erase)
31511d26bd1SRiver Riddle .Case("let", Token::kw_let)
31611d26bd1SRiver Riddle .Case("Constraint", Token::kw_Constraint)
317*930916c7SMogball .Case("not", Token::kw_not)
31811d26bd1SRiver Riddle .Case("op", Token::kw_op)
31911d26bd1SRiver Riddle .Case("Op", Token::kw_Op)
32011d26bd1SRiver Riddle .Case("OpName", Token::kw_OpName)
32111d26bd1SRiver Riddle .Case("Pattern", Token::kw_Pattern)
32211d26bd1SRiver Riddle .Case("replace", Token::kw_replace)
323faf42264SRiver Riddle .Case("return", Token::kw_return)
32411d26bd1SRiver Riddle .Case("rewrite", Token::kw_rewrite)
325faf42264SRiver Riddle .Case("Rewrite", Token::kw_Rewrite)
32611d26bd1SRiver Riddle .Case("type", Token::kw_type)
32711d26bd1SRiver Riddle .Case("Type", Token::kw_Type)
32811d26bd1SRiver Riddle .Case("TypeRange", Token::kw_TypeRange)
32911d26bd1SRiver Riddle .Case("Value", Token::kw_Value)
33011d26bd1SRiver Riddle .Case("ValueRange", Token::kw_ValueRange)
33111d26bd1SRiver Riddle .Case("with", Token::kw_with)
33211d26bd1SRiver Riddle .Case("_", Token::underscore)
33311d26bd1SRiver Riddle .Default(Token::identifier);
33411d26bd1SRiver Riddle return Token(kind, str);
33511d26bd1SRiver Riddle }
33611d26bd1SRiver Riddle
lexNumber(const char * tokStart)33711d26bd1SRiver Riddle Token Lexer::lexNumber(const char *tokStart) {
33811d26bd1SRiver Riddle assert(isdigit(curPtr[-1]));
33911d26bd1SRiver Riddle
34011d26bd1SRiver Riddle // Handle the normal decimal case.
341b7f93c28SJeff Niu while (isdigit(*curPtr))
342b7f93c28SJeff Niu ++curPtr;
34311d26bd1SRiver Riddle
34411d26bd1SRiver Riddle return formToken(Token::integer, tokStart);
34511d26bd1SRiver Riddle }
34611d26bd1SRiver Riddle
lexString(const char * tokStart,bool isStringBlock)34711d26bd1SRiver Riddle Token Lexer::lexString(const char *tokStart, bool isStringBlock) {
34811d26bd1SRiver Riddle while (true) {
34941d2c6dfSRiver Riddle // Check to see if there is a code completion location within the string. In
35041d2c6dfSRiver Riddle // these cases we generate a completion location and place the currently
35141d2c6dfSRiver Riddle // lexed string within the token (without the quotes). This allows for the
35241d2c6dfSRiver Riddle // parser to use the partially lexed string when computing the completion
35341d2c6dfSRiver Riddle // results.
35441d2c6dfSRiver Riddle if (curPtr == codeCompletionLocation) {
35541d2c6dfSRiver Riddle return formToken(Token::code_complete_string,
35641d2c6dfSRiver Riddle tokStart + (isStringBlock ? 2 : 1));
35741d2c6dfSRiver Riddle }
35841d2c6dfSRiver Riddle
35911d26bd1SRiver Riddle switch (*curPtr++) {
36011d26bd1SRiver Riddle case '"':
36111d26bd1SRiver Riddle // If this is a string block, we only end the string when we encounter a
36211d26bd1SRiver Riddle // `}]`.
363ebb1e900SRiver Riddle if (!isStringBlock)
364ebb1e900SRiver Riddle return formToken(Token::string, tokStart);
36511d26bd1SRiver Riddle continue;
36611d26bd1SRiver Riddle case '}':
36711d26bd1SRiver Riddle // If this is a string block, we only end the string when we encounter a
36811d26bd1SRiver Riddle // `}]`.
369ebb1e900SRiver Riddle if (!isStringBlock || *curPtr != ']')
370ebb1e900SRiver Riddle continue;
37111d26bd1SRiver Riddle ++curPtr;
37211d26bd1SRiver Riddle return formToken(Token::string_block, tokStart);
373ebb1e900SRiver Riddle case 0: {
37411d26bd1SRiver Riddle // If this is a random nul character in the middle of a string, just
37511d26bd1SRiver Riddle // include it. If it is the end of file, then it is an error.
376ebb1e900SRiver Riddle if (curPtr - 1 != curBuffer.end())
377ebb1e900SRiver Riddle continue;
378ebb1e900SRiver Riddle --curPtr;
379ebb1e900SRiver Riddle
380ebb1e900SRiver Riddle StringRef expectedEndStr = isStringBlock ? "}]" : "\"";
381ebb1e900SRiver Riddle return emitError(curPtr - 1,
382ebb1e900SRiver Riddle "expected '" + expectedEndStr + "' in string literal");
383ebb1e900SRiver Riddle }
384ebb1e900SRiver Riddle
38511d26bd1SRiver Riddle case '\n':
38611d26bd1SRiver Riddle case '\v':
38711d26bd1SRiver Riddle case '\f':
38811d26bd1SRiver Riddle // String blocks allow multiple lines.
38911d26bd1SRiver Riddle if (!isStringBlock)
39011d26bd1SRiver Riddle return emitError(curPtr - 1, "expected '\"' in string literal");
39111d26bd1SRiver Riddle continue;
39211d26bd1SRiver Riddle
39311d26bd1SRiver Riddle case '\\':
39411d26bd1SRiver Riddle // Handle explicitly a few escapes.
39511d26bd1SRiver Riddle if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' ||
39611d26bd1SRiver Riddle *curPtr == 't') {
39711d26bd1SRiver Riddle ++curPtr;
39811d26bd1SRiver Riddle } else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1])) {
39911d26bd1SRiver Riddle // Support \xx for two hex digits.
40011d26bd1SRiver Riddle curPtr += 2;
40111d26bd1SRiver Riddle } else {
40211d26bd1SRiver Riddle return emitError(curPtr - 1, "unknown escape in string literal");
40311d26bd1SRiver Riddle }
40411d26bd1SRiver Riddle continue;
40511d26bd1SRiver Riddle
40611d26bd1SRiver Riddle default:
40711d26bd1SRiver Riddle continue;
40811d26bd1SRiver Riddle }
40911d26bd1SRiver Riddle }
41011d26bd1SRiver Riddle }
411