109467b48Spatrick //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This class implements the lexer for assembly files.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "llvm/MC/MCParser/AsmLexer.h"
1409467b48Spatrick #include "llvm/ADT/APInt.h"
1509467b48Spatrick #include "llvm/ADT/ArrayRef.h"
1609467b48Spatrick #include "llvm/ADT/StringExtras.h"
1709467b48Spatrick #include "llvm/ADT/StringRef.h"
1809467b48Spatrick #include "llvm/ADT/StringSwitch.h"
1909467b48Spatrick #include "llvm/MC/MCAsmInfo.h"
2009467b48Spatrick #include "llvm/MC/MCParser/MCAsmLexer.h"
2173471bf0Spatrick #include "llvm/Support/Compiler.h"
2209467b48Spatrick #include "llvm/Support/SMLoc.h"
2309467b48Spatrick #include "llvm/Support/SaveAndRestore.h"
2409467b48Spatrick #include <cassert>
2509467b48Spatrick #include <cctype>
2609467b48Spatrick #include <cstdio>
2709467b48Spatrick #include <cstring>
2809467b48Spatrick #include <string>
2909467b48Spatrick #include <tuple>
3009467b48Spatrick #include <utility>
3109467b48Spatrick
3209467b48Spatrick using namespace llvm;
3309467b48Spatrick
AsmLexer(const MCAsmInfo & MAI)3409467b48Spatrick AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
3509467b48Spatrick AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
3673471bf0Spatrick LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
3709467b48Spatrick }
3809467b48Spatrick
3909467b48Spatrick AsmLexer::~AsmLexer() = default;
4009467b48Spatrick
setBuffer(StringRef Buf,const char * ptr,bool EndStatementAtEOF)41097a140dSpatrick void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
42097a140dSpatrick bool EndStatementAtEOF) {
4309467b48Spatrick CurBuf = Buf;
4409467b48Spatrick
4509467b48Spatrick if (ptr)
4609467b48Spatrick CurPtr = ptr;
4709467b48Spatrick else
4809467b48Spatrick CurPtr = CurBuf.begin();
4909467b48Spatrick
5009467b48Spatrick TokStart = nullptr;
51097a140dSpatrick this->EndStatementAtEOF = EndStatementAtEOF;
5209467b48Spatrick }
5309467b48Spatrick
5409467b48Spatrick /// ReturnError - Set the error to the specified string at the specified
5509467b48Spatrick /// location. This is defined to always return AsmToken::Error.
ReturnError(const char * Loc,const std::string & Msg)5609467b48Spatrick AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
5709467b48Spatrick SetError(SMLoc::getFromPointer(Loc), Msg);
5809467b48Spatrick
5909467b48Spatrick return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
6009467b48Spatrick }
6109467b48Spatrick
getNextChar()6209467b48Spatrick int AsmLexer::getNextChar() {
6309467b48Spatrick if (CurPtr == CurBuf.end())
6409467b48Spatrick return EOF;
6509467b48Spatrick return (unsigned char)*CurPtr++;
6609467b48Spatrick }
6709467b48Spatrick
peekNextChar()6873471bf0Spatrick int AsmLexer::peekNextChar() {
6973471bf0Spatrick if (CurPtr == CurBuf.end())
7073471bf0Spatrick return EOF;
7173471bf0Spatrick return (unsigned char)*CurPtr;
7273471bf0Spatrick }
7373471bf0Spatrick
7409467b48Spatrick /// The leading integral digit sequence and dot should have already been
7509467b48Spatrick /// consumed, some or all of the fractional digit sequence *can* have been
7609467b48Spatrick /// consumed.
LexFloatLiteral()7709467b48Spatrick AsmToken AsmLexer::LexFloatLiteral() {
7809467b48Spatrick // Skip the fractional digit sequence.
7909467b48Spatrick while (isDigit(*CurPtr))
8009467b48Spatrick ++CurPtr;
8109467b48Spatrick
8209467b48Spatrick if (*CurPtr == '-' || *CurPtr == '+')
8373471bf0Spatrick return ReturnError(CurPtr, "invalid sign in float literal");
8409467b48Spatrick
8509467b48Spatrick // Check for exponent
8609467b48Spatrick if ((*CurPtr == 'e' || *CurPtr == 'E')) {
8709467b48Spatrick ++CurPtr;
8809467b48Spatrick
8909467b48Spatrick if (*CurPtr == '-' || *CurPtr == '+')
9009467b48Spatrick ++CurPtr;
9109467b48Spatrick
9209467b48Spatrick while (isDigit(*CurPtr))
9309467b48Spatrick ++CurPtr;
9409467b48Spatrick }
9509467b48Spatrick
9609467b48Spatrick return AsmToken(AsmToken::Real,
9709467b48Spatrick StringRef(TokStart, CurPtr - TokStart));
9809467b48Spatrick }
9909467b48Spatrick
10009467b48Spatrick /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
10109467b48Spatrick /// while making sure there are enough actual digits around for the constant to
10209467b48Spatrick /// be valid.
10309467b48Spatrick ///
10409467b48Spatrick /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
10509467b48Spatrick /// before we get here.
LexHexFloatLiteral(bool NoIntDigits)10609467b48Spatrick AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
10709467b48Spatrick assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
10809467b48Spatrick "unexpected parse state in floating hex");
10909467b48Spatrick bool NoFracDigits = true;
11009467b48Spatrick
11109467b48Spatrick // Skip the fractional part if there is one
11209467b48Spatrick if (*CurPtr == '.') {
11309467b48Spatrick ++CurPtr;
11409467b48Spatrick
11509467b48Spatrick const char *FracStart = CurPtr;
11609467b48Spatrick while (isHexDigit(*CurPtr))
11709467b48Spatrick ++CurPtr;
11809467b48Spatrick
11909467b48Spatrick NoFracDigits = CurPtr == FracStart;
12009467b48Spatrick }
12109467b48Spatrick
12209467b48Spatrick if (NoIntDigits && NoFracDigits)
12309467b48Spatrick return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
12409467b48Spatrick "expected at least one significand digit");
12509467b48Spatrick
12609467b48Spatrick // Make sure we do have some kind of proper exponent part
12709467b48Spatrick if (*CurPtr != 'p' && *CurPtr != 'P')
12809467b48Spatrick return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
12909467b48Spatrick "expected exponent part 'p'");
13009467b48Spatrick ++CurPtr;
13109467b48Spatrick
13209467b48Spatrick if (*CurPtr == '+' || *CurPtr == '-')
13309467b48Spatrick ++CurPtr;
13409467b48Spatrick
13509467b48Spatrick // N.b. exponent digits are *not* hex
13609467b48Spatrick const char *ExpStart = CurPtr;
13709467b48Spatrick while (isDigit(*CurPtr))
13809467b48Spatrick ++CurPtr;
13909467b48Spatrick
14009467b48Spatrick if (CurPtr == ExpStart)
14109467b48Spatrick return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
14209467b48Spatrick "expected at least one exponent digit");
14309467b48Spatrick
14409467b48Spatrick return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
14509467b48Spatrick }
14609467b48Spatrick
14773471bf0Spatrick /// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
isIdentifierChar(char C,bool AllowAt,bool AllowHash)14873471bf0Spatrick static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
14973471bf0Spatrick return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
15073471bf0Spatrick (AllowAt && C == '@') || (AllowHash && C == '#');
15109467b48Spatrick }
15209467b48Spatrick
LexIdentifier()15309467b48Spatrick AsmToken AsmLexer::LexIdentifier() {
15409467b48Spatrick // Check for floating point literals.
15509467b48Spatrick if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
15609467b48Spatrick // Disambiguate a .1243foo identifier from a floating literal.
15709467b48Spatrick while (isDigit(*CurPtr))
15809467b48Spatrick ++CurPtr;
15909467b48Spatrick
16073471bf0Spatrick if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
16173471bf0Spatrick AllowHashInIdentifier) ||
16209467b48Spatrick *CurPtr == 'e' || *CurPtr == 'E')
16309467b48Spatrick return LexFloatLiteral();
16409467b48Spatrick }
16509467b48Spatrick
16673471bf0Spatrick while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
16709467b48Spatrick ++CurPtr;
16809467b48Spatrick
16909467b48Spatrick // Handle . as a special case.
17009467b48Spatrick if (CurPtr == TokStart+1 && TokStart[0] == '.')
17109467b48Spatrick return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
17209467b48Spatrick
17309467b48Spatrick return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
17409467b48Spatrick }
17509467b48Spatrick
17609467b48Spatrick /// LexSlash: Slash: /
17709467b48Spatrick /// C-Style Comment: /* ... */
17873471bf0Spatrick /// C-style Comment: // ...
LexSlash()17909467b48Spatrick AsmToken AsmLexer::LexSlash() {
18073471bf0Spatrick if (!MAI.shouldAllowAdditionalComments()) {
18173471bf0Spatrick IsAtStartOfStatement = false;
18273471bf0Spatrick return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
18373471bf0Spatrick }
18473471bf0Spatrick
18509467b48Spatrick switch (*CurPtr) {
18609467b48Spatrick case '*':
18709467b48Spatrick IsAtStartOfStatement = false;
18809467b48Spatrick break; // C style comment.
18909467b48Spatrick case '/':
19009467b48Spatrick ++CurPtr;
19109467b48Spatrick return LexLineComment();
19209467b48Spatrick default:
19309467b48Spatrick IsAtStartOfStatement = false;
19409467b48Spatrick return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
19509467b48Spatrick }
19609467b48Spatrick
19709467b48Spatrick // C Style comment.
19809467b48Spatrick ++CurPtr; // skip the star.
19909467b48Spatrick const char *CommentTextStart = CurPtr;
20009467b48Spatrick while (CurPtr != CurBuf.end()) {
20109467b48Spatrick switch (*CurPtr++) {
20209467b48Spatrick case '*':
20309467b48Spatrick // End of the comment?
20409467b48Spatrick if (*CurPtr != '/')
20509467b48Spatrick break;
20609467b48Spatrick // If we have a CommentConsumer, notify it about the comment.
20709467b48Spatrick if (CommentConsumer) {
20809467b48Spatrick CommentConsumer->HandleComment(
20909467b48Spatrick SMLoc::getFromPointer(CommentTextStart),
21009467b48Spatrick StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
21109467b48Spatrick }
21209467b48Spatrick ++CurPtr; // End the */.
21309467b48Spatrick return AsmToken(AsmToken::Comment,
21409467b48Spatrick StringRef(TokStart, CurPtr - TokStart));
21509467b48Spatrick }
21609467b48Spatrick }
21709467b48Spatrick return ReturnError(TokStart, "unterminated comment");
21809467b48Spatrick }
21909467b48Spatrick
22009467b48Spatrick /// LexLineComment: Comment: #[^\n]*
22109467b48Spatrick /// : //[^\n]*
LexLineComment()22209467b48Spatrick AsmToken AsmLexer::LexLineComment() {
22309467b48Spatrick // Mark This as an end of statement with a body of the
22409467b48Spatrick // comment. While it would be nicer to leave this two tokens,
22509467b48Spatrick // backwards compatability with TargetParsers makes keeping this in this form
22609467b48Spatrick // better.
22709467b48Spatrick const char *CommentTextStart = CurPtr;
22809467b48Spatrick int CurChar = getNextChar();
22909467b48Spatrick while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
23009467b48Spatrick CurChar = getNextChar();
231*d415bd75Srobert const char *NewlinePtr = CurPtr;
23209467b48Spatrick if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
23309467b48Spatrick ++CurPtr;
23409467b48Spatrick
23509467b48Spatrick // If we have a CommentConsumer, notify it about the comment.
23609467b48Spatrick if (CommentConsumer) {
23709467b48Spatrick CommentConsumer->HandleComment(
23809467b48Spatrick SMLoc::getFromPointer(CommentTextStart),
239*d415bd75Srobert StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
24009467b48Spatrick }
24109467b48Spatrick
24209467b48Spatrick IsAtStartOfLine = true;
24309467b48Spatrick // This is a whole line comment. leave newline
24409467b48Spatrick if (IsAtStartOfStatement)
24509467b48Spatrick return AsmToken(AsmToken::EndOfStatement,
24609467b48Spatrick StringRef(TokStart, CurPtr - TokStart));
24709467b48Spatrick IsAtStartOfStatement = true;
24809467b48Spatrick
24909467b48Spatrick return AsmToken(AsmToken::EndOfStatement,
25009467b48Spatrick StringRef(TokStart, CurPtr - 1 - TokStart));
25109467b48Spatrick }
25209467b48Spatrick
SkipIgnoredIntegerSuffix(const char * & CurPtr)25309467b48Spatrick static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
254*d415bd75Srobert // Skip case-insensitive ULL, UL, U, L and LL suffixes.
255*d415bd75Srobert if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
25609467b48Spatrick ++CurPtr;
257*d415bd75Srobert if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
25809467b48Spatrick ++CurPtr;
259*d415bd75Srobert if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
26009467b48Spatrick ++CurPtr;
26109467b48Spatrick }
26209467b48Spatrick
26309467b48Spatrick // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
26409467b48Spatrick // integer as a hexadecimal, possibly with leading zeroes.
doHexLookAhead(const char * & CurPtr,unsigned DefaultRadix,bool LexHex)26509467b48Spatrick static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
26609467b48Spatrick bool LexHex) {
26709467b48Spatrick const char *FirstNonDec = nullptr;
26809467b48Spatrick const char *LookAhead = CurPtr;
26909467b48Spatrick while (true) {
27009467b48Spatrick if (isDigit(*LookAhead)) {
27109467b48Spatrick ++LookAhead;
27209467b48Spatrick } else {
27309467b48Spatrick if (!FirstNonDec)
27409467b48Spatrick FirstNonDec = LookAhead;
27509467b48Spatrick
27609467b48Spatrick // Keep going if we are looking for a 'h' suffix.
27709467b48Spatrick if (LexHex && isHexDigit(*LookAhead))
27809467b48Spatrick ++LookAhead;
27909467b48Spatrick else
28009467b48Spatrick break;
28109467b48Spatrick }
28209467b48Spatrick }
28309467b48Spatrick bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
28409467b48Spatrick CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
28509467b48Spatrick if (isHex)
28609467b48Spatrick return 16;
28709467b48Spatrick return DefaultRadix;
28809467b48Spatrick }
28909467b48Spatrick
findLastDigit(const char * CurPtr,unsigned DefaultRadix)29073471bf0Spatrick static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
29173471bf0Spatrick while (hexDigitValue(*CurPtr) < DefaultRadix) {
29273471bf0Spatrick ++CurPtr;
29373471bf0Spatrick }
29473471bf0Spatrick return CurPtr;
29573471bf0Spatrick }
29673471bf0Spatrick
intToken(StringRef Ref,APInt & Value)29773471bf0Spatrick static AsmToken intToken(StringRef Ref, APInt &Value) {
29809467b48Spatrick if (Value.isIntN(64))
29909467b48Spatrick return AsmToken(AsmToken::Integer, Ref, Value);
30009467b48Spatrick return AsmToken(AsmToken::BigNum, Ref, Value);
30109467b48Spatrick }
30209467b48Spatrick
radixName(unsigned Radix)30373471bf0Spatrick static std::string radixName(unsigned Radix) {
30473471bf0Spatrick switch (Radix) {
30573471bf0Spatrick case 2:
30673471bf0Spatrick return "binary";
30773471bf0Spatrick case 8:
30873471bf0Spatrick return "octal";
30973471bf0Spatrick case 10:
31073471bf0Spatrick return "decimal";
31173471bf0Spatrick case 16:
31273471bf0Spatrick return "hexadecimal";
31373471bf0Spatrick default:
31473471bf0Spatrick return "base-" + std::to_string(Radix);
31573471bf0Spatrick }
31673471bf0Spatrick }
31773471bf0Spatrick
31809467b48Spatrick /// LexDigit: First character is [0-9].
31909467b48Spatrick /// Local Label: [0-9][:]
32009467b48Spatrick /// Forward/Backward Label: [0-9][fb]
32109467b48Spatrick /// Binary integer: 0b[01]+
32209467b48Spatrick /// Octal integer: 0[0-7]+
32309467b48Spatrick /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
32409467b48Spatrick /// Decimal integer: [1-9][0-9]*
LexDigit()32509467b48Spatrick AsmToken AsmLexer::LexDigit() {
32673471bf0Spatrick // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
32773471bf0Spatrick // MASM-flavor octal integer: [0-7]+[oOqQ]
32873471bf0Spatrick // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
32909467b48Spatrick // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
33009467b48Spatrick if (LexMasmIntegers && isdigit(CurPtr[-1])) {
33173471bf0Spatrick const char *FirstNonBinary =
33273471bf0Spatrick (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
33373471bf0Spatrick const char *FirstNonDecimal =
33473471bf0Spatrick (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
33509467b48Spatrick const char *OldCurPtr = CurPtr;
33609467b48Spatrick while (isHexDigit(*CurPtr)) {
33773471bf0Spatrick switch (*CurPtr) {
33873471bf0Spatrick default:
33973471bf0Spatrick if (!FirstNonDecimal) {
34073471bf0Spatrick FirstNonDecimal = CurPtr;
34173471bf0Spatrick }
342*d415bd75Srobert [[fallthrough]];
34373471bf0Spatrick case '9':
34473471bf0Spatrick case '8':
34573471bf0Spatrick case '7':
34673471bf0Spatrick case '6':
34773471bf0Spatrick case '5':
34873471bf0Spatrick case '4':
34973471bf0Spatrick case '3':
35073471bf0Spatrick case '2':
35173471bf0Spatrick if (!FirstNonBinary) {
35209467b48Spatrick FirstNonBinary = CurPtr;
35373471bf0Spatrick }
35473471bf0Spatrick break;
35573471bf0Spatrick case '1':
35673471bf0Spatrick case '0':
35773471bf0Spatrick break;
35873471bf0Spatrick }
35909467b48Spatrick ++CurPtr;
36009467b48Spatrick }
36173471bf0Spatrick if (*CurPtr == '.') {
36273471bf0Spatrick // MASM float literals (other than hex floats) always contain a ".", and
36373471bf0Spatrick // are always written in decimal.
36473471bf0Spatrick ++CurPtr;
36573471bf0Spatrick return LexFloatLiteral();
36673471bf0Spatrick }
36773471bf0Spatrick
36873471bf0Spatrick if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
36973471bf0Spatrick ++CurPtr;
37073471bf0Spatrick return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
37173471bf0Spatrick }
37209467b48Spatrick
37309467b48Spatrick unsigned Radix = 0;
37409467b48Spatrick if (*CurPtr == 'h' || *CurPtr == 'H') {
37509467b48Spatrick // hexadecimal number
37609467b48Spatrick ++CurPtr;
37709467b48Spatrick Radix = 16;
37873471bf0Spatrick } else if (*CurPtr == 't' || *CurPtr == 'T') {
37973471bf0Spatrick // decimal number
38073471bf0Spatrick ++CurPtr;
38173471bf0Spatrick Radix = 10;
38273471bf0Spatrick } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
38373471bf0Spatrick *CurPtr == 'Q') {
38473471bf0Spatrick // octal number
38573471bf0Spatrick ++CurPtr;
38673471bf0Spatrick Radix = 8;
38773471bf0Spatrick } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
38873471bf0Spatrick // binary number
38973471bf0Spatrick ++CurPtr;
39009467b48Spatrick Radix = 2;
39173471bf0Spatrick } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
39273471bf0Spatrick DefaultRadix < 14 &&
39373471bf0Spatrick (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
39473471bf0Spatrick Radix = 10;
39573471bf0Spatrick } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
39673471bf0Spatrick DefaultRadix < 12 &&
39773471bf0Spatrick (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
39873471bf0Spatrick Radix = 2;
39973471bf0Spatrick }
40009467b48Spatrick
40173471bf0Spatrick if (Radix) {
40209467b48Spatrick StringRef Result(TokStart, CurPtr - TokStart);
40309467b48Spatrick APInt Value(128, 0, true);
40409467b48Spatrick
40509467b48Spatrick if (Result.drop_back().getAsInteger(Radix, Value))
40673471bf0Spatrick return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
40709467b48Spatrick
40809467b48Spatrick // MSVC accepts and ignores type suffices on integer literals.
40909467b48Spatrick SkipIgnoredIntegerSuffix(CurPtr);
41009467b48Spatrick
41109467b48Spatrick return intToken(Result, Value);
41209467b48Spatrick }
41309467b48Spatrick
41473471bf0Spatrick // default-radix integers, or floating point numbers, fall through
41509467b48Spatrick CurPtr = OldCurPtr;
41609467b48Spatrick }
41709467b48Spatrick
41873471bf0Spatrick // MASM default-radix integers: [0-9a-fA-F]+
41973471bf0Spatrick // (All other integer literals have a radix specifier.)
42073471bf0Spatrick if (LexMasmIntegers && UseMasmDefaultRadix) {
42173471bf0Spatrick CurPtr = findLastDigit(CurPtr, 16);
42273471bf0Spatrick StringRef Result(TokStart, CurPtr - TokStart);
42373471bf0Spatrick
42473471bf0Spatrick APInt Value(128, 0, true);
42573471bf0Spatrick if (Result.getAsInteger(DefaultRadix, Value)) {
42673471bf0Spatrick return ReturnError(TokStart,
42773471bf0Spatrick "invalid " + radixName(DefaultRadix) + " number");
42873471bf0Spatrick }
42973471bf0Spatrick
43073471bf0Spatrick return intToken(Result, Value);
43173471bf0Spatrick }
43273471bf0Spatrick
43373471bf0Spatrick // Motorola hex integers: $[0-9a-fA-F]+
43473471bf0Spatrick if (LexMotorolaIntegers && CurPtr[-1] == '$') {
43573471bf0Spatrick const char *NumStart = CurPtr;
43673471bf0Spatrick while (isHexDigit(CurPtr[0]))
43773471bf0Spatrick ++CurPtr;
43873471bf0Spatrick
43973471bf0Spatrick APInt Result(128, 0);
44073471bf0Spatrick if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
44173471bf0Spatrick return ReturnError(TokStart, "invalid hexadecimal number");
44273471bf0Spatrick
44373471bf0Spatrick return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
44473471bf0Spatrick }
44573471bf0Spatrick
44673471bf0Spatrick // Motorola binary integers: %[01]+
44773471bf0Spatrick if (LexMotorolaIntegers && CurPtr[-1] == '%') {
44873471bf0Spatrick const char *NumStart = CurPtr;
44973471bf0Spatrick while (*CurPtr == '0' || *CurPtr == '1')
45073471bf0Spatrick ++CurPtr;
45173471bf0Spatrick
45273471bf0Spatrick APInt Result(128, 0);
45373471bf0Spatrick if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
45473471bf0Spatrick return ReturnError(TokStart, "invalid binary number");
45573471bf0Spatrick
45673471bf0Spatrick return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
45773471bf0Spatrick }
45873471bf0Spatrick
45909467b48Spatrick // Decimal integer: [1-9][0-9]*
46073471bf0Spatrick // HLASM-flavour decimal integer: [0-9][0-9]*
46173471bf0Spatrick // FIXME: Later on, support for fb for HLASM has to be added in
46273471bf0Spatrick // as they probably would be needed for asm goto
46373471bf0Spatrick if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
46409467b48Spatrick unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
46573471bf0Spatrick
46673471bf0Spatrick if (!LexHLASMIntegers) {
46773471bf0Spatrick bool IsHex = Radix == 16;
46809467b48Spatrick // Check for floating point literals.
46973471bf0Spatrick if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
47009467b48Spatrick if (*CurPtr == '.')
47109467b48Spatrick ++CurPtr;
47209467b48Spatrick return LexFloatLiteral();
47309467b48Spatrick }
47473471bf0Spatrick }
47509467b48Spatrick
47609467b48Spatrick StringRef Result(TokStart, CurPtr - TokStart);
47709467b48Spatrick
47809467b48Spatrick APInt Value(128, 0, true);
47909467b48Spatrick if (Result.getAsInteger(Radix, Value))
48073471bf0Spatrick return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
48109467b48Spatrick
48273471bf0Spatrick if (!LexHLASMIntegers)
48309467b48Spatrick // The darwin/x86 (and x86-64) assembler accepts and ignores type
48409467b48Spatrick // suffices on integer literals.
48509467b48Spatrick SkipIgnoredIntegerSuffix(CurPtr);
48609467b48Spatrick
48709467b48Spatrick return intToken(Result, Value);
48809467b48Spatrick }
48909467b48Spatrick
49009467b48Spatrick if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
49109467b48Spatrick ++CurPtr;
49209467b48Spatrick // See if we actually have "0b" as part of something like "jmp 0b\n"
49309467b48Spatrick if (!isDigit(CurPtr[0])) {
49409467b48Spatrick --CurPtr;
49509467b48Spatrick StringRef Result(TokStart, CurPtr - TokStart);
49609467b48Spatrick return AsmToken(AsmToken::Integer, Result, 0);
49709467b48Spatrick }
49809467b48Spatrick const char *NumStart = CurPtr;
49909467b48Spatrick while (CurPtr[0] == '0' || CurPtr[0] == '1')
50009467b48Spatrick ++CurPtr;
50109467b48Spatrick
50209467b48Spatrick // Requires at least one binary digit.
50309467b48Spatrick if (CurPtr == NumStart)
50409467b48Spatrick return ReturnError(TokStart, "invalid binary number");
50509467b48Spatrick
50609467b48Spatrick StringRef Result(TokStart, CurPtr - TokStart);
50709467b48Spatrick
50809467b48Spatrick APInt Value(128, 0, true);
50909467b48Spatrick if (Result.substr(2).getAsInteger(2, Value))
51009467b48Spatrick return ReturnError(TokStart, "invalid binary number");
51109467b48Spatrick
51209467b48Spatrick // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
51309467b48Spatrick // suffixes on integer literals.
51409467b48Spatrick SkipIgnoredIntegerSuffix(CurPtr);
51509467b48Spatrick
51609467b48Spatrick return intToken(Result, Value);
51709467b48Spatrick }
51809467b48Spatrick
51909467b48Spatrick if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
52009467b48Spatrick ++CurPtr;
52109467b48Spatrick const char *NumStart = CurPtr;
52209467b48Spatrick while (isHexDigit(CurPtr[0]))
52309467b48Spatrick ++CurPtr;
52409467b48Spatrick
52509467b48Spatrick // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
52609467b48Spatrick // diagnosed by LexHexFloatLiteral).
52709467b48Spatrick if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
52809467b48Spatrick return LexHexFloatLiteral(NumStart == CurPtr);
52909467b48Spatrick
53009467b48Spatrick // Otherwise requires at least one hex digit.
53109467b48Spatrick if (CurPtr == NumStart)
53209467b48Spatrick return ReturnError(CurPtr-2, "invalid hexadecimal number");
53309467b48Spatrick
53409467b48Spatrick APInt Result(128, 0);
53509467b48Spatrick if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
53609467b48Spatrick return ReturnError(TokStart, "invalid hexadecimal number");
53709467b48Spatrick
53809467b48Spatrick // Consume the optional [hH].
53909467b48Spatrick if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
54009467b48Spatrick ++CurPtr;
54109467b48Spatrick
54209467b48Spatrick // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
54309467b48Spatrick // suffixes on integer literals.
54409467b48Spatrick SkipIgnoredIntegerSuffix(CurPtr);
54509467b48Spatrick
54609467b48Spatrick return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
54709467b48Spatrick }
54809467b48Spatrick
54909467b48Spatrick // Either octal or hexadecimal.
55009467b48Spatrick APInt Value(128, 0, true);
55109467b48Spatrick unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
55209467b48Spatrick StringRef Result(TokStart, CurPtr - TokStart);
55309467b48Spatrick if (Result.getAsInteger(Radix, Value))
55473471bf0Spatrick return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
55509467b48Spatrick
55609467b48Spatrick // Consume the [hH].
55709467b48Spatrick if (Radix == 16)
55809467b48Spatrick ++CurPtr;
55909467b48Spatrick
56009467b48Spatrick // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
56109467b48Spatrick // suffixes on integer literals.
56209467b48Spatrick SkipIgnoredIntegerSuffix(CurPtr);
56309467b48Spatrick
56409467b48Spatrick return intToken(Result, Value);
56509467b48Spatrick }
56609467b48Spatrick
56709467b48Spatrick /// LexSingleQuote: Integer: 'b'
LexSingleQuote()56809467b48Spatrick AsmToken AsmLexer::LexSingleQuote() {
56909467b48Spatrick int CurChar = getNextChar();
57009467b48Spatrick
57173471bf0Spatrick if (LexHLASMStrings)
57273471bf0Spatrick return ReturnError(TokStart, "invalid usage of character literals");
57373471bf0Spatrick
57473471bf0Spatrick if (LexMasmStrings) {
57573471bf0Spatrick while (CurChar != EOF) {
57673471bf0Spatrick if (CurChar != '\'') {
57773471bf0Spatrick CurChar = getNextChar();
57873471bf0Spatrick } else if (peekNextChar() == '\'') {
57973471bf0Spatrick // In MASM single-quote strings, doubled single-quotes mean an escaped
58073471bf0Spatrick // single quote, so should be lexed in.
58173471bf0Spatrick getNextChar();
58273471bf0Spatrick CurChar = getNextChar();
58373471bf0Spatrick } else {
58473471bf0Spatrick break;
58573471bf0Spatrick }
58673471bf0Spatrick }
58773471bf0Spatrick if (CurChar == EOF)
58873471bf0Spatrick return ReturnError(TokStart, "unterminated string constant");
58973471bf0Spatrick return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
59073471bf0Spatrick }
59173471bf0Spatrick
59209467b48Spatrick if (CurChar == '\\')
59309467b48Spatrick CurChar = getNextChar();
59409467b48Spatrick
59509467b48Spatrick if (CurChar == EOF)
59609467b48Spatrick return ReturnError(TokStart, "unterminated single quote");
59709467b48Spatrick
59809467b48Spatrick CurChar = getNextChar();
59909467b48Spatrick
60009467b48Spatrick if (CurChar != '\'')
60109467b48Spatrick return ReturnError(TokStart, "single quote way too long");
60209467b48Spatrick
60309467b48Spatrick // The idea here being that 'c' is basically just an integral
60409467b48Spatrick // constant.
60509467b48Spatrick StringRef Res = StringRef(TokStart,CurPtr - TokStart);
60609467b48Spatrick long long Value;
60709467b48Spatrick
60809467b48Spatrick if (Res.startswith("\'\\")) {
60909467b48Spatrick char theChar = Res[2];
61009467b48Spatrick switch (theChar) {
61109467b48Spatrick default: Value = theChar; break;
61209467b48Spatrick case '\'': Value = '\''; break;
61309467b48Spatrick case 't': Value = '\t'; break;
61409467b48Spatrick case 'n': Value = '\n'; break;
61509467b48Spatrick case 'b': Value = '\b'; break;
61673471bf0Spatrick case 'f': Value = '\f'; break;
61773471bf0Spatrick case 'r': Value = '\r'; break;
61809467b48Spatrick }
61909467b48Spatrick } else
62009467b48Spatrick Value = TokStart[1];
62109467b48Spatrick
62209467b48Spatrick return AsmToken(AsmToken::Integer, Res, Value);
62309467b48Spatrick }
62409467b48Spatrick
62509467b48Spatrick /// LexQuote: String: "..."
LexQuote()62609467b48Spatrick AsmToken AsmLexer::LexQuote() {
62709467b48Spatrick int CurChar = getNextChar();
62873471bf0Spatrick if (LexHLASMStrings)
62973471bf0Spatrick return ReturnError(TokStart, "invalid usage of string literals");
63073471bf0Spatrick
63173471bf0Spatrick if (LexMasmStrings) {
63273471bf0Spatrick while (CurChar != EOF) {
63373471bf0Spatrick if (CurChar != '"') {
63473471bf0Spatrick CurChar = getNextChar();
63573471bf0Spatrick } else if (peekNextChar() == '"') {
63673471bf0Spatrick // In MASM double-quoted strings, doubled double-quotes mean an escaped
63773471bf0Spatrick // double quote, so should be lexed in.
63873471bf0Spatrick getNextChar();
63973471bf0Spatrick CurChar = getNextChar();
64073471bf0Spatrick } else {
64173471bf0Spatrick break;
64273471bf0Spatrick }
64373471bf0Spatrick }
64473471bf0Spatrick if (CurChar == EOF)
64573471bf0Spatrick return ReturnError(TokStart, "unterminated string constant");
64673471bf0Spatrick return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
64773471bf0Spatrick }
64873471bf0Spatrick
64909467b48Spatrick // TODO: does gas allow multiline string constants?
65009467b48Spatrick while (CurChar != '"') {
65109467b48Spatrick if (CurChar == '\\') {
65209467b48Spatrick // Allow \", etc.
65309467b48Spatrick CurChar = getNextChar();
65409467b48Spatrick }
65509467b48Spatrick
65609467b48Spatrick if (CurChar == EOF)
65709467b48Spatrick return ReturnError(TokStart, "unterminated string constant");
65809467b48Spatrick
65909467b48Spatrick CurChar = getNextChar();
66009467b48Spatrick }
66109467b48Spatrick
66209467b48Spatrick return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
66309467b48Spatrick }
66409467b48Spatrick
LexUntilEndOfStatement()66509467b48Spatrick StringRef AsmLexer::LexUntilEndOfStatement() {
66609467b48Spatrick TokStart = CurPtr;
66709467b48Spatrick
66809467b48Spatrick while (!isAtStartOfComment(CurPtr) && // Start of line comment.
66909467b48Spatrick !isAtStatementSeparator(CurPtr) && // End of statement marker.
67009467b48Spatrick *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
67109467b48Spatrick ++CurPtr;
67209467b48Spatrick }
67309467b48Spatrick return StringRef(TokStart, CurPtr-TokStart);
67409467b48Spatrick }
67509467b48Spatrick
LexUntilEndOfLine()67609467b48Spatrick StringRef AsmLexer::LexUntilEndOfLine() {
67709467b48Spatrick TokStart = CurPtr;
67809467b48Spatrick
67909467b48Spatrick while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
68009467b48Spatrick ++CurPtr;
68109467b48Spatrick }
68209467b48Spatrick return StringRef(TokStart, CurPtr-TokStart);
68309467b48Spatrick }
68409467b48Spatrick
peekTokens(MutableArrayRef<AsmToken> Buf,bool ShouldSkipSpace)68509467b48Spatrick size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
68609467b48Spatrick bool ShouldSkipSpace) {
687*d415bd75Srobert SaveAndRestore SavedTokenStart(TokStart);
688*d415bd75Srobert SaveAndRestore SavedCurPtr(CurPtr);
689*d415bd75Srobert SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine);
690*d415bd75Srobert SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);
691*d415bd75Srobert SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace);
692*d415bd75Srobert SaveAndRestore SavedIsPeeking(IsPeeking, true);
69309467b48Spatrick std::string SavedErr = getErr();
69409467b48Spatrick SMLoc SavedErrLoc = getErrLoc();
69509467b48Spatrick
69609467b48Spatrick size_t ReadCount;
69709467b48Spatrick for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
69809467b48Spatrick AsmToken Token = LexToken();
69909467b48Spatrick
70009467b48Spatrick Buf[ReadCount] = Token;
70109467b48Spatrick
70209467b48Spatrick if (Token.is(AsmToken::Eof))
70309467b48Spatrick break;
70409467b48Spatrick }
70509467b48Spatrick
70609467b48Spatrick SetError(SavedErrLoc, SavedErr);
70709467b48Spatrick return ReadCount;
70809467b48Spatrick }
70909467b48Spatrick
isAtStartOfComment(const char * Ptr)71009467b48Spatrick bool AsmLexer::isAtStartOfComment(const char *Ptr) {
71173471bf0Spatrick if (MAI.getRestrictCommentStringToStartOfStatement() && !IsAtStartOfStatement)
71273471bf0Spatrick return false;
71373471bf0Spatrick
71409467b48Spatrick StringRef CommentString = MAI.getCommentString();
71509467b48Spatrick
71609467b48Spatrick if (CommentString.size() == 1)
71709467b48Spatrick return CommentString[0] == Ptr[0];
71809467b48Spatrick
719*d415bd75Srobert // Allow # preprocessor comments also be counted as comments for "##" cases
72009467b48Spatrick if (CommentString[1] == '#')
72109467b48Spatrick return CommentString[0] == Ptr[0];
72209467b48Spatrick
72309467b48Spatrick return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
72409467b48Spatrick }
72509467b48Spatrick
isAtStatementSeparator(const char * Ptr)72609467b48Spatrick bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
72709467b48Spatrick return strncmp(Ptr, MAI.getSeparatorString(),
72809467b48Spatrick strlen(MAI.getSeparatorString())) == 0;
72909467b48Spatrick }
73009467b48Spatrick
LexToken()73109467b48Spatrick AsmToken AsmLexer::LexToken() {
73209467b48Spatrick TokStart = CurPtr;
73309467b48Spatrick // This always consumes at least one character.
73409467b48Spatrick int CurChar = getNextChar();
73509467b48Spatrick
73609467b48Spatrick if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
73709467b48Spatrick // If this starts with a '#', this may be a cpp
73809467b48Spatrick // hash directive and otherwise a line comment.
73909467b48Spatrick AsmToken TokenBuf[2];
74009467b48Spatrick MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
74109467b48Spatrick size_t num = peekTokens(Buf, true);
74209467b48Spatrick // There cannot be a space preceding this
74309467b48Spatrick if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
74409467b48Spatrick TokenBuf[1].is(AsmToken::String)) {
74509467b48Spatrick CurPtr = TokStart; // reset curPtr;
74609467b48Spatrick StringRef s = LexUntilEndOfLine();
74709467b48Spatrick UnLex(TokenBuf[1]);
74809467b48Spatrick UnLex(TokenBuf[0]);
74909467b48Spatrick return AsmToken(AsmToken::HashDirective, s);
75009467b48Spatrick }
75173471bf0Spatrick
75273471bf0Spatrick if (MAI.shouldAllowAdditionalComments())
75309467b48Spatrick return LexLineComment();
75409467b48Spatrick }
75509467b48Spatrick
75609467b48Spatrick if (isAtStartOfComment(TokStart))
75709467b48Spatrick return LexLineComment();
75809467b48Spatrick
75909467b48Spatrick if (isAtStatementSeparator(TokStart)) {
76009467b48Spatrick CurPtr += strlen(MAI.getSeparatorString()) - 1;
76109467b48Spatrick IsAtStartOfLine = true;
76209467b48Spatrick IsAtStartOfStatement = true;
76309467b48Spatrick return AsmToken(AsmToken::EndOfStatement,
76409467b48Spatrick StringRef(TokStart, strlen(MAI.getSeparatorString())));
76509467b48Spatrick }
76609467b48Spatrick
76709467b48Spatrick // If we're missing a newline at EOF, make sure we still get an
76809467b48Spatrick // EndOfStatement token before the Eof token.
769097a140dSpatrick if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
77009467b48Spatrick IsAtStartOfLine = true;
77109467b48Spatrick IsAtStartOfStatement = true;
77273471bf0Spatrick return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
77309467b48Spatrick }
77409467b48Spatrick IsAtStartOfLine = false;
77509467b48Spatrick bool OldIsAtStartOfStatement = IsAtStartOfStatement;
77609467b48Spatrick IsAtStartOfStatement = false;
77709467b48Spatrick switch (CurChar) {
77809467b48Spatrick default:
77973471bf0Spatrick // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
78073471bf0Spatrick if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
78173471bf0Spatrick (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
782097a140dSpatrick return LexIdentifier();
78309467b48Spatrick
78409467b48Spatrick // Unknown character, emit an error.
78509467b48Spatrick return ReturnError(TokStart, "invalid character in input");
78609467b48Spatrick case EOF:
787097a140dSpatrick if (EndStatementAtEOF) {
78809467b48Spatrick IsAtStartOfLine = true;
78909467b48Spatrick IsAtStartOfStatement = true;
790097a140dSpatrick }
79109467b48Spatrick return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
79209467b48Spatrick case 0:
79309467b48Spatrick case ' ':
79409467b48Spatrick case '\t':
79509467b48Spatrick IsAtStartOfStatement = OldIsAtStartOfStatement;
79609467b48Spatrick while (*CurPtr == ' ' || *CurPtr == '\t')
79709467b48Spatrick CurPtr++;
79809467b48Spatrick if (SkipSpace)
79909467b48Spatrick return LexToken(); // Ignore whitespace.
80009467b48Spatrick else
80109467b48Spatrick return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
80209467b48Spatrick case '\r': {
80309467b48Spatrick IsAtStartOfLine = true;
80409467b48Spatrick IsAtStartOfStatement = true;
80509467b48Spatrick // If this is a CR followed by LF, treat that as one token.
80609467b48Spatrick if (CurPtr != CurBuf.end() && *CurPtr == '\n')
80709467b48Spatrick ++CurPtr;
80809467b48Spatrick return AsmToken(AsmToken::EndOfStatement,
80909467b48Spatrick StringRef(TokStart, CurPtr - TokStart));
81009467b48Spatrick }
81109467b48Spatrick case '\n':
81209467b48Spatrick IsAtStartOfLine = true;
81309467b48Spatrick IsAtStartOfStatement = true;
81409467b48Spatrick return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
81509467b48Spatrick case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
81609467b48Spatrick case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
81709467b48Spatrick case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
81809467b48Spatrick case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
81909467b48Spatrick case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
82009467b48Spatrick case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
82109467b48Spatrick case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
82209467b48Spatrick case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
82309467b48Spatrick case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
82409467b48Spatrick case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
82509467b48Spatrick case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
82673471bf0Spatrick case '$': {
82773471bf0Spatrick if (LexMotorolaIntegers && isHexDigit(*CurPtr))
82873471bf0Spatrick return LexDigit();
82973471bf0Spatrick if (MAI.doesAllowDollarAtStartOfIdentifier())
83073471bf0Spatrick return LexIdentifier();
83173471bf0Spatrick return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
83273471bf0Spatrick }
83373471bf0Spatrick case '@': {
83473471bf0Spatrick if (MAI.doesAllowAtAtStartOfIdentifier())
83573471bf0Spatrick return LexIdentifier();
83673471bf0Spatrick return AsmToken(AsmToken::At, StringRef(TokStart, 1));
83773471bf0Spatrick }
83809467b48Spatrick case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
83909467b48Spatrick case '=':
84009467b48Spatrick if (*CurPtr == '=') {
84109467b48Spatrick ++CurPtr;
84209467b48Spatrick return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
84309467b48Spatrick }
84409467b48Spatrick return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
84509467b48Spatrick case '-':
84609467b48Spatrick if (*CurPtr == '>') {
84709467b48Spatrick ++CurPtr;
84809467b48Spatrick return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
84909467b48Spatrick }
85009467b48Spatrick return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
85109467b48Spatrick case '|':
85209467b48Spatrick if (*CurPtr == '|') {
85309467b48Spatrick ++CurPtr;
85409467b48Spatrick return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
85509467b48Spatrick }
85609467b48Spatrick return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
85709467b48Spatrick case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
85809467b48Spatrick case '&':
85909467b48Spatrick if (*CurPtr == '&') {
86009467b48Spatrick ++CurPtr;
86109467b48Spatrick return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
86209467b48Spatrick }
86309467b48Spatrick return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
86409467b48Spatrick case '!':
86509467b48Spatrick if (*CurPtr == '=') {
86609467b48Spatrick ++CurPtr;
86709467b48Spatrick return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
86809467b48Spatrick }
86909467b48Spatrick return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
87009467b48Spatrick case '%':
87173471bf0Spatrick if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) {
87273471bf0Spatrick return LexDigit();
87373471bf0Spatrick }
87473471bf0Spatrick
87509467b48Spatrick if (MAI.hasMipsExpressions()) {
87609467b48Spatrick AsmToken::TokenKind Operator;
87709467b48Spatrick unsigned OperatorLength;
87809467b48Spatrick
87909467b48Spatrick std::tie(Operator, OperatorLength) =
88009467b48Spatrick StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
88109467b48Spatrick StringRef(CurPtr))
88209467b48Spatrick .StartsWith("call16", {AsmToken::PercentCall16, 7})
88309467b48Spatrick .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
88409467b48Spatrick .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
88509467b48Spatrick .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
88609467b48Spatrick .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
88709467b48Spatrick .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
88809467b48Spatrick .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
88909467b48Spatrick .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
89009467b48Spatrick .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
89109467b48Spatrick .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
89209467b48Spatrick .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
89309467b48Spatrick .StartsWith("got", {AsmToken::PercentGot, 4})
89409467b48Spatrick .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
89509467b48Spatrick .StartsWith("higher", {AsmToken::PercentHigher, 7})
89609467b48Spatrick .StartsWith("highest", {AsmToken::PercentHighest, 8})
89709467b48Spatrick .StartsWith("hi", {AsmToken::PercentHi, 3})
89809467b48Spatrick .StartsWith("lo", {AsmToken::PercentLo, 3})
89909467b48Spatrick .StartsWith("neg", {AsmToken::PercentNeg, 4})
90009467b48Spatrick .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
90109467b48Spatrick .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
90209467b48Spatrick .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
90309467b48Spatrick .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
90409467b48Spatrick .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
90509467b48Spatrick .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
90609467b48Spatrick .Default({AsmToken::Percent, 1});
90709467b48Spatrick
90809467b48Spatrick if (Operator != AsmToken::Percent) {
90909467b48Spatrick CurPtr += OperatorLength - 1;
91009467b48Spatrick return AsmToken(Operator, StringRef(TokStart, OperatorLength));
91109467b48Spatrick }
91209467b48Spatrick }
91309467b48Spatrick return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
91409467b48Spatrick case '/':
91509467b48Spatrick IsAtStartOfStatement = OldIsAtStartOfStatement;
91609467b48Spatrick return LexSlash();
91773471bf0Spatrick case '#': {
91873471bf0Spatrick if (MAI.doesAllowHashAtStartOfIdentifier())
91973471bf0Spatrick return LexIdentifier();
92073471bf0Spatrick return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
92173471bf0Spatrick }
92209467b48Spatrick case '\'': return LexSingleQuote();
92309467b48Spatrick case '"': return LexQuote();
92409467b48Spatrick case '0': case '1': case '2': case '3': case '4':
92509467b48Spatrick case '5': case '6': case '7': case '8': case '9':
92609467b48Spatrick return LexDigit();
92709467b48Spatrick case '<':
92809467b48Spatrick switch (*CurPtr) {
92909467b48Spatrick case '<':
93009467b48Spatrick ++CurPtr;
93109467b48Spatrick return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
93209467b48Spatrick case '=':
93309467b48Spatrick ++CurPtr;
93409467b48Spatrick return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
93509467b48Spatrick case '>':
93609467b48Spatrick ++CurPtr;
93709467b48Spatrick return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
93809467b48Spatrick default:
93909467b48Spatrick return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
94009467b48Spatrick }
94109467b48Spatrick case '>':
94209467b48Spatrick switch (*CurPtr) {
94309467b48Spatrick case '>':
94409467b48Spatrick ++CurPtr;
94509467b48Spatrick return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
94609467b48Spatrick case '=':
94709467b48Spatrick ++CurPtr;
94809467b48Spatrick return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
94909467b48Spatrick default:
95009467b48Spatrick return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
95109467b48Spatrick }
95209467b48Spatrick
95309467b48Spatrick // TODO: Quoted identifiers (objc methods etc)
95409467b48Spatrick // local labels: [0-9][:]
95509467b48Spatrick // Forward/backward labels: [0-9][fb]
95609467b48Spatrick // Integers, fp constants, character constants.
95709467b48Spatrick }
95809467b48Spatrick }
959