xref: /openbsd-src/gnu/llvm/llvm/lib/MC/MCParser/AsmLexer.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This class implements the lexer for assembly files.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "llvm/MC/MCParser/AsmLexer.h"
1409467b48Spatrick #include "llvm/ADT/APInt.h"
1509467b48Spatrick #include "llvm/ADT/ArrayRef.h"
1609467b48Spatrick #include "llvm/ADT/StringExtras.h"
1709467b48Spatrick #include "llvm/ADT/StringRef.h"
1809467b48Spatrick #include "llvm/ADT/StringSwitch.h"
1909467b48Spatrick #include "llvm/MC/MCAsmInfo.h"
2009467b48Spatrick #include "llvm/MC/MCParser/MCAsmLexer.h"
2173471bf0Spatrick #include "llvm/Support/Compiler.h"
2209467b48Spatrick #include "llvm/Support/SMLoc.h"
2309467b48Spatrick #include "llvm/Support/SaveAndRestore.h"
2409467b48Spatrick #include <cassert>
2509467b48Spatrick #include <cctype>
2609467b48Spatrick #include <cstdio>
2709467b48Spatrick #include <cstring>
2809467b48Spatrick #include <string>
2909467b48Spatrick #include <tuple>
3009467b48Spatrick #include <utility>
3109467b48Spatrick 
3209467b48Spatrick using namespace llvm;
3309467b48Spatrick 
AsmLexer(const MCAsmInfo & MAI)3409467b48Spatrick AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
3509467b48Spatrick   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
3673471bf0Spatrick   LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
3709467b48Spatrick }
3809467b48Spatrick 
3909467b48Spatrick AsmLexer::~AsmLexer() = default;
4009467b48Spatrick 
setBuffer(StringRef Buf,const char * ptr,bool EndStatementAtEOF)41097a140dSpatrick void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
42097a140dSpatrick                          bool EndStatementAtEOF) {
4309467b48Spatrick   CurBuf = Buf;
4409467b48Spatrick 
4509467b48Spatrick   if (ptr)
4609467b48Spatrick     CurPtr = ptr;
4709467b48Spatrick   else
4809467b48Spatrick     CurPtr = CurBuf.begin();
4909467b48Spatrick 
5009467b48Spatrick   TokStart = nullptr;
51097a140dSpatrick   this->EndStatementAtEOF = EndStatementAtEOF;
5209467b48Spatrick }
5309467b48Spatrick 
5409467b48Spatrick /// ReturnError - Set the error to the specified string at the specified
5509467b48Spatrick /// location.  This is defined to always return AsmToken::Error.
ReturnError(const char * Loc,const std::string & Msg)5609467b48Spatrick AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
5709467b48Spatrick   SetError(SMLoc::getFromPointer(Loc), Msg);
5809467b48Spatrick 
5909467b48Spatrick   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
6009467b48Spatrick }
6109467b48Spatrick 
getNextChar()6209467b48Spatrick int AsmLexer::getNextChar() {
6309467b48Spatrick   if (CurPtr == CurBuf.end())
6409467b48Spatrick     return EOF;
6509467b48Spatrick   return (unsigned char)*CurPtr++;
6609467b48Spatrick }
6709467b48Spatrick 
peekNextChar()6873471bf0Spatrick int AsmLexer::peekNextChar() {
6973471bf0Spatrick   if (CurPtr == CurBuf.end())
7073471bf0Spatrick     return EOF;
7173471bf0Spatrick   return (unsigned char)*CurPtr;
7273471bf0Spatrick }
7373471bf0Spatrick 
7409467b48Spatrick /// The leading integral digit sequence and dot should have already been
7509467b48Spatrick /// consumed, some or all of the fractional digit sequence *can* have been
7609467b48Spatrick /// consumed.
LexFloatLiteral()7709467b48Spatrick AsmToken AsmLexer::LexFloatLiteral() {
7809467b48Spatrick   // Skip the fractional digit sequence.
7909467b48Spatrick   while (isDigit(*CurPtr))
8009467b48Spatrick     ++CurPtr;
8109467b48Spatrick 
8209467b48Spatrick   if (*CurPtr == '-' || *CurPtr == '+')
8373471bf0Spatrick     return ReturnError(CurPtr, "invalid sign in float literal");
8409467b48Spatrick 
8509467b48Spatrick   // Check for exponent
8609467b48Spatrick   if ((*CurPtr == 'e' || *CurPtr == 'E')) {
8709467b48Spatrick     ++CurPtr;
8809467b48Spatrick 
8909467b48Spatrick     if (*CurPtr == '-' || *CurPtr == '+')
9009467b48Spatrick       ++CurPtr;
9109467b48Spatrick 
9209467b48Spatrick     while (isDigit(*CurPtr))
9309467b48Spatrick       ++CurPtr;
9409467b48Spatrick   }
9509467b48Spatrick 
9609467b48Spatrick   return AsmToken(AsmToken::Real,
9709467b48Spatrick                   StringRef(TokStart, CurPtr - TokStart));
9809467b48Spatrick }
9909467b48Spatrick 
10009467b48Spatrick /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
10109467b48Spatrick /// while making sure there are enough actual digits around for the constant to
10209467b48Spatrick /// be valid.
10309467b48Spatrick ///
10409467b48Spatrick /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
10509467b48Spatrick /// before we get here.
LexHexFloatLiteral(bool NoIntDigits)10609467b48Spatrick AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
10709467b48Spatrick   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
10809467b48Spatrick          "unexpected parse state in floating hex");
10909467b48Spatrick   bool NoFracDigits = true;
11009467b48Spatrick 
11109467b48Spatrick   // Skip the fractional part if there is one
11209467b48Spatrick   if (*CurPtr == '.') {
11309467b48Spatrick     ++CurPtr;
11409467b48Spatrick 
11509467b48Spatrick     const char *FracStart = CurPtr;
11609467b48Spatrick     while (isHexDigit(*CurPtr))
11709467b48Spatrick       ++CurPtr;
11809467b48Spatrick 
11909467b48Spatrick     NoFracDigits = CurPtr == FracStart;
12009467b48Spatrick   }
12109467b48Spatrick 
12209467b48Spatrick   if (NoIntDigits && NoFracDigits)
12309467b48Spatrick     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
12409467b48Spatrick                                  "expected at least one significand digit");
12509467b48Spatrick 
12609467b48Spatrick   // Make sure we do have some kind of proper exponent part
12709467b48Spatrick   if (*CurPtr != 'p' && *CurPtr != 'P')
12809467b48Spatrick     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
12909467b48Spatrick                                  "expected exponent part 'p'");
13009467b48Spatrick   ++CurPtr;
13109467b48Spatrick 
13209467b48Spatrick   if (*CurPtr == '+' || *CurPtr == '-')
13309467b48Spatrick     ++CurPtr;
13409467b48Spatrick 
13509467b48Spatrick   // N.b. exponent digits are *not* hex
13609467b48Spatrick   const char *ExpStart = CurPtr;
13709467b48Spatrick   while (isDigit(*CurPtr))
13809467b48Spatrick     ++CurPtr;
13909467b48Spatrick 
14009467b48Spatrick   if (CurPtr == ExpStart)
14109467b48Spatrick     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
14209467b48Spatrick                                  "expected at least one exponent digit");
14309467b48Spatrick 
14409467b48Spatrick   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
14509467b48Spatrick }
14609467b48Spatrick 
14773471bf0Spatrick /// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
isIdentifierChar(char C,bool AllowAt,bool AllowHash)14873471bf0Spatrick static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
14973471bf0Spatrick   return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
15073471bf0Spatrick          (AllowAt && C == '@') || (AllowHash && C == '#');
15109467b48Spatrick }
15209467b48Spatrick 
LexIdentifier()15309467b48Spatrick AsmToken AsmLexer::LexIdentifier() {
15409467b48Spatrick   // Check for floating point literals.
15509467b48Spatrick   if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
15609467b48Spatrick     // Disambiguate a .1243foo identifier from a floating literal.
15709467b48Spatrick     while (isDigit(*CurPtr))
15809467b48Spatrick       ++CurPtr;
15909467b48Spatrick 
16073471bf0Spatrick     if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
16173471bf0Spatrick                           AllowHashInIdentifier) ||
16209467b48Spatrick         *CurPtr == 'e' || *CurPtr == 'E')
16309467b48Spatrick       return LexFloatLiteral();
16409467b48Spatrick   }
16509467b48Spatrick 
16673471bf0Spatrick   while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
16709467b48Spatrick     ++CurPtr;
16809467b48Spatrick 
16909467b48Spatrick   // Handle . as a special case.
17009467b48Spatrick   if (CurPtr == TokStart+1 && TokStart[0] == '.')
17109467b48Spatrick     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
17209467b48Spatrick 
17309467b48Spatrick   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
17409467b48Spatrick }
17509467b48Spatrick 
17609467b48Spatrick /// LexSlash: Slash: /
17709467b48Spatrick ///           C-Style Comment: /* ... */
17873471bf0Spatrick ///           C-style Comment: // ...
LexSlash()17909467b48Spatrick AsmToken AsmLexer::LexSlash() {
18073471bf0Spatrick   if (!MAI.shouldAllowAdditionalComments()) {
18173471bf0Spatrick     IsAtStartOfStatement = false;
18273471bf0Spatrick     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
18373471bf0Spatrick   }
18473471bf0Spatrick 
18509467b48Spatrick   switch (*CurPtr) {
18609467b48Spatrick   case '*':
18709467b48Spatrick     IsAtStartOfStatement = false;
18809467b48Spatrick     break; // C style comment.
18909467b48Spatrick   case '/':
19009467b48Spatrick     ++CurPtr;
19109467b48Spatrick     return LexLineComment();
19209467b48Spatrick   default:
19309467b48Spatrick     IsAtStartOfStatement = false;
19409467b48Spatrick     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
19509467b48Spatrick   }
19609467b48Spatrick 
19709467b48Spatrick   // C Style comment.
19809467b48Spatrick   ++CurPtr;  // skip the star.
19909467b48Spatrick   const char *CommentTextStart = CurPtr;
20009467b48Spatrick   while (CurPtr != CurBuf.end()) {
20109467b48Spatrick     switch (*CurPtr++) {
20209467b48Spatrick     case '*':
20309467b48Spatrick       // End of the comment?
20409467b48Spatrick       if (*CurPtr != '/')
20509467b48Spatrick         break;
20609467b48Spatrick       // If we have a CommentConsumer, notify it about the comment.
20709467b48Spatrick       if (CommentConsumer) {
20809467b48Spatrick         CommentConsumer->HandleComment(
20909467b48Spatrick             SMLoc::getFromPointer(CommentTextStart),
21009467b48Spatrick             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
21109467b48Spatrick       }
21209467b48Spatrick       ++CurPtr;   // End the */.
21309467b48Spatrick       return AsmToken(AsmToken::Comment,
21409467b48Spatrick                       StringRef(TokStart, CurPtr - TokStart));
21509467b48Spatrick     }
21609467b48Spatrick   }
21709467b48Spatrick   return ReturnError(TokStart, "unterminated comment");
21809467b48Spatrick }
21909467b48Spatrick 
22009467b48Spatrick /// LexLineComment: Comment: #[^\n]*
22109467b48Spatrick ///                        : //[^\n]*
LexLineComment()22209467b48Spatrick AsmToken AsmLexer::LexLineComment() {
22309467b48Spatrick   // Mark This as an end of statement with a body of the
22409467b48Spatrick   // comment. While it would be nicer to leave this two tokens,
22509467b48Spatrick   // backwards compatability with TargetParsers makes keeping this in this form
22609467b48Spatrick   // better.
22709467b48Spatrick   const char *CommentTextStart = CurPtr;
22809467b48Spatrick   int CurChar = getNextChar();
22909467b48Spatrick   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
23009467b48Spatrick     CurChar = getNextChar();
231*d415bd75Srobert   const char *NewlinePtr = CurPtr;
23209467b48Spatrick   if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
23309467b48Spatrick     ++CurPtr;
23409467b48Spatrick 
23509467b48Spatrick   // If we have a CommentConsumer, notify it about the comment.
23609467b48Spatrick   if (CommentConsumer) {
23709467b48Spatrick     CommentConsumer->HandleComment(
23809467b48Spatrick         SMLoc::getFromPointer(CommentTextStart),
239*d415bd75Srobert         StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
24009467b48Spatrick   }
24109467b48Spatrick 
24209467b48Spatrick   IsAtStartOfLine = true;
24309467b48Spatrick   // This is a whole line comment. leave newline
24409467b48Spatrick   if (IsAtStartOfStatement)
24509467b48Spatrick     return AsmToken(AsmToken::EndOfStatement,
24609467b48Spatrick                     StringRef(TokStart, CurPtr - TokStart));
24709467b48Spatrick   IsAtStartOfStatement = true;
24809467b48Spatrick 
24909467b48Spatrick   return AsmToken(AsmToken::EndOfStatement,
25009467b48Spatrick                   StringRef(TokStart, CurPtr - 1 - TokStart));
25109467b48Spatrick }
25209467b48Spatrick 
SkipIgnoredIntegerSuffix(const char * & CurPtr)25309467b48Spatrick static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
254*d415bd75Srobert   // Skip case-insensitive ULL, UL, U, L and LL suffixes.
255*d415bd75Srobert   if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
25609467b48Spatrick     ++CurPtr;
257*d415bd75Srobert   if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
25809467b48Spatrick     ++CurPtr;
259*d415bd75Srobert   if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
26009467b48Spatrick     ++CurPtr;
26109467b48Spatrick }
26209467b48Spatrick 
26309467b48Spatrick // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
26409467b48Spatrick // integer as a hexadecimal, possibly with leading zeroes.
doHexLookAhead(const char * & CurPtr,unsigned DefaultRadix,bool LexHex)26509467b48Spatrick static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
26609467b48Spatrick                                bool LexHex) {
26709467b48Spatrick   const char *FirstNonDec = nullptr;
26809467b48Spatrick   const char *LookAhead = CurPtr;
26909467b48Spatrick   while (true) {
27009467b48Spatrick     if (isDigit(*LookAhead)) {
27109467b48Spatrick       ++LookAhead;
27209467b48Spatrick     } else {
27309467b48Spatrick       if (!FirstNonDec)
27409467b48Spatrick         FirstNonDec = LookAhead;
27509467b48Spatrick 
27609467b48Spatrick       // Keep going if we are looking for a 'h' suffix.
27709467b48Spatrick       if (LexHex && isHexDigit(*LookAhead))
27809467b48Spatrick         ++LookAhead;
27909467b48Spatrick       else
28009467b48Spatrick         break;
28109467b48Spatrick     }
28209467b48Spatrick   }
28309467b48Spatrick   bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
28409467b48Spatrick   CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
28509467b48Spatrick   if (isHex)
28609467b48Spatrick     return 16;
28709467b48Spatrick   return DefaultRadix;
28809467b48Spatrick }
28909467b48Spatrick 
findLastDigit(const char * CurPtr,unsigned DefaultRadix)29073471bf0Spatrick static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
29173471bf0Spatrick   while (hexDigitValue(*CurPtr) < DefaultRadix) {
29273471bf0Spatrick     ++CurPtr;
29373471bf0Spatrick   }
29473471bf0Spatrick   return CurPtr;
29573471bf0Spatrick }
29673471bf0Spatrick 
intToken(StringRef Ref,APInt & Value)29773471bf0Spatrick static AsmToken intToken(StringRef Ref, APInt &Value) {
29809467b48Spatrick   if (Value.isIntN(64))
29909467b48Spatrick     return AsmToken(AsmToken::Integer, Ref, Value);
30009467b48Spatrick   return AsmToken(AsmToken::BigNum, Ref, Value);
30109467b48Spatrick }
30209467b48Spatrick 
radixName(unsigned Radix)30373471bf0Spatrick static std::string radixName(unsigned Radix) {
30473471bf0Spatrick   switch (Radix) {
30573471bf0Spatrick   case 2:
30673471bf0Spatrick     return "binary";
30773471bf0Spatrick   case 8:
30873471bf0Spatrick     return "octal";
30973471bf0Spatrick   case 10:
31073471bf0Spatrick     return "decimal";
31173471bf0Spatrick   case 16:
31273471bf0Spatrick     return "hexadecimal";
31373471bf0Spatrick   default:
31473471bf0Spatrick     return "base-" + std::to_string(Radix);
31573471bf0Spatrick   }
31673471bf0Spatrick }
31773471bf0Spatrick 
31809467b48Spatrick /// LexDigit: First character is [0-9].
31909467b48Spatrick ///   Local Label: [0-9][:]
32009467b48Spatrick ///   Forward/Backward Label: [0-9][fb]
32109467b48Spatrick ///   Binary integer: 0b[01]+
32209467b48Spatrick ///   Octal integer: 0[0-7]+
32309467b48Spatrick ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
32409467b48Spatrick ///   Decimal integer: [1-9][0-9]*
LexDigit()32509467b48Spatrick AsmToken AsmLexer::LexDigit() {
32673471bf0Spatrick   // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
32773471bf0Spatrick   // MASM-flavor octal integer: [0-7]+[oOqQ]
32873471bf0Spatrick   // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
32909467b48Spatrick   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
33009467b48Spatrick   if (LexMasmIntegers && isdigit(CurPtr[-1])) {
33173471bf0Spatrick     const char *FirstNonBinary =
33273471bf0Spatrick         (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
33373471bf0Spatrick     const char *FirstNonDecimal =
33473471bf0Spatrick         (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
33509467b48Spatrick     const char *OldCurPtr = CurPtr;
33609467b48Spatrick     while (isHexDigit(*CurPtr)) {
33773471bf0Spatrick       switch (*CurPtr) {
33873471bf0Spatrick       default:
33973471bf0Spatrick         if (!FirstNonDecimal) {
34073471bf0Spatrick           FirstNonDecimal = CurPtr;
34173471bf0Spatrick         }
342*d415bd75Srobert         [[fallthrough]];
34373471bf0Spatrick       case '9':
34473471bf0Spatrick       case '8':
34573471bf0Spatrick       case '7':
34673471bf0Spatrick       case '6':
34773471bf0Spatrick       case '5':
34873471bf0Spatrick       case '4':
34973471bf0Spatrick       case '3':
35073471bf0Spatrick       case '2':
35173471bf0Spatrick         if (!FirstNonBinary) {
35209467b48Spatrick           FirstNonBinary = CurPtr;
35373471bf0Spatrick         }
35473471bf0Spatrick         break;
35573471bf0Spatrick       case '1':
35673471bf0Spatrick       case '0':
35773471bf0Spatrick         break;
35873471bf0Spatrick       }
35909467b48Spatrick       ++CurPtr;
36009467b48Spatrick     }
36173471bf0Spatrick     if (*CurPtr == '.') {
36273471bf0Spatrick       // MASM float literals (other than hex floats) always contain a ".", and
36373471bf0Spatrick       // are always written in decimal.
36473471bf0Spatrick       ++CurPtr;
36573471bf0Spatrick       return LexFloatLiteral();
36673471bf0Spatrick     }
36773471bf0Spatrick 
36873471bf0Spatrick     if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
36973471bf0Spatrick       ++CurPtr;
37073471bf0Spatrick       return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
37173471bf0Spatrick     }
37209467b48Spatrick 
37309467b48Spatrick     unsigned Radix = 0;
37409467b48Spatrick     if (*CurPtr == 'h' || *CurPtr == 'H') {
37509467b48Spatrick       // hexadecimal number
37609467b48Spatrick       ++CurPtr;
37709467b48Spatrick       Radix = 16;
37873471bf0Spatrick     } else if (*CurPtr == 't' || *CurPtr == 'T') {
37973471bf0Spatrick       // decimal number
38073471bf0Spatrick       ++CurPtr;
38173471bf0Spatrick       Radix = 10;
38273471bf0Spatrick     } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
38373471bf0Spatrick                *CurPtr == 'Q') {
38473471bf0Spatrick       // octal number
38573471bf0Spatrick       ++CurPtr;
38673471bf0Spatrick       Radix = 8;
38773471bf0Spatrick     } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
38873471bf0Spatrick       // binary number
38973471bf0Spatrick       ++CurPtr;
39009467b48Spatrick       Radix = 2;
39173471bf0Spatrick     } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
39273471bf0Spatrick                DefaultRadix < 14 &&
39373471bf0Spatrick                (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
39473471bf0Spatrick       Radix = 10;
39573471bf0Spatrick     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
39673471bf0Spatrick                DefaultRadix < 12 &&
39773471bf0Spatrick                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
39873471bf0Spatrick       Radix = 2;
39973471bf0Spatrick     }
40009467b48Spatrick 
40173471bf0Spatrick     if (Radix) {
40209467b48Spatrick       StringRef Result(TokStart, CurPtr - TokStart);
40309467b48Spatrick       APInt Value(128, 0, true);
40409467b48Spatrick 
40509467b48Spatrick       if (Result.drop_back().getAsInteger(Radix, Value))
40673471bf0Spatrick         return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
40709467b48Spatrick 
40809467b48Spatrick       // MSVC accepts and ignores type suffices on integer literals.
40909467b48Spatrick       SkipIgnoredIntegerSuffix(CurPtr);
41009467b48Spatrick 
41109467b48Spatrick       return intToken(Result, Value);
41209467b48Spatrick     }
41309467b48Spatrick 
41473471bf0Spatrick     // default-radix integers, or floating point numbers, fall through
41509467b48Spatrick     CurPtr = OldCurPtr;
41609467b48Spatrick   }
41709467b48Spatrick 
41873471bf0Spatrick   // MASM default-radix integers: [0-9a-fA-F]+
41973471bf0Spatrick   // (All other integer literals have a radix specifier.)
42073471bf0Spatrick   if (LexMasmIntegers && UseMasmDefaultRadix) {
42173471bf0Spatrick     CurPtr = findLastDigit(CurPtr, 16);
42273471bf0Spatrick     StringRef Result(TokStart, CurPtr - TokStart);
42373471bf0Spatrick 
42473471bf0Spatrick     APInt Value(128, 0, true);
42573471bf0Spatrick     if (Result.getAsInteger(DefaultRadix, Value)) {
42673471bf0Spatrick       return ReturnError(TokStart,
42773471bf0Spatrick                          "invalid " + radixName(DefaultRadix) + " number");
42873471bf0Spatrick     }
42973471bf0Spatrick 
43073471bf0Spatrick     return intToken(Result, Value);
43173471bf0Spatrick   }
43273471bf0Spatrick 
43373471bf0Spatrick   // Motorola hex integers: $[0-9a-fA-F]+
43473471bf0Spatrick   if (LexMotorolaIntegers && CurPtr[-1] == '$') {
43573471bf0Spatrick     const char *NumStart = CurPtr;
43673471bf0Spatrick     while (isHexDigit(CurPtr[0]))
43773471bf0Spatrick       ++CurPtr;
43873471bf0Spatrick 
43973471bf0Spatrick     APInt Result(128, 0);
44073471bf0Spatrick     if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
44173471bf0Spatrick       return ReturnError(TokStart, "invalid hexadecimal number");
44273471bf0Spatrick 
44373471bf0Spatrick     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
44473471bf0Spatrick   }
44573471bf0Spatrick 
44673471bf0Spatrick   // Motorola binary integers: %[01]+
44773471bf0Spatrick   if (LexMotorolaIntegers && CurPtr[-1] == '%') {
44873471bf0Spatrick     const char *NumStart = CurPtr;
44973471bf0Spatrick     while (*CurPtr == '0' || *CurPtr == '1')
45073471bf0Spatrick       ++CurPtr;
45173471bf0Spatrick 
45273471bf0Spatrick     APInt Result(128, 0);
45373471bf0Spatrick     if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
45473471bf0Spatrick       return ReturnError(TokStart, "invalid binary number");
45573471bf0Spatrick 
45673471bf0Spatrick     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
45773471bf0Spatrick   }
45873471bf0Spatrick 
45909467b48Spatrick   // Decimal integer: [1-9][0-9]*
46073471bf0Spatrick   // HLASM-flavour decimal integer: [0-9][0-9]*
46173471bf0Spatrick   // FIXME: Later on, support for fb for HLASM has to be added in
46273471bf0Spatrick   // as they probably would be needed for asm goto
46373471bf0Spatrick   if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
46409467b48Spatrick     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
46573471bf0Spatrick 
46673471bf0Spatrick     if (!LexHLASMIntegers) {
46773471bf0Spatrick       bool IsHex = Radix == 16;
46809467b48Spatrick       // Check for floating point literals.
46973471bf0Spatrick       if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
47009467b48Spatrick         if (*CurPtr == '.')
47109467b48Spatrick           ++CurPtr;
47209467b48Spatrick         return LexFloatLiteral();
47309467b48Spatrick       }
47473471bf0Spatrick     }
47509467b48Spatrick 
47609467b48Spatrick     StringRef Result(TokStart, CurPtr - TokStart);
47709467b48Spatrick 
47809467b48Spatrick     APInt Value(128, 0, true);
47909467b48Spatrick     if (Result.getAsInteger(Radix, Value))
48073471bf0Spatrick       return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
48109467b48Spatrick 
48273471bf0Spatrick     if (!LexHLASMIntegers)
48309467b48Spatrick       // The darwin/x86 (and x86-64) assembler accepts and ignores type
48409467b48Spatrick       // suffices on integer literals.
48509467b48Spatrick       SkipIgnoredIntegerSuffix(CurPtr);
48609467b48Spatrick 
48709467b48Spatrick     return intToken(Result, Value);
48809467b48Spatrick   }
48909467b48Spatrick 
49009467b48Spatrick   if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
49109467b48Spatrick     ++CurPtr;
49209467b48Spatrick     // See if we actually have "0b" as part of something like "jmp 0b\n"
49309467b48Spatrick     if (!isDigit(CurPtr[0])) {
49409467b48Spatrick       --CurPtr;
49509467b48Spatrick       StringRef Result(TokStart, CurPtr - TokStart);
49609467b48Spatrick       return AsmToken(AsmToken::Integer, Result, 0);
49709467b48Spatrick     }
49809467b48Spatrick     const char *NumStart = CurPtr;
49909467b48Spatrick     while (CurPtr[0] == '0' || CurPtr[0] == '1')
50009467b48Spatrick       ++CurPtr;
50109467b48Spatrick 
50209467b48Spatrick     // Requires at least one binary digit.
50309467b48Spatrick     if (CurPtr == NumStart)
50409467b48Spatrick       return ReturnError(TokStart, "invalid binary number");
50509467b48Spatrick 
50609467b48Spatrick     StringRef Result(TokStart, CurPtr - TokStart);
50709467b48Spatrick 
50809467b48Spatrick     APInt Value(128, 0, true);
50909467b48Spatrick     if (Result.substr(2).getAsInteger(2, Value))
51009467b48Spatrick       return ReturnError(TokStart, "invalid binary number");
51109467b48Spatrick 
51209467b48Spatrick     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
51309467b48Spatrick     // suffixes on integer literals.
51409467b48Spatrick     SkipIgnoredIntegerSuffix(CurPtr);
51509467b48Spatrick 
51609467b48Spatrick     return intToken(Result, Value);
51709467b48Spatrick   }
51809467b48Spatrick 
51909467b48Spatrick   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
52009467b48Spatrick     ++CurPtr;
52109467b48Spatrick     const char *NumStart = CurPtr;
52209467b48Spatrick     while (isHexDigit(CurPtr[0]))
52309467b48Spatrick       ++CurPtr;
52409467b48Spatrick 
52509467b48Spatrick     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
52609467b48Spatrick     // diagnosed by LexHexFloatLiteral).
52709467b48Spatrick     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
52809467b48Spatrick       return LexHexFloatLiteral(NumStart == CurPtr);
52909467b48Spatrick 
53009467b48Spatrick     // Otherwise requires at least one hex digit.
53109467b48Spatrick     if (CurPtr == NumStart)
53209467b48Spatrick       return ReturnError(CurPtr-2, "invalid hexadecimal number");
53309467b48Spatrick 
53409467b48Spatrick     APInt Result(128, 0);
53509467b48Spatrick     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
53609467b48Spatrick       return ReturnError(TokStart, "invalid hexadecimal number");
53709467b48Spatrick 
53809467b48Spatrick     // Consume the optional [hH].
53909467b48Spatrick     if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
54009467b48Spatrick       ++CurPtr;
54109467b48Spatrick 
54209467b48Spatrick     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
54309467b48Spatrick     // suffixes on integer literals.
54409467b48Spatrick     SkipIgnoredIntegerSuffix(CurPtr);
54509467b48Spatrick 
54609467b48Spatrick     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
54709467b48Spatrick   }
54809467b48Spatrick 
54909467b48Spatrick   // Either octal or hexadecimal.
55009467b48Spatrick   APInt Value(128, 0, true);
55109467b48Spatrick   unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
55209467b48Spatrick   StringRef Result(TokStart, CurPtr - TokStart);
55309467b48Spatrick   if (Result.getAsInteger(Radix, Value))
55473471bf0Spatrick     return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
55509467b48Spatrick 
55609467b48Spatrick   // Consume the [hH].
55709467b48Spatrick   if (Radix == 16)
55809467b48Spatrick     ++CurPtr;
55909467b48Spatrick 
56009467b48Spatrick   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
56109467b48Spatrick   // suffixes on integer literals.
56209467b48Spatrick   SkipIgnoredIntegerSuffix(CurPtr);
56309467b48Spatrick 
56409467b48Spatrick   return intToken(Result, Value);
56509467b48Spatrick }
56609467b48Spatrick 
56709467b48Spatrick /// LexSingleQuote: Integer: 'b'
LexSingleQuote()56809467b48Spatrick AsmToken AsmLexer::LexSingleQuote() {
56909467b48Spatrick   int CurChar = getNextChar();
57009467b48Spatrick 
57173471bf0Spatrick   if (LexHLASMStrings)
57273471bf0Spatrick     return ReturnError(TokStart, "invalid usage of character literals");
57373471bf0Spatrick 
57473471bf0Spatrick   if (LexMasmStrings) {
57573471bf0Spatrick     while (CurChar != EOF) {
57673471bf0Spatrick       if (CurChar != '\'') {
57773471bf0Spatrick         CurChar = getNextChar();
57873471bf0Spatrick       } else if (peekNextChar() == '\'') {
57973471bf0Spatrick         // In MASM single-quote strings, doubled single-quotes mean an escaped
58073471bf0Spatrick         // single quote, so should be lexed in.
58173471bf0Spatrick         getNextChar();
58273471bf0Spatrick         CurChar = getNextChar();
58373471bf0Spatrick       } else {
58473471bf0Spatrick         break;
58573471bf0Spatrick       }
58673471bf0Spatrick     }
58773471bf0Spatrick     if (CurChar == EOF)
58873471bf0Spatrick       return ReturnError(TokStart, "unterminated string constant");
58973471bf0Spatrick     return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
59073471bf0Spatrick   }
59173471bf0Spatrick 
59209467b48Spatrick   if (CurChar == '\\')
59309467b48Spatrick     CurChar = getNextChar();
59409467b48Spatrick 
59509467b48Spatrick   if (CurChar == EOF)
59609467b48Spatrick     return ReturnError(TokStart, "unterminated single quote");
59709467b48Spatrick 
59809467b48Spatrick   CurChar = getNextChar();
59909467b48Spatrick 
60009467b48Spatrick   if (CurChar != '\'')
60109467b48Spatrick     return ReturnError(TokStart, "single quote way too long");
60209467b48Spatrick 
60309467b48Spatrick   // The idea here being that 'c' is basically just an integral
60409467b48Spatrick   // constant.
60509467b48Spatrick   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
60609467b48Spatrick   long long Value;
60709467b48Spatrick 
60809467b48Spatrick   if (Res.startswith("\'\\")) {
60909467b48Spatrick     char theChar = Res[2];
61009467b48Spatrick     switch (theChar) {
61109467b48Spatrick       default: Value = theChar; break;
61209467b48Spatrick       case '\'': Value = '\''; break;
61309467b48Spatrick       case 't': Value = '\t'; break;
61409467b48Spatrick       case 'n': Value = '\n'; break;
61509467b48Spatrick       case 'b': Value = '\b'; break;
61673471bf0Spatrick       case 'f': Value = '\f'; break;
61773471bf0Spatrick       case 'r': Value = '\r'; break;
61809467b48Spatrick     }
61909467b48Spatrick   } else
62009467b48Spatrick     Value = TokStart[1];
62109467b48Spatrick 
62209467b48Spatrick   return AsmToken(AsmToken::Integer, Res, Value);
62309467b48Spatrick }
62409467b48Spatrick 
62509467b48Spatrick /// LexQuote: String: "..."
LexQuote()62609467b48Spatrick AsmToken AsmLexer::LexQuote() {
62709467b48Spatrick   int CurChar = getNextChar();
62873471bf0Spatrick   if (LexHLASMStrings)
62973471bf0Spatrick     return ReturnError(TokStart, "invalid usage of string literals");
63073471bf0Spatrick 
63173471bf0Spatrick   if (LexMasmStrings) {
63273471bf0Spatrick     while (CurChar != EOF) {
63373471bf0Spatrick       if (CurChar != '"') {
63473471bf0Spatrick         CurChar = getNextChar();
63573471bf0Spatrick       } else if (peekNextChar() == '"') {
63673471bf0Spatrick         // In MASM double-quoted strings, doubled double-quotes mean an escaped
63773471bf0Spatrick         // double quote, so should be lexed in.
63873471bf0Spatrick         getNextChar();
63973471bf0Spatrick         CurChar = getNextChar();
64073471bf0Spatrick       } else {
64173471bf0Spatrick         break;
64273471bf0Spatrick       }
64373471bf0Spatrick     }
64473471bf0Spatrick     if (CurChar == EOF)
64573471bf0Spatrick       return ReturnError(TokStart, "unterminated string constant");
64673471bf0Spatrick     return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
64773471bf0Spatrick   }
64873471bf0Spatrick 
64909467b48Spatrick   // TODO: does gas allow multiline string constants?
65009467b48Spatrick   while (CurChar != '"') {
65109467b48Spatrick     if (CurChar == '\\') {
65209467b48Spatrick       // Allow \", etc.
65309467b48Spatrick       CurChar = getNextChar();
65409467b48Spatrick     }
65509467b48Spatrick 
65609467b48Spatrick     if (CurChar == EOF)
65709467b48Spatrick       return ReturnError(TokStart, "unterminated string constant");
65809467b48Spatrick 
65909467b48Spatrick     CurChar = getNextChar();
66009467b48Spatrick   }
66109467b48Spatrick 
66209467b48Spatrick   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
66309467b48Spatrick }
66409467b48Spatrick 
LexUntilEndOfStatement()66509467b48Spatrick StringRef AsmLexer::LexUntilEndOfStatement() {
66609467b48Spatrick   TokStart = CurPtr;
66709467b48Spatrick 
66809467b48Spatrick   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
66909467b48Spatrick          !isAtStatementSeparator(CurPtr) && // End of statement marker.
67009467b48Spatrick          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
67109467b48Spatrick     ++CurPtr;
67209467b48Spatrick   }
67309467b48Spatrick   return StringRef(TokStart, CurPtr-TokStart);
67409467b48Spatrick }
67509467b48Spatrick 
LexUntilEndOfLine()67609467b48Spatrick StringRef AsmLexer::LexUntilEndOfLine() {
67709467b48Spatrick   TokStart = CurPtr;
67809467b48Spatrick 
67909467b48Spatrick   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
68009467b48Spatrick     ++CurPtr;
68109467b48Spatrick   }
68209467b48Spatrick   return StringRef(TokStart, CurPtr-TokStart);
68309467b48Spatrick }
68409467b48Spatrick 
peekTokens(MutableArrayRef<AsmToken> Buf,bool ShouldSkipSpace)68509467b48Spatrick size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
68609467b48Spatrick                             bool ShouldSkipSpace) {
687*d415bd75Srobert   SaveAndRestore SavedTokenStart(TokStart);
688*d415bd75Srobert   SaveAndRestore SavedCurPtr(CurPtr);
689*d415bd75Srobert   SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine);
690*d415bd75Srobert   SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);
691*d415bd75Srobert   SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace);
692*d415bd75Srobert   SaveAndRestore SavedIsPeeking(IsPeeking, true);
69309467b48Spatrick   std::string SavedErr = getErr();
69409467b48Spatrick   SMLoc SavedErrLoc = getErrLoc();
69509467b48Spatrick 
69609467b48Spatrick   size_t ReadCount;
69709467b48Spatrick   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
69809467b48Spatrick     AsmToken Token = LexToken();
69909467b48Spatrick 
70009467b48Spatrick     Buf[ReadCount] = Token;
70109467b48Spatrick 
70209467b48Spatrick     if (Token.is(AsmToken::Eof))
70309467b48Spatrick       break;
70409467b48Spatrick   }
70509467b48Spatrick 
70609467b48Spatrick   SetError(SavedErrLoc, SavedErr);
70709467b48Spatrick   return ReadCount;
70809467b48Spatrick }
70909467b48Spatrick 
isAtStartOfComment(const char * Ptr)71009467b48Spatrick bool AsmLexer::isAtStartOfComment(const char *Ptr) {
71173471bf0Spatrick   if (MAI.getRestrictCommentStringToStartOfStatement() && !IsAtStartOfStatement)
71273471bf0Spatrick     return false;
71373471bf0Spatrick 
71409467b48Spatrick   StringRef CommentString = MAI.getCommentString();
71509467b48Spatrick 
71609467b48Spatrick   if (CommentString.size() == 1)
71709467b48Spatrick     return CommentString[0] == Ptr[0];
71809467b48Spatrick 
719*d415bd75Srobert   // Allow # preprocessor comments also be counted as comments for "##" cases
72009467b48Spatrick   if (CommentString[1] == '#')
72109467b48Spatrick     return CommentString[0] == Ptr[0];
72209467b48Spatrick 
72309467b48Spatrick   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
72409467b48Spatrick }
72509467b48Spatrick 
isAtStatementSeparator(const char * Ptr)72609467b48Spatrick bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
72709467b48Spatrick   return strncmp(Ptr, MAI.getSeparatorString(),
72809467b48Spatrick                  strlen(MAI.getSeparatorString())) == 0;
72909467b48Spatrick }
73009467b48Spatrick 
LexToken()73109467b48Spatrick AsmToken AsmLexer::LexToken() {
73209467b48Spatrick   TokStart = CurPtr;
73309467b48Spatrick   // This always consumes at least one character.
73409467b48Spatrick   int CurChar = getNextChar();
73509467b48Spatrick 
73609467b48Spatrick   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
73709467b48Spatrick     // If this starts with a '#', this may be a cpp
73809467b48Spatrick     // hash directive and otherwise a line comment.
73909467b48Spatrick     AsmToken TokenBuf[2];
74009467b48Spatrick     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
74109467b48Spatrick     size_t num = peekTokens(Buf, true);
74209467b48Spatrick     // There cannot be a space preceding this
74309467b48Spatrick     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
74409467b48Spatrick         TokenBuf[1].is(AsmToken::String)) {
74509467b48Spatrick       CurPtr = TokStart; // reset curPtr;
74609467b48Spatrick       StringRef s = LexUntilEndOfLine();
74709467b48Spatrick       UnLex(TokenBuf[1]);
74809467b48Spatrick       UnLex(TokenBuf[0]);
74909467b48Spatrick       return AsmToken(AsmToken::HashDirective, s);
75009467b48Spatrick     }
75173471bf0Spatrick 
75273471bf0Spatrick     if (MAI.shouldAllowAdditionalComments())
75309467b48Spatrick       return LexLineComment();
75409467b48Spatrick   }
75509467b48Spatrick 
75609467b48Spatrick   if (isAtStartOfComment(TokStart))
75709467b48Spatrick     return LexLineComment();
75809467b48Spatrick 
75909467b48Spatrick   if (isAtStatementSeparator(TokStart)) {
76009467b48Spatrick     CurPtr += strlen(MAI.getSeparatorString()) - 1;
76109467b48Spatrick     IsAtStartOfLine = true;
76209467b48Spatrick     IsAtStartOfStatement = true;
76309467b48Spatrick     return AsmToken(AsmToken::EndOfStatement,
76409467b48Spatrick                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
76509467b48Spatrick   }
76609467b48Spatrick 
76709467b48Spatrick   // If we're missing a newline at EOF, make sure we still get an
76809467b48Spatrick   // EndOfStatement token before the Eof token.
769097a140dSpatrick   if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
77009467b48Spatrick     IsAtStartOfLine = true;
77109467b48Spatrick     IsAtStartOfStatement = true;
77273471bf0Spatrick     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
77309467b48Spatrick   }
77409467b48Spatrick   IsAtStartOfLine = false;
77509467b48Spatrick   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
77609467b48Spatrick   IsAtStartOfStatement = false;
77709467b48Spatrick   switch (CurChar) {
77809467b48Spatrick   default:
77973471bf0Spatrick     // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
78073471bf0Spatrick     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
78173471bf0Spatrick         (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
782097a140dSpatrick       return LexIdentifier();
78309467b48Spatrick 
78409467b48Spatrick     // Unknown character, emit an error.
78509467b48Spatrick     return ReturnError(TokStart, "invalid character in input");
78609467b48Spatrick   case EOF:
787097a140dSpatrick     if (EndStatementAtEOF) {
78809467b48Spatrick       IsAtStartOfLine = true;
78909467b48Spatrick       IsAtStartOfStatement = true;
790097a140dSpatrick     }
79109467b48Spatrick     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
79209467b48Spatrick   case 0:
79309467b48Spatrick   case ' ':
79409467b48Spatrick   case '\t':
79509467b48Spatrick     IsAtStartOfStatement = OldIsAtStartOfStatement;
79609467b48Spatrick     while (*CurPtr == ' ' || *CurPtr == '\t')
79709467b48Spatrick       CurPtr++;
79809467b48Spatrick     if (SkipSpace)
79909467b48Spatrick       return LexToken(); // Ignore whitespace.
80009467b48Spatrick     else
80109467b48Spatrick       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
80209467b48Spatrick   case '\r': {
80309467b48Spatrick     IsAtStartOfLine = true;
80409467b48Spatrick     IsAtStartOfStatement = true;
80509467b48Spatrick     // If this is a CR followed by LF, treat that as one token.
80609467b48Spatrick     if (CurPtr != CurBuf.end() && *CurPtr == '\n')
80709467b48Spatrick       ++CurPtr;
80809467b48Spatrick     return AsmToken(AsmToken::EndOfStatement,
80909467b48Spatrick                     StringRef(TokStart, CurPtr - TokStart));
81009467b48Spatrick   }
81109467b48Spatrick   case '\n':
81209467b48Spatrick     IsAtStartOfLine = true;
81309467b48Spatrick     IsAtStartOfStatement = true;
81409467b48Spatrick     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
81509467b48Spatrick   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
81609467b48Spatrick   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
81709467b48Spatrick   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
81809467b48Spatrick   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
81909467b48Spatrick   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
82009467b48Spatrick   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
82109467b48Spatrick   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
82209467b48Spatrick   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
82309467b48Spatrick   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
82409467b48Spatrick   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
82509467b48Spatrick   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
82673471bf0Spatrick   case '$': {
82773471bf0Spatrick     if (LexMotorolaIntegers && isHexDigit(*CurPtr))
82873471bf0Spatrick       return LexDigit();
82973471bf0Spatrick     if (MAI.doesAllowDollarAtStartOfIdentifier())
83073471bf0Spatrick       return LexIdentifier();
83173471bf0Spatrick     return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
83273471bf0Spatrick   }
83373471bf0Spatrick   case '@': {
83473471bf0Spatrick     if (MAI.doesAllowAtAtStartOfIdentifier())
83573471bf0Spatrick       return LexIdentifier();
83673471bf0Spatrick     return AsmToken(AsmToken::At, StringRef(TokStart, 1));
83773471bf0Spatrick   }
83809467b48Spatrick   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
83909467b48Spatrick   case '=':
84009467b48Spatrick     if (*CurPtr == '=') {
84109467b48Spatrick       ++CurPtr;
84209467b48Spatrick       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
84309467b48Spatrick     }
84409467b48Spatrick     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
84509467b48Spatrick   case '-':
84609467b48Spatrick     if (*CurPtr == '>') {
84709467b48Spatrick       ++CurPtr;
84809467b48Spatrick       return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
84909467b48Spatrick     }
85009467b48Spatrick     return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
85109467b48Spatrick   case '|':
85209467b48Spatrick     if (*CurPtr == '|') {
85309467b48Spatrick       ++CurPtr;
85409467b48Spatrick       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
85509467b48Spatrick     }
85609467b48Spatrick     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
85709467b48Spatrick   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
85809467b48Spatrick   case '&':
85909467b48Spatrick     if (*CurPtr == '&') {
86009467b48Spatrick       ++CurPtr;
86109467b48Spatrick       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
86209467b48Spatrick     }
86309467b48Spatrick     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
86409467b48Spatrick   case '!':
86509467b48Spatrick     if (*CurPtr == '=') {
86609467b48Spatrick       ++CurPtr;
86709467b48Spatrick       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
86809467b48Spatrick     }
86909467b48Spatrick     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
87009467b48Spatrick   case '%':
87173471bf0Spatrick     if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) {
87273471bf0Spatrick       return LexDigit();
87373471bf0Spatrick     }
87473471bf0Spatrick 
87509467b48Spatrick     if (MAI.hasMipsExpressions()) {
87609467b48Spatrick       AsmToken::TokenKind Operator;
87709467b48Spatrick       unsigned OperatorLength;
87809467b48Spatrick 
87909467b48Spatrick       std::tie(Operator, OperatorLength) =
88009467b48Spatrick           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
88109467b48Spatrick               StringRef(CurPtr))
88209467b48Spatrick               .StartsWith("call16", {AsmToken::PercentCall16, 7})
88309467b48Spatrick               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
88409467b48Spatrick               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
88509467b48Spatrick               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
88609467b48Spatrick               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
88709467b48Spatrick               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
88809467b48Spatrick               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
88909467b48Spatrick               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
89009467b48Spatrick               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
89109467b48Spatrick               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
89209467b48Spatrick               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
89309467b48Spatrick               .StartsWith("got", {AsmToken::PercentGot, 4})
89409467b48Spatrick               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
89509467b48Spatrick               .StartsWith("higher", {AsmToken::PercentHigher, 7})
89609467b48Spatrick               .StartsWith("highest", {AsmToken::PercentHighest, 8})
89709467b48Spatrick               .StartsWith("hi", {AsmToken::PercentHi, 3})
89809467b48Spatrick               .StartsWith("lo", {AsmToken::PercentLo, 3})
89909467b48Spatrick               .StartsWith("neg", {AsmToken::PercentNeg, 4})
90009467b48Spatrick               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
90109467b48Spatrick               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
90209467b48Spatrick               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
90309467b48Spatrick               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
90409467b48Spatrick               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
90509467b48Spatrick               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
90609467b48Spatrick               .Default({AsmToken::Percent, 1});
90709467b48Spatrick 
90809467b48Spatrick       if (Operator != AsmToken::Percent) {
90909467b48Spatrick         CurPtr += OperatorLength - 1;
91009467b48Spatrick         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
91109467b48Spatrick       }
91209467b48Spatrick     }
91309467b48Spatrick     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
91409467b48Spatrick   case '/':
91509467b48Spatrick     IsAtStartOfStatement = OldIsAtStartOfStatement;
91609467b48Spatrick     return LexSlash();
91773471bf0Spatrick   case '#': {
91873471bf0Spatrick     if (MAI.doesAllowHashAtStartOfIdentifier())
91973471bf0Spatrick       return LexIdentifier();
92073471bf0Spatrick     return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
92173471bf0Spatrick   }
92209467b48Spatrick   case '\'': return LexSingleQuote();
92309467b48Spatrick   case '"': return LexQuote();
92409467b48Spatrick   case '0': case '1': case '2': case '3': case '4':
92509467b48Spatrick   case '5': case '6': case '7': case '8': case '9':
92609467b48Spatrick     return LexDigit();
92709467b48Spatrick   case '<':
92809467b48Spatrick     switch (*CurPtr) {
92909467b48Spatrick     case '<':
93009467b48Spatrick       ++CurPtr;
93109467b48Spatrick       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
93209467b48Spatrick     case '=':
93309467b48Spatrick       ++CurPtr;
93409467b48Spatrick       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
93509467b48Spatrick     case '>':
93609467b48Spatrick       ++CurPtr;
93709467b48Spatrick       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
93809467b48Spatrick     default:
93909467b48Spatrick       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
94009467b48Spatrick     }
94109467b48Spatrick   case '>':
94209467b48Spatrick     switch (*CurPtr) {
94309467b48Spatrick     case '>':
94409467b48Spatrick       ++CurPtr;
94509467b48Spatrick       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
94609467b48Spatrick     case '=':
94709467b48Spatrick       ++CurPtr;
94809467b48Spatrick       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
94909467b48Spatrick     default:
95009467b48Spatrick       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
95109467b48Spatrick     }
95209467b48Spatrick 
95309467b48Spatrick   // TODO: Quoted identifiers (objc methods etc)
95409467b48Spatrick   // local labels: [0-9][:]
95509467b48Spatrick   // Forward/backward labels: [0-9][fb]
95609467b48Spatrick   // Integers, fp constants, character constants.
95709467b48Spatrick   }
95809467b48Spatrick }
959