xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- MILexer.cpp - Machine instructions lexer implementation ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the lexing of machine instructions.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "MILexer.h"
140b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
150b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h"
160b57cec5SDimitry Andric #include "llvm/ADT/Twine.h"
170b57cec5SDimitry Andric #include <cassert>
180b57cec5SDimitry Andric #include <cctype>
190b57cec5SDimitry Andric #include <string>
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric using namespace llvm;
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric namespace {
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using ErrorCallbackType =
260b57cec5SDimitry Andric     function_ref<void(StringRef::iterator Loc, const Twine &)>;
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric /// This class provides a way to iterate and get characters from the source
290b57cec5SDimitry Andric /// string.
300b57cec5SDimitry Andric class Cursor {
310b57cec5SDimitry Andric   const char *Ptr = nullptr;
320b57cec5SDimitry Andric   const char *End = nullptr;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric public:
35bdd1243dSDimitry Andric   Cursor(std::nullopt_t) {}
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   explicit Cursor(StringRef Str) {
380b57cec5SDimitry Andric     Ptr = Str.data();
390b57cec5SDimitry Andric     End = Ptr + Str.size();
400b57cec5SDimitry Andric   }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric   bool isEOF() const { return Ptr == End; }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric   char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   void advance(unsigned I = 1) { Ptr += I; }
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   StringRef upto(Cursor C) const {
510b57cec5SDimitry Andric     assert(C.Ptr >= Ptr && C.Ptr <= End);
520b57cec5SDimitry Andric     return StringRef(Ptr, C.Ptr - Ptr);
530b57cec5SDimitry Andric   }
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric   StringRef::iterator location() const { return Ptr; }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   operator bool() const { return Ptr != nullptr; }
580b57cec5SDimitry Andric };
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric } // end anonymous namespace
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
630b57cec5SDimitry Andric   this->Kind = Kind;
640b57cec5SDimitry Andric   this->Range = Range;
650b57cec5SDimitry Andric   return *this;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric MIToken &MIToken::setStringValue(StringRef StrVal) {
690b57cec5SDimitry Andric   StringValue = StrVal;
700b57cec5SDimitry Andric   return *this;
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
740b57cec5SDimitry Andric   StringValueStorage = std::move(StrVal);
750b57cec5SDimitry Andric   StringValue = StringValueStorage;
760b57cec5SDimitry Andric   return *this;
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric MIToken &MIToken::setIntegerValue(APSInt IntVal) {
800b57cec5SDimitry Andric   this->IntVal = std::move(IntVal);
810b57cec5SDimitry Andric   return *this;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric /// Skip the leading whitespace characters and return the updated cursor.
850b57cec5SDimitry Andric static Cursor skipWhitespace(Cursor C) {
860b57cec5SDimitry Andric   while (isblank(C.peek()))
870b57cec5SDimitry Andric     C.advance();
880b57cec5SDimitry Andric   return C;
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric /// Skip a line comment and return the updated cursor.
940b57cec5SDimitry Andric static Cursor skipComment(Cursor C) {
950b57cec5SDimitry Andric   if (C.peek() != ';')
960b57cec5SDimitry Andric     return C;
970b57cec5SDimitry Andric   while (!isNewlineChar(C.peek()) && !C.isEOF())
980b57cec5SDimitry Andric     C.advance();
990b57cec5SDimitry Andric   return C;
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
1025ffd83dbSDimitry Andric /// Machine operands can have comments, enclosed between /* and */.
1035ffd83dbSDimitry Andric /// This eats up all tokens, including /* and */.
1045ffd83dbSDimitry Andric static Cursor skipMachineOperandComment(Cursor C) {
1055ffd83dbSDimitry Andric   if (C.peek() != '/' || C.peek(1) != '*')
1065ffd83dbSDimitry Andric     return C;
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric   while (C.peek() != '*' || C.peek(1) != '/')
1095ffd83dbSDimitry Andric     C.advance();
1105ffd83dbSDimitry Andric 
1115ffd83dbSDimitry Andric   C.advance();
1125ffd83dbSDimitry Andric   C.advance();
1135ffd83dbSDimitry Andric   return C;
1145ffd83dbSDimitry Andric }
1155ffd83dbSDimitry Andric 
1160b57cec5SDimitry Andric /// Return true if the given character satisfies the following regular
1170b57cec5SDimitry Andric /// expression: [-a-zA-Z$._0-9]
1180b57cec5SDimitry Andric static bool isIdentifierChar(char C) {
1190b57cec5SDimitry Andric   return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
1200b57cec5SDimitry Andric          C == '$';
1210b57cec5SDimitry Andric }
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric /// Unescapes the given string value.
1240b57cec5SDimitry Andric ///
1250b57cec5SDimitry Andric /// Expects the string value to be quoted.
1260b57cec5SDimitry Andric static std::string unescapeQuotedString(StringRef Value) {
1270b57cec5SDimitry Andric   assert(Value.front() == '"' && Value.back() == '"');
1280b57cec5SDimitry Andric   Cursor C = Cursor(Value.substr(1, Value.size() - 2));
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   std::string Str;
1310b57cec5SDimitry Andric   Str.reserve(C.remaining().size());
1320b57cec5SDimitry Andric   while (!C.isEOF()) {
1330b57cec5SDimitry Andric     char Char = C.peek();
1340b57cec5SDimitry Andric     if (Char == '\\') {
1350b57cec5SDimitry Andric       if (C.peek(1) == '\\') {
1360b57cec5SDimitry Andric         // Two '\' become one
1370b57cec5SDimitry Andric         Str += '\\';
1380b57cec5SDimitry Andric         C.advance(2);
1390b57cec5SDimitry Andric         continue;
1400b57cec5SDimitry Andric       }
1410b57cec5SDimitry Andric       if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
1420b57cec5SDimitry Andric         Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
1430b57cec5SDimitry Andric         C.advance(3);
1440b57cec5SDimitry Andric         continue;
1450b57cec5SDimitry Andric       }
1460b57cec5SDimitry Andric     }
1470b57cec5SDimitry Andric     Str += Char;
1480b57cec5SDimitry Andric     C.advance();
1490b57cec5SDimitry Andric   }
1500b57cec5SDimitry Andric   return Str;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric /// Lex a string constant using the following regular expression: \"[^\"]*\"
1540b57cec5SDimitry Andric static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
1550b57cec5SDimitry Andric   assert(C.peek() == '"');
1560b57cec5SDimitry Andric   for (C.advance(); C.peek() != '"'; C.advance()) {
1570b57cec5SDimitry Andric     if (C.isEOF() || isNewlineChar(C.peek())) {
1580b57cec5SDimitry Andric       ErrorCallback(
1590b57cec5SDimitry Andric           C.location(),
1600b57cec5SDimitry Andric           "end of machine instruction reached before the closing '\"'");
161bdd1243dSDimitry Andric       return std::nullopt;
1620b57cec5SDimitry Andric     }
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric   C.advance();
1650b57cec5SDimitry Andric   return C;
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
1690b57cec5SDimitry Andric                       unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
1700b57cec5SDimitry Andric   auto Range = C;
1710b57cec5SDimitry Andric   C.advance(PrefixLength);
1720b57cec5SDimitry Andric   if (C.peek() == '"') {
1730b57cec5SDimitry Andric     if (Cursor R = lexStringConstant(C, ErrorCallback)) {
1740b57cec5SDimitry Andric       StringRef String = Range.upto(R);
1750b57cec5SDimitry Andric       Token.reset(Type, String)
1760b57cec5SDimitry Andric           .setOwnedStringValue(
1770b57cec5SDimitry Andric               unescapeQuotedString(String.drop_front(PrefixLength)));
1780b57cec5SDimitry Andric       return R;
1790b57cec5SDimitry Andric     }
1800b57cec5SDimitry Andric     Token.reset(MIToken::Error, Range.remaining());
1810b57cec5SDimitry Andric     return Range;
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric   while (isIdentifierChar(C.peek()))
1840b57cec5SDimitry Andric     C.advance();
1850b57cec5SDimitry Andric   Token.reset(Type, Range.upto(C))
1860b57cec5SDimitry Andric       .setStringValue(Range.upto(C).drop_front(PrefixLength));
1870b57cec5SDimitry Andric   return C;
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
1910b57cec5SDimitry Andric   return StringSwitch<MIToken::TokenKind>(Identifier)
1920b57cec5SDimitry Andric       .Case("_", MIToken::underscore)
1930b57cec5SDimitry Andric       .Case("implicit", MIToken::kw_implicit)
1940b57cec5SDimitry Andric       .Case("implicit-def", MIToken::kw_implicit_define)
1950b57cec5SDimitry Andric       .Case("def", MIToken::kw_def)
1960b57cec5SDimitry Andric       .Case("dead", MIToken::kw_dead)
1970b57cec5SDimitry Andric       .Case("killed", MIToken::kw_killed)
1980b57cec5SDimitry Andric       .Case("undef", MIToken::kw_undef)
1990b57cec5SDimitry Andric       .Case("internal", MIToken::kw_internal)
2000b57cec5SDimitry Andric       .Case("early-clobber", MIToken::kw_early_clobber)
2010b57cec5SDimitry Andric       .Case("debug-use", MIToken::kw_debug_use)
2020b57cec5SDimitry Andric       .Case("renamable", MIToken::kw_renamable)
2030b57cec5SDimitry Andric       .Case("tied-def", MIToken::kw_tied_def)
2040b57cec5SDimitry Andric       .Case("frame-setup", MIToken::kw_frame_setup)
2050b57cec5SDimitry Andric       .Case("frame-destroy", MIToken::kw_frame_destroy)
2060b57cec5SDimitry Andric       .Case("nnan", MIToken::kw_nnan)
2070b57cec5SDimitry Andric       .Case("ninf", MIToken::kw_ninf)
2080b57cec5SDimitry Andric       .Case("nsz", MIToken::kw_nsz)
2090b57cec5SDimitry Andric       .Case("arcp", MIToken::kw_arcp)
2100b57cec5SDimitry Andric       .Case("contract", MIToken::kw_contract)
2110b57cec5SDimitry Andric       .Case("afn", MIToken::kw_afn)
2120b57cec5SDimitry Andric       .Case("reassoc", MIToken::kw_reassoc)
2130b57cec5SDimitry Andric       .Case("nuw", MIToken::kw_nuw)
2140b57cec5SDimitry Andric       .Case("nsw", MIToken::kw_nsw)
215*0fca6ea1SDimitry Andric       .Case("nusw", MIToken::kw_nusw)
2160b57cec5SDimitry Andric       .Case("exact", MIToken::kw_exact)
217*0fca6ea1SDimitry Andric       .Case("nneg", MIToken::kw_nneg)
218*0fca6ea1SDimitry Andric       .Case("disjoint", MIToken::kw_disjoint)
219480093f4SDimitry Andric       .Case("nofpexcept", MIToken::kw_nofpexcept)
22006c3fb27SDimitry Andric       .Case("unpredictable", MIToken::kw_unpredictable)
2210b57cec5SDimitry Andric       .Case("debug-location", MIToken::kw_debug_location)
222e8d8bef9SDimitry Andric       .Case("debug-instr-number", MIToken::kw_debug_instr_number)
223bdd1243dSDimitry Andric       .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
2240b57cec5SDimitry Andric       .Case("same_value", MIToken::kw_cfi_same_value)
2250b57cec5SDimitry Andric       .Case("offset", MIToken::kw_cfi_offset)
2260b57cec5SDimitry Andric       .Case("rel_offset", MIToken::kw_cfi_rel_offset)
2270b57cec5SDimitry Andric       .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
2280b57cec5SDimitry Andric       .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
2290b57cec5SDimitry Andric       .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
2300b57cec5SDimitry Andric       .Case("escape", MIToken::kw_cfi_escape)
2310b57cec5SDimitry Andric       .Case("def_cfa", MIToken::kw_cfi_def_cfa)
232fe6060f1SDimitry Andric       .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa)
2330b57cec5SDimitry Andric       .Case("remember_state", MIToken::kw_cfi_remember_state)
2340b57cec5SDimitry Andric       .Case("restore", MIToken::kw_cfi_restore)
2350b57cec5SDimitry Andric       .Case("restore_state", MIToken::kw_cfi_restore_state)
2360b57cec5SDimitry Andric       .Case("undefined", MIToken::kw_cfi_undefined)
2370b57cec5SDimitry Andric       .Case("register", MIToken::kw_cfi_register)
2380b57cec5SDimitry Andric       .Case("window_save", MIToken::kw_cfi_window_save)
239e8d8bef9SDimitry Andric       .Case("negate_ra_sign_state",
240e8d8bef9SDimitry Andric             MIToken::kw_cfi_aarch64_negate_ra_sign_state)
2410b57cec5SDimitry Andric       .Case("blockaddress", MIToken::kw_blockaddress)
2420b57cec5SDimitry Andric       .Case("intrinsic", MIToken::kw_intrinsic)
2430b57cec5SDimitry Andric       .Case("target-index", MIToken::kw_target_index)
2440b57cec5SDimitry Andric       .Case("half", MIToken::kw_half)
245*0fca6ea1SDimitry Andric       .Case("bfloat", MIToken::kw_bfloat)
2460b57cec5SDimitry Andric       .Case("float", MIToken::kw_float)
2470b57cec5SDimitry Andric       .Case("double", MIToken::kw_double)
2480b57cec5SDimitry Andric       .Case("x86_fp80", MIToken::kw_x86_fp80)
2490b57cec5SDimitry Andric       .Case("fp128", MIToken::kw_fp128)
2500b57cec5SDimitry Andric       .Case("ppc_fp128", MIToken::kw_ppc_fp128)
2510b57cec5SDimitry Andric       .Case("target-flags", MIToken::kw_target_flags)
2520b57cec5SDimitry Andric       .Case("volatile", MIToken::kw_volatile)
2530b57cec5SDimitry Andric       .Case("non-temporal", MIToken::kw_non_temporal)
2540b57cec5SDimitry Andric       .Case("dereferenceable", MIToken::kw_dereferenceable)
2550b57cec5SDimitry Andric       .Case("invariant", MIToken::kw_invariant)
2560b57cec5SDimitry Andric       .Case("align", MIToken::kw_align)
25781ad6265SDimitry Andric       .Case("basealign", MIToken::kw_basealign)
2580b57cec5SDimitry Andric       .Case("addrspace", MIToken::kw_addrspace)
2590b57cec5SDimitry Andric       .Case("stack", MIToken::kw_stack)
2600b57cec5SDimitry Andric       .Case("got", MIToken::kw_got)
2610b57cec5SDimitry Andric       .Case("jump-table", MIToken::kw_jump_table)
2620b57cec5SDimitry Andric       .Case("constant-pool", MIToken::kw_constant_pool)
2630b57cec5SDimitry Andric       .Case("call-entry", MIToken::kw_call_entry)
264480093f4SDimitry Andric       .Case("custom", MIToken::kw_custom)
2650b57cec5SDimitry Andric       .Case("liveout", MIToken::kw_liveout)
2660b57cec5SDimitry Andric       .Case("landing-pad", MIToken::kw_landing_pad)
267349cc55cSDimitry Andric       .Case("inlineasm-br-indirect-target",
268349cc55cSDimitry Andric             MIToken::kw_inlineasm_br_indirect_target)
2695ffd83dbSDimitry Andric       .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
2700b57cec5SDimitry Andric       .Case("liveins", MIToken::kw_liveins)
2710b57cec5SDimitry Andric       .Case("successors", MIToken::kw_successors)
2720b57cec5SDimitry Andric       .Case("floatpred", MIToken::kw_floatpred)
2730b57cec5SDimitry Andric       .Case("intpred", MIToken::kw_intpred)
2748bcb0991SDimitry Andric       .Case("shufflemask", MIToken::kw_shufflemask)
2750b57cec5SDimitry Andric       .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
2760b57cec5SDimitry Andric       .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
277480093f4SDimitry Andric       .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
278bdd1243dSDimitry Andric       .Case("pcsections", MIToken::kw_pcsections)
279bdd1243dSDimitry Andric       .Case("cfi-type", MIToken::kw_cfi_type)
2805ffd83dbSDimitry Andric       .Case("bbsections", MIToken::kw_bbsections)
281bdd1243dSDimitry Andric       .Case("bb_id", MIToken::kw_bb_id)
2820b57cec5SDimitry Andric       .Case("unknown-size", MIToken::kw_unknown_size)
283fe6060f1SDimitry Andric       .Case("unknown-address", MIToken::kw_unknown_address)
284fe6060f1SDimitry Andric       .Case("distinct", MIToken::kw_distinct)
285bdd1243dSDimitry Andric       .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken)
286bdd1243dSDimitry Andric       .Case("machine-block-address-taken",
287bdd1243dSDimitry Andric             MIToken::kw_machine_block_address_taken)
2885f757f3fSDimitry Andric       .Case("call-frame-size", MIToken::kw_call_frame_size)
2895f757f3fSDimitry Andric       .Case("noconvergent", MIToken::kw_noconvergent)
2900b57cec5SDimitry Andric       .Default(MIToken::Identifier);
2910b57cec5SDimitry Andric }
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
2940b57cec5SDimitry Andric   if (!isalpha(C.peek()) && C.peek() != '_')
295bdd1243dSDimitry Andric     return std::nullopt;
2960b57cec5SDimitry Andric   auto Range = C;
2970b57cec5SDimitry Andric   while (isIdentifierChar(C.peek()))
2980b57cec5SDimitry Andric     C.advance();
2990b57cec5SDimitry Andric   auto Identifier = Range.upto(C);
3000b57cec5SDimitry Andric   Token.reset(getIdentifierKind(Identifier), Identifier)
3010b57cec5SDimitry Andric       .setStringValue(Identifier);
3020b57cec5SDimitry Andric   return C;
3030b57cec5SDimitry Andric }
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
3060b57cec5SDimitry Andric                                         ErrorCallbackType ErrorCallback) {
3075f757f3fSDimitry Andric   bool IsReference = C.remaining().starts_with("%bb.");
3085f757f3fSDimitry Andric   if (!IsReference && !C.remaining().starts_with("bb."))
309bdd1243dSDimitry Andric     return std::nullopt;
3100b57cec5SDimitry Andric   auto Range = C;
3110b57cec5SDimitry Andric   unsigned PrefixLength = IsReference ? 4 : 3;
3120b57cec5SDimitry Andric   C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
3130b57cec5SDimitry Andric   if (!isdigit(C.peek())) {
3140b57cec5SDimitry Andric     Token.reset(MIToken::Error, C.remaining());
3150b57cec5SDimitry Andric     ErrorCallback(C.location(), "expected a number after '%bb.'");
3160b57cec5SDimitry Andric     return C;
3170b57cec5SDimitry Andric   }
3180b57cec5SDimitry Andric   auto NumberRange = C;
3190b57cec5SDimitry Andric   while (isdigit(C.peek()))
3200b57cec5SDimitry Andric     C.advance();
3210b57cec5SDimitry Andric   StringRef Number = NumberRange.upto(C);
3220b57cec5SDimitry Andric   unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
3230b57cec5SDimitry Andric   // TODO: The format bb.<id>.<irname> is supported only when it's not a
3240b57cec5SDimitry Andric   // reference. Once we deprecate the format where the irname shows up, we
3250b57cec5SDimitry Andric   // should only lex forward if it is a reference.
3260b57cec5SDimitry Andric   if (C.peek() == '.') {
3270b57cec5SDimitry Andric     C.advance(); // Skip '.'
3280b57cec5SDimitry Andric     ++StringOffset;
3290b57cec5SDimitry Andric     while (isIdentifierChar(C.peek()))
3300b57cec5SDimitry Andric       C.advance();
3310b57cec5SDimitry Andric   }
3320b57cec5SDimitry Andric   Token.reset(IsReference ? MIToken::MachineBasicBlock
3330b57cec5SDimitry Andric                           : MIToken::MachineBasicBlockLabel,
3340b57cec5SDimitry Andric               Range.upto(C))
3350b57cec5SDimitry Andric       .setIntegerValue(APSInt(Number))
3360b57cec5SDimitry Andric       .setStringValue(Range.upto(C).drop_front(StringOffset));
3370b57cec5SDimitry Andric   return C;
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
3410b57cec5SDimitry Andric                             MIToken::TokenKind Kind) {
3425f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size())))
343bdd1243dSDimitry Andric     return std::nullopt;
3440b57cec5SDimitry Andric   auto Range = C;
3450b57cec5SDimitry Andric   C.advance(Rule.size());
3460b57cec5SDimitry Andric   auto NumberRange = C;
3470b57cec5SDimitry Andric   while (isdigit(C.peek()))
3480b57cec5SDimitry Andric     C.advance();
3490b57cec5SDimitry Andric   Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
3500b57cec5SDimitry Andric   return C;
3510b57cec5SDimitry Andric }
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
3540b57cec5SDimitry Andric                                    MIToken::TokenKind Kind) {
3555f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size())))
356bdd1243dSDimitry Andric     return std::nullopt;
3570b57cec5SDimitry Andric   auto Range = C;
3580b57cec5SDimitry Andric   C.advance(Rule.size());
3590b57cec5SDimitry Andric   auto NumberRange = C;
3600b57cec5SDimitry Andric   while (isdigit(C.peek()))
3610b57cec5SDimitry Andric     C.advance();
3620b57cec5SDimitry Andric   StringRef Number = NumberRange.upto(C);
3630b57cec5SDimitry Andric   unsigned StringOffset = Rule.size() + Number.size();
3640b57cec5SDimitry Andric   if (C.peek() == '.') {
3650b57cec5SDimitry Andric     C.advance();
3660b57cec5SDimitry Andric     ++StringOffset;
3670b57cec5SDimitry Andric     while (isIdentifierChar(C.peek()))
3680b57cec5SDimitry Andric       C.advance();
3690b57cec5SDimitry Andric   }
3700b57cec5SDimitry Andric   Token.reset(Kind, Range.upto(C))
3710b57cec5SDimitry Andric       .setIntegerValue(APSInt(Number))
3720b57cec5SDimitry Andric       .setStringValue(Range.upto(C).drop_front(StringOffset));
3730b57cec5SDimitry Andric   return C;
3740b57cec5SDimitry Andric }
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
3770b57cec5SDimitry Andric   return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
3810b57cec5SDimitry Andric   return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
3820b57cec5SDimitry Andric }
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
3850b57cec5SDimitry Andric   return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
3890b57cec5SDimitry Andric   return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
3900b57cec5SDimitry Andric }
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
3930b57cec5SDimitry Andric                                        ErrorCallbackType ErrorCallback) {
3940b57cec5SDimitry Andric   const StringRef Rule = "%subreg.";
3955f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule))
396bdd1243dSDimitry Andric     return std::nullopt;
3970b57cec5SDimitry Andric   return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
3980b57cec5SDimitry Andric                  ErrorCallback);
3990b57cec5SDimitry Andric }
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
4020b57cec5SDimitry Andric                               ErrorCallbackType ErrorCallback) {
4030b57cec5SDimitry Andric   const StringRef Rule = "%ir-block.";
4045f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule))
405bdd1243dSDimitry Andric     return std::nullopt;
4060b57cec5SDimitry Andric   if (isdigit(C.peek(Rule.size())))
4070b57cec5SDimitry Andric     return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
4080b57cec5SDimitry Andric   return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
4090b57cec5SDimitry Andric }
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
4120b57cec5SDimitry Andric                               ErrorCallbackType ErrorCallback) {
4130b57cec5SDimitry Andric   const StringRef Rule = "%ir.";
4145f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule))
415bdd1243dSDimitry Andric     return std::nullopt;
4160b57cec5SDimitry Andric   if (isdigit(C.peek(Rule.size())))
4170b57cec5SDimitry Andric     return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
4180b57cec5SDimitry Andric   return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
4220b57cec5SDimitry Andric                                      ErrorCallbackType ErrorCallback) {
4230b57cec5SDimitry Andric   if (C.peek() != '"')
424bdd1243dSDimitry Andric     return std::nullopt;
4250b57cec5SDimitry Andric   return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
4260b57cec5SDimitry Andric                  ErrorCallback);
4270b57cec5SDimitry Andric }
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
4300b57cec5SDimitry Andric   auto Range = C;
4310b57cec5SDimitry Andric   C.advance(); // Skip '%'
4320b57cec5SDimitry Andric   auto NumberRange = C;
4330b57cec5SDimitry Andric   while (isdigit(C.peek()))
4340b57cec5SDimitry Andric     C.advance();
4350b57cec5SDimitry Andric   Token.reset(MIToken::VirtualRegister, Range.upto(C))
4360b57cec5SDimitry Andric       .setIntegerValue(APSInt(NumberRange.upto(C)));
4370b57cec5SDimitry Andric   return C;
4380b57cec5SDimitry Andric }
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric /// Returns true for a character allowed in a register name.
4410b57cec5SDimitry Andric static bool isRegisterChar(char C) {
4420b57cec5SDimitry Andric   return isIdentifierChar(C) && C != '.';
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
4460b57cec5SDimitry Andric   Cursor Range = C;
4470b57cec5SDimitry Andric   C.advance(); // Skip '%'
4480b57cec5SDimitry Andric   while (isRegisterChar(C.peek()))
4490b57cec5SDimitry Andric     C.advance();
4500b57cec5SDimitry Andric   Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
4510b57cec5SDimitry Andric       .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
4520b57cec5SDimitry Andric   return C;
4530b57cec5SDimitry Andric }
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric static Cursor maybeLexRegister(Cursor C, MIToken &Token,
4560b57cec5SDimitry Andric                                ErrorCallbackType ErrorCallback) {
4570b57cec5SDimitry Andric   if (C.peek() != '%' && C.peek() != '$')
458bdd1243dSDimitry Andric     return std::nullopt;
4590b57cec5SDimitry Andric 
4600b57cec5SDimitry Andric   if (C.peek() == '%') {
4610b57cec5SDimitry Andric     if (isdigit(C.peek(1)))
4620b57cec5SDimitry Andric       return lexVirtualRegister(C, Token);
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric     if (isRegisterChar(C.peek(1)))
4650b57cec5SDimitry Andric       return lexNamedVirtualRegister(C, Token);
4660b57cec5SDimitry Andric 
467bdd1243dSDimitry Andric     return std::nullopt;
4680b57cec5SDimitry Andric   }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   assert(C.peek() == '$');
4710b57cec5SDimitry Andric   auto Range = C;
4720b57cec5SDimitry Andric   C.advance(); // Skip '$'
4730b57cec5SDimitry Andric   while (isRegisterChar(C.peek()))
4740b57cec5SDimitry Andric     C.advance();
4750b57cec5SDimitry Andric   Token.reset(MIToken::NamedRegister, Range.upto(C))
4760b57cec5SDimitry Andric       .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
4770b57cec5SDimitry Andric   return C;
4780b57cec5SDimitry Andric }
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
4810b57cec5SDimitry Andric                                   ErrorCallbackType ErrorCallback) {
4820b57cec5SDimitry Andric   if (C.peek() != '@')
483bdd1243dSDimitry Andric     return std::nullopt;
4840b57cec5SDimitry Andric   if (!isdigit(C.peek(1)))
4850b57cec5SDimitry Andric     return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
4860b57cec5SDimitry Andric                    ErrorCallback);
4870b57cec5SDimitry Andric   auto Range = C;
4880b57cec5SDimitry Andric   C.advance(1); // Skip the '@'
4890b57cec5SDimitry Andric   auto NumberRange = C;
4900b57cec5SDimitry Andric   while (isdigit(C.peek()))
4910b57cec5SDimitry Andric     C.advance();
4920b57cec5SDimitry Andric   Token.reset(MIToken::GlobalValue, Range.upto(C))
4930b57cec5SDimitry Andric       .setIntegerValue(APSInt(NumberRange.upto(C)));
4940b57cec5SDimitry Andric   return C;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
4980b57cec5SDimitry Andric                                      ErrorCallbackType ErrorCallback) {
4990b57cec5SDimitry Andric   if (C.peek() != '&')
500bdd1243dSDimitry Andric     return std::nullopt;
5010b57cec5SDimitry Andric   return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
5020b57cec5SDimitry Andric                  ErrorCallback);
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
5060b57cec5SDimitry Andric                                ErrorCallbackType ErrorCallback) {
5070b57cec5SDimitry Andric   const StringRef Rule = "<mcsymbol ";
5085f757f3fSDimitry Andric   if (!C.remaining().starts_with(Rule))
509bdd1243dSDimitry Andric     return std::nullopt;
5100b57cec5SDimitry Andric   auto Start = C;
5110b57cec5SDimitry Andric   C.advance(Rule.size());
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric   // Try a simple unquoted name.
5140b57cec5SDimitry Andric   if (C.peek() != '"') {
5150b57cec5SDimitry Andric     while (isIdentifierChar(C.peek()))
5160b57cec5SDimitry Andric       C.advance();
5170b57cec5SDimitry Andric     StringRef String = Start.upto(C).drop_front(Rule.size());
5180b57cec5SDimitry Andric     if (C.peek() != '>') {
5190b57cec5SDimitry Andric       ErrorCallback(C.location(),
5200b57cec5SDimitry Andric                     "expected the '<mcsymbol ...' to be closed by a '>'");
5210b57cec5SDimitry Andric       Token.reset(MIToken::Error, Start.remaining());
5220b57cec5SDimitry Andric       return Start;
5230b57cec5SDimitry Andric     }
5240b57cec5SDimitry Andric     C.advance();
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric     Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
5270b57cec5SDimitry Andric     return C;
5280b57cec5SDimitry Andric   }
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   // Otherwise lex out a quoted name.
5310b57cec5SDimitry Andric   Cursor R = lexStringConstant(C, ErrorCallback);
5320b57cec5SDimitry Andric   if (!R) {
5330b57cec5SDimitry Andric     ErrorCallback(C.location(),
5340b57cec5SDimitry Andric                   "unable to parse quoted string from opening quote");
5350b57cec5SDimitry Andric     Token.reset(MIToken::Error, Start.remaining());
5360b57cec5SDimitry Andric     return Start;
5370b57cec5SDimitry Andric   }
5380b57cec5SDimitry Andric   StringRef String = Start.upto(R).drop_front(Rule.size());
5390b57cec5SDimitry Andric   if (R.peek() != '>') {
5400b57cec5SDimitry Andric     ErrorCallback(R.location(),
5410b57cec5SDimitry Andric                   "expected the '<mcsymbol ...' to be closed by a '>'");
5420b57cec5SDimitry Andric     Token.reset(MIToken::Error, Start.remaining());
5430b57cec5SDimitry Andric     return Start;
5440b57cec5SDimitry Andric   }
5450b57cec5SDimitry Andric   R.advance();
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric   Token.reset(MIToken::MCSymbol, Start.upto(R))
5480b57cec5SDimitry Andric       .setOwnedStringValue(unescapeQuotedString(String));
5490b57cec5SDimitry Andric   return R;
5500b57cec5SDimitry Andric }
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric static bool isValidHexFloatingPointPrefix(char C) {
5535ffd83dbSDimitry Andric   return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R';
5540b57cec5SDimitry Andric }
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
5570b57cec5SDimitry Andric   C.advance();
5580b57cec5SDimitry Andric   // Skip over [0-9]*([eE][-+]?[0-9]+)?
5590b57cec5SDimitry Andric   while (isdigit(C.peek()))
5600b57cec5SDimitry Andric     C.advance();
5610b57cec5SDimitry Andric   if ((C.peek() == 'e' || C.peek() == 'E') &&
5620b57cec5SDimitry Andric       (isdigit(C.peek(1)) ||
5630b57cec5SDimitry Andric        ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
5640b57cec5SDimitry Andric     C.advance(2);
5650b57cec5SDimitry Andric     while (isdigit(C.peek()))
5660b57cec5SDimitry Andric       C.advance();
5670b57cec5SDimitry Andric   }
5680b57cec5SDimitry Andric   Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
5690b57cec5SDimitry Andric   return C;
5700b57cec5SDimitry Andric }
5710b57cec5SDimitry Andric 
5720b57cec5SDimitry Andric static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
5730b57cec5SDimitry Andric   if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
574bdd1243dSDimitry Andric     return std::nullopt;
5750b57cec5SDimitry Andric   Cursor Range = C;
5760b57cec5SDimitry Andric   C.advance(2);
5770b57cec5SDimitry Andric   unsigned PrefLen = 2;
5780b57cec5SDimitry Andric   if (isValidHexFloatingPointPrefix(C.peek())) {
5790b57cec5SDimitry Andric     C.advance();
5800b57cec5SDimitry Andric     PrefLen++;
5810b57cec5SDimitry Andric   }
5820b57cec5SDimitry Andric   while (isxdigit(C.peek()))
5830b57cec5SDimitry Andric     C.advance();
5840b57cec5SDimitry Andric   StringRef StrVal = Range.upto(C);
5850b57cec5SDimitry Andric   if (StrVal.size() <= PrefLen)
586bdd1243dSDimitry Andric     return std::nullopt;
5870b57cec5SDimitry Andric   if (PrefLen == 2)
5880b57cec5SDimitry Andric     Token.reset(MIToken::HexLiteral, Range.upto(C));
5890b57cec5SDimitry Andric   else // It must be 3, which means that there was a floating-point prefix.
5900b57cec5SDimitry Andric     Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
5910b57cec5SDimitry Andric   return C;
5920b57cec5SDimitry Andric }
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
5950b57cec5SDimitry Andric   if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
596bdd1243dSDimitry Andric     return std::nullopt;
5970b57cec5SDimitry Andric   auto Range = C;
5980b57cec5SDimitry Andric   C.advance();
5990b57cec5SDimitry Andric   while (isdigit(C.peek()))
6000b57cec5SDimitry Andric     C.advance();
6010b57cec5SDimitry Andric   if (C.peek() == '.')
6020b57cec5SDimitry Andric     return lexFloatingPointLiteral(Range, C, Token);
6030b57cec5SDimitry Andric   StringRef StrVal = Range.upto(C);
6040b57cec5SDimitry Andric   Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
6050b57cec5SDimitry Andric   return C;
6060b57cec5SDimitry Andric }
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
6090b57cec5SDimitry Andric   return StringSwitch<MIToken::TokenKind>(Identifier)
6100b57cec5SDimitry Andric       .Case("!tbaa", MIToken::md_tbaa)
6110b57cec5SDimitry Andric       .Case("!alias.scope", MIToken::md_alias_scope)
6120b57cec5SDimitry Andric       .Case("!noalias", MIToken::md_noalias)
6130b57cec5SDimitry Andric       .Case("!range", MIToken::md_range)
6140b57cec5SDimitry Andric       .Case("!DIExpression", MIToken::md_diexpr)
6150b57cec5SDimitry Andric       .Case("!DILocation", MIToken::md_dilocation)
6160b57cec5SDimitry Andric       .Default(MIToken::Error);
6170b57cec5SDimitry Andric }
6180b57cec5SDimitry Andric 
619480093f4SDimitry Andric static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
6200b57cec5SDimitry Andric                               ErrorCallbackType ErrorCallback) {
6210b57cec5SDimitry Andric   if (C.peek() != '!')
622bdd1243dSDimitry Andric     return std::nullopt;
6230b57cec5SDimitry Andric   auto Range = C;
6240b57cec5SDimitry Andric   C.advance(1);
6250b57cec5SDimitry Andric   if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
6260b57cec5SDimitry Andric     Token.reset(MIToken::exclaim, Range.upto(C));
6270b57cec5SDimitry Andric     return C;
6280b57cec5SDimitry Andric   }
6290b57cec5SDimitry Andric   while (isIdentifierChar(C.peek()))
6300b57cec5SDimitry Andric     C.advance();
6310b57cec5SDimitry Andric   StringRef StrVal = Range.upto(C);
6320b57cec5SDimitry Andric   Token.reset(getMetadataKeywordKind(StrVal), StrVal);
6330b57cec5SDimitry Andric   if (Token.isError())
6340b57cec5SDimitry Andric     ErrorCallback(Token.location(),
6350b57cec5SDimitry Andric                   "use of unknown metadata keyword '" + StrVal + "'");
6360b57cec5SDimitry Andric   return C;
6370b57cec5SDimitry Andric }
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric static MIToken::TokenKind symbolToken(char C) {
6400b57cec5SDimitry Andric   switch (C) {
6410b57cec5SDimitry Andric   case ',':
6420b57cec5SDimitry Andric     return MIToken::comma;
6430b57cec5SDimitry Andric   case '.':
6440b57cec5SDimitry Andric     return MIToken::dot;
6450b57cec5SDimitry Andric   case '=':
6460b57cec5SDimitry Andric     return MIToken::equal;
6470b57cec5SDimitry Andric   case ':':
6480b57cec5SDimitry Andric     return MIToken::colon;
6490b57cec5SDimitry Andric   case '(':
6500b57cec5SDimitry Andric     return MIToken::lparen;
6510b57cec5SDimitry Andric   case ')':
6520b57cec5SDimitry Andric     return MIToken::rparen;
6530b57cec5SDimitry Andric   case '{':
6540b57cec5SDimitry Andric     return MIToken::lbrace;
6550b57cec5SDimitry Andric   case '}':
6560b57cec5SDimitry Andric     return MIToken::rbrace;
6570b57cec5SDimitry Andric   case '+':
6580b57cec5SDimitry Andric     return MIToken::plus;
6590b57cec5SDimitry Andric   case '-':
6600b57cec5SDimitry Andric     return MIToken::minus;
6610b57cec5SDimitry Andric   case '<':
6620b57cec5SDimitry Andric     return MIToken::less;
6630b57cec5SDimitry Andric   case '>':
6640b57cec5SDimitry Andric     return MIToken::greater;
6650b57cec5SDimitry Andric   default:
6660b57cec5SDimitry Andric     return MIToken::Error;
6670b57cec5SDimitry Andric   }
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
6710b57cec5SDimitry Andric   MIToken::TokenKind Kind;
6720b57cec5SDimitry Andric   unsigned Length = 1;
6730b57cec5SDimitry Andric   if (C.peek() == ':' && C.peek(1) == ':') {
6740b57cec5SDimitry Andric     Kind = MIToken::coloncolon;
6750b57cec5SDimitry Andric     Length = 2;
6760b57cec5SDimitry Andric   } else
6770b57cec5SDimitry Andric     Kind = symbolToken(C.peek());
6780b57cec5SDimitry Andric   if (Kind == MIToken::Error)
679bdd1243dSDimitry Andric     return std::nullopt;
6800b57cec5SDimitry Andric   auto Range = C;
6810b57cec5SDimitry Andric   C.advance(Length);
6820b57cec5SDimitry Andric   Token.reset(Kind, Range.upto(C));
6830b57cec5SDimitry Andric   return C;
6840b57cec5SDimitry Andric }
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
6870b57cec5SDimitry Andric   if (!isNewlineChar(C.peek()))
688bdd1243dSDimitry Andric     return std::nullopt;
6890b57cec5SDimitry Andric   auto Range = C;
6900b57cec5SDimitry Andric   C.advance();
6910b57cec5SDimitry Andric   Token.reset(MIToken::Newline, Range.upto(C));
6920b57cec5SDimitry Andric   return C;
6930b57cec5SDimitry Andric }
6940b57cec5SDimitry Andric 
6950b57cec5SDimitry Andric static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
6960b57cec5SDimitry Andric                                      ErrorCallbackType ErrorCallback) {
6970b57cec5SDimitry Andric   if (C.peek() != '`')
698bdd1243dSDimitry Andric     return std::nullopt;
6990b57cec5SDimitry Andric   auto Range = C;
7000b57cec5SDimitry Andric   C.advance();
7010b57cec5SDimitry Andric   auto StrRange = C;
7020b57cec5SDimitry Andric   while (C.peek() != '`') {
7030b57cec5SDimitry Andric     if (C.isEOF() || isNewlineChar(C.peek())) {
7040b57cec5SDimitry Andric       ErrorCallback(
7050b57cec5SDimitry Andric           C.location(),
7060b57cec5SDimitry Andric           "end of machine instruction reached before the closing '`'");
7070b57cec5SDimitry Andric       Token.reset(MIToken::Error, Range.remaining());
7080b57cec5SDimitry Andric       return C;
7090b57cec5SDimitry Andric     }
7100b57cec5SDimitry Andric     C.advance();
7110b57cec5SDimitry Andric   }
7120b57cec5SDimitry Andric   StringRef Value = StrRange.upto(C);
7130b57cec5SDimitry Andric   C.advance();
7140b57cec5SDimitry Andric   Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
7150b57cec5SDimitry Andric   return C;
7160b57cec5SDimitry Andric }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
7190b57cec5SDimitry Andric                            ErrorCallbackType ErrorCallback) {
7200b57cec5SDimitry Andric   auto C = skipComment(skipWhitespace(Cursor(Source)));
7210b57cec5SDimitry Andric   if (C.isEOF()) {
7220b57cec5SDimitry Andric     Token.reset(MIToken::Eof, C.remaining());
7230b57cec5SDimitry Andric     return C.remaining();
7240b57cec5SDimitry Andric   }
7250b57cec5SDimitry Andric 
7265ffd83dbSDimitry Andric   C = skipMachineOperandComment(C);
7275ffd83dbSDimitry Andric 
7280b57cec5SDimitry Andric   if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
7290b57cec5SDimitry Andric     return R.remaining();
7300b57cec5SDimitry Andric   if (Cursor R = maybeLexIdentifier(C, Token))
7310b57cec5SDimitry Andric     return R.remaining();
7320b57cec5SDimitry Andric   if (Cursor R = maybeLexJumpTableIndex(C, Token))
7330b57cec5SDimitry Andric     return R.remaining();
7340b57cec5SDimitry Andric   if (Cursor R = maybeLexStackObject(C, Token))
7350b57cec5SDimitry Andric     return R.remaining();
7360b57cec5SDimitry Andric   if (Cursor R = maybeLexFixedStackObject(C, Token))
7370b57cec5SDimitry Andric     return R.remaining();
7380b57cec5SDimitry Andric   if (Cursor R = maybeLexConstantPoolItem(C, Token))
7390b57cec5SDimitry Andric     return R.remaining();
7400b57cec5SDimitry Andric   if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
7410b57cec5SDimitry Andric     return R.remaining();
7420b57cec5SDimitry Andric   if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
7430b57cec5SDimitry Andric     return R.remaining();
7440b57cec5SDimitry Andric   if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
7450b57cec5SDimitry Andric     return R.remaining();
7460b57cec5SDimitry Andric   if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
7470b57cec5SDimitry Andric     return R.remaining();
7480b57cec5SDimitry Andric   if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
7490b57cec5SDimitry Andric     return R.remaining();
7500b57cec5SDimitry Andric   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
7510b57cec5SDimitry Andric     return R.remaining();
7520b57cec5SDimitry Andric   if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
7530b57cec5SDimitry Andric     return R.remaining();
7540b57cec5SDimitry Andric   if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
7550b57cec5SDimitry Andric     return R.remaining();
7560b57cec5SDimitry Andric   if (Cursor R = maybeLexNumericalLiteral(C, Token))
7570b57cec5SDimitry Andric     return R.remaining();
758480093f4SDimitry Andric   if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
7590b57cec5SDimitry Andric     return R.remaining();
7600b57cec5SDimitry Andric   if (Cursor R = maybeLexSymbol(C, Token))
7610b57cec5SDimitry Andric     return R.remaining();
7620b57cec5SDimitry Andric   if (Cursor R = maybeLexNewline(C, Token))
7630b57cec5SDimitry Andric     return R.remaining();
7640b57cec5SDimitry Andric   if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
7650b57cec5SDimitry Andric     return R.remaining();
7660b57cec5SDimitry Andric   if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
7670b57cec5SDimitry Andric     return R.remaining();
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric   Token.reset(MIToken::Error, C.remaining());
7700b57cec5SDimitry Andric   ErrorCallback(C.location(),
7710b57cec5SDimitry Andric                 Twine("unexpected character '") + Twine(C.peek()) + "'");
7720b57cec5SDimitry Andric   return C.remaining();
7730b57cec5SDimitry Andric }
774