10b57cec5SDimitry Andric //===- MILexer.cpp - Machine instructions lexer implementation ------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the lexing of machine instructions. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "MILexer.h" 140b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 150b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h" 160b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 170b57cec5SDimitry Andric #include <cassert> 180b57cec5SDimitry Andric #include <cctype> 190b57cec5SDimitry Andric #include <string> 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric using namespace llvm; 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric namespace { 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using ErrorCallbackType = 260b57cec5SDimitry Andric function_ref<void(StringRef::iterator Loc, const Twine &)>; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric /// This class provides a way to iterate and get characters from the source 290b57cec5SDimitry Andric /// string. 300b57cec5SDimitry Andric class Cursor { 310b57cec5SDimitry Andric const char *Ptr = nullptr; 320b57cec5SDimitry Andric const char *End = nullptr; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric public: 35bdd1243dSDimitry Andric Cursor(std::nullopt_t) {} 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric explicit Cursor(StringRef Str) { 380b57cec5SDimitry Andric Ptr = Str.data(); 390b57cec5SDimitry Andric End = Ptr + Str.size(); 400b57cec5SDimitry Andric } 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric bool isEOF() const { return Ptr == End; } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric void advance(unsigned I = 1) { Ptr += I; } 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric StringRef upto(Cursor C) const { 510b57cec5SDimitry Andric assert(C.Ptr >= Ptr && C.Ptr <= End); 520b57cec5SDimitry Andric return StringRef(Ptr, C.Ptr - Ptr); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric StringRef::iterator location() const { return Ptr; } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric operator bool() const { return Ptr != nullptr; } 580b57cec5SDimitry Andric }; 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric } // end anonymous namespace 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 630b57cec5SDimitry Andric this->Kind = Kind; 640b57cec5SDimitry Andric this->Range = Range; 650b57cec5SDimitry Andric return *this; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric MIToken &MIToken::setStringValue(StringRef StrVal) { 690b57cec5SDimitry Andric StringValue = StrVal; 700b57cec5SDimitry Andric return *this; 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 740b57cec5SDimitry Andric StringValueStorage = std::move(StrVal); 750b57cec5SDimitry Andric StringValue = StringValueStorage; 760b57cec5SDimitry Andric return *this; 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric MIToken &MIToken::setIntegerValue(APSInt IntVal) { 800b57cec5SDimitry Andric this->IntVal = std::move(IntVal); 810b57cec5SDimitry Andric return *this; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric /// Skip the leading whitespace characters and return the updated cursor. 850b57cec5SDimitry Andric static Cursor skipWhitespace(Cursor C) { 860b57cec5SDimitry Andric while (isblank(C.peek())) 870b57cec5SDimitry Andric C.advance(); 880b57cec5SDimitry Andric return C; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric /// Skip a line comment and return the updated cursor. 940b57cec5SDimitry Andric static Cursor skipComment(Cursor C) { 950b57cec5SDimitry Andric if (C.peek() != ';') 960b57cec5SDimitry Andric return C; 970b57cec5SDimitry Andric while (!isNewlineChar(C.peek()) && !C.isEOF()) 980b57cec5SDimitry Andric C.advance(); 990b57cec5SDimitry Andric return C; 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1025ffd83dbSDimitry Andric /// Machine operands can have comments, enclosed between /* and */. 1035ffd83dbSDimitry Andric /// This eats up all tokens, including /* and */. 1045ffd83dbSDimitry Andric static Cursor skipMachineOperandComment(Cursor C) { 1055ffd83dbSDimitry Andric if (C.peek() != '/' || C.peek(1) != '*') 1065ffd83dbSDimitry Andric return C; 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric while (C.peek() != '*' || C.peek(1) != '/') 1095ffd83dbSDimitry Andric C.advance(); 1105ffd83dbSDimitry Andric 1115ffd83dbSDimitry Andric C.advance(); 1125ffd83dbSDimitry Andric C.advance(); 1135ffd83dbSDimitry Andric return C; 1145ffd83dbSDimitry Andric } 1155ffd83dbSDimitry Andric 1160b57cec5SDimitry Andric /// Return true if the given character satisfies the following regular 1170b57cec5SDimitry Andric /// expression: [-a-zA-Z$._0-9] 1180b57cec5SDimitry Andric static bool isIdentifierChar(char C) { 1190b57cec5SDimitry Andric return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 1200b57cec5SDimitry Andric C == '$'; 1210b57cec5SDimitry Andric } 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric /// Unescapes the given string value. 1240b57cec5SDimitry Andric /// 1250b57cec5SDimitry Andric /// Expects the string value to be quoted. 1260b57cec5SDimitry Andric static std::string unescapeQuotedString(StringRef Value) { 1270b57cec5SDimitry Andric assert(Value.front() == '"' && Value.back() == '"'); 1280b57cec5SDimitry Andric Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric std::string Str; 1310b57cec5SDimitry Andric Str.reserve(C.remaining().size()); 1320b57cec5SDimitry Andric while (!C.isEOF()) { 1330b57cec5SDimitry Andric char Char = C.peek(); 1340b57cec5SDimitry Andric if (Char == '\\') { 1350b57cec5SDimitry Andric if (C.peek(1) == '\\') { 1360b57cec5SDimitry Andric // Two '\' become one 1370b57cec5SDimitry Andric Str += '\\'; 1380b57cec5SDimitry Andric C.advance(2); 1390b57cec5SDimitry Andric continue; 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 1420b57cec5SDimitry Andric Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 1430b57cec5SDimitry Andric C.advance(3); 1440b57cec5SDimitry Andric continue; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric Str += Char; 1480b57cec5SDimitry Andric C.advance(); 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric return Str; 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric /// Lex a string constant using the following regular expression: \"[^\"]*\" 1540b57cec5SDimitry Andric static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { 1550b57cec5SDimitry Andric assert(C.peek() == '"'); 1560b57cec5SDimitry Andric for (C.advance(); C.peek() != '"'; C.advance()) { 1570b57cec5SDimitry Andric if (C.isEOF() || isNewlineChar(C.peek())) { 1580b57cec5SDimitry Andric ErrorCallback( 1590b57cec5SDimitry Andric C.location(), 1600b57cec5SDimitry Andric "end of machine instruction reached before the closing '\"'"); 161bdd1243dSDimitry Andric return std::nullopt; 1620b57cec5SDimitry Andric } 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric C.advance(); 1650b57cec5SDimitry Andric return C; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, 1690b57cec5SDimitry Andric unsigned PrefixLength, ErrorCallbackType ErrorCallback) { 1700b57cec5SDimitry Andric auto Range = C; 1710b57cec5SDimitry Andric C.advance(PrefixLength); 1720b57cec5SDimitry Andric if (C.peek() == '"') { 1730b57cec5SDimitry Andric if (Cursor R = lexStringConstant(C, ErrorCallback)) { 1740b57cec5SDimitry Andric StringRef String = Range.upto(R); 1750b57cec5SDimitry Andric Token.reset(Type, String) 1760b57cec5SDimitry Andric .setOwnedStringValue( 1770b57cec5SDimitry Andric unescapeQuotedString(String.drop_front(PrefixLength))); 1780b57cec5SDimitry Andric return R; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric Token.reset(MIToken::Error, Range.remaining()); 1810b57cec5SDimitry Andric return Range; 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 1840b57cec5SDimitry Andric C.advance(); 1850b57cec5SDimitry Andric Token.reset(Type, Range.upto(C)) 1860b57cec5SDimitry Andric .setStringValue(Range.upto(C).drop_front(PrefixLength)); 1870b57cec5SDimitry Andric return C; 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 1910b57cec5SDimitry Andric return StringSwitch<MIToken::TokenKind>(Identifier) 1920b57cec5SDimitry Andric .Case("_", MIToken::underscore) 1930b57cec5SDimitry Andric .Case("implicit", MIToken::kw_implicit) 1940b57cec5SDimitry Andric .Case("implicit-def", MIToken::kw_implicit_define) 1950b57cec5SDimitry Andric .Case("def", MIToken::kw_def) 1960b57cec5SDimitry Andric .Case("dead", MIToken::kw_dead) 1970b57cec5SDimitry Andric .Case("killed", MIToken::kw_killed) 1980b57cec5SDimitry Andric .Case("undef", MIToken::kw_undef) 1990b57cec5SDimitry Andric .Case("internal", MIToken::kw_internal) 2000b57cec5SDimitry Andric .Case("early-clobber", MIToken::kw_early_clobber) 2010b57cec5SDimitry Andric .Case("debug-use", MIToken::kw_debug_use) 2020b57cec5SDimitry Andric .Case("renamable", MIToken::kw_renamable) 2030b57cec5SDimitry Andric .Case("tied-def", MIToken::kw_tied_def) 2040b57cec5SDimitry Andric .Case("frame-setup", MIToken::kw_frame_setup) 2050b57cec5SDimitry Andric .Case("frame-destroy", MIToken::kw_frame_destroy) 2060b57cec5SDimitry Andric .Case("nnan", MIToken::kw_nnan) 2070b57cec5SDimitry Andric .Case("ninf", MIToken::kw_ninf) 2080b57cec5SDimitry Andric .Case("nsz", MIToken::kw_nsz) 2090b57cec5SDimitry Andric .Case("arcp", MIToken::kw_arcp) 2100b57cec5SDimitry Andric .Case("contract", MIToken::kw_contract) 2110b57cec5SDimitry Andric .Case("afn", MIToken::kw_afn) 2120b57cec5SDimitry Andric .Case("reassoc", MIToken::kw_reassoc) 2130b57cec5SDimitry Andric .Case("nuw", MIToken::kw_nuw) 2140b57cec5SDimitry Andric .Case("nsw", MIToken::kw_nsw) 215*0fca6ea1SDimitry Andric .Case("nusw", MIToken::kw_nusw) 2160b57cec5SDimitry Andric .Case("exact", MIToken::kw_exact) 217*0fca6ea1SDimitry Andric .Case("nneg", MIToken::kw_nneg) 218*0fca6ea1SDimitry Andric .Case("disjoint", MIToken::kw_disjoint) 219480093f4SDimitry Andric .Case("nofpexcept", MIToken::kw_nofpexcept) 22006c3fb27SDimitry Andric .Case("unpredictable", MIToken::kw_unpredictable) 2210b57cec5SDimitry Andric .Case("debug-location", MIToken::kw_debug_location) 222e8d8bef9SDimitry Andric .Case("debug-instr-number", MIToken::kw_debug_instr_number) 223bdd1243dSDimitry Andric .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref) 2240b57cec5SDimitry Andric .Case("same_value", MIToken::kw_cfi_same_value) 2250b57cec5SDimitry Andric .Case("offset", MIToken::kw_cfi_offset) 2260b57cec5SDimitry Andric .Case("rel_offset", MIToken::kw_cfi_rel_offset) 2270b57cec5SDimitry Andric .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) 2280b57cec5SDimitry Andric .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 2290b57cec5SDimitry Andric .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) 2300b57cec5SDimitry Andric .Case("escape", MIToken::kw_cfi_escape) 2310b57cec5SDimitry Andric .Case("def_cfa", MIToken::kw_cfi_def_cfa) 232fe6060f1SDimitry Andric .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa) 2330b57cec5SDimitry Andric .Case("remember_state", MIToken::kw_cfi_remember_state) 2340b57cec5SDimitry Andric .Case("restore", MIToken::kw_cfi_restore) 2350b57cec5SDimitry Andric .Case("restore_state", MIToken::kw_cfi_restore_state) 2360b57cec5SDimitry Andric .Case("undefined", MIToken::kw_cfi_undefined) 2370b57cec5SDimitry Andric .Case("register", MIToken::kw_cfi_register) 2380b57cec5SDimitry Andric .Case("window_save", MIToken::kw_cfi_window_save) 239e8d8bef9SDimitry Andric .Case("negate_ra_sign_state", 240e8d8bef9SDimitry Andric MIToken::kw_cfi_aarch64_negate_ra_sign_state) 2410b57cec5SDimitry Andric .Case("blockaddress", MIToken::kw_blockaddress) 2420b57cec5SDimitry Andric .Case("intrinsic", MIToken::kw_intrinsic) 2430b57cec5SDimitry Andric .Case("target-index", MIToken::kw_target_index) 2440b57cec5SDimitry Andric .Case("half", MIToken::kw_half) 245*0fca6ea1SDimitry Andric .Case("bfloat", MIToken::kw_bfloat) 2460b57cec5SDimitry Andric .Case("float", MIToken::kw_float) 2470b57cec5SDimitry Andric .Case("double", MIToken::kw_double) 2480b57cec5SDimitry Andric .Case("x86_fp80", MIToken::kw_x86_fp80) 2490b57cec5SDimitry Andric .Case("fp128", MIToken::kw_fp128) 2500b57cec5SDimitry Andric .Case("ppc_fp128", MIToken::kw_ppc_fp128) 2510b57cec5SDimitry Andric .Case("target-flags", MIToken::kw_target_flags) 2520b57cec5SDimitry Andric .Case("volatile", MIToken::kw_volatile) 2530b57cec5SDimitry Andric .Case("non-temporal", MIToken::kw_non_temporal) 2540b57cec5SDimitry Andric .Case("dereferenceable", MIToken::kw_dereferenceable) 2550b57cec5SDimitry Andric .Case("invariant", MIToken::kw_invariant) 2560b57cec5SDimitry Andric .Case("align", MIToken::kw_align) 25781ad6265SDimitry Andric .Case("basealign", MIToken::kw_basealign) 2580b57cec5SDimitry Andric .Case("addrspace", MIToken::kw_addrspace) 2590b57cec5SDimitry Andric .Case("stack", MIToken::kw_stack) 2600b57cec5SDimitry Andric .Case("got", MIToken::kw_got) 2610b57cec5SDimitry Andric .Case("jump-table", MIToken::kw_jump_table) 2620b57cec5SDimitry Andric .Case("constant-pool", MIToken::kw_constant_pool) 2630b57cec5SDimitry Andric .Case("call-entry", MIToken::kw_call_entry) 264480093f4SDimitry Andric .Case("custom", MIToken::kw_custom) 2650b57cec5SDimitry Andric .Case("liveout", MIToken::kw_liveout) 2660b57cec5SDimitry Andric .Case("landing-pad", MIToken::kw_landing_pad) 267349cc55cSDimitry Andric .Case("inlineasm-br-indirect-target", 268349cc55cSDimitry Andric MIToken::kw_inlineasm_br_indirect_target) 2695ffd83dbSDimitry Andric .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry) 2700b57cec5SDimitry Andric .Case("liveins", MIToken::kw_liveins) 2710b57cec5SDimitry Andric .Case("successors", MIToken::kw_successors) 2720b57cec5SDimitry Andric .Case("floatpred", MIToken::kw_floatpred) 2730b57cec5SDimitry Andric .Case("intpred", MIToken::kw_intpred) 2748bcb0991SDimitry Andric .Case("shufflemask", MIToken::kw_shufflemask) 2750b57cec5SDimitry Andric .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) 2760b57cec5SDimitry Andric .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) 277480093f4SDimitry Andric .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) 278bdd1243dSDimitry Andric .Case("pcsections", MIToken::kw_pcsections) 279bdd1243dSDimitry Andric .Case("cfi-type", MIToken::kw_cfi_type) 2805ffd83dbSDimitry Andric .Case("bbsections", MIToken::kw_bbsections) 281bdd1243dSDimitry Andric .Case("bb_id", MIToken::kw_bb_id) 2820b57cec5SDimitry Andric .Case("unknown-size", MIToken::kw_unknown_size) 283fe6060f1SDimitry Andric .Case("unknown-address", MIToken::kw_unknown_address) 284fe6060f1SDimitry Andric .Case("distinct", MIToken::kw_distinct) 285bdd1243dSDimitry Andric .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken) 286bdd1243dSDimitry Andric .Case("machine-block-address-taken", 287bdd1243dSDimitry Andric MIToken::kw_machine_block_address_taken) 2885f757f3fSDimitry Andric .Case("call-frame-size", MIToken::kw_call_frame_size) 2895f757f3fSDimitry Andric .Case("noconvergent", MIToken::kw_noconvergent) 2900b57cec5SDimitry Andric .Default(MIToken::Identifier); 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 2940b57cec5SDimitry Andric if (!isalpha(C.peek()) && C.peek() != '_') 295bdd1243dSDimitry Andric return std::nullopt; 2960b57cec5SDimitry Andric auto Range = C; 2970b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 2980b57cec5SDimitry Andric C.advance(); 2990b57cec5SDimitry Andric auto Identifier = Range.upto(C); 3000b57cec5SDimitry Andric Token.reset(getIdentifierKind(Identifier), Identifier) 3010b57cec5SDimitry Andric .setStringValue(Identifier); 3020b57cec5SDimitry Andric return C; 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, 3060b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 3075f757f3fSDimitry Andric bool IsReference = C.remaining().starts_with("%bb."); 3085f757f3fSDimitry Andric if (!IsReference && !C.remaining().starts_with("bb.")) 309bdd1243dSDimitry Andric return std::nullopt; 3100b57cec5SDimitry Andric auto Range = C; 3110b57cec5SDimitry Andric unsigned PrefixLength = IsReference ? 4 : 3; 3120b57cec5SDimitry Andric C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 3130b57cec5SDimitry Andric if (!isdigit(C.peek())) { 3140b57cec5SDimitry Andric Token.reset(MIToken::Error, C.remaining()); 3150b57cec5SDimitry Andric ErrorCallback(C.location(), "expected a number after '%bb.'"); 3160b57cec5SDimitry Andric return C; 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric auto NumberRange = C; 3190b57cec5SDimitry Andric while (isdigit(C.peek())) 3200b57cec5SDimitry Andric C.advance(); 3210b57cec5SDimitry Andric StringRef Number = NumberRange.upto(C); 3220b57cec5SDimitry Andric unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 3230b57cec5SDimitry Andric // TODO: The format bb.<id>.<irname> is supported only when it's not a 3240b57cec5SDimitry Andric // reference. Once we deprecate the format where the irname shows up, we 3250b57cec5SDimitry Andric // should only lex forward if it is a reference. 3260b57cec5SDimitry Andric if (C.peek() == '.') { 3270b57cec5SDimitry Andric C.advance(); // Skip '.' 3280b57cec5SDimitry Andric ++StringOffset; 3290b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 3300b57cec5SDimitry Andric C.advance(); 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric Token.reset(IsReference ? MIToken::MachineBasicBlock 3330b57cec5SDimitry Andric : MIToken::MachineBasicBlockLabel, 3340b57cec5SDimitry Andric Range.upto(C)) 3350b57cec5SDimitry Andric .setIntegerValue(APSInt(Number)) 3360b57cec5SDimitry Andric .setStringValue(Range.upto(C).drop_front(StringOffset)); 3370b57cec5SDimitry Andric return C; 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 3410b57cec5SDimitry Andric MIToken::TokenKind Kind) { 3425f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size()))) 343bdd1243dSDimitry Andric return std::nullopt; 3440b57cec5SDimitry Andric auto Range = C; 3450b57cec5SDimitry Andric C.advance(Rule.size()); 3460b57cec5SDimitry Andric auto NumberRange = C; 3470b57cec5SDimitry Andric while (isdigit(C.peek())) 3480b57cec5SDimitry Andric C.advance(); 3490b57cec5SDimitry Andric Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 3500b57cec5SDimitry Andric return C; 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 3540b57cec5SDimitry Andric MIToken::TokenKind Kind) { 3555f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule) || !isdigit(C.peek(Rule.size()))) 356bdd1243dSDimitry Andric return std::nullopt; 3570b57cec5SDimitry Andric auto Range = C; 3580b57cec5SDimitry Andric C.advance(Rule.size()); 3590b57cec5SDimitry Andric auto NumberRange = C; 3600b57cec5SDimitry Andric while (isdigit(C.peek())) 3610b57cec5SDimitry Andric C.advance(); 3620b57cec5SDimitry Andric StringRef Number = NumberRange.upto(C); 3630b57cec5SDimitry Andric unsigned StringOffset = Rule.size() + Number.size(); 3640b57cec5SDimitry Andric if (C.peek() == '.') { 3650b57cec5SDimitry Andric C.advance(); 3660b57cec5SDimitry Andric ++StringOffset; 3670b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 3680b57cec5SDimitry Andric C.advance(); 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric Token.reset(Kind, Range.upto(C)) 3710b57cec5SDimitry Andric .setIntegerValue(APSInt(Number)) 3720b57cec5SDimitry Andric .setStringValue(Range.upto(C).drop_front(StringOffset)); 3730b57cec5SDimitry Andric return C; 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 3770b57cec5SDimitry Andric return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 3810b57cec5SDimitry Andric return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 3850b57cec5SDimitry Andric return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 3890b57cec5SDimitry Andric return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, 3930b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 3940b57cec5SDimitry Andric const StringRef Rule = "%subreg."; 3955f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule)) 396bdd1243dSDimitry Andric return std::nullopt; 3970b57cec5SDimitry Andric return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), 3980b57cec5SDimitry Andric ErrorCallback); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, 4020b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4030b57cec5SDimitry Andric const StringRef Rule = "%ir-block."; 4045f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule)) 405bdd1243dSDimitry Andric return std::nullopt; 4060b57cec5SDimitry Andric if (isdigit(C.peek(Rule.size()))) 4070b57cec5SDimitry Andric return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 4080b57cec5SDimitry Andric return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 4090b57cec5SDimitry Andric } 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric static Cursor maybeLexIRValue(Cursor C, MIToken &Token, 4120b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4130b57cec5SDimitry Andric const StringRef Rule = "%ir."; 4145f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule)) 415bdd1243dSDimitry Andric return std::nullopt; 4160b57cec5SDimitry Andric if (isdigit(C.peek(Rule.size()))) 4170b57cec5SDimitry Andric return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 4180b57cec5SDimitry Andric return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, 4220b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4230b57cec5SDimitry Andric if (C.peek() != '"') 424bdd1243dSDimitry Andric return std::nullopt; 4250b57cec5SDimitry Andric return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, 4260b57cec5SDimitry Andric ErrorCallback); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 4300b57cec5SDimitry Andric auto Range = C; 4310b57cec5SDimitry Andric C.advance(); // Skip '%' 4320b57cec5SDimitry Andric auto NumberRange = C; 4330b57cec5SDimitry Andric while (isdigit(C.peek())) 4340b57cec5SDimitry Andric C.advance(); 4350b57cec5SDimitry Andric Token.reset(MIToken::VirtualRegister, Range.upto(C)) 4360b57cec5SDimitry Andric .setIntegerValue(APSInt(NumberRange.upto(C))); 4370b57cec5SDimitry Andric return C; 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric /// Returns true for a character allowed in a register name. 4410b57cec5SDimitry Andric static bool isRegisterChar(char C) { 4420b57cec5SDimitry Andric return isIdentifierChar(C) && C != '.'; 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { 4460b57cec5SDimitry Andric Cursor Range = C; 4470b57cec5SDimitry Andric C.advance(); // Skip '%' 4480b57cec5SDimitry Andric while (isRegisterChar(C.peek())) 4490b57cec5SDimitry Andric C.advance(); 4500b57cec5SDimitry Andric Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) 4510b57cec5SDimitry Andric .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 4520b57cec5SDimitry Andric return C; 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric static Cursor maybeLexRegister(Cursor C, MIToken &Token, 4560b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4570b57cec5SDimitry Andric if (C.peek() != '%' && C.peek() != '$') 458bdd1243dSDimitry Andric return std::nullopt; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric if (C.peek() == '%') { 4610b57cec5SDimitry Andric if (isdigit(C.peek(1))) 4620b57cec5SDimitry Andric return lexVirtualRegister(C, Token); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric if (isRegisterChar(C.peek(1))) 4650b57cec5SDimitry Andric return lexNamedVirtualRegister(C, Token); 4660b57cec5SDimitry Andric 467bdd1243dSDimitry Andric return std::nullopt; 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric assert(C.peek() == '$'); 4710b57cec5SDimitry Andric auto Range = C; 4720b57cec5SDimitry Andric C.advance(); // Skip '$' 4730b57cec5SDimitry Andric while (isRegisterChar(C.peek())) 4740b57cec5SDimitry Andric C.advance(); 4750b57cec5SDimitry Andric Token.reset(MIToken::NamedRegister, Range.upto(C)) 4760b57cec5SDimitry Andric .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' 4770b57cec5SDimitry Andric return C; 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, 4810b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4820b57cec5SDimitry Andric if (C.peek() != '@') 483bdd1243dSDimitry Andric return std::nullopt; 4840b57cec5SDimitry Andric if (!isdigit(C.peek(1))) 4850b57cec5SDimitry Andric return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 4860b57cec5SDimitry Andric ErrorCallback); 4870b57cec5SDimitry Andric auto Range = C; 4880b57cec5SDimitry Andric C.advance(1); // Skip the '@' 4890b57cec5SDimitry Andric auto NumberRange = C; 4900b57cec5SDimitry Andric while (isdigit(C.peek())) 4910b57cec5SDimitry Andric C.advance(); 4920b57cec5SDimitry Andric Token.reset(MIToken::GlobalValue, Range.upto(C)) 4930b57cec5SDimitry Andric .setIntegerValue(APSInt(NumberRange.upto(C))); 4940b57cec5SDimitry Andric return C; 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, 4980b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 4990b57cec5SDimitry Andric if (C.peek() != '&') 500bdd1243dSDimitry Andric return std::nullopt; 5010b57cec5SDimitry Andric return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 5020b57cec5SDimitry Andric ErrorCallback); 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, 5060b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 5070b57cec5SDimitry Andric const StringRef Rule = "<mcsymbol "; 5085f757f3fSDimitry Andric if (!C.remaining().starts_with(Rule)) 509bdd1243dSDimitry Andric return std::nullopt; 5100b57cec5SDimitry Andric auto Start = C; 5110b57cec5SDimitry Andric C.advance(Rule.size()); 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric // Try a simple unquoted name. 5140b57cec5SDimitry Andric if (C.peek() != '"') { 5150b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 5160b57cec5SDimitry Andric C.advance(); 5170b57cec5SDimitry Andric StringRef String = Start.upto(C).drop_front(Rule.size()); 5180b57cec5SDimitry Andric if (C.peek() != '>') { 5190b57cec5SDimitry Andric ErrorCallback(C.location(), 5200b57cec5SDimitry Andric "expected the '<mcsymbol ...' to be closed by a '>'"); 5210b57cec5SDimitry Andric Token.reset(MIToken::Error, Start.remaining()); 5220b57cec5SDimitry Andric return Start; 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric C.advance(); 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); 5270b57cec5SDimitry Andric return C; 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric // Otherwise lex out a quoted name. 5310b57cec5SDimitry Andric Cursor R = lexStringConstant(C, ErrorCallback); 5320b57cec5SDimitry Andric if (!R) { 5330b57cec5SDimitry Andric ErrorCallback(C.location(), 5340b57cec5SDimitry Andric "unable to parse quoted string from opening quote"); 5350b57cec5SDimitry Andric Token.reset(MIToken::Error, Start.remaining()); 5360b57cec5SDimitry Andric return Start; 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric StringRef String = Start.upto(R).drop_front(Rule.size()); 5390b57cec5SDimitry Andric if (R.peek() != '>') { 5400b57cec5SDimitry Andric ErrorCallback(R.location(), 5410b57cec5SDimitry Andric "expected the '<mcsymbol ...' to be closed by a '>'"); 5420b57cec5SDimitry Andric Token.reset(MIToken::Error, Start.remaining()); 5430b57cec5SDimitry Andric return Start; 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric R.advance(); 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric Token.reset(MIToken::MCSymbol, Start.upto(R)) 5480b57cec5SDimitry Andric .setOwnedStringValue(unescapeQuotedString(String)); 5490b57cec5SDimitry Andric return R; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric static bool isValidHexFloatingPointPrefix(char C) { 5535ffd83dbSDimitry Andric return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 5570b57cec5SDimitry Andric C.advance(); 5580b57cec5SDimitry Andric // Skip over [0-9]*([eE][-+]?[0-9]+)? 5590b57cec5SDimitry Andric while (isdigit(C.peek())) 5600b57cec5SDimitry Andric C.advance(); 5610b57cec5SDimitry Andric if ((C.peek() == 'e' || C.peek() == 'E') && 5620b57cec5SDimitry Andric (isdigit(C.peek(1)) || 5630b57cec5SDimitry Andric ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 5640b57cec5SDimitry Andric C.advance(2); 5650b57cec5SDimitry Andric while (isdigit(C.peek())) 5660b57cec5SDimitry Andric C.advance(); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 5690b57cec5SDimitry Andric return C; 5700b57cec5SDimitry Andric } 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { 5730b57cec5SDimitry Andric if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) 574bdd1243dSDimitry Andric return std::nullopt; 5750b57cec5SDimitry Andric Cursor Range = C; 5760b57cec5SDimitry Andric C.advance(2); 5770b57cec5SDimitry Andric unsigned PrefLen = 2; 5780b57cec5SDimitry Andric if (isValidHexFloatingPointPrefix(C.peek())) { 5790b57cec5SDimitry Andric C.advance(); 5800b57cec5SDimitry Andric PrefLen++; 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric while (isxdigit(C.peek())) 5830b57cec5SDimitry Andric C.advance(); 5840b57cec5SDimitry Andric StringRef StrVal = Range.upto(C); 5850b57cec5SDimitry Andric if (StrVal.size() <= PrefLen) 586bdd1243dSDimitry Andric return std::nullopt; 5870b57cec5SDimitry Andric if (PrefLen == 2) 5880b57cec5SDimitry Andric Token.reset(MIToken::HexLiteral, Range.upto(C)); 5890b57cec5SDimitry Andric else // It must be 3, which means that there was a floating-point prefix. 5900b57cec5SDimitry Andric Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 5910b57cec5SDimitry Andric return C; 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 5950b57cec5SDimitry Andric if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 596bdd1243dSDimitry Andric return std::nullopt; 5970b57cec5SDimitry Andric auto Range = C; 5980b57cec5SDimitry Andric C.advance(); 5990b57cec5SDimitry Andric while (isdigit(C.peek())) 6000b57cec5SDimitry Andric C.advance(); 6010b57cec5SDimitry Andric if (C.peek() == '.') 6020b57cec5SDimitry Andric return lexFloatingPointLiteral(Range, C, Token); 6030b57cec5SDimitry Andric StringRef StrVal = Range.upto(C); 6040b57cec5SDimitry Andric Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 6050b57cec5SDimitry Andric return C; 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 6090b57cec5SDimitry Andric return StringSwitch<MIToken::TokenKind>(Identifier) 6100b57cec5SDimitry Andric .Case("!tbaa", MIToken::md_tbaa) 6110b57cec5SDimitry Andric .Case("!alias.scope", MIToken::md_alias_scope) 6120b57cec5SDimitry Andric .Case("!noalias", MIToken::md_noalias) 6130b57cec5SDimitry Andric .Case("!range", MIToken::md_range) 6140b57cec5SDimitry Andric .Case("!DIExpression", MIToken::md_diexpr) 6150b57cec5SDimitry Andric .Case("!DILocation", MIToken::md_dilocation) 6160b57cec5SDimitry Andric .Default(MIToken::Error); 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric 619480093f4SDimitry Andric static Cursor maybeLexExclaim(Cursor C, MIToken &Token, 6200b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 6210b57cec5SDimitry Andric if (C.peek() != '!') 622bdd1243dSDimitry Andric return std::nullopt; 6230b57cec5SDimitry Andric auto Range = C; 6240b57cec5SDimitry Andric C.advance(1); 6250b57cec5SDimitry Andric if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 6260b57cec5SDimitry Andric Token.reset(MIToken::exclaim, Range.upto(C)); 6270b57cec5SDimitry Andric return C; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric while (isIdentifierChar(C.peek())) 6300b57cec5SDimitry Andric C.advance(); 6310b57cec5SDimitry Andric StringRef StrVal = Range.upto(C); 6320b57cec5SDimitry Andric Token.reset(getMetadataKeywordKind(StrVal), StrVal); 6330b57cec5SDimitry Andric if (Token.isError()) 6340b57cec5SDimitry Andric ErrorCallback(Token.location(), 6350b57cec5SDimitry Andric "use of unknown metadata keyword '" + StrVal + "'"); 6360b57cec5SDimitry Andric return C; 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric static MIToken::TokenKind symbolToken(char C) { 6400b57cec5SDimitry Andric switch (C) { 6410b57cec5SDimitry Andric case ',': 6420b57cec5SDimitry Andric return MIToken::comma; 6430b57cec5SDimitry Andric case '.': 6440b57cec5SDimitry Andric return MIToken::dot; 6450b57cec5SDimitry Andric case '=': 6460b57cec5SDimitry Andric return MIToken::equal; 6470b57cec5SDimitry Andric case ':': 6480b57cec5SDimitry Andric return MIToken::colon; 6490b57cec5SDimitry Andric case '(': 6500b57cec5SDimitry Andric return MIToken::lparen; 6510b57cec5SDimitry Andric case ')': 6520b57cec5SDimitry Andric return MIToken::rparen; 6530b57cec5SDimitry Andric case '{': 6540b57cec5SDimitry Andric return MIToken::lbrace; 6550b57cec5SDimitry Andric case '}': 6560b57cec5SDimitry Andric return MIToken::rbrace; 6570b57cec5SDimitry Andric case '+': 6580b57cec5SDimitry Andric return MIToken::plus; 6590b57cec5SDimitry Andric case '-': 6600b57cec5SDimitry Andric return MIToken::minus; 6610b57cec5SDimitry Andric case '<': 6620b57cec5SDimitry Andric return MIToken::less; 6630b57cec5SDimitry Andric case '>': 6640b57cec5SDimitry Andric return MIToken::greater; 6650b57cec5SDimitry Andric default: 6660b57cec5SDimitry Andric return MIToken::Error; 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 6710b57cec5SDimitry Andric MIToken::TokenKind Kind; 6720b57cec5SDimitry Andric unsigned Length = 1; 6730b57cec5SDimitry Andric if (C.peek() == ':' && C.peek(1) == ':') { 6740b57cec5SDimitry Andric Kind = MIToken::coloncolon; 6750b57cec5SDimitry Andric Length = 2; 6760b57cec5SDimitry Andric } else 6770b57cec5SDimitry Andric Kind = symbolToken(C.peek()); 6780b57cec5SDimitry Andric if (Kind == MIToken::Error) 679bdd1243dSDimitry Andric return std::nullopt; 6800b57cec5SDimitry Andric auto Range = C; 6810b57cec5SDimitry Andric C.advance(Length); 6820b57cec5SDimitry Andric Token.reset(Kind, Range.upto(C)); 6830b57cec5SDimitry Andric return C; 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 6870b57cec5SDimitry Andric if (!isNewlineChar(C.peek())) 688bdd1243dSDimitry Andric return std::nullopt; 6890b57cec5SDimitry Andric auto Range = C; 6900b57cec5SDimitry Andric C.advance(); 6910b57cec5SDimitry Andric Token.reset(MIToken::Newline, Range.upto(C)); 6920b57cec5SDimitry Andric return C; 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, 6960b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 6970b57cec5SDimitry Andric if (C.peek() != '`') 698bdd1243dSDimitry Andric return std::nullopt; 6990b57cec5SDimitry Andric auto Range = C; 7000b57cec5SDimitry Andric C.advance(); 7010b57cec5SDimitry Andric auto StrRange = C; 7020b57cec5SDimitry Andric while (C.peek() != '`') { 7030b57cec5SDimitry Andric if (C.isEOF() || isNewlineChar(C.peek())) { 7040b57cec5SDimitry Andric ErrorCallback( 7050b57cec5SDimitry Andric C.location(), 7060b57cec5SDimitry Andric "end of machine instruction reached before the closing '`'"); 7070b57cec5SDimitry Andric Token.reset(MIToken::Error, Range.remaining()); 7080b57cec5SDimitry Andric return C; 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric C.advance(); 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric StringRef Value = StrRange.upto(C); 7130b57cec5SDimitry Andric C.advance(); 7140b57cec5SDimitry Andric Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 7150b57cec5SDimitry Andric return C; 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, 7190b57cec5SDimitry Andric ErrorCallbackType ErrorCallback) { 7200b57cec5SDimitry Andric auto C = skipComment(skipWhitespace(Cursor(Source))); 7210b57cec5SDimitry Andric if (C.isEOF()) { 7220b57cec5SDimitry Andric Token.reset(MIToken::Eof, C.remaining()); 7230b57cec5SDimitry Andric return C.remaining(); 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7265ffd83dbSDimitry Andric C = skipMachineOperandComment(C); 7275ffd83dbSDimitry Andric 7280b57cec5SDimitry Andric if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 7290b57cec5SDimitry Andric return R.remaining(); 7300b57cec5SDimitry Andric if (Cursor R = maybeLexIdentifier(C, Token)) 7310b57cec5SDimitry Andric return R.remaining(); 7320b57cec5SDimitry Andric if (Cursor R = maybeLexJumpTableIndex(C, Token)) 7330b57cec5SDimitry Andric return R.remaining(); 7340b57cec5SDimitry Andric if (Cursor R = maybeLexStackObject(C, Token)) 7350b57cec5SDimitry Andric return R.remaining(); 7360b57cec5SDimitry Andric if (Cursor R = maybeLexFixedStackObject(C, Token)) 7370b57cec5SDimitry Andric return R.remaining(); 7380b57cec5SDimitry Andric if (Cursor R = maybeLexConstantPoolItem(C, Token)) 7390b57cec5SDimitry Andric return R.remaining(); 7400b57cec5SDimitry Andric if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) 7410b57cec5SDimitry Andric return R.remaining(); 7420b57cec5SDimitry Andric if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 7430b57cec5SDimitry Andric return R.remaining(); 7440b57cec5SDimitry Andric if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 7450b57cec5SDimitry Andric return R.remaining(); 7460b57cec5SDimitry Andric if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) 7470b57cec5SDimitry Andric return R.remaining(); 7480b57cec5SDimitry Andric if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 7490b57cec5SDimitry Andric return R.remaining(); 7500b57cec5SDimitry Andric if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 7510b57cec5SDimitry Andric return R.remaining(); 7520b57cec5SDimitry Andric if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) 7530b57cec5SDimitry Andric return R.remaining(); 7540b57cec5SDimitry Andric if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) 7550b57cec5SDimitry Andric return R.remaining(); 7560b57cec5SDimitry Andric if (Cursor R = maybeLexNumericalLiteral(C, Token)) 7570b57cec5SDimitry Andric return R.remaining(); 758480093f4SDimitry Andric if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) 7590b57cec5SDimitry Andric return R.remaining(); 7600b57cec5SDimitry Andric if (Cursor R = maybeLexSymbol(C, Token)) 7610b57cec5SDimitry Andric return R.remaining(); 7620b57cec5SDimitry Andric if (Cursor R = maybeLexNewline(C, Token)) 7630b57cec5SDimitry Andric return R.remaining(); 7640b57cec5SDimitry Andric if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 7650b57cec5SDimitry Andric return R.remaining(); 7660b57cec5SDimitry Andric if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) 7670b57cec5SDimitry Andric return R.remaining(); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric Token.reset(MIToken::Error, C.remaining()); 7700b57cec5SDimitry Andric ErrorCallback(C.location(), 7710b57cec5SDimitry Andric Twine("unexpected character '") + Twine(C.peek()) + "'"); 7720b57cec5SDimitry Andric return C.remaining(); 7730b57cec5SDimitry Andric } 774