xref: /llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h (revision b3bb6f18bb5b2b8756b585b80d46d13ab3636a18)
1 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16 
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/ADT/StringRef.h"
19 #include <string>
20 
21 namespace llvm {
22 
23 class Twine;
24 
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27   enum TokenKind {
28     // Markers
29     Eof,
30     Error,
31     Newline,
32 
33     // Tokens with no info.
34     comma,
35     equal,
36     underscore,
37     colon,
38     coloncolon,
39     dot,
40     exclaim,
41     lparen,
42     rparen,
43     lbrace,
44     rbrace,
45     plus,
46     minus,
47     less,
48     greater,
49 
50     // Keywords
51     kw_implicit,
52     kw_implicit_define,
53     kw_def,
54     kw_dead,
55     kw_dereferenceable,
56     kw_killed,
57     kw_undef,
58     kw_internal,
59     kw_early_clobber,
60     kw_debug_use,
61     kw_renamable,
62     kw_tied_def,
63     kw_frame_setup,
64     kw_frame_destroy,
65     kw_nnan,
66     kw_ninf,
67     kw_nsz,
68     kw_arcp,
69     kw_contract,
70     kw_afn,
71     kw_reassoc,
72     kw_nusw,
73     kw_nuw,
74     kw_nsw,
75     kw_exact,
76     kw_nofpexcept,
77     kw_unpredictable,
78     kw_nneg,
79     kw_disjoint,
80     kw_samesign,
81     kw_debug_location,
82     kw_debug_instr_number,
83     kw_dbg_instr_ref,
84     kw_cfi_same_value,
85     kw_cfi_offset,
86     kw_cfi_rel_offset,
87     kw_cfi_def_cfa_register,
88     kw_cfi_def_cfa_offset,
89     kw_cfi_adjust_cfa_offset,
90     kw_cfi_escape,
91     kw_cfi_def_cfa,
92     kw_cfi_llvm_def_aspace_cfa,
93     kw_cfi_register,
94     kw_cfi_remember_state,
95     kw_cfi_restore,
96     kw_cfi_restore_state,
97     kw_cfi_undefined,
98     kw_cfi_window_save,
99     kw_cfi_aarch64_negate_ra_sign_state,
100     kw_cfi_aarch64_negate_ra_sign_state_with_pc,
101     kw_blockaddress,
102     kw_intrinsic,
103     kw_target_index,
104     kw_half,
105     kw_bfloat,
106     kw_float,
107     kw_double,
108     kw_x86_fp80,
109     kw_fp128,
110     kw_ppc_fp128,
111     kw_target_flags,
112     kw_volatile,
113     kw_non_temporal,
114     kw_invariant,
115     kw_align,
116     kw_basealign,
117     kw_addrspace,
118     kw_stack,
119     kw_got,
120     kw_jump_table,
121     kw_constant_pool,
122     kw_call_entry,
123     kw_custom,
124     kw_liveout,
125     kw_landing_pad,
126     kw_inlineasm_br_indirect_target,
127     kw_ehfunclet_entry,
128     kw_liveins,
129     kw_successors,
130     kw_floatpred,
131     kw_intpred,
132     kw_shufflemask,
133     kw_pre_instr_symbol,
134     kw_post_instr_symbol,
135     kw_heap_alloc_marker,
136     kw_pcsections,
137     kw_cfi_type,
138     kw_bbsections,
139     kw_bb_id,
140     kw_unknown_size,
141     kw_unknown_address,
142     kw_ir_block_address_taken,
143     kw_machine_block_address_taken,
144     kw_call_frame_size,
145     kw_noconvergent,
146 
147     // Metadata types.
148     kw_distinct,
149 
150     // Named metadata keywords
151     md_tbaa,
152     md_alias_scope,
153     md_noalias,
154     md_range,
155     md_diexpr,
156     md_dilocation,
157 
158     // Identifier tokens
159     Identifier,
160     NamedRegister,
161     NamedVirtualRegister,
162     MachineBasicBlockLabel,
163     MachineBasicBlock,
164     StackObject,
165     FixedStackObject,
166     NamedGlobalValue,
167     GlobalValue,
168     ExternalSymbol,
169     MCSymbol,
170 
171     // Other tokens
172     IntegerLiteral,
173     FloatingPointLiteral,
174     HexLiteral,
175     VectorLiteral,
176     VirtualRegister,
177     ConstantPoolItem,
178     JumpTableIndex,
179     NamedIRBlock,
180     IRBlock,
181     NamedIRValue,
182     IRValue,
183     QuotedIRValue, // `<constant value>`
184     SubRegisterIndex,
185     StringConstant
186   };
187 
188 private:
189   TokenKind Kind = Error;
190   StringRef Range;
191   StringRef StringValue;
192   std::string StringValueStorage;
193   APSInt IntVal;
194 
195 public:
196   MIToken() = default;
197 
198   MIToken &reset(TokenKind Kind, StringRef Range);
199 
200   MIToken &setStringValue(StringRef StrVal);
201   MIToken &setOwnedStringValue(std::string StrVal);
202   MIToken &setIntegerValue(APSInt IntVal);
203 
204   TokenKind kind() const { return Kind; }
205 
206   bool isError() const { return Kind == Error; }
207 
208   bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
209 
210   bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
211 
212   bool isRegister() const {
213     return Kind == NamedRegister || Kind == underscore ||
214            Kind == NamedVirtualRegister || Kind == VirtualRegister;
215   }
216 
217   bool isRegisterFlag() const {
218     return Kind == kw_implicit || Kind == kw_implicit_define ||
219            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
220            Kind == kw_undef || Kind == kw_internal ||
221            Kind == kw_early_clobber || Kind == kw_debug_use ||
222            Kind == kw_renamable;
223   }
224 
225   bool isMemoryOperandFlag() const {
226     return Kind == kw_volatile || Kind == kw_non_temporal ||
227            Kind == kw_dereferenceable || Kind == kw_invariant ||
228            Kind == StringConstant;
229   }
230 
231   bool is(TokenKind K) const { return Kind == K; }
232 
233   bool isNot(TokenKind K) const { return Kind != K; }
234 
235   StringRef::iterator location() const { return Range.begin(); }
236 
237   StringRef range() const { return Range; }
238 
239   /// Return the token's string value.
240   StringRef stringValue() const { return StringValue; }
241 
242   const APSInt &integerValue() const { return IntVal; }
243 
244   bool hasIntegerValue() const {
245     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
246            Kind == MachineBasicBlockLabel || Kind == StackObject ||
247            Kind == FixedStackObject || Kind == GlobalValue ||
248            Kind == VirtualRegister || Kind == ConstantPoolItem ||
249            Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
250   }
251 };
252 
253 /// Consume a single machine instruction token in the given source and return
254 /// the remaining source string.
255 StringRef lexMIToken(
256     StringRef Source, MIToken &Token,
257     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
258 
259 } // end namespace llvm
260 
261 #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
262