xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/MC/SubtargetFeature.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCParsedAsmOperand;
28 class MCStreamer;
29 class MCSubtargetInfo;
30 template <typename T> class SmallVectorImpl;
31 
32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33 
34 enum AsmRewriteKind {
35   AOK_Align,          // Rewrite align as .align.
36   AOK_EVEN,           // Rewrite even as .even.
37   AOK_Emit,           // Rewrite _emit as .byte.
38   AOK_Input,          // Rewrite in terms of $N.
39   AOK_Output,         // Rewrite in terms of $N.
40   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
41   AOK_Label,          // Rewrite local labels.
42   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
43   AOK_Skip,           // Skip emission (e.g., offset/type operators).
44   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
45 };
46 
47 const char AsmRewritePrecedence [] = {
48   2, // AOK_Align
49   2, // AOK_EVEN
50   2, // AOK_Emit
51   3, // AOK_Input
52   3, // AOK_Output
53   5, // AOK_SizeDirective
54   1, // AOK_Label
55   5, // AOK_EndOfStatement
56   2, // AOK_Skip
57   2  // AOK_IntelExpr
58 };
59 
60 // Represnt the various parts which makes up an intel expression,
61 // used for emitting compound intel expressions
62 struct IntelExpr {
63   bool NeedBracs;
64   int64_t Imm;
65   StringRef BaseReg;
66   StringRef IndexReg;
67   unsigned Scale;
68 
69   IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
70     BaseReg(StringRef()), IndexReg(StringRef()),
71     Scale(1) {}
72   // Compund immediate expression
73   IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
74     Imm = imm;
75   }
76   // [Reg + ImmediateExpression]
77   // We don't bother to emit an immediate expression evaluated to zero
78   IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
79     bool needBracs = true) :
80     IntelExpr(imm, needBracs) {
81     IndexReg = reg;
82     if (scale)
83       Scale = scale;
84   }
85   // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
86   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
87     int64_t imm = 0, bool needBracs = true) :
88     IntelExpr(indexReg, imm, scale, needBracs) {
89     BaseReg = baseReg;
90   }
91   bool hasBaseReg() const {
92     return BaseReg.size();
93   }
94   bool hasIndexReg() const {
95     return IndexReg.size();
96   }
97   bool hasRegs() const {
98     return hasBaseReg() || hasIndexReg();
99   }
100   bool isValid() const {
101     return (Scale == 1) ||
102            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
103   }
104 };
105 
106 struct AsmRewrite {
107   AsmRewriteKind Kind;
108   SMLoc Loc;
109   unsigned Len;
110   int64_t Val;
111   StringRef Label;
112   IntelExpr IntelExp;
113 
114 public:
115   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
116     : Kind(kind), Loc(loc), Len(len), Val(val) {}
117   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
118     : AsmRewrite(kind, loc, len) { Label = label; }
119   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
120     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
121 };
122 
123 struct ParseInstructionInfo {
124   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
125 
126   ParseInstructionInfo() = default;
127   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
128     : AsmRewrites(rewrites) {}
129 };
130 
131 enum OperandMatchResultTy {
132   MatchOperand_Success,  // operand matched successfully
133   MatchOperand_NoMatch,  // operand did not match
134   MatchOperand_ParseFail // operand matched but had errors
135 };
136 
137 enum class DiagnosticPredicateTy {
138   Match,
139   NearMatch,
140   NoMatch,
141 };
142 
143 // When an operand is parsed, the assembler will try to iterate through a set of
144 // possible operand classes that the operand might match and call the
145 // corresponding PredicateMethod to determine that.
146 //
147 // If there are two AsmOperands that would give a specific diagnostic if there
148 // is no match, there is currently no mechanism to distinguish which operand is
149 // a closer match. The DiagnosticPredicate distinguishes between 'completely
150 // no match' and 'near match', so the assembler can decide whether to give a
151 // specific diagnostic, or use 'InvalidOperand' and continue to find a
152 // 'better matching' diagnostic.
153 //
154 // For example:
155 //    opcode opnd0, onpd1, opnd2
156 //
157 // where:
158 //    opnd2 could be an 'immediate of range [-8, 7]'
159 //    opnd2 could be a  'register + shift/extend'.
160 //
161 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
162 // little sense to give a diagnostic that the operand should be an immediate
163 // in range [-8, 7].
164 //
165 // This is a light-weight alternative to the 'NearMissInfo' approach
166 // below which collects *all* possible diagnostics. This alternative
167 // is optional and fully backward compatible with existing
168 // PredicateMethods that return a 'bool' (match or no match).
169 struct DiagnosticPredicate {
170   DiagnosticPredicateTy Type;
171 
172   explicit DiagnosticPredicate(bool Match)
173       : Type(Match ? DiagnosticPredicateTy::Match
174                    : DiagnosticPredicateTy::NearMatch) {}
175   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
176   DiagnosticPredicate(const DiagnosticPredicate &) = default;
177 
178   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
179   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
180   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
181   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
182 };
183 
184 // When matching of an assembly instruction fails, there may be multiple
185 // encodings that are close to being a match. It's often ambiguous which one
186 // the programmer intended to use, so we want to report an error which mentions
187 // each of these "near-miss" encodings. This struct contains information about
188 // one such encoding, and why it did not match the parsed instruction.
189 class NearMissInfo {
190 public:
191   enum NearMissKind {
192     NoNearMiss,
193     NearMissOperand,
194     NearMissFeature,
195     NearMissPredicate,
196     NearMissTooFewOperands,
197   };
198 
199   // The encoding is valid for the parsed assembly string. This is only used
200   // internally to the table-generated assembly matcher.
201   static NearMissInfo getSuccess() { return NearMissInfo(); }
202 
203   // The instruction encoding is not valid because it requires some target
204   // features that are not currently enabled. MissingFeatures has a bit set for
205   // each feature that the encoding needs but which is not enabled.
206   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
207     NearMissInfo Result;
208     Result.Kind = NearMissFeature;
209     Result.Features = MissingFeatures;
210     return Result;
211   }
212 
213   // The instruction encoding is not valid because the target-specific
214   // predicate function returned an error code. FailureCode is the
215   // target-specific error code returned by the predicate.
216   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
217     NearMissInfo Result;
218     Result.Kind = NearMissPredicate;
219     Result.PredicateError = FailureCode;
220     return Result;
221   }
222 
223   // The instruction encoding is not valid because one (and only one) parsed
224   // operand is not of the correct type. OperandError is the error code
225   // relating to the operand class expected by the encoding. OperandClass is
226   // the type of the expected operand. Opcode is the opcode of the encoding.
227   // OperandIndex is the index into the parsed operand list.
228   static NearMissInfo getMissedOperand(unsigned OperandError,
229                                        unsigned OperandClass, unsigned Opcode,
230                                        unsigned OperandIndex) {
231     NearMissInfo Result;
232     Result.Kind = NearMissOperand;
233     Result.MissedOperand.Error = OperandError;
234     Result.MissedOperand.Class = OperandClass;
235     Result.MissedOperand.Opcode = Opcode;
236     Result.MissedOperand.Index = OperandIndex;
237     return Result;
238   }
239 
240   // The instruction encoding is not valid because it expects more operands
241   // than were parsed. OperandClass is the class of the expected operand that
242   // was not provided. Opcode is the instruction encoding.
243   static NearMissInfo getTooFewOperands(unsigned OperandClass,
244                                         unsigned Opcode) {
245     NearMissInfo Result;
246     Result.Kind = NearMissTooFewOperands;
247     Result.TooFewOperands.Class = OperandClass;
248     Result.TooFewOperands.Opcode = Opcode;
249     return Result;
250   }
251 
252   operator bool() const { return Kind != NoNearMiss; }
253 
254   NearMissKind getKind() const { return Kind; }
255 
256   // Feature flags required by the instruction, that the current target does
257   // not have.
258   const FeatureBitset& getFeatures() const {
259     assert(Kind == NearMissFeature);
260     return Features;
261   }
262   // Error code returned by the target predicate when validating this
263   // instruction encoding.
264   unsigned getPredicateError() const {
265     assert(Kind == NearMissPredicate);
266     return PredicateError;
267   }
268   // MatchClassKind of the operand that we expected to see.
269   unsigned getOperandClass() const {
270     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
271     return MissedOperand.Class;
272   }
273   // Opcode of the encoding we were trying to match.
274   unsigned getOpcode() const {
275     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
276     return MissedOperand.Opcode;
277   }
278   // Error code returned when validating the operand.
279   unsigned getOperandError() const {
280     assert(Kind == NearMissOperand);
281     return MissedOperand.Error;
282   }
283   // Index of the actual operand we were trying to match in the list of parsed
284   // operands.
285   unsigned getOperandIndex() const {
286     assert(Kind == NearMissOperand);
287     return MissedOperand.Index;
288   }
289 
290 private:
291   NearMissKind Kind;
292 
293   // These two structs share a common prefix, so we can safely rely on the fact
294   // that they overlap in the union.
295   struct MissedOpInfo {
296     unsigned Class;
297     unsigned Opcode;
298     unsigned Error;
299     unsigned Index;
300   };
301 
302   struct TooFewOperandsInfo {
303     unsigned Class;
304     unsigned Opcode;
305   };
306 
307   union {
308     FeatureBitset Features;
309     unsigned PredicateError;
310     MissedOpInfo MissedOperand;
311     TooFewOperandsInfo TooFewOperands;
312   };
313 
314   NearMissInfo() : Kind(NoNearMiss) {}
315 };
316 
317 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
318 class MCTargetAsmParser : public MCAsmParserExtension {
319 public:
320   enum MatchResultTy {
321     Match_InvalidOperand,
322     Match_InvalidTiedOperand,
323     Match_MissingFeature,
324     Match_MnemonicFail,
325     Match_Success,
326     Match_NearMisses,
327     FIRST_TARGET_MATCH_RESULT_TY
328   };
329 
330 protected: // Can only create subclasses.
331   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
332                     const MCInstrInfo &MII);
333 
334   /// Create a copy of STI and return a non-const reference to it.
335   MCSubtargetInfo &copySTI();
336 
337   /// AvailableFeatures - The current set of available features.
338   FeatureBitset AvailableFeatures;
339 
340   /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
341   bool ParsingInlineAsm = false;
342 
343   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
344   /// ms-style inline assembly.
345   MCAsmParserSemaCallback *SemaCallback;
346 
347   /// Set of options which affects instrumentation of inline assembly.
348   MCTargetOptions MCOptions;
349 
350   /// Current STI.
351   const MCSubtargetInfo *STI;
352 
353   const MCInstrInfo &MII;
354 
355 public:
356   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
357   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
358 
359   ~MCTargetAsmParser() override;
360 
361   const MCSubtargetInfo &getSTI() const;
362 
363   const FeatureBitset& getAvailableFeatures() const {
364     return AvailableFeatures;
365   }
366   void setAvailableFeatures(const FeatureBitset& Value) {
367     AvailableFeatures = Value;
368   }
369 
370   bool isParsingInlineAsm () { return ParsingInlineAsm; }
371   void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
372 
373   MCTargetOptions getTargetOptions() const { return MCOptions; }
374 
375   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
376     SemaCallback = Callback;
377   }
378 
379   // Target-specific parsing of expression.
380   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
381     return getParser().parsePrimaryExpr(Res, EndLoc);
382   }
383 
384   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
385                              SMLoc &EndLoc) = 0;
386 
387   /// ParseInstruction - Parse one assembly instruction.
388   ///
389   /// The parser is positioned following the instruction name. The target
390   /// specific instruction parser should parse the entire instruction and
391   /// construct the appropriate MCInst, or emit an error. On success, the entire
392   /// line should be parsed up to and including the end-of-statement token. On
393   /// failure, the parser is not required to read to the end of the line.
394   //
395   /// \param Name - The instruction name.
396   /// \param NameLoc - The source location of the name.
397   /// \param Operands [out] - The list of parsed operands, this returns
398   ///        ownership of them to the caller.
399   /// \return True on failure.
400   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
401                                 SMLoc NameLoc, OperandVector &Operands) = 0;
402   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
403                                 AsmToken Token, OperandVector &Operands) {
404     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
405   }
406 
407   /// ParseDirective - Parse a target specific assembler directive
408   ///
409   /// The parser is positioned following the directive name.  The target
410   /// specific directive parser should parse the entire directive doing or
411   /// recording any target specific work, or return true and do nothing if the
412   /// directive is not target specific. If the directive is specific for
413   /// the target, the entire line is parsed up to and including the
414   /// end-of-statement token and false is returned.
415   ///
416   /// \param DirectiveID - the identifier token of the directive.
417   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
418 
419   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
420   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
421   /// This returns false on success and returns true on failure to match.
422   ///
423   /// On failure, the target parser is responsible for emitting a diagnostic
424   /// explaining the match failure.
425   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
426                                        OperandVector &Operands, MCStreamer &Out,
427                                        uint64_t &ErrorInfo,
428                                        bool MatchingInlineAsm) = 0;
429 
430   /// Allows targets to let registers opt out of clobber lists.
431   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
432 
433   /// Allow a target to add special case operand matching for things that
434   /// tblgen doesn't/can't handle effectively. For example, literal
435   /// immediates on ARM. TableGen expects a token operand, but the parser
436   /// will recognize them as immediates.
437   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
438                                               unsigned Kind) {
439     return Match_InvalidOperand;
440   }
441 
442   /// Validate the instruction match against any complex target predicates
443   /// before rendering any operands to it.
444   virtual unsigned
445   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
446     return Match_Success;
447   }
448 
449   /// checkTargetMatchPredicate - Validate the instruction match against
450   /// any complex target predicates not expressible via match classes.
451   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
452     return Match_Success;
453   }
454 
455   virtual void convertToMapAndConstraints(unsigned Kind,
456                                           const OperandVector &Operands) = 0;
457 
458   /// Returns whether two registers are equal and is used by the tied-operands
459   /// checks in the AsmMatcher. This method can be overridden allow e.g. a
460   /// sub- or super-register as the tied operand.
461   virtual bool regsEqual(const MCParsedAsmOperand &Op1,
462                          const MCParsedAsmOperand &Op2) const {
463     assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
464     return Op1.getReg() == Op2.getReg();
465   }
466 
467   // Return whether this parser uses assignment statements with equals tokens
468   virtual bool equalIsAsmAssignment() { return true; };
469   // Return whether this start of statement identifier is a label
470   virtual bool isLabel(AsmToken &Token) { return true; };
471   // Return whether this parser accept star as start of statement
472   virtual bool starIsStartOfStatement() { return false; };
473 
474   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
475                                             MCSymbolRefExpr::VariantKind,
476                                             MCContext &Ctx) {
477     return nullptr;
478   }
479 
480   // For actions that have to be performed before a label is emitted
481   virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
482 
483   virtual void onLabelParsed(MCSymbol *Symbol) {}
484 
485   /// Ensure that all previously parsed instructions have been emitted to the
486   /// output streamer, if the target does not emit them immediately.
487   virtual void flushPendingInstructions(MCStreamer &Out) {}
488 
489   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
490                                               AsmToken::TokenKind OperatorToken,
491                                               MCContext &Ctx) {
492     return nullptr;
493   }
494 
495   // For any checks or cleanups at the end of parsing.
496   virtual void onEndOfFile() {}
497 };
498 
499 } // end namespace llvm
500 
501 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
502