xref: /llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp (revision ed8019d9fbed2e6a6b08f8f73e9fa54a24f3ed52)
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/MC/MCContext.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInst.h"
15 #include "llvm/MC/MCInstrInfo.h"
16 #include "llvm/MC/MCParser/MCAsmLexer.h"
17 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
18 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
19 #include "llvm/MC/MCStreamer.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/MC/TargetRegistry.h"
22 #include "llvm/Support/Casting.h"
23 
24 using namespace llvm;
25 
26 namespace {
27 struct BPFOperand;
28 
29 class BPFAsmParser : public MCTargetAsmParser {
30 
31   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
32 
33   bool PreMatchCheck(OperandVector &Operands);
34 
35   bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
36                                OperandVector &Operands, MCStreamer &Out,
37                                uint64_t &ErrorInfo,
38                                bool MatchingInlineAsm) override;
39 
40   bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
41   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
42                                SMLoc &EndLoc) override;
43 
44   bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
45                         SMLoc NameLoc, OperandVector &Operands) override;
46 
47   // "=" is used as assignment operator for assembly statment, so can't be used
48   // for symbol assignment.
49   bool equalIsAsmAssignment() override { return false; }
50   // "*" is used for dereferencing memory that it will be the start of
51   // statement.
52   bool starIsStartOfStatement() override { return true; }
53 
54 #define GET_ASSEMBLER_HEADER
55 #include "BPFGenAsmMatcher.inc"
56 
57   ParseStatus parseImmediate(OperandVector &Operands);
58   ParseStatus parseRegister(OperandVector &Operands);
59   ParseStatus parseOperandAsOperator(OperandVector &Operands);
60 
61 public:
62   enum BPFMatchResultTy {
63     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
64 #define GET_OPERAND_DIAGNOSTIC_TYPES
65 #include "BPFGenAsmMatcher.inc"
66 #undef GET_OPERAND_DIAGNOSTIC_TYPES
67   };
68 
69   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
70                const MCInstrInfo &MII, const MCTargetOptions &Options)
71       : MCTargetAsmParser(Options, STI, MII) {
72     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
73   }
74 };
75 
76 /// BPFOperand - Instances of this class represent a parsed machine
77 /// instruction
78 struct BPFOperand : public MCParsedAsmOperand {
79 
80   enum KindTy {
81     Token,
82     Register,
83     Immediate,
84   } Kind;
85 
86   struct RegOp {
87     MCRegister RegNum;
88   };
89 
90   struct ImmOp {
91     const MCExpr *Val;
92   };
93 
94   SMLoc StartLoc, EndLoc;
95   union {
96     StringRef Tok;
97     RegOp Reg;
98     ImmOp Imm;
99   };
100 
101   BPFOperand(KindTy K) : Kind(K) {}
102 
103 public:
104   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
105     Kind = o.Kind;
106     StartLoc = o.StartLoc;
107     EndLoc = o.EndLoc;
108 
109     switch (Kind) {
110     case Register:
111       Reg = o.Reg;
112       break;
113     case Immediate:
114       Imm = o.Imm;
115       break;
116     case Token:
117       Tok = o.Tok;
118       break;
119     }
120   }
121 
122   bool isToken() const override { return Kind == Token; }
123   bool isReg() const override { return Kind == Register; }
124   bool isImm() const override { return Kind == Immediate; }
125   bool isMem() const override { return false; }
126 
127   bool isConstantImm() const {
128     return isImm() && isa<MCConstantExpr>(getImm());
129   }
130 
131   int64_t getConstantImm() const {
132     const MCExpr *Val = getImm();
133     return static_cast<const MCConstantExpr *>(Val)->getValue();
134   }
135 
136   bool isSImm16() const {
137     return (isConstantImm() && isInt<16>(getConstantImm()));
138   }
139 
140   bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
141 
142   bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
143 
144   /// getStartLoc - Gets location of the first token of this operand
145   SMLoc getStartLoc() const override { return StartLoc; }
146   /// getEndLoc - Gets location of the last token of this operand
147   SMLoc getEndLoc() const override { return EndLoc; }
148 
149   MCRegister getReg() const override {
150     assert(Kind == Register && "Invalid type access!");
151     return Reg.RegNum;
152   }
153 
154   const MCExpr *getImm() const {
155     assert(Kind == Immediate && "Invalid type access!");
156     return Imm.Val;
157   }
158 
159   StringRef getToken() const {
160     assert(Kind == Token && "Invalid type access!");
161     return Tok;
162   }
163 
164   void print(raw_ostream &OS) const override {
165     switch (Kind) {
166     case Immediate:
167       OS << *getImm();
168       break;
169     case Register:
170       OS << "<register x";
171       OS << getReg() << ">";
172       break;
173     case Token:
174       OS << "'" << getToken() << "'";
175       break;
176     }
177   }
178 
179   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
180     assert(Expr && "Expr shouldn't be null!");
181 
182     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
183       Inst.addOperand(MCOperand::createImm(CE->getValue()));
184     else
185       Inst.addOperand(MCOperand::createExpr(Expr));
186   }
187 
188   // Used by the TableGen Code
189   void addRegOperands(MCInst &Inst, unsigned N) const {
190     assert(N == 1 && "Invalid number of operands!");
191     Inst.addOperand(MCOperand::createReg(getReg()));
192   }
193 
194   void addImmOperands(MCInst &Inst, unsigned N) const {
195     assert(N == 1 && "Invalid number of operands!");
196     addExpr(Inst, getImm());
197   }
198 
199   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
200     auto Op = std::make_unique<BPFOperand>(Token);
201     Op->Tok = Str;
202     Op->StartLoc = S;
203     Op->EndLoc = S;
204     return Op;
205   }
206 
207   static std::unique_ptr<BPFOperand> createReg(MCRegister Reg, SMLoc S,
208                                                SMLoc E) {
209     auto Op = std::make_unique<BPFOperand>(Register);
210     Op->Reg.RegNum = Reg;
211     Op->StartLoc = S;
212     Op->EndLoc = E;
213     return Op;
214   }
215 
216   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
217                                                SMLoc E) {
218     auto Op = std::make_unique<BPFOperand>(Immediate);
219     Op->Imm.Val = Val;
220     Op->StartLoc = S;
221     Op->EndLoc = E;
222     return Op;
223   }
224 
225   // Identifiers that can be used at the start of a statment.
226   static bool isValidIdAtStart(StringRef Name) {
227     return StringSwitch<bool>(Name.lower())
228         .Case("if", true)
229         .Case("call", true)
230         .Case("callx", true)
231         .Case("goto", true)
232         .Case("gotol", true)
233         .Case("may_goto", true)
234         .Case("*", true)
235         .Case("exit", true)
236         .Case("lock", true)
237         .Case("ld_pseudo", true)
238         .Default(false);
239   }
240 
241   // Identifiers that can be used in the middle of a statment.
242   static bool isValidIdInMiddle(StringRef Name) {
243     return StringSwitch<bool>(Name.lower())
244         .Case("u64", true)
245         .Case("u32", true)
246         .Case("u16", true)
247         .Case("u8", true)
248         .Case("s32", true)
249         .Case("s16", true)
250         .Case("s8", true)
251         .Case("be64", true)
252         .Case("be32", true)
253         .Case("be16", true)
254         .Case("le64", true)
255         .Case("le32", true)
256         .Case("le16", true)
257         .Case("bswap16", true)
258         .Case("bswap32", true)
259         .Case("bswap64", true)
260         .Case("goto", true)
261         .Case("gotol", true)
262         .Case("ll", true)
263         .Case("skb", true)
264         .Case("s", true)
265         .Case("atomic_fetch_add", true)
266         .Case("atomic_fetch_and", true)
267         .Case("atomic_fetch_or", true)
268         .Case("atomic_fetch_xor", true)
269         .Case("xchg_64", true)
270         .Case("xchg32_32", true)
271         .Case("cmpxchg_64", true)
272         .Case("cmpxchg32_32", true)
273         .Case("addr_space_cast", true)
274         .Default(false);
275   }
276 };
277 } // end anonymous namespace.
278 
279 #define GET_REGISTER_MATCHER
280 #define GET_MATCHER_IMPLEMENTATION
281 #include "BPFGenAsmMatcher.inc"
282 
283 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
284 
285   if (Operands.size() == 4) {
286     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
287     // reg1 must be the same as reg2
288     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
289     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
290     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
291     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
292     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
293         && Op1.getToken() == "="
294         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
295             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
296             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
297             || Op2.getToken() == "le64")
298         && Op0.getReg() != Op3.getReg())
299       return true;
300   }
301 
302   return false;
303 }
304 
305 bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
306                                            OperandVector &Operands,
307                                            MCStreamer &Out, uint64_t &ErrorInfo,
308                                            bool MatchingInlineAsm) {
309   MCInst Inst;
310   SMLoc ErrorLoc;
311 
312   if (PreMatchCheck(Operands))
313     return Error(IDLoc, "additional inst constraint not met");
314 
315   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
316   default:
317     break;
318   case Match_Success:
319     Inst.setLoc(IDLoc);
320     Out.emitInstruction(Inst, getSTI());
321     return false;
322   case Match_MissingFeature:
323     return Error(IDLoc, "instruction use requires an option to be enabled");
324   case Match_MnemonicFail:
325     return Error(IDLoc, "unrecognized instruction mnemonic");
326   case Match_InvalidOperand:
327     ErrorLoc = IDLoc;
328 
329     if (ErrorInfo != ~0U) {
330       if (ErrorInfo >= Operands.size())
331         return Error(ErrorLoc, "too few operands for instruction");
332 
333       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
334 
335       if (ErrorLoc == SMLoc())
336         ErrorLoc = IDLoc;
337     }
338 
339     return Error(ErrorLoc, "invalid operand for instruction");
340   case Match_InvalidBrTarget:
341     return Error(Operands[ErrorInfo]->getStartLoc(),
342                  "operand is not an identifier or 16-bit signed integer");
343   case Match_InvalidSImm16:
344     return Error(Operands[ErrorInfo]->getStartLoc(),
345                  "operand is not a 16-bit signed integer");
346   }
347 
348   llvm_unreachable("Unknown match type detected!");
349 }
350 
351 bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
352                                  SMLoc &EndLoc) {
353   if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
354     return Error(StartLoc, "invalid register name");
355   return false;
356 }
357 
358 ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
359                                            SMLoc &EndLoc) {
360   const AsmToken &Tok = getParser().getTok();
361   StartLoc = Tok.getLoc();
362   EndLoc = Tok.getEndLoc();
363   Reg = BPF::NoRegister;
364   StringRef Name = getLexer().getTok().getIdentifier();
365 
366   if (!MatchRegisterName(Name)) {
367     getParser().Lex(); // Eat identifier token.
368     return ParseStatus::Success;
369   }
370 
371   return ParseStatus::NoMatch;
372 }
373 
374 ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
375   SMLoc S = getLoc();
376 
377   if (getLexer().getKind() == AsmToken::Identifier) {
378     StringRef Name = getLexer().getTok().getIdentifier();
379 
380     if (BPFOperand::isValidIdInMiddle(Name)) {
381       getLexer().Lex();
382       Operands.push_back(BPFOperand::createToken(Name, S));
383       return ParseStatus::Success;
384     }
385 
386     return ParseStatus::NoMatch;
387   }
388 
389   switch (getLexer().getKind()) {
390   case AsmToken::Minus:
391   case AsmToken::Plus: {
392     if (getLexer().peekTok().is(AsmToken::Integer))
393       return ParseStatus::NoMatch;
394     [[fallthrough]];
395   }
396 
397   case AsmToken::Equal:
398   case AsmToken::Greater:
399   case AsmToken::Less:
400   case AsmToken::Pipe:
401   case AsmToken::Star:
402   case AsmToken::LParen:
403   case AsmToken::RParen:
404   case AsmToken::LBrac:
405   case AsmToken::RBrac:
406   case AsmToken::Slash:
407   case AsmToken::Amp:
408   case AsmToken::Percent:
409   case AsmToken::Caret: {
410     StringRef Name = getLexer().getTok().getString();
411     getLexer().Lex();
412     Operands.push_back(BPFOperand::createToken(Name, S));
413 
414     return ParseStatus::Success;
415   }
416 
417   case AsmToken::EqualEqual:
418   case AsmToken::ExclaimEqual:
419   case AsmToken::GreaterEqual:
420   case AsmToken::GreaterGreater:
421   case AsmToken::LessEqual:
422   case AsmToken::LessLess: {
423     Operands.push_back(BPFOperand::createToken(
424         getLexer().getTok().getString().substr(0, 1), S));
425     Operands.push_back(BPFOperand::createToken(
426         getLexer().getTok().getString().substr(1, 1), S));
427     getLexer().Lex();
428 
429     return ParseStatus::Success;
430   }
431 
432   default:
433     break;
434   }
435 
436   return ParseStatus::NoMatch;
437 }
438 
439 ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
440   SMLoc S = getLoc();
441   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
442 
443   switch (getLexer().getKind()) {
444   default:
445     return ParseStatus::NoMatch;
446   case AsmToken::Identifier:
447     StringRef Name = getLexer().getTok().getIdentifier();
448     MCRegister Reg = MatchRegisterName(Name);
449 
450     if (!Reg)
451       return ParseStatus::NoMatch;
452 
453     getLexer().Lex();
454     Operands.push_back(BPFOperand::createReg(Reg, S, E));
455   }
456   return ParseStatus::Success;
457 }
458 
459 ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
460   switch (getLexer().getKind()) {
461   default:
462     return ParseStatus::NoMatch;
463   case AsmToken::LParen:
464   case AsmToken::Minus:
465   case AsmToken::Plus:
466   case AsmToken::Integer:
467   case AsmToken::String:
468   case AsmToken::Identifier:
469     break;
470   }
471 
472   const MCExpr *IdVal;
473   SMLoc S = getLoc();
474 
475   if (getParser().parseExpression(IdVal))
476     return ParseStatus::Failure;
477 
478   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
479   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
480 
481   return ParseStatus::Success;
482 }
483 
484 /// Parse an BPF instruction which is in BPF verifier format.
485 bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
486                                     SMLoc NameLoc, OperandVector &Operands) {
487   // The first operand could be either register or actually an operator.
488   MCRegister Reg = MatchRegisterName(Name);
489 
490   if (Reg) {
491     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
492     Operands.push_back(BPFOperand::createReg(Reg, NameLoc, E));
493   } else if (BPFOperand::isValidIdAtStart(Name))
494     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
495   else
496     return Error(NameLoc, "invalid register/token name");
497 
498   while (!getLexer().is(AsmToken::EndOfStatement)) {
499     // Attempt to parse token as operator
500     if (parseOperandAsOperator(Operands).isSuccess())
501       continue;
502 
503     // Attempt to parse token as register
504     if (parseRegister(Operands).isSuccess())
505       continue;
506 
507     if (getLexer().is(AsmToken::Comma)) {
508       getLexer().Lex();
509       continue;
510     }
511 
512     // Attempt to parse token as an immediate
513     if (!parseImmediate(Operands).isSuccess()) {
514       SMLoc Loc = getLexer().getLoc();
515       return Error(Loc, "unexpected token");
516     }
517   }
518 
519   if (getLexer().isNot(AsmToken::EndOfStatement)) {
520     SMLoc Loc = getLexer().getLoc();
521 
522     getParser().eatToEndOfStatement();
523 
524     return Error(Loc, "unexpected token");
525   }
526 
527   // Consume the EndOfStatement.
528   getParser().Lex();
529   return false;
530 }
531 
532 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
533   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
534   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
535   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
536 }
537