xref: /llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp (revision f1615e32379ff1ea125a8b3ac8792c3e0b5e6f2c)
1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Assembler.
11 ///
12 /// It contains code to translate a parsed .s file into MCInsts.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AsmParser/WebAssemblyAsmTypeCheck.h"
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
19 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
20 #include "TargetInfo/WebAssemblyTargetInfo.h"
21 #include "WebAssembly.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSectionWasm.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/MCSymbolWasm.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/SourceMgr.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "wasm-asm-parser"
40 
41 static const char *getSubtargetFeatureName(uint64_t Val);
42 
43 namespace {
44 
45 /// WebAssemblyOperand - Instances of this class represent the operands in a
46 /// parsed Wasm machine instruction.
47 struct WebAssemblyOperand : public MCParsedAsmOperand {
48   enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
49 
50   SMLoc StartLoc, EndLoc;
51 
52   struct TokOp {
53     StringRef Tok;
54   };
55 
56   struct IntOp {
57     int64_t Val;
58   };
59 
60   struct FltOp {
61     double Val;
62   };
63 
64   struct SymOp {
65     const MCExpr *Exp;
66   };
67 
68   struct BrLOp {
69     std::vector<unsigned> List;
70   };
71 
72   union {
73     struct TokOp Tok;
74     struct IntOp Int;
75     struct FltOp Flt;
76     struct SymOp Sym;
77     struct BrLOp BrL;
78   };
79 
80   WebAssemblyOperand(SMLoc Start, SMLoc End, TokOp T)
81       : Kind(Token), StartLoc(Start), EndLoc(End), Tok(T) {}
82   WebAssemblyOperand(SMLoc Start, SMLoc End, IntOp I)
83       : Kind(Integer), StartLoc(Start), EndLoc(End), Int(I) {}
84   WebAssemblyOperand(SMLoc Start, SMLoc End, FltOp F)
85       : Kind(Float), StartLoc(Start), EndLoc(End), Flt(F) {}
86   WebAssemblyOperand(SMLoc Start, SMLoc End, SymOp S)
87       : Kind(Symbol), StartLoc(Start), EndLoc(End), Sym(S) {}
88   WebAssemblyOperand(SMLoc Start, SMLoc End)
89       : Kind(BrList), StartLoc(Start), EndLoc(End), BrL() {}
90 
91   ~WebAssemblyOperand() {
92     if (isBrList())
93       BrL.~BrLOp();
94   }
95 
96   bool isToken() const override { return Kind == Token; }
97   bool isImm() const override { return Kind == Integer || Kind == Symbol; }
98   bool isFPImm() const { return Kind == Float; }
99   bool isMem() const override { return false; }
100   bool isReg() const override { return false; }
101   bool isBrList() const { return Kind == BrList; }
102 
103   MCRegister getReg() const override {
104     llvm_unreachable("Assembly inspects a register operand");
105     return 0;
106   }
107 
108   StringRef getToken() const {
109     assert(isToken());
110     return Tok.Tok;
111   }
112 
113   SMLoc getStartLoc() const override { return StartLoc; }
114   SMLoc getEndLoc() const override { return EndLoc; }
115 
116   void addRegOperands(MCInst &, unsigned) const {
117     // Required by the assembly matcher.
118     llvm_unreachable("Assembly matcher creates register operands");
119   }
120 
121   void addImmOperands(MCInst &Inst, unsigned N) const {
122     assert(N == 1 && "Invalid number of operands!");
123     if (Kind == Integer)
124       Inst.addOperand(MCOperand::createImm(Int.Val));
125     else if (Kind == Symbol)
126       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
127     else
128       llvm_unreachable("Should be integer immediate or symbol!");
129   }
130 
131   void addFPImmf32Operands(MCInst &Inst, unsigned N) const {
132     assert(N == 1 && "Invalid number of operands!");
133     if (Kind == Float)
134       Inst.addOperand(
135           MCOperand::createSFPImm(bit_cast<uint32_t>(float(Flt.Val))));
136     else
137       llvm_unreachable("Should be float immediate!");
138   }
139 
140   void addFPImmf64Operands(MCInst &Inst, unsigned N) const {
141     assert(N == 1 && "Invalid number of operands!");
142     if (Kind == Float)
143       Inst.addOperand(MCOperand::createDFPImm(bit_cast<uint64_t>(Flt.Val)));
144     else
145       llvm_unreachable("Should be float immediate!");
146   }
147 
148   void addBrListOperands(MCInst &Inst, unsigned N) const {
149     assert(N == 1 && isBrList() && "Invalid BrList!");
150     for (auto Br : BrL.List)
151       Inst.addOperand(MCOperand::createImm(Br));
152   }
153 
154   void print(raw_ostream &OS) const override {
155     switch (Kind) {
156     case Token:
157       OS << "Tok:" << Tok.Tok;
158       break;
159     case Integer:
160       OS << "Int:" << Int.Val;
161       break;
162     case Float:
163       OS << "Flt:" << Flt.Val;
164       break;
165     case Symbol:
166       OS << "Sym:" << Sym.Exp;
167       break;
168     case BrList:
169       OS << "BrList:" << BrL.List.size();
170       break;
171     }
172   }
173 };
174 
175 // Perhaps this should go somewhere common.
176 static wasm::WasmLimits DefaultLimits() {
177   return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
178 }
179 
180 static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
181                                                     const StringRef &Name,
182                                                     bool is64) {
183   MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
184   if (Sym) {
185     if (!Sym->isFunctionTable())
186       Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
187   } else {
188     Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
189     Sym->setFunctionTable(is64);
190     // The default function table is synthesized by the linker.
191     Sym->setUndefined();
192   }
193   return Sym;
194 }
195 
196 class WebAssemblyAsmParser final : public MCTargetAsmParser {
197   MCAsmParser &Parser;
198   MCAsmLexer &Lexer;
199 
200   // Order of labels, directives and instructions in a .s file have no
201   // syntactical enforcement. This class is a callback from the actual parser,
202   // and yet we have to be feeding data to the streamer in a very particular
203   // order to ensure a correct binary encoding that matches the regular backend
204   // (the streamer does not enforce this). This "state machine" enum helps
205   // guarantee that correct order.
206   enum ParserState {
207     FileStart,
208     FunctionLabel,
209     FunctionStart,
210     FunctionLocals,
211     Instructions,
212     EndFunction,
213     DataSection,
214   } CurrentState = FileStart;
215 
216   // For ensuring blocks are properly nested.
217   enum NestingType {
218     Function,
219     Block,
220     Loop,
221     Try,
222     CatchAll,
223     If,
224     Else,
225     Undefined,
226   };
227   struct Nested {
228     NestingType NT;
229     wasm::WasmSignature Sig;
230   };
231   std::vector<Nested> NestingStack;
232 
233   MCSymbolWasm *DefaultFunctionTable = nullptr;
234   MCSymbol *LastFunctionLabel = nullptr;
235 
236   bool is64;
237 
238   WebAssemblyAsmTypeCheck TC;
239   // Don't type check if -no-type-check was set.
240   bool SkipTypeCheck;
241 
242 public:
243   WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
244                        const MCInstrInfo &MII, const MCTargetOptions &Options)
245       : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
246         Lexer(Parser.getLexer()), is64(STI.getTargetTriple().isArch64Bit()),
247         TC(Parser, MII, is64), SkipTypeCheck(Options.MCNoTypeCheck) {
248     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
249     // Don't type check if this is inline asm, since that is a naked sequence of
250     // instructions without a function/locals decl.
251     auto &SM = Parser.getSourceManager();
252     auto BufferName =
253         SM.getBufferInfo(SM.getMainFileID()).Buffer->getBufferIdentifier();
254     if (BufferName == "<inline asm>")
255       SkipTypeCheck = true;
256   }
257 
258   void Initialize(MCAsmParser &Parser) override {
259     MCAsmParserExtension::Initialize(Parser);
260 
261     DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
262         getContext(), "__indirect_function_table", is64);
263     if (!STI->checkFeatures("+reference-types"))
264       DefaultFunctionTable->setOmitFromLinkingSection();
265   }
266 
267 #define GET_ASSEMBLER_HEADER
268 #include "WebAssemblyGenAsmMatcher.inc"
269 
270   // TODO: This is required to be implemented, but appears unused.
271   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override {
272     llvm_unreachable("parseRegister is not implemented.");
273   }
274   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
275                                SMLoc &EndLoc) override {
276     llvm_unreachable("tryParseRegister is not implemented.");
277   }
278 
279   bool error(const Twine &Msg, const AsmToken &Tok) {
280     return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
281   }
282 
283   bool error(const Twine &Msg, SMLoc Loc = SMLoc()) {
284     return Parser.Error(Loc.isValid() ? Loc : Lexer.getTok().getLoc(), Msg);
285   }
286 
287   std::pair<StringRef, StringRef> nestingString(NestingType NT) {
288     switch (NT) {
289     case Function:
290       return {"function", "end_function"};
291     case Block:
292       return {"block", "end_block"};
293     case Loop:
294       return {"loop", "end_loop"};
295     case Try:
296       return {"try", "end_try/delegate"};
297     case CatchAll:
298       return {"catch_all", "end_try"};
299     case If:
300       return {"if", "end_if"};
301     case Else:
302       return {"else", "end_if"};
303     default:
304       llvm_unreachable("unknown NestingType");
305     }
306   }
307 
308   void push(NestingType NT, wasm::WasmSignature Sig = wasm::WasmSignature()) {
309     NestingStack.push_back({NT, Sig});
310   }
311 
312   bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
313     if (NestingStack.empty())
314       return error(Twine("End of block construct with no start: ") + Ins);
315     auto Top = NestingStack.back();
316     if (Top.NT != NT1 && Top.NT != NT2)
317       return error(Twine("Block construct type mismatch, expected: ") +
318                    nestingString(Top.NT).second + ", instead got: " + Ins);
319     TC.setLastSig(Top.Sig);
320     NestingStack.pop_back();
321     return false;
322   }
323 
324   // Pop a NestingType and push a new NestingType with the same signature. Used
325   // for if-else and try-catch(_all).
326   bool popAndPushWithSameSignature(StringRef Ins, NestingType PopNT,
327                                    NestingType PushNT) {
328     if (NestingStack.empty())
329       return error(Twine("End of block construct with no start: ") + Ins);
330     auto Sig = NestingStack.back().Sig;
331     if (pop(Ins, PopNT))
332       return true;
333     push(PushNT, Sig);
334     return false;
335   }
336 
337   bool ensureEmptyNestingStack(SMLoc Loc = SMLoc()) {
338     auto Err = !NestingStack.empty();
339     while (!NestingStack.empty()) {
340       error(Twine("Unmatched block construct(s) at function end: ") +
341                 nestingString(NestingStack.back().NT).first,
342             Loc);
343       NestingStack.pop_back();
344     }
345     return Err;
346   }
347 
348   bool isNext(AsmToken::TokenKind Kind) {
349     auto Ok = Lexer.is(Kind);
350     if (Ok)
351       Parser.Lex();
352     return Ok;
353   }
354 
355   bool expect(AsmToken::TokenKind Kind, const char *KindName) {
356     if (!isNext(Kind))
357       return error(std::string("Expected ") + KindName + ", instead got: ",
358                    Lexer.getTok());
359     return false;
360   }
361 
362   StringRef expectIdent() {
363     if (!Lexer.is(AsmToken::Identifier)) {
364       error("Expected identifier, got: ", Lexer.getTok());
365       return StringRef();
366     }
367     auto Name = Lexer.getTok().getString();
368     Parser.Lex();
369     return Name;
370   }
371 
372   bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
373     while (Lexer.is(AsmToken::Identifier)) {
374       auto Type = WebAssembly::parseType(Lexer.getTok().getString());
375       if (!Type)
376         return error("unknown type: ", Lexer.getTok());
377       Types.push_back(*Type);
378       Parser.Lex();
379       if (!isNext(AsmToken::Comma))
380         break;
381     }
382     return false;
383   }
384 
385   void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
386     auto &Int = Lexer.getTok();
387     int64_t Val = Int.getIntVal();
388     if (IsNegative)
389       Val = -Val;
390     Operands.push_back(std::make_unique<WebAssemblyOperand>(
391         Int.getLoc(), Int.getEndLoc(), WebAssemblyOperand::IntOp{Val}));
392     Parser.Lex();
393   }
394 
395   bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
396     auto &Flt = Lexer.getTok();
397     double Val;
398     if (Flt.getString().getAsDouble(Val, false))
399       return error("Cannot parse real: ", Flt);
400     if (IsNegative)
401       Val = -Val;
402     Operands.push_back(std::make_unique<WebAssemblyOperand>(
403         Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
404     Parser.Lex();
405     return false;
406   }
407 
408   bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
409     if (Lexer.isNot(AsmToken::Identifier))
410       return true;
411     auto &Flt = Lexer.getTok();
412     auto S = Flt.getString();
413     double Val;
414     if (S.compare_insensitive("infinity") == 0) {
415       Val = std::numeric_limits<double>::infinity();
416     } else if (S.compare_insensitive("nan") == 0) {
417       Val = std::numeric_limits<double>::quiet_NaN();
418     } else {
419       return true;
420     }
421     if (IsNegative)
422       Val = -Val;
423     Operands.push_back(std::make_unique<WebAssemblyOperand>(
424         Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val}));
425     Parser.Lex();
426     return false;
427   }
428 
429   bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
430     // FIXME: there is probably a cleaner way to do this.
431     auto IsLoadStore = InstName.contains(".load") ||
432                        InstName.contains(".store") ||
433                        InstName.contains("prefetch");
434     auto IsAtomic = InstName.contains("atomic.");
435     if (IsLoadStore || IsAtomic) {
436       // Parse load/store operands of the form: offset:p2align=align
437       if (IsLoadStore && isNext(AsmToken::Colon)) {
438         auto Id = expectIdent();
439         if (Id != "p2align")
440           return error("Expected p2align, instead got: " + Id);
441         if (expect(AsmToken::Equal, "="))
442           return true;
443         if (!Lexer.is(AsmToken::Integer))
444           return error("Expected integer constant");
445         parseSingleInteger(false, Operands);
446       } else {
447         // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
448         // index. We need to avoid parsing an extra alignment operand for the
449         // lane index.
450         auto IsLoadStoreLane = InstName.contains("_lane");
451         if (IsLoadStoreLane && Operands.size() == 4)
452           return false;
453         // Alignment not specified (or atomics, must use default alignment).
454         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
455         // an opcode until after the assembly matcher, so set a default to fix
456         // up later.
457         auto Tok = Lexer.getTok();
458         Operands.push_back(std::make_unique<WebAssemblyOperand>(
459             Tok.getLoc(), Tok.getEndLoc(), WebAssemblyOperand::IntOp{-1}));
460       }
461     }
462     return false;
463   }
464 
465   void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
466                            WebAssembly::BlockType BT) {
467     if (BT != WebAssembly::BlockType::Void) {
468       wasm::WasmSignature Sig({static_cast<wasm::ValType>(BT)}, {});
469       TC.setLastSig(Sig);
470       NestingStack.back().Sig = Sig;
471     }
472     Operands.push_back(std::make_unique<WebAssemblyOperand>(
473         NameLoc, NameLoc, WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
474   }
475 
476   bool parseLimits(wasm::WasmLimits *Limits) {
477     auto Tok = Lexer.getTok();
478     if (!Tok.is(AsmToken::Integer))
479       return error("Expected integer constant, instead got: ", Tok);
480     int64_t Val = Tok.getIntVal();
481     assert(Val >= 0);
482     Limits->Minimum = Val;
483     Parser.Lex();
484 
485     if (isNext(AsmToken::Comma)) {
486       Limits->Flags |= wasm::WASM_LIMITS_FLAG_HAS_MAX;
487       auto Tok = Lexer.getTok();
488       if (!Tok.is(AsmToken::Integer))
489         return error("Expected integer constant, instead got: ", Tok);
490       int64_t Val = Tok.getIntVal();
491       assert(Val >= 0);
492       Limits->Maximum = Val;
493       Parser.Lex();
494     }
495     return false;
496   }
497 
498   bool parseFunctionTableOperand(std::unique_ptr<WebAssemblyOperand> *Op) {
499     if (STI->checkFeatures("+reference-types")) {
500       // If the reference-types feature is enabled, there is an explicit table
501       // operand.  To allow the same assembly to be compiled with or without
502       // reference types, we allow the operand to be omitted, in which case we
503       // default to __indirect_function_table.
504       auto &Tok = Lexer.getTok();
505       if (Tok.is(AsmToken::Identifier)) {
506         auto *Sym =
507             GetOrCreateFunctionTableSymbol(getContext(), Tok.getString(), is64);
508         const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
509         *Op = std::make_unique<WebAssemblyOperand>(
510             Tok.getLoc(), Tok.getEndLoc(), WebAssemblyOperand::SymOp{Val});
511         Parser.Lex();
512         return expect(AsmToken::Comma, ",");
513       } else {
514         const auto *Val =
515             MCSymbolRefExpr::create(DefaultFunctionTable, getContext());
516         *Op = std::make_unique<WebAssemblyOperand>(
517             SMLoc(), SMLoc(), WebAssemblyOperand::SymOp{Val});
518         return false;
519       }
520     } else {
521       // For the MVP there is at most one table whose number is 0, but we can't
522       // write a table symbol or issue relocations.  Instead we just ensure the
523       // table is live and write a zero.
524       getStreamer().emitSymbolAttribute(DefaultFunctionTable, MCSA_NoDeadStrip);
525       *Op = std::make_unique<WebAssemblyOperand>(SMLoc(), SMLoc(),
526                                                  WebAssemblyOperand::IntOp{0});
527       return false;
528     }
529   }
530 
531   bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
532                         SMLoc NameLoc, OperandVector &Operands) override {
533     // Note: Name does NOT point into the sourcecode, but to a local, so
534     // use NameLoc instead.
535     Name = StringRef(NameLoc.getPointer(), Name.size());
536 
537     // WebAssembly has instructions with / in them, which AsmLexer parses
538     // as separate tokens, so if we find such tokens immediately adjacent (no
539     // whitespace), expand the name to include them:
540     for (;;) {
541       auto &Sep = Lexer.getTok();
542       if (Sep.getLoc().getPointer() != Name.end() ||
543           Sep.getKind() != AsmToken::Slash)
544         break;
545       // Extend name with /
546       Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
547       Parser.Lex();
548       // We must now find another identifier, or error.
549       auto &Id = Lexer.getTok();
550       if (Id.getKind() != AsmToken::Identifier ||
551           Id.getLoc().getPointer() != Name.end())
552         return error("Incomplete instruction name: ", Id);
553       Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
554       Parser.Lex();
555     }
556 
557     // Now construct the name as first operand.
558     Operands.push_back(std::make_unique<WebAssemblyOperand>(
559         NameLoc, SMLoc::getFromPointer(Name.end()),
560         WebAssemblyOperand::TokOp{Name}));
561 
562     // If this instruction is part of a control flow structure, ensure
563     // proper nesting.
564     bool ExpectBlockType = false;
565     bool ExpectFuncType = false;
566     std::unique_ptr<WebAssemblyOperand> FunctionTable;
567     if (Name == "block") {
568       push(Block);
569       ExpectBlockType = true;
570     } else if (Name == "loop") {
571       push(Loop);
572       ExpectBlockType = true;
573     } else if (Name == "try") {
574       push(Try);
575       ExpectBlockType = true;
576     } else if (Name == "if") {
577       push(If);
578       ExpectBlockType = true;
579     } else if (Name == "else") {
580       if (popAndPushWithSameSignature(Name, If, Else))
581         return true;
582     } else if (Name == "catch") {
583       if (popAndPushWithSameSignature(Name, Try, Try))
584         return true;
585     } else if (Name == "catch_all") {
586       if (popAndPushWithSameSignature(Name, Try, CatchAll))
587         return true;
588     } else if (Name == "end_if") {
589       if (pop(Name, If, Else))
590         return true;
591     } else if (Name == "end_try") {
592       if (pop(Name, Try, CatchAll))
593         return true;
594     } else if (Name == "delegate") {
595       if (pop(Name, Try))
596         return true;
597     } else if (Name == "end_loop") {
598       if (pop(Name, Loop))
599         return true;
600     } else if (Name == "end_block") {
601       if (pop(Name, Block))
602         return true;
603     } else if (Name == "end_function") {
604       ensureLocals(getStreamer());
605       CurrentState = EndFunction;
606       if (pop(Name, Function) || ensureEmptyNestingStack())
607         return true;
608     } else if (Name == "call_indirect" || Name == "return_call_indirect") {
609       // These instructions have differing operand orders in the text format vs
610       // the binary formats.  The MC instructions follow the binary format, so
611       // here we stash away the operand and append it later.
612       if (parseFunctionTableOperand(&FunctionTable))
613         return true;
614       ExpectFuncType = true;
615     }
616 
617     if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
618       // This has a special TYPEINDEX operand which in text we
619       // represent as a signature, such that we can re-build this signature,
620       // attach it to an anonymous symbol, which is what WasmObjectWriter
621       // expects to be able to recreate the actual unique-ified type indices.
622       auto &Ctx = getContext();
623       auto Loc = Parser.getTok();
624       auto Signature = Ctx.createWasmSignature();
625       if (parseSignature(Signature))
626         return true;
627       // Got signature as block type, don't need more
628       TC.setLastSig(*Signature);
629       if (ExpectBlockType)
630         NestingStack.back().Sig = *Signature;
631       ExpectBlockType = false;
632       // The "true" here will cause this to be a nameless symbol.
633       MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
634       auto *WasmSym = cast<MCSymbolWasm>(Sym);
635       WasmSym->setSignature(Signature);
636       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
637       const MCExpr *Expr = MCSymbolRefExpr::create(
638           WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
639       Operands.push_back(std::make_unique<WebAssemblyOperand>(
640           Loc.getLoc(), Loc.getEndLoc(), WebAssemblyOperand::SymOp{Expr}));
641     }
642 
643     while (Lexer.isNot(AsmToken::EndOfStatement)) {
644       auto &Tok = Lexer.getTok();
645       switch (Tok.getKind()) {
646       case AsmToken::Identifier: {
647         if (!parseSpecialFloatMaybe(false, Operands))
648           break;
649         auto &Id = Lexer.getTok();
650         if (ExpectBlockType) {
651           // Assume this identifier is a block_type.
652           auto BT = WebAssembly::parseBlockType(Id.getString());
653           if (BT == WebAssembly::BlockType::Invalid)
654             return error("Unknown block type: ", Id);
655           addBlockTypeOperand(Operands, NameLoc, BT);
656           Parser.Lex();
657         } else {
658           // Assume this identifier is a label.
659           const MCExpr *Val;
660           SMLoc Start = Id.getLoc();
661           SMLoc End;
662           if (Parser.parseExpression(Val, End))
663             return error("Cannot parse symbol: ", Lexer.getTok());
664           Operands.push_back(std::make_unique<WebAssemblyOperand>(
665               Start, End, WebAssemblyOperand::SymOp{Val}));
666           if (checkForP2AlignIfLoadStore(Operands, Name))
667             return true;
668         }
669         break;
670       }
671       case AsmToken::Minus:
672         Parser.Lex();
673         if (Lexer.is(AsmToken::Integer)) {
674           parseSingleInteger(true, Operands);
675           if (checkForP2AlignIfLoadStore(Operands, Name))
676             return true;
677         } else if (Lexer.is(AsmToken::Real)) {
678           if (parseSingleFloat(true, Operands))
679             return true;
680         } else if (!parseSpecialFloatMaybe(true, Operands)) {
681         } else {
682           return error("Expected numeric constant instead got: ",
683                        Lexer.getTok());
684         }
685         break;
686       case AsmToken::Integer:
687         parseSingleInteger(false, Operands);
688         if (checkForP2AlignIfLoadStore(Operands, Name))
689           return true;
690         break;
691       case AsmToken::Real: {
692         if (parseSingleFloat(false, Operands))
693           return true;
694         break;
695       }
696       case AsmToken::LCurly: {
697         Parser.Lex();
698         auto Op =
699             std::make_unique<WebAssemblyOperand>(Tok.getLoc(), Tok.getEndLoc());
700         if (!Lexer.is(AsmToken::RCurly))
701           for (;;) {
702             Op->BrL.List.push_back(Lexer.getTok().getIntVal());
703             expect(AsmToken::Integer, "integer");
704             if (!isNext(AsmToken::Comma))
705               break;
706           }
707         expect(AsmToken::RCurly, "}");
708         Operands.push_back(std::move(Op));
709         break;
710       }
711       default:
712         return error("Unexpected token in operand: ", Tok);
713       }
714       if (Lexer.isNot(AsmToken::EndOfStatement)) {
715         if (expect(AsmToken::Comma, ","))
716           return true;
717       }
718     }
719     if (ExpectBlockType && Operands.size() == 1) {
720       // Support blocks with no operands as default to void.
721       addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
722     }
723     if (FunctionTable)
724       Operands.push_back(std::move(FunctionTable));
725     Parser.Lex();
726     return false;
727   }
728 
729   bool parseSignature(wasm::WasmSignature *Signature) {
730     if (expect(AsmToken::LParen, "("))
731       return true;
732     if (parseRegTypeList(Signature->Params))
733       return true;
734     if (expect(AsmToken::RParen, ")"))
735       return true;
736     if (expect(AsmToken::MinusGreater, "->"))
737       return true;
738     if (expect(AsmToken::LParen, "("))
739       return true;
740     if (parseRegTypeList(Signature->Returns))
741       return true;
742     if (expect(AsmToken::RParen, ")"))
743       return true;
744     return false;
745   }
746 
747   bool CheckDataSection() {
748     if (CurrentState != DataSection) {
749       auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
750       if (WS && WS->isText())
751         return error("data directive must occur in a data segment: ",
752                      Lexer.getTok());
753     }
754     CurrentState = DataSection;
755     return false;
756   }
757 
758   // This function processes wasm-specific directives streamed to
759   // WebAssemblyTargetStreamer, all others go to the generic parser
760   // (see WasmAsmParser).
761   ParseStatus parseDirective(AsmToken DirectiveID) override {
762     assert(DirectiveID.getKind() == AsmToken::Identifier);
763     auto &Out = getStreamer();
764     auto &TOut =
765         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
766     auto &Ctx = Out.getContext();
767 
768     if (DirectiveID.getString() == ".globaltype") {
769       auto SymName = expectIdent();
770       if (SymName.empty())
771         return ParseStatus::Failure;
772       if (expect(AsmToken::Comma, ","))
773         return ParseStatus::Failure;
774       auto TypeTok = Lexer.getTok();
775       auto TypeName = expectIdent();
776       if (TypeName.empty())
777         return ParseStatus::Failure;
778       auto Type = WebAssembly::parseType(TypeName);
779       if (!Type)
780         return error("Unknown type in .globaltype directive: ", TypeTok);
781       // Optional mutable modifier. Default to mutable for historical reasons.
782       // Ideally we would have gone with immutable as the default and used `mut`
783       // as the modifier to match the `.wat` format.
784       bool Mutable = true;
785       if (isNext(AsmToken::Comma)) {
786         TypeTok = Lexer.getTok();
787         auto Id = expectIdent();
788         if (Id.empty())
789           return ParseStatus::Failure;
790         if (Id == "immutable")
791           Mutable = false;
792         else
793           // Should we also allow `mutable` and `mut` here for clarity?
794           return error("Unknown type in .globaltype modifier: ", TypeTok);
795       }
796       // Now set this symbol with the correct type.
797       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
798       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
799       WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable});
800       // And emit the directive again.
801       TOut.emitGlobalType(WasmSym);
802       return expect(AsmToken::EndOfStatement, "EOL");
803     }
804 
805     if (DirectiveID.getString() == ".tabletype") {
806       // .tabletype SYM, ELEMTYPE[, MINSIZE[, MAXSIZE]]
807       auto SymName = expectIdent();
808       if (SymName.empty())
809         return ParseStatus::Failure;
810       if (expect(AsmToken::Comma, ","))
811         return ParseStatus::Failure;
812 
813       auto ElemTypeTok = Lexer.getTok();
814       auto ElemTypeName = expectIdent();
815       if (ElemTypeName.empty())
816         return ParseStatus::Failure;
817       std::optional<wasm::ValType> ElemType =
818           WebAssembly::parseType(ElemTypeName);
819       if (!ElemType)
820         return error("Unknown type in .tabletype directive: ", ElemTypeTok);
821 
822       wasm::WasmLimits Limits = DefaultLimits();
823       if (isNext(AsmToken::Comma) && parseLimits(&Limits))
824         return ParseStatus::Failure;
825 
826       // Now that we have the name and table type, we can actually create the
827       // symbol
828       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
829       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
830       if (is64) {
831         Limits.Flags |= wasm::WASM_LIMITS_FLAG_IS_64;
832       }
833       wasm::WasmTableType Type = {*ElemType, Limits};
834       WasmSym->setTableType(Type);
835       TOut.emitTableType(WasmSym);
836       return expect(AsmToken::EndOfStatement, "EOL");
837     }
838 
839     if (DirectiveID.getString() == ".functype") {
840       // This code has to send things to the streamer similar to
841       // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
842       // TODO: would be good to factor this into a common function, but the
843       // assembler and backend really don't share any common code, and this code
844       // parses the locals separately.
845       auto SymName = expectIdent();
846       if (SymName.empty())
847         return ParseStatus::Failure;
848       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
849       if (WasmSym->isDefined()) {
850         // We push 'Function' either when a label is parsed or a .functype
851         // directive is parsed. The reason it is not easy to do this uniformly
852         // in a single place is,
853         // 1. We can't do this at label parsing time only because there are
854         //    cases we don't have .functype directive before a function label,
855         //    in which case we don't know if the label is a function at the time
856         //    of parsing.
857         // 2. We can't do this at .functype parsing time only because we want to
858         //    detect a function started with a label and not ended correctly
859         //    without encountering a .functype directive after the label.
860         if (CurrentState != FunctionLabel) {
861           // This .functype indicates a start of a function.
862           if (ensureEmptyNestingStack())
863             return ParseStatus::Failure;
864           push(Function);
865         }
866         CurrentState = FunctionStart;
867         LastFunctionLabel = WasmSym;
868       }
869       auto Signature = Ctx.createWasmSignature();
870       if (parseSignature(Signature))
871         return ParseStatus::Failure;
872       TC.funcDecl(*Signature);
873       WasmSym->setSignature(Signature);
874       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
875       TOut.emitFunctionType(WasmSym);
876       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
877       return expect(AsmToken::EndOfStatement, "EOL");
878     }
879 
880     if (DirectiveID.getString() == ".export_name") {
881       auto SymName = expectIdent();
882       if (SymName.empty())
883         return ParseStatus::Failure;
884       if (expect(AsmToken::Comma, ","))
885         return ParseStatus::Failure;
886       auto ExportName = expectIdent();
887       if (ExportName.empty())
888         return ParseStatus::Failure;
889       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
890       WasmSym->setExportName(Ctx.allocateString(ExportName));
891       TOut.emitExportName(WasmSym, ExportName);
892       return expect(AsmToken::EndOfStatement, "EOL");
893     }
894 
895     if (DirectiveID.getString() == ".import_module") {
896       auto SymName = expectIdent();
897       if (SymName.empty())
898         return ParseStatus::Failure;
899       if (expect(AsmToken::Comma, ","))
900         return ParseStatus::Failure;
901       auto ImportModule = expectIdent();
902       if (ImportModule.empty())
903         return ParseStatus::Failure;
904       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
905       WasmSym->setImportModule(Ctx.allocateString(ImportModule));
906       TOut.emitImportModule(WasmSym, ImportModule);
907       return expect(AsmToken::EndOfStatement, "EOL");
908     }
909 
910     if (DirectiveID.getString() == ".import_name") {
911       auto SymName = expectIdent();
912       if (SymName.empty())
913         return ParseStatus::Failure;
914       if (expect(AsmToken::Comma, ","))
915         return ParseStatus::Failure;
916       auto ImportName = expectIdent();
917       if (ImportName.empty())
918         return ParseStatus::Failure;
919       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
920       WasmSym->setImportName(Ctx.allocateString(ImportName));
921       TOut.emitImportName(WasmSym, ImportName);
922       return expect(AsmToken::EndOfStatement, "EOL");
923     }
924 
925     if (DirectiveID.getString() == ".tagtype") {
926       auto SymName = expectIdent();
927       if (SymName.empty())
928         return ParseStatus::Failure;
929       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
930       auto Signature = Ctx.createWasmSignature();
931       if (parseRegTypeList(Signature->Params))
932         return ParseStatus::Failure;
933       WasmSym->setSignature(Signature);
934       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
935       TOut.emitTagType(WasmSym);
936       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
937       return expect(AsmToken::EndOfStatement, "EOL");
938     }
939 
940     if (DirectiveID.getString() == ".local") {
941       if (CurrentState != FunctionStart)
942         return error(".local directive should follow the start of a function: ",
943                      Lexer.getTok());
944       SmallVector<wasm::ValType, 4> Locals;
945       if (parseRegTypeList(Locals))
946         return ParseStatus::Failure;
947       TC.localDecl(Locals);
948       TOut.emitLocal(Locals);
949       CurrentState = FunctionLocals;
950       return expect(AsmToken::EndOfStatement, "EOL");
951     }
952 
953     if (DirectiveID.getString() == ".int8" ||
954         DirectiveID.getString() == ".int16" ||
955         DirectiveID.getString() == ".int32" ||
956         DirectiveID.getString() == ".int64") {
957       if (CheckDataSection())
958         return ParseStatus::Failure;
959       const MCExpr *Val;
960       SMLoc End;
961       if (Parser.parseExpression(Val, End))
962         return error("Cannot parse .int expression: ", Lexer.getTok());
963       size_t NumBits = 0;
964       DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
965       Out.emitValue(Val, NumBits / 8, End);
966       return expect(AsmToken::EndOfStatement, "EOL");
967     }
968 
969     if (DirectiveID.getString() == ".asciz") {
970       if (CheckDataSection())
971         return ParseStatus::Failure;
972       std::string S;
973       if (Parser.parseEscapedString(S))
974         return error("Cannot parse string constant: ", Lexer.getTok());
975       Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
976       return expect(AsmToken::EndOfStatement, "EOL");
977     }
978 
979     return ParseStatus::NoMatch; // We didn't process this directive.
980   }
981 
982   // Called either when the first instruction is parsed of the function ends.
983   void ensureLocals(MCStreamer &Out) {
984     if (CurrentState == FunctionStart) {
985       // We haven't seen a .local directive yet. The streamer requires locals to
986       // be encoded as a prelude to the instructions, so emit an empty list of
987       // locals here.
988       auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
989           *Out.getTargetStreamer());
990       TOut.emitLocal(SmallVector<wasm::ValType, 0>());
991       CurrentState = FunctionLocals;
992     }
993   }
994 
995   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
996                                OperandVector &Operands, MCStreamer &Out,
997                                uint64_t &ErrorInfo,
998                                bool MatchingInlineAsm) override {
999     MCInst Inst;
1000     Inst.setLoc(IDLoc);
1001     FeatureBitset MissingFeatures;
1002     unsigned MatchResult = MatchInstructionImpl(
1003         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
1004     switch (MatchResult) {
1005     case Match_Success: {
1006       ensureLocals(Out);
1007       // Fix unknown p2align operands.
1008       auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
1009       if (Align != -1U) {
1010         auto &Op0 = Inst.getOperand(0);
1011         if (Op0.getImm() == -1)
1012           Op0.setImm(Align);
1013       }
1014       if (is64) {
1015         // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
1016         // an offset64 arg instead of offset32, but to the assembler matcher
1017         // they're both immediates so don't get selected for.
1018         auto Opc64 = WebAssembly::getWasm64Opcode(
1019             static_cast<uint16_t>(Inst.getOpcode()));
1020         if (Opc64 >= 0) {
1021           Inst.setOpcode(Opc64);
1022         }
1023       }
1024       if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst, Operands))
1025         return true;
1026       Out.emitInstruction(Inst, getSTI());
1027       if (CurrentState == EndFunction) {
1028         onEndOfFunction(IDLoc);
1029       } else {
1030         CurrentState = Instructions;
1031       }
1032       return false;
1033     }
1034     case Match_MissingFeature: {
1035       assert(MissingFeatures.count() > 0 && "Expected missing features");
1036       SmallString<128> Message;
1037       raw_svector_ostream OS(Message);
1038       OS << "instruction requires:";
1039       for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
1040         if (MissingFeatures.test(i))
1041           OS << ' ' << getSubtargetFeatureName(i);
1042       return Parser.Error(IDLoc, Message);
1043     }
1044     case Match_MnemonicFail:
1045       return Parser.Error(IDLoc, "invalid instruction");
1046     case Match_NearMisses:
1047       return Parser.Error(IDLoc, "ambiguous instruction");
1048     case Match_InvalidTiedOperand:
1049     case Match_InvalidOperand: {
1050       SMLoc ErrorLoc = IDLoc;
1051       if (ErrorInfo != ~0ULL) {
1052         if (ErrorInfo >= Operands.size())
1053           return Parser.Error(IDLoc, "too few operands for instruction");
1054         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
1055         if (ErrorLoc == SMLoc())
1056           ErrorLoc = IDLoc;
1057       }
1058       return Parser.Error(ErrorLoc, "invalid operand for instruction");
1059     }
1060     }
1061     llvm_unreachable("Implement any new match types added!");
1062   }
1063 
1064   void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override {
1065     // Code below only applies to labels in text sections.
1066     auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
1067     if (!CWS->isText())
1068       return;
1069 
1070     auto WasmSym = cast<MCSymbolWasm>(Symbol);
1071     // Unlike other targets, we don't allow data in text sections (labels
1072     // declared with .type @object).
1073     if (WasmSym->getType() == wasm::WASM_SYMBOL_TYPE_DATA) {
1074       Parser.Error(IDLoc,
1075                    "Wasm doesn\'t support data symbols in text sections");
1076       return;
1077     }
1078 
1079     // Start a new section for the next function automatically, since our
1080     // object writer expects each function to have its own section. This way
1081     // The user can't forget this "convention".
1082     auto SymName = Symbol->getName();
1083     if (SymName.starts_with(".L"))
1084       return; // Local Symbol.
1085 
1086     // TODO: If the user explicitly creates a new function section, we ignore
1087     // its name when we create this one. It would be nice to honor their
1088     // choice, while still ensuring that we create one if they forget.
1089     // (that requires coordination with WasmAsmParser::parseSectionDirective)
1090     auto SecName = ".text." + SymName;
1091 
1092     auto *Group = CWS->getGroup();
1093     // If the current section is a COMDAT, also set the flag on the symbol.
1094     // TODO: Currently the only place that the symbols' comdat flag matters is
1095     // for importing comdat functions. But there's no way to specify that in
1096     // assembly currently.
1097     if (Group)
1098       WasmSym->setComdat(true);
1099     auto *WS = getContext().getWasmSection(SecName, SectionKind::getText(), 0,
1100                                            Group, MCContext::GenericSectionID);
1101     getStreamer().switchSection(WS);
1102     // Also generate DWARF for this section if requested.
1103     if (getContext().getGenDwarfForAssembly())
1104       getContext().addGenDwarfSection(WS);
1105 
1106     if (WasmSym->isFunction()) {
1107       // We give the location of the label (IDLoc) here, because otherwise the
1108       // lexer's next location will be used, which can be confusing. For
1109       // example:
1110       //
1111       // test0: ; This function does not end properly
1112       //   ...
1113       //
1114       // test1: ; We would like to point to this line for error
1115       //   ...  . Not this line, which can contain any instruction
1116       ensureEmptyNestingStack(IDLoc);
1117       CurrentState = FunctionLabel;
1118       LastFunctionLabel = Symbol;
1119       push(Function);
1120     }
1121   }
1122 
1123   void onEndOfFunction(SMLoc ErrorLoc) {
1124     if (!SkipTypeCheck)
1125       TC.endOfFunction(ErrorLoc);
1126     // Reset the type checker state.
1127     TC.Clear();
1128   }
1129 
1130   void onEndOfFile() override { ensureEmptyNestingStack(); }
1131 };
1132 } // end anonymous namespace
1133 
1134 // Force static initialization.
1135 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
1136   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
1137   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
1138 }
1139 
1140 #define GET_REGISTER_MATCHER
1141 #define GET_SUBTARGET_FEATURE_NAME
1142 #define GET_MATCHER_IMPLEMENTATION
1143 #include "WebAssemblyGenAsmMatcher.inc"
1144 
1145 StringRef GetMnemonic(unsigned Opc) {
1146   // FIXME: linear search!
1147   for (auto &ME : MatchTable0) {
1148     if (ME.Opcode == Opc) {
1149       return ME.getMnemonic();
1150     }
1151   }
1152   assert(false && "mnemonic not found");
1153   return StringRef();
1154 }
1155