1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Assembler.
11 ///
12 /// It contains code to translate a parsed .s file into MCInsts.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
17 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
18 #include "TargetInfo/WebAssemblyTargetInfo.h"
19 #include "Utils/WebAssemblyTypeUtilities.h"
20 #include "Utils/WebAssemblyUtilities.h"
21 #include "WebAssembly.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
27 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
28 #include "llvm/MC/MCSectionWasm.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCSymbolWasm.h"
33 #include "llvm/Support/Endian.h"
34 #include "llvm/Support/TargetRegistry.h"
35
36 using namespace llvm;
37
38 #define DEBUG_TYPE "wasm-asm-parser"
39
40 static const char *getSubtargetFeatureName(uint64_t Val);
41
42 namespace {
43
44 /// WebAssemblyOperand - Instances of this class represent the operands in a
45 /// parsed Wasm machine instruction.
46 struct WebAssemblyOperand : public MCParsedAsmOperand {
47 enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
48
49 SMLoc StartLoc, EndLoc;
50
51 struct TokOp {
52 StringRef Tok;
53 };
54
55 struct IntOp {
56 int64_t Val;
57 };
58
59 struct FltOp {
60 double Val;
61 };
62
63 struct SymOp {
64 const MCExpr *Exp;
65 };
66
67 struct BrLOp {
68 std::vector<unsigned> List;
69 };
70
71 union {
72 struct TokOp Tok;
73 struct IntOp Int;
74 struct FltOp Flt;
75 struct SymOp Sym;
76 struct BrLOp BrL;
77 };
78
WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand79 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
80 : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand81 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
82 : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand83 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
84 : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand85 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
86 : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand87 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
88 : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
89
~WebAssemblyOperand__anon101c24c70111::WebAssemblyOperand90 ~WebAssemblyOperand() {
91 if (isBrList())
92 BrL.~BrLOp();
93 }
94
isToken__anon101c24c70111::WebAssemblyOperand95 bool isToken() const override { return Kind == Token; }
isImm__anon101c24c70111::WebAssemblyOperand96 bool isImm() const override { return Kind == Integer || Kind == Symbol; }
isFPImm__anon101c24c70111::WebAssemblyOperand97 bool isFPImm() const { return Kind == Float; }
isMem__anon101c24c70111::WebAssemblyOperand98 bool isMem() const override { return false; }
isReg__anon101c24c70111::WebAssemblyOperand99 bool isReg() const override { return false; }
isBrList__anon101c24c70111::WebAssemblyOperand100 bool isBrList() const { return Kind == BrList; }
101
getReg__anon101c24c70111::WebAssemblyOperand102 unsigned getReg() const override {
103 llvm_unreachable("Assembly inspects a register operand");
104 return 0;
105 }
106
getToken__anon101c24c70111::WebAssemblyOperand107 StringRef getToken() const {
108 assert(isToken());
109 return Tok.Tok;
110 }
111
getStartLoc__anon101c24c70111::WebAssemblyOperand112 SMLoc getStartLoc() const override { return StartLoc; }
getEndLoc__anon101c24c70111::WebAssemblyOperand113 SMLoc getEndLoc() const override { return EndLoc; }
114
addRegOperands__anon101c24c70111::WebAssemblyOperand115 void addRegOperands(MCInst &, unsigned) const {
116 // Required by the assembly matcher.
117 llvm_unreachable("Assembly matcher creates register operands");
118 }
119
addImmOperands__anon101c24c70111::WebAssemblyOperand120 void addImmOperands(MCInst &Inst, unsigned N) const {
121 assert(N == 1 && "Invalid number of operands!");
122 if (Kind == Integer)
123 Inst.addOperand(MCOperand::createImm(Int.Val));
124 else if (Kind == Symbol)
125 Inst.addOperand(MCOperand::createExpr(Sym.Exp));
126 else
127 llvm_unreachable("Should be integer immediate or symbol!");
128 }
129
addFPImmf32Operands__anon101c24c70111::WebAssemblyOperand130 void addFPImmf32Operands(MCInst &Inst, unsigned N) const {
131 assert(N == 1 && "Invalid number of operands!");
132 if (Kind == Float)
133 Inst.addOperand(
134 MCOperand::createSFPImm(bit_cast<uint32_t>(float(Flt.Val))));
135 else
136 llvm_unreachable("Should be float immediate!");
137 }
138
addFPImmf64Operands__anon101c24c70111::WebAssemblyOperand139 void addFPImmf64Operands(MCInst &Inst, unsigned N) const {
140 assert(N == 1 && "Invalid number of operands!");
141 if (Kind == Float)
142 Inst.addOperand(MCOperand::createDFPImm(bit_cast<uint64_t>(Flt.Val)));
143 else
144 llvm_unreachable("Should be float immediate!");
145 }
146
addBrListOperands__anon101c24c70111::WebAssemblyOperand147 void addBrListOperands(MCInst &Inst, unsigned N) const {
148 assert(N == 1 && isBrList() && "Invalid BrList!");
149 for (auto Br : BrL.List)
150 Inst.addOperand(MCOperand::createImm(Br));
151 }
152
print__anon101c24c70111::WebAssemblyOperand153 void print(raw_ostream &OS) const override {
154 switch (Kind) {
155 case Token:
156 OS << "Tok:" << Tok.Tok;
157 break;
158 case Integer:
159 OS << "Int:" << Int.Val;
160 break;
161 case Float:
162 OS << "Flt:" << Flt.Val;
163 break;
164 case Symbol:
165 OS << "Sym:" << Sym.Exp;
166 break;
167 case BrList:
168 OS << "BrList:" << BrL.List.size();
169 break;
170 }
171 }
172 };
173
174 // Perhaps this should go somewhere common.
DefaultLimits()175 static wasm::WasmLimits DefaultLimits() {
176 return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
177 }
178
GetOrCreateFunctionTableSymbol(MCContext & Ctx,const StringRef & Name)179 static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
180 const StringRef &Name) {
181 MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
182 if (Sym) {
183 if (!Sym->isFunctionTable())
184 Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
185 } else {
186 Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
187 Sym->setFunctionTable();
188 // The default function table is synthesized by the linker.
189 Sym->setUndefined();
190 }
191 return Sym;
192 }
193
194 class WebAssemblyAsmParser final : public MCTargetAsmParser {
195 MCAsmParser &Parser;
196 MCAsmLexer &Lexer;
197
198 // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
199 std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
200 std::vector<std::unique_ptr<std::string>> Names;
201
202 // Order of labels, directives and instructions in a .s file have no
203 // syntactical enforcement. This class is a callback from the actual parser,
204 // and yet we have to be feeding data to the streamer in a very particular
205 // order to ensure a correct binary encoding that matches the regular backend
206 // (the streamer does not enforce this). This "state machine" enum helps
207 // guarantee that correct order.
208 enum ParserState {
209 FileStart,
210 FunctionStart,
211 FunctionLocals,
212 Instructions,
213 EndFunction,
214 DataSection,
215 } CurrentState = FileStart;
216
217 // For ensuring blocks are properly nested.
218 enum NestingType {
219 Function,
220 Block,
221 Loop,
222 Try,
223 CatchAll,
224 If,
225 Else,
226 Undefined,
227 };
228 std::vector<NestingType> NestingStack;
229
230 MCSymbolWasm *DefaultFunctionTable = nullptr;
231 MCSymbol *LastFunctionLabel = nullptr;
232
233 public:
WebAssemblyAsmParser(const MCSubtargetInfo & STI,MCAsmParser & Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)234 WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
235 const MCInstrInfo &MII, const MCTargetOptions &Options)
236 : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
237 Lexer(Parser.getLexer()) {
238 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
239 }
240
Initialize(MCAsmParser & Parser)241 void Initialize(MCAsmParser &Parser) override {
242 MCAsmParserExtension::Initialize(Parser);
243
244 DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
245 getContext(), "__indirect_function_table");
246 if (!STI->checkFeatures("+reference-types"))
247 DefaultFunctionTable->setOmitFromLinkingSection();
248 }
249
250 #define GET_ASSEMBLER_HEADER
251 #include "WebAssemblyGenAsmMatcher.inc"
252
253 // TODO: This is required to be implemented, but appears unused.
ParseRegister(unsigned &,SMLoc &,SMLoc &)254 bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
255 SMLoc & /*EndLoc*/) override {
256 llvm_unreachable("ParseRegister is not implemented.");
257 }
tryParseRegister(unsigned &,SMLoc &,SMLoc &)258 OperandMatchResultTy tryParseRegister(unsigned & /*RegNo*/,
259 SMLoc & /*StartLoc*/,
260 SMLoc & /*EndLoc*/) override {
261 llvm_unreachable("tryParseRegister is not implemented.");
262 }
263
error(const Twine & Msg,const AsmToken & Tok)264 bool error(const Twine &Msg, const AsmToken &Tok) {
265 return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
266 }
267
error(const Twine & Msg)268 bool error(const Twine &Msg) {
269 return Parser.Error(Lexer.getTok().getLoc(), Msg);
270 }
271
addSignature(std::unique_ptr<wasm::WasmSignature> && Sig)272 void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
273 Signatures.push_back(std::move(Sig));
274 }
275
storeName(StringRef Name)276 StringRef storeName(StringRef Name) {
277 std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
278 Names.push_back(std::move(N));
279 return *Names.back();
280 }
281
nestingString(NestingType NT)282 std::pair<StringRef, StringRef> nestingString(NestingType NT) {
283 switch (NT) {
284 case Function:
285 return {"function", "end_function"};
286 case Block:
287 return {"block", "end_block"};
288 case Loop:
289 return {"loop", "end_loop"};
290 case Try:
291 return {"try", "end_try/delegate"};
292 case CatchAll:
293 return {"catch_all", "end_try"};
294 case If:
295 return {"if", "end_if"};
296 case Else:
297 return {"else", "end_if"};
298 default:
299 llvm_unreachable("unknown NestingType");
300 }
301 }
302
push(NestingType NT)303 void push(NestingType NT) { NestingStack.push_back(NT); }
304
pop(StringRef Ins,NestingType NT1,NestingType NT2=Undefined)305 bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
306 if (NestingStack.empty())
307 return error(Twine("End of block construct with no start: ") + Ins);
308 auto Top = NestingStack.back();
309 if (Top != NT1 && Top != NT2)
310 return error(Twine("Block construct type mismatch, expected: ") +
311 nestingString(Top).second + ", instead got: " + Ins);
312 NestingStack.pop_back();
313 return false;
314 }
315
ensureEmptyNestingStack()316 bool ensureEmptyNestingStack() {
317 auto Err = !NestingStack.empty();
318 while (!NestingStack.empty()) {
319 error(Twine("Unmatched block construct(s) at function end: ") +
320 nestingString(NestingStack.back()).first);
321 NestingStack.pop_back();
322 }
323 return Err;
324 }
325
isNext(AsmToken::TokenKind Kind)326 bool isNext(AsmToken::TokenKind Kind) {
327 auto Ok = Lexer.is(Kind);
328 if (Ok)
329 Parser.Lex();
330 return Ok;
331 }
332
expect(AsmToken::TokenKind Kind,const char * KindName)333 bool expect(AsmToken::TokenKind Kind, const char *KindName) {
334 if (!isNext(Kind))
335 return error(std::string("Expected ") + KindName + ", instead got: ",
336 Lexer.getTok());
337 return false;
338 }
339
expectIdent()340 StringRef expectIdent() {
341 if (!Lexer.is(AsmToken::Identifier)) {
342 error("Expected identifier, got: ", Lexer.getTok());
343 return StringRef();
344 }
345 auto Name = Lexer.getTok().getString();
346 Parser.Lex();
347 return Name;
348 }
349
parseRegTypeList(SmallVectorImpl<wasm::ValType> & Types)350 bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
351 while (Lexer.is(AsmToken::Identifier)) {
352 auto Type = WebAssembly::parseType(Lexer.getTok().getString());
353 if (!Type)
354 return error("unknown type: ", Lexer.getTok());
355 Types.push_back(Type.getValue());
356 Parser.Lex();
357 if (!isNext(AsmToken::Comma))
358 break;
359 }
360 return false;
361 }
362
parseSingleInteger(bool IsNegative,OperandVector & Operands)363 void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
364 auto &Int = Lexer.getTok();
365 int64_t Val = Int.getIntVal();
366 if (IsNegative)
367 Val = -Val;
368 Operands.push_back(std::make_unique<WebAssemblyOperand>(
369 WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
370 WebAssemblyOperand::IntOp{Val}));
371 Parser.Lex();
372 }
373
parseSingleFloat(bool IsNegative,OperandVector & Operands)374 bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
375 auto &Flt = Lexer.getTok();
376 double Val;
377 if (Flt.getString().getAsDouble(Val, false))
378 return error("Cannot parse real: ", Flt);
379 if (IsNegative)
380 Val = -Val;
381 Operands.push_back(std::make_unique<WebAssemblyOperand>(
382 WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
383 WebAssemblyOperand::FltOp{Val}));
384 Parser.Lex();
385 return false;
386 }
387
parseSpecialFloatMaybe(bool IsNegative,OperandVector & Operands)388 bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
389 if (Lexer.isNot(AsmToken::Identifier))
390 return true;
391 auto &Flt = Lexer.getTok();
392 auto S = Flt.getString();
393 double Val;
394 if (S.compare_lower("infinity") == 0) {
395 Val = std::numeric_limits<double>::infinity();
396 } else if (S.compare_lower("nan") == 0) {
397 Val = std::numeric_limits<double>::quiet_NaN();
398 } else {
399 return true;
400 }
401 if (IsNegative)
402 Val = -Val;
403 Operands.push_back(std::make_unique<WebAssemblyOperand>(
404 WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
405 WebAssemblyOperand::FltOp{Val}));
406 Parser.Lex();
407 return false;
408 }
409
checkForP2AlignIfLoadStore(OperandVector & Operands,StringRef InstName)410 bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
411 // FIXME: there is probably a cleaner way to do this.
412 auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
413 InstName.find(".store") != StringRef::npos ||
414 InstName.find("prefetch") != StringRef::npos;
415 auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
416 if (IsLoadStore || IsAtomic) {
417 // Parse load/store operands of the form: offset:p2align=align
418 if (IsLoadStore && isNext(AsmToken::Colon)) {
419 auto Id = expectIdent();
420 if (Id != "p2align")
421 return error("Expected p2align, instead got: " + Id);
422 if (expect(AsmToken::Equal, "="))
423 return true;
424 if (!Lexer.is(AsmToken::Integer))
425 return error("Expected integer constant");
426 parseSingleInteger(false, Operands);
427 } else {
428 // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
429 // index. We need to avoid parsing an extra alignment operand for the
430 // lane index.
431 auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
432 if (IsLoadStoreLane && Operands.size() == 4)
433 return false;
434 // Alignment not specified (or atomics, must use default alignment).
435 // We can't just call WebAssembly::GetDefaultP2Align since we don't have
436 // an opcode until after the assembly matcher, so set a default to fix
437 // up later.
438 auto Tok = Lexer.getTok();
439 Operands.push_back(std::make_unique<WebAssemblyOperand>(
440 WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
441 WebAssemblyOperand::IntOp{-1}));
442 }
443 }
444 return false;
445 }
446
addBlockTypeOperand(OperandVector & Operands,SMLoc NameLoc,WebAssembly::BlockType BT)447 void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
448 WebAssembly::BlockType BT) {
449 Operands.push_back(std::make_unique<WebAssemblyOperand>(
450 WebAssemblyOperand::Integer, NameLoc, NameLoc,
451 WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
452 }
453
parseLimits(wasm::WasmLimits * Limits)454 bool parseLimits(wasm::WasmLimits *Limits) {
455 auto Tok = Lexer.getTok();
456 if (!Tok.is(AsmToken::Integer))
457 return error("Expected integer constant, instead got: ", Tok);
458 int64_t Val = Tok.getIntVal();
459 assert(Val >= 0);
460 Limits->Minimum = Val;
461 Parser.Lex();
462
463 if (isNext(AsmToken::Comma)) {
464 Limits->Flags |= wasm::WASM_LIMITS_FLAG_HAS_MAX;
465 auto Tok = Lexer.getTok();
466 if (!Tok.is(AsmToken::Integer))
467 return error("Expected integer constant, instead got: ", Tok);
468 int64_t Val = Tok.getIntVal();
469 assert(Val >= 0);
470 Limits->Maximum = Val;
471 Parser.Lex();
472 }
473 return false;
474 }
475
parseFunctionTableOperand(std::unique_ptr<WebAssemblyOperand> * Op)476 bool parseFunctionTableOperand(std::unique_ptr<WebAssemblyOperand> *Op) {
477 if (STI->checkFeatures("+reference-types")) {
478 // If the reference-types feature is enabled, there is an explicit table
479 // operand. To allow the same assembly to be compiled with or without
480 // reference types, we allow the operand to be omitted, in which case we
481 // default to __indirect_function_table.
482 auto &Tok = Lexer.getTok();
483 if (Tok.is(AsmToken::Identifier)) {
484 auto *Sym =
485 GetOrCreateFunctionTableSymbol(getContext(), Tok.getString());
486 const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
487 *Op = std::make_unique<WebAssemblyOperand>(
488 WebAssemblyOperand::Symbol, Tok.getLoc(), Tok.getEndLoc(),
489 WebAssemblyOperand::SymOp{Val});
490 Parser.Lex();
491 return expect(AsmToken::Comma, ",");
492 } else {
493 const auto *Val =
494 MCSymbolRefExpr::create(DefaultFunctionTable, getContext());
495 *Op = std::make_unique<WebAssemblyOperand>(
496 WebAssemblyOperand::Symbol, SMLoc(), SMLoc(),
497 WebAssemblyOperand::SymOp{Val});
498 return false;
499 }
500 } else {
501 // For the MVP there is at most one table whose number is 0, but we can't
502 // write a table symbol or issue relocations. Instead we just ensure the
503 // table is live and write a zero.
504 getStreamer().emitSymbolAttribute(DefaultFunctionTable, MCSA_NoDeadStrip);
505 *Op = std::make_unique<WebAssemblyOperand>(WebAssemblyOperand::Integer,
506 SMLoc(), SMLoc(),
507 WebAssemblyOperand::IntOp{0});
508 return false;
509 }
510 }
511
ParseInstruction(ParseInstructionInfo &,StringRef Name,SMLoc NameLoc,OperandVector & Operands)512 bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
513 SMLoc NameLoc, OperandVector &Operands) override {
514 // Note: Name does NOT point into the sourcecode, but to a local, so
515 // use NameLoc instead.
516 Name = StringRef(NameLoc.getPointer(), Name.size());
517
518 // WebAssembly has instructions with / in them, which AsmLexer parses
519 // as separate tokens, so if we find such tokens immediately adjacent (no
520 // whitespace), expand the name to include them:
521 for (;;) {
522 auto &Sep = Lexer.getTok();
523 if (Sep.getLoc().getPointer() != Name.end() ||
524 Sep.getKind() != AsmToken::Slash)
525 break;
526 // Extend name with /
527 Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
528 Parser.Lex();
529 // We must now find another identifier, or error.
530 auto &Id = Lexer.getTok();
531 if (Id.getKind() != AsmToken::Identifier ||
532 Id.getLoc().getPointer() != Name.end())
533 return error("Incomplete instruction name: ", Id);
534 Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
535 Parser.Lex();
536 }
537
538 // Now construct the name as first operand.
539 Operands.push_back(std::make_unique<WebAssemblyOperand>(
540 WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
541 WebAssemblyOperand::TokOp{Name}));
542
543 // If this instruction is part of a control flow structure, ensure
544 // proper nesting.
545 bool ExpectBlockType = false;
546 bool ExpectFuncType = false;
547 bool ExpectHeapType = false;
548 std::unique_ptr<WebAssemblyOperand> FunctionTable;
549 if (Name == "block") {
550 push(Block);
551 ExpectBlockType = true;
552 } else if (Name == "loop") {
553 push(Loop);
554 ExpectBlockType = true;
555 } else if (Name == "try") {
556 push(Try);
557 ExpectBlockType = true;
558 } else if (Name == "if") {
559 push(If);
560 ExpectBlockType = true;
561 } else if (Name == "else") {
562 if (pop(Name, If))
563 return true;
564 push(Else);
565 } else if (Name == "catch") {
566 if (pop(Name, Try))
567 return true;
568 push(Try);
569 } else if (Name == "catch_all") {
570 if (pop(Name, Try))
571 return true;
572 push(CatchAll);
573 } else if (Name == "end_if") {
574 if (pop(Name, If, Else))
575 return true;
576 } else if (Name == "end_try") {
577 if (pop(Name, Try, CatchAll))
578 return true;
579 } else if (Name == "delegate") {
580 if (pop(Name, Try))
581 return true;
582 } else if (Name == "end_loop") {
583 if (pop(Name, Loop))
584 return true;
585 } else if (Name == "end_block") {
586 if (pop(Name, Block))
587 return true;
588 } else if (Name == "end_function") {
589 ensureLocals(getStreamer());
590 CurrentState = EndFunction;
591 if (pop(Name, Function) || ensureEmptyNestingStack())
592 return true;
593 } else if (Name == "call_indirect" || Name == "return_call_indirect") {
594 // These instructions have differing operand orders in the text format vs
595 // the binary formats. The MC instructions follow the binary format, so
596 // here we stash away the operand and append it later.
597 if (parseFunctionTableOperand(&FunctionTable))
598 return true;
599 ExpectFuncType = true;
600 } else if (Name == "ref.null") {
601 ExpectHeapType = true;
602 }
603
604 if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
605 // This has a special TYPEINDEX operand which in text we
606 // represent as a signature, such that we can re-build this signature,
607 // attach it to an anonymous symbol, which is what WasmObjectWriter
608 // expects to be able to recreate the actual unique-ified type indices.
609 auto Loc = Parser.getTok();
610 auto Signature = std::make_unique<wasm::WasmSignature>();
611 if (parseSignature(Signature.get()))
612 return true;
613 // Got signature as block type, don't need more
614 ExpectBlockType = false;
615 auto &Ctx = getContext();
616 // The "true" here will cause this to be a nameless symbol.
617 MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
618 auto *WasmSym = cast<MCSymbolWasm>(Sym);
619 WasmSym->setSignature(Signature.get());
620 addSignature(std::move(Signature));
621 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
622 const MCExpr *Expr = MCSymbolRefExpr::create(
623 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
624 Operands.push_back(std::make_unique<WebAssemblyOperand>(
625 WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(),
626 WebAssemblyOperand::SymOp{Expr}));
627 }
628
629 while (Lexer.isNot(AsmToken::EndOfStatement)) {
630 auto &Tok = Lexer.getTok();
631 switch (Tok.getKind()) {
632 case AsmToken::Identifier: {
633 if (!parseSpecialFloatMaybe(false, Operands))
634 break;
635 auto &Id = Lexer.getTok();
636 if (ExpectBlockType) {
637 // Assume this identifier is a block_type.
638 auto BT = WebAssembly::parseBlockType(Id.getString());
639 if (BT == WebAssembly::BlockType::Invalid)
640 return error("Unknown block type: ", Id);
641 addBlockTypeOperand(Operands, NameLoc, BT);
642 Parser.Lex();
643 } else if (ExpectHeapType) {
644 auto HeapType = WebAssembly::parseHeapType(Id.getString());
645 if (HeapType == WebAssembly::HeapType::Invalid) {
646 return error("Expected a heap type: ", Id);
647 }
648 Operands.push_back(std::make_unique<WebAssemblyOperand>(
649 WebAssemblyOperand::Integer, Id.getLoc(), Id.getEndLoc(),
650 WebAssemblyOperand::IntOp{static_cast<int64_t>(HeapType)}));
651 Parser.Lex();
652 } else {
653 // Assume this identifier is a label.
654 const MCExpr *Val;
655 SMLoc End;
656 if (Parser.parseExpression(Val, End))
657 return error("Cannot parse symbol: ", Lexer.getTok());
658 Operands.push_back(std::make_unique<WebAssemblyOperand>(
659 WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
660 WebAssemblyOperand::SymOp{Val}));
661 if (checkForP2AlignIfLoadStore(Operands, Name))
662 return true;
663 }
664 break;
665 }
666 case AsmToken::Minus:
667 Parser.Lex();
668 if (Lexer.is(AsmToken::Integer)) {
669 parseSingleInteger(true, Operands);
670 if (checkForP2AlignIfLoadStore(Operands, Name))
671 return true;
672 } else if(Lexer.is(AsmToken::Real)) {
673 if (parseSingleFloat(true, Operands))
674 return true;
675 } else if (!parseSpecialFloatMaybe(true, Operands)) {
676 } else {
677 return error("Expected numeric constant instead got: ",
678 Lexer.getTok());
679 }
680 break;
681 case AsmToken::Integer:
682 parseSingleInteger(false, Operands);
683 if (checkForP2AlignIfLoadStore(Operands, Name))
684 return true;
685 break;
686 case AsmToken::Real: {
687 if (parseSingleFloat(false, Operands))
688 return true;
689 break;
690 }
691 case AsmToken::LCurly: {
692 Parser.Lex();
693 auto Op = std::make_unique<WebAssemblyOperand>(
694 WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
695 if (!Lexer.is(AsmToken::RCurly))
696 for (;;) {
697 Op->BrL.List.push_back(Lexer.getTok().getIntVal());
698 expect(AsmToken::Integer, "integer");
699 if (!isNext(AsmToken::Comma))
700 break;
701 }
702 expect(AsmToken::RCurly, "}");
703 Operands.push_back(std::move(Op));
704 break;
705 }
706 default:
707 return error("Unexpected token in operand: ", Tok);
708 }
709 if (Lexer.isNot(AsmToken::EndOfStatement)) {
710 if (expect(AsmToken::Comma, ","))
711 return true;
712 }
713 }
714 if (ExpectBlockType && Operands.size() == 1) {
715 // Support blocks with no operands as default to void.
716 addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
717 }
718 if (FunctionTable)
719 Operands.push_back(std::move(FunctionTable));
720 Parser.Lex();
721 return false;
722 }
723
parseSignature(wasm::WasmSignature * Signature)724 bool parseSignature(wasm::WasmSignature *Signature) {
725 if (expect(AsmToken::LParen, "("))
726 return true;
727 if (parseRegTypeList(Signature->Params))
728 return true;
729 if (expect(AsmToken::RParen, ")"))
730 return true;
731 if (expect(AsmToken::MinusGreater, "->"))
732 return true;
733 if (expect(AsmToken::LParen, "("))
734 return true;
735 if (parseRegTypeList(Signature->Returns))
736 return true;
737 if (expect(AsmToken::RParen, ")"))
738 return true;
739 return false;
740 }
741
CheckDataSection()742 bool CheckDataSection() {
743 if (CurrentState != DataSection) {
744 auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
745 if (WS && WS->getKind().isText())
746 return error("data directive must occur in a data segment: ",
747 Lexer.getTok());
748 }
749 CurrentState = DataSection;
750 return false;
751 }
752
753 // This function processes wasm-specific directives streamed to
754 // WebAssemblyTargetStreamer, all others go to the generic parser
755 // (see WasmAsmParser).
ParseDirective(AsmToken DirectiveID)756 bool ParseDirective(AsmToken DirectiveID) override {
757 // This function has a really weird return value behavior that is different
758 // from all the other parsing functions:
759 // - return true && no tokens consumed -> don't know this directive / let
760 // the generic parser handle it.
761 // - return true && tokens consumed -> a parsing error occurred.
762 // - return false -> processed this directive successfully.
763 assert(DirectiveID.getKind() == AsmToken::Identifier);
764 auto &Out = getStreamer();
765 auto &TOut =
766 reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
767 auto &Ctx = Out.getContext();
768
769 // TODO: any time we return an error, at least one token must have been
770 // consumed, otherwise this will not signal an error to the caller.
771 if (DirectiveID.getString() == ".globaltype") {
772 auto SymName = expectIdent();
773 if (SymName.empty())
774 return true;
775 if (expect(AsmToken::Comma, ","))
776 return true;
777 auto TypeTok = Lexer.getTok();
778 auto TypeName = expectIdent();
779 if (TypeName.empty())
780 return true;
781 auto Type = WebAssembly::parseType(TypeName);
782 if (!Type)
783 return error("Unknown type in .globaltype directive: ", TypeTok);
784 // Optional mutable modifier. Default to mutable for historical reasons.
785 // Ideally we would have gone with immutable as the default and used `mut`
786 // as the modifier to match the `.wat` format.
787 bool Mutable = true;
788 if (isNext(AsmToken::Comma)) {
789 TypeTok = Lexer.getTok();
790 auto Id = expectIdent();
791 if (Id == "immutable")
792 Mutable = false;
793 else
794 // Should we also allow `mutable` and `mut` here for clarity?
795 return error("Unknown type in .globaltype modifier: ", TypeTok);
796 }
797 // Now set this symbol with the correct type.
798 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
799 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
800 WasmSym->setGlobalType(
801 wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable});
802 // And emit the directive again.
803 TOut.emitGlobalType(WasmSym);
804 return expect(AsmToken::EndOfStatement, "EOL");
805 }
806
807 if (DirectiveID.getString() == ".tabletype") {
808 // .tabletype SYM, ELEMTYPE[, MINSIZE[, MAXSIZE]]
809 auto SymName = expectIdent();
810 if (SymName.empty())
811 return true;
812 if (expect(AsmToken::Comma, ","))
813 return true;
814
815 auto ElemTypeTok = Lexer.getTok();
816 auto ElemTypeName = expectIdent();
817 if (ElemTypeName.empty())
818 return true;
819 Optional<wasm::ValType> ElemType = WebAssembly::parseType(ElemTypeName);
820 if (!ElemType)
821 return error("Unknown type in .tabletype directive: ", ElemTypeTok);
822
823 wasm::WasmLimits Limits = DefaultLimits();
824 if (isNext(AsmToken::Comma) && parseLimits(&Limits))
825 return true;
826
827 // Now that we have the name and table type, we can actually create the
828 // symbol
829 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
830 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
831 wasm::WasmTableType Type = {uint8_t(ElemType.getValue()), Limits};
832 WasmSym->setTableType(Type);
833 TOut.emitTableType(WasmSym);
834 return expect(AsmToken::EndOfStatement, "EOL");
835 }
836
837 if (DirectiveID.getString() == ".functype") {
838 // This code has to send things to the streamer similar to
839 // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
840 // TODO: would be good to factor this into a common function, but the
841 // assembler and backend really don't share any common code, and this code
842 // parses the locals separately.
843 auto SymName = expectIdent();
844 if (SymName.empty())
845 return true;
846 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
847 if (WasmSym->isDefined()) {
848 // This .functype indicates a start of a function.
849 if (ensureEmptyNestingStack())
850 return true;
851 CurrentState = FunctionStart;
852 LastFunctionLabel = WasmSym;
853 push(Function);
854 }
855 auto Signature = std::make_unique<wasm::WasmSignature>();
856 if (parseSignature(Signature.get()))
857 return true;
858 WasmSym->setSignature(Signature.get());
859 addSignature(std::move(Signature));
860 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
861 TOut.emitFunctionType(WasmSym);
862 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
863 return expect(AsmToken::EndOfStatement, "EOL");
864 }
865
866 if (DirectiveID.getString() == ".export_name") {
867 auto SymName = expectIdent();
868 if (SymName.empty())
869 return true;
870 if (expect(AsmToken::Comma, ","))
871 return true;
872 auto ExportName = expectIdent();
873 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
874 WasmSym->setExportName(storeName(ExportName));
875 TOut.emitExportName(WasmSym, ExportName);
876 }
877
878 if (DirectiveID.getString() == ".import_module") {
879 auto SymName = expectIdent();
880 if (SymName.empty())
881 return true;
882 if (expect(AsmToken::Comma, ","))
883 return true;
884 auto ImportModule = expectIdent();
885 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
886 WasmSym->setImportModule(storeName(ImportModule));
887 TOut.emitImportModule(WasmSym, ImportModule);
888 }
889
890 if (DirectiveID.getString() == ".import_name") {
891 auto SymName = expectIdent();
892 if (SymName.empty())
893 return true;
894 if (expect(AsmToken::Comma, ","))
895 return true;
896 auto ImportName = expectIdent();
897 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
898 WasmSym->setImportName(storeName(ImportName));
899 TOut.emitImportName(WasmSym, ImportName);
900 }
901
902 if (DirectiveID.getString() == ".eventtype") {
903 auto SymName = expectIdent();
904 if (SymName.empty())
905 return true;
906 auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
907 auto Signature = std::make_unique<wasm::WasmSignature>();
908 if (parseRegTypeList(Signature->Params))
909 return true;
910 WasmSym->setSignature(Signature.get());
911 addSignature(std::move(Signature));
912 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
913 TOut.emitEventType(WasmSym);
914 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
915 return expect(AsmToken::EndOfStatement, "EOL");
916 }
917
918 if (DirectiveID.getString() == ".local") {
919 if (CurrentState != FunctionStart)
920 return error(".local directive should follow the start of a function: ",
921 Lexer.getTok());
922 SmallVector<wasm::ValType, 4> Locals;
923 if (parseRegTypeList(Locals))
924 return true;
925 TOut.emitLocal(Locals);
926 CurrentState = FunctionLocals;
927 return expect(AsmToken::EndOfStatement, "EOL");
928 }
929
930 if (DirectiveID.getString() == ".int8" ||
931 DirectiveID.getString() == ".int16" ||
932 DirectiveID.getString() == ".int32" ||
933 DirectiveID.getString() == ".int64") {
934 if (CheckDataSection()) return true;
935 const MCExpr *Val;
936 SMLoc End;
937 if (Parser.parseExpression(Val, End))
938 return error("Cannot parse .int expression: ", Lexer.getTok());
939 size_t NumBits = 0;
940 DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
941 Out.emitValue(Val, NumBits / 8, End);
942 return expect(AsmToken::EndOfStatement, "EOL");
943 }
944
945 if (DirectiveID.getString() == ".asciz") {
946 if (CheckDataSection()) return true;
947 std::string S;
948 if (Parser.parseEscapedString(S))
949 return error("Cannot parse string constant: ", Lexer.getTok());
950 Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
951 return expect(AsmToken::EndOfStatement, "EOL");
952 }
953
954 return true; // We didn't process this directive.
955 }
956
957 // Called either when the first instruction is parsed of the function ends.
ensureLocals(MCStreamer & Out)958 void ensureLocals(MCStreamer &Out) {
959 if (CurrentState == FunctionStart) {
960 // We haven't seen a .local directive yet. The streamer requires locals to
961 // be encoded as a prelude to the instructions, so emit an empty list of
962 // locals here.
963 auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
964 *Out.getTargetStreamer());
965 TOut.emitLocal(SmallVector<wasm::ValType, 0>());
966 CurrentState = FunctionLocals;
967 }
968 }
969
MatchAndEmitInstruction(SMLoc IDLoc,unsigned &,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)970 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
971 OperandVector &Operands, MCStreamer &Out,
972 uint64_t &ErrorInfo,
973 bool MatchingInlineAsm) override {
974 MCInst Inst;
975 Inst.setLoc(IDLoc);
976 FeatureBitset MissingFeatures;
977 unsigned MatchResult = MatchInstructionImpl(
978 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
979 switch (MatchResult) {
980 case Match_Success: {
981 ensureLocals(Out);
982 // Fix unknown p2align operands.
983 auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
984 if (Align != -1U) {
985 auto &Op0 = Inst.getOperand(0);
986 if (Op0.getImm() == -1)
987 Op0.setImm(Align);
988 }
989 if (getSTI().getTargetTriple().isArch64Bit()) {
990 // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
991 // an offset64 arg instead of offset32, but to the assembler matcher
992 // they're both immediates so don't get selected for.
993 auto Opc64 = WebAssembly::getWasm64Opcode(
994 static_cast<uint16_t>(Inst.getOpcode()));
995 if (Opc64 >= 0) {
996 Inst.setOpcode(Opc64);
997 }
998 }
999 Out.emitInstruction(Inst, getSTI());
1000 if (CurrentState == EndFunction) {
1001 onEndOfFunction();
1002 } else {
1003 CurrentState = Instructions;
1004 }
1005 return false;
1006 }
1007 case Match_MissingFeature: {
1008 assert(MissingFeatures.count() > 0 && "Expected missing features");
1009 SmallString<128> Message;
1010 raw_svector_ostream OS(Message);
1011 OS << "instruction requires:";
1012 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
1013 if (MissingFeatures.test(i))
1014 OS << ' ' << getSubtargetFeatureName(i);
1015 return Parser.Error(IDLoc, Message);
1016 }
1017 case Match_MnemonicFail:
1018 return Parser.Error(IDLoc, "invalid instruction");
1019 case Match_NearMisses:
1020 return Parser.Error(IDLoc, "ambiguous instruction");
1021 case Match_InvalidTiedOperand:
1022 case Match_InvalidOperand: {
1023 SMLoc ErrorLoc = IDLoc;
1024 if (ErrorInfo != ~0ULL) {
1025 if (ErrorInfo >= Operands.size())
1026 return Parser.Error(IDLoc, "too few operands for instruction");
1027 ErrorLoc = Operands[ErrorInfo]->getStartLoc();
1028 if (ErrorLoc == SMLoc())
1029 ErrorLoc = IDLoc;
1030 }
1031 return Parser.Error(ErrorLoc, "invalid operand for instruction");
1032 }
1033 }
1034 llvm_unreachable("Implement any new match types added!");
1035 }
1036
doBeforeLabelEmit(MCSymbol * Symbol)1037 void doBeforeLabelEmit(MCSymbol *Symbol) override {
1038 // Code below only applies to labels in text sections.
1039 auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
1040 if (!CWS || !CWS->getKind().isText())
1041 return;
1042
1043 auto WasmSym = cast<MCSymbolWasm>(Symbol);
1044 // Unlike other targets, we don't allow data in text sections (labels
1045 // declared with .type @object).
1046 if (WasmSym->getType() == wasm::WASM_SYMBOL_TYPE_DATA) {
1047 Parser.Error(Parser.getTok().getLoc(),
1048 "Wasm doesn\'t support data symbols in text sections");
1049 return;
1050 }
1051
1052 // Start a new section for the next function automatically, since our
1053 // object writer expects each function to have its own section. This way
1054 // The user can't forget this "convention".
1055 auto SymName = Symbol->getName();
1056 if (SymName.startswith(".L"))
1057 return; // Local Symbol.
1058
1059 // TODO: If the user explicitly creates a new function section, we ignore
1060 // its name when we create this one. It would be nice to honor their
1061 // choice, while still ensuring that we create one if they forget.
1062 // (that requires coordination with WasmAsmParser::parseSectionDirective)
1063 auto SecName = ".text." + SymName;
1064
1065 auto *Group = CWS->getGroup();
1066 // If the current section is a COMDAT, also set the flag on the symbol.
1067 // TODO: Currently the only place that the symbols' comdat flag matters is
1068 // for importing comdat functions. But there's no way to specify that in
1069 // assembly currently.
1070 if (Group)
1071 WasmSym->setComdat(true);
1072 auto *WS =
1073 getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group,
1074 MCContext::GenericSectionID, nullptr);
1075 getStreamer().SwitchSection(WS);
1076 // Also generate DWARF for this section if requested.
1077 if (getContext().getGenDwarfForAssembly())
1078 getContext().addGenDwarfSection(WS);
1079 }
1080
onEndOfFunction()1081 void onEndOfFunction() {
1082 // Automatically output a .size directive, so it becomes optional for the
1083 // user.
1084 if (!LastFunctionLabel) return;
1085 auto TempSym = getContext().createLinkerPrivateTempSymbol();
1086 getStreamer().emitLabel(TempSym);
1087 auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext());
1088 auto End = MCSymbolRefExpr::create(TempSym, getContext());
1089 auto Expr =
1090 MCBinaryExpr::create(MCBinaryExpr::Sub, End, Start, getContext());
1091 getStreamer().emitELFSize(LastFunctionLabel, Expr);
1092 }
1093
onEndOfFile()1094 void onEndOfFile() override { ensureEmptyNestingStack(); }
1095 };
1096 } // end anonymous namespace
1097
1098 // Force static initialization.
LLVMInitializeWebAssemblyAsmParser()1099 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
1100 RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
1101 RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
1102 }
1103
1104 #define GET_REGISTER_MATCHER
1105 #define GET_SUBTARGET_FEATURE_NAME
1106 #define GET_MATCHER_IMPLEMENTATION
1107 #include "WebAssemblyGenAsmMatcher.inc"
1108