10b57cec5SDimitry Andric //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric // -- 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Note, this is for wasm, the binary format (analogous to ELF), not wasm, 100b57cec5SDimitry Andric // the instruction set (analogous to x86), for which parsing code lives in 110b57cec5SDimitry Andric // WebAssemblyAsmParser. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // This file contains processing for generic directives implemented using 140b57cec5SDimitry Andric // MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in 150b57cec5SDimitry Andric // WebAssemblyAsmParser. 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric 1906c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h" 200b57cec5SDimitry Andric #include "llvm/BinaryFormat/Wasm.h" 210b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 2206c3fb27SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h" 230b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmLexer.h" 240b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmParser.h" 250b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmParserExtension.h" 260b57cec5SDimitry Andric #include "llvm/MC/MCSectionWasm.h" 270b57cec5SDimitry Andric #include "llvm/MC/MCStreamer.h" 280b57cec5SDimitry Andric #include "llvm/MC/MCSymbolWasm.h" 2981ad6265SDimitry Andric #include "llvm/Support/Casting.h" 30bdd1243dSDimitry Andric #include <optional> 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric namespace { 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric class WasmAsmParser : public MCAsmParserExtension { 370b57cec5SDimitry Andric MCAsmParser *Parser = nullptr; 380b57cec5SDimitry Andric MCAsmLexer *Lexer = nullptr; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)> 410b57cec5SDimitry Andric void addDirectiveHandler(StringRef Directive) { 420b57cec5SDimitry Andric MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair( 430b57cec5SDimitry Andric this, HandleDirective<WasmAsmParser, HandlerMethod>); 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric getParser().addDirectiveHandler(Directive, Handler); 460b57cec5SDimitry Andric } 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric public: 490b57cec5SDimitry Andric WasmAsmParser() { BracketExpressionsSupported = true; } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric void Initialize(MCAsmParser &P) override { 520b57cec5SDimitry Andric Parser = &P; 530b57cec5SDimitry Andric Lexer = &Parser->getLexer(); 540b57cec5SDimitry Andric // Call the base implementation. 550b57cec5SDimitry Andric this->MCAsmParserExtension::Initialize(*Parser); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text"); 5881ad6265SDimitry Andric addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data"); 590b57cec5SDimitry Andric addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section"); 600b57cec5SDimitry Andric addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size"); 610b57cec5SDimitry Andric addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type"); 620b57cec5SDimitry Andric addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident"); 630b57cec5SDimitry Andric addDirectiveHandler< 640b57cec5SDimitry Andric &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak"); 650b57cec5SDimitry Andric addDirectiveHandler< 660b57cec5SDimitry Andric &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local"); 670b57cec5SDimitry Andric addDirectiveHandler< 680b57cec5SDimitry Andric &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal"); 690b57cec5SDimitry Andric addDirectiveHandler< 700b57cec5SDimitry Andric &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric bool error(const StringRef &Msg, const AsmToken &Tok) { 740b57cec5SDimitry Andric return Parser->Error(Tok.getLoc(), Msg + Tok.getString()); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric bool isNext(AsmToken::TokenKind Kind) { 780b57cec5SDimitry Andric auto Ok = Lexer->is(Kind); 790b57cec5SDimitry Andric if (Ok) 800b57cec5SDimitry Andric Lex(); 810b57cec5SDimitry Andric return Ok; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric bool expect(AsmToken::TokenKind Kind, const char *KindName) { 850b57cec5SDimitry Andric if (!isNext(Kind)) 860b57cec5SDimitry Andric return error(std::string("Expected ") + KindName + ", instead got: ", 870b57cec5SDimitry Andric Lexer->getTok()); 880b57cec5SDimitry Andric return false; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric bool parseSectionDirectiveText(StringRef, SMLoc) { 920b57cec5SDimitry Andric // FIXME: .text currently no-op. 930b57cec5SDimitry Andric return false; 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 9681ad6265SDimitry Andric bool parseSectionDirectiveData(StringRef, SMLoc) { 9781ad6265SDimitry Andric auto *S = getContext().getObjectFileInfo()->getDataSection(); 9881ad6265SDimitry Andric getStreamer().switchSection(S); 9981ad6265SDimitry Andric return false; 10081ad6265SDimitry Andric } 10181ad6265SDimitry Andric 102fe6060f1SDimitry Andric uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) { 103fe6060f1SDimitry Andric uint32_t flags = 0; 104e8d8bef9SDimitry Andric for (char C : FlagStr) { 105e8d8bef9SDimitry Andric switch (C) { 106e8d8bef9SDimitry Andric case 'p': 1070b57cec5SDimitry Andric Passive = true; 108e8d8bef9SDimitry Andric break; 109e8d8bef9SDimitry Andric case 'G': 110e8d8bef9SDimitry Andric Group = true; 111e8d8bef9SDimitry Andric break; 112fe6060f1SDimitry Andric case 'T': 113fe6060f1SDimitry Andric flags |= wasm::WASM_SEG_FLAG_TLS; 114fe6060f1SDimitry Andric break; 115fe6060f1SDimitry Andric case 'S': 116fe6060f1SDimitry Andric flags |= wasm::WASM_SEG_FLAG_STRINGS; 117fe6060f1SDimitry Andric break; 118*0fca6ea1SDimitry Andric case 'R': 119*0fca6ea1SDimitry Andric flags |= wasm::WASM_SEG_FLAG_RETAIN; 120*0fca6ea1SDimitry Andric break; 121e8d8bef9SDimitry Andric default: 122fe6060f1SDimitry Andric return -1U; 123e8d8bef9SDimitry Andric } 124e8d8bef9SDimitry Andric } 125fe6060f1SDimitry Andric return flags; 126e8d8bef9SDimitry Andric } 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric bool parseGroup(StringRef &GroupName) { 129e8d8bef9SDimitry Andric if (Lexer->isNot(AsmToken::Comma)) 130e8d8bef9SDimitry Andric return TokError("expected group name"); 131e8d8bef9SDimitry Andric Lex(); 132e8d8bef9SDimitry Andric if (Lexer->is(AsmToken::Integer)) { 133e8d8bef9SDimitry Andric GroupName = getTok().getString(); 134e8d8bef9SDimitry Andric Lex(); 135e8d8bef9SDimitry Andric } else if (Parser->parseIdentifier(GroupName)) { 136e8d8bef9SDimitry Andric return TokError("invalid group name"); 137e8d8bef9SDimitry Andric } 138e8d8bef9SDimitry Andric if (Lexer->is(AsmToken::Comma)) { 139e8d8bef9SDimitry Andric Lex(); 140e8d8bef9SDimitry Andric StringRef Linkage; 141e8d8bef9SDimitry Andric if (Parser->parseIdentifier(Linkage)) 142e8d8bef9SDimitry Andric return TokError("invalid linkage"); 143e8d8bef9SDimitry Andric if (Linkage != "comdat") 144e8d8bef9SDimitry Andric return TokError("Linkage must be 'comdat'"); 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric return false; 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 149fe6060f1SDimitry Andric bool parseSectionDirective(StringRef, SMLoc loc) { 1500b57cec5SDimitry Andric StringRef Name; 1510b57cec5SDimitry Andric if (Parser->parseIdentifier(Name)) 1520b57cec5SDimitry Andric return TokError("expected identifier in directive"); 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric if (expect(AsmToken::Comma, ",")) 1550b57cec5SDimitry Andric return true; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric if (Lexer->isNot(AsmToken::String)) 1580b57cec5SDimitry Andric return error("expected string in directive, instead got: ", Lexer->getTok()); 1590b57cec5SDimitry Andric 160bdd1243dSDimitry Andric auto Kind = StringSwitch<std::optional<SectionKind>>(Name) 1610b57cec5SDimitry Andric .StartsWith(".data", SectionKind::getData()) 162e8d8bef9SDimitry Andric .StartsWith(".tdata", SectionKind::getThreadData()) 163e8d8bef9SDimitry Andric .StartsWith(".tbss", SectionKind::getThreadBSS()) 1640b57cec5SDimitry Andric .StartsWith(".rodata", SectionKind::getReadOnly()) 1650b57cec5SDimitry Andric .StartsWith(".text", SectionKind::getText()) 1660b57cec5SDimitry Andric .StartsWith(".custom_section", SectionKind::getMetadata()) 1670b57cec5SDimitry Andric .StartsWith(".bss", SectionKind::getBSS()) 1680b57cec5SDimitry Andric // See use of .init_array in WasmObjectWriter and 1690b57cec5SDimitry Andric // TargetLoweringObjectFileWasm 1700b57cec5SDimitry Andric .StartsWith(".init_array", SectionKind::getData()) 1718bcb0991SDimitry Andric .StartsWith(".debug_", SectionKind::getMetadata()) 172fe6060f1SDimitry Andric .Default(SectionKind::getData()); 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // Update section flags if present in this .section directive 1750b57cec5SDimitry Andric bool Passive = false; 176e8d8bef9SDimitry Andric bool Group = false; 177fe6060f1SDimitry Andric uint32_t Flags = 178fe6060f1SDimitry Andric parseSectionFlags(getTok().getStringContents(), Passive, Group); 179fe6060f1SDimitry Andric if (Flags == -1U) 180fe6060f1SDimitry Andric return TokError("unknown flag"); 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric Lex(); 1830b57cec5SDimitry Andric 184e8d8bef9SDimitry Andric if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@")) 1850b57cec5SDimitry Andric return true; 1860b57cec5SDimitry Andric 187e8d8bef9SDimitry Andric StringRef GroupName; 188e8d8bef9SDimitry Andric if (Group && parseGroup(GroupName)) 189e8d8bef9SDimitry Andric return true; 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric if (expect(AsmToken::EndOfStatement, "eol")) 192e8d8bef9SDimitry Andric return true; 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric // TODO: Parse UniqueID 195e8d8bef9SDimitry Andric MCSectionWasm *WS = getContext().getWasmSection( 19681ad6265SDimitry Andric Name, *Kind, Flags, GroupName, MCContext::GenericSectionID); 197fe6060f1SDimitry Andric 198fe6060f1SDimitry Andric if (WS->getSegmentFlags() != Flags) 199fe6060f1SDimitry Andric Parser->Error(loc, "changed section flags for " + Name + 200fe6060f1SDimitry Andric ", expected: 0x" + 201fe6060f1SDimitry Andric utohexstr(WS->getSegmentFlags())); 202fe6060f1SDimitry Andric 203e8d8bef9SDimitry Andric if (Passive) { 204e8d8bef9SDimitry Andric if (!WS->isWasmData()) 205fe6060f1SDimitry Andric return Parser->Error(loc, "Only data sections can be passive"); 206e8d8bef9SDimitry Andric WS->setPassive(); 207e8d8bef9SDimitry Andric } 208fe6060f1SDimitry Andric 20981ad6265SDimitry Andric getStreamer().switchSection(WS); 2100b57cec5SDimitry Andric return false; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize 2140b57cec5SDimitry Andric // so maybe could be shared somehow. 215bdd1243dSDimitry Andric bool parseDirectiveSize(StringRef, SMLoc Loc) { 2160b57cec5SDimitry Andric StringRef Name; 2170b57cec5SDimitry Andric if (Parser->parseIdentifier(Name)) 2180b57cec5SDimitry Andric return TokError("expected identifier in directive"); 2190b57cec5SDimitry Andric auto Sym = getContext().getOrCreateSymbol(Name); 2200b57cec5SDimitry Andric if (expect(AsmToken::Comma, ",")) 2210b57cec5SDimitry Andric return true; 2220b57cec5SDimitry Andric const MCExpr *Expr; 2230b57cec5SDimitry Andric if (Parser->parseExpression(Expr)) 2240b57cec5SDimitry Andric return true; 2250b57cec5SDimitry Andric if (expect(AsmToken::EndOfStatement, "eol")) 2260b57cec5SDimitry Andric return true; 227bdd1243dSDimitry Andric auto WasmSym = cast<MCSymbolWasm>(Sym); 228bdd1243dSDimitry Andric if (WasmSym->isFunction()) { 229bdd1243dSDimitry Andric // Ignore .size directives for function symbols. They get their size 230bdd1243dSDimitry Andric // set automatically based on their content. 231bdd1243dSDimitry Andric Warning(Loc, ".size directive ignored for function symbols"); 232bdd1243dSDimitry Andric } else { 2330b57cec5SDimitry Andric getStreamer().emitELFSize(Sym, Expr); 234bdd1243dSDimitry Andric } 2350b57cec5SDimitry Andric return false; 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric bool parseDirectiveType(StringRef, SMLoc) { 2390b57cec5SDimitry Andric // This could be the start of a function, check if followed by 2400b57cec5SDimitry Andric // "label,@function" 2410b57cec5SDimitry Andric if (!Lexer->is(AsmToken::Identifier)) 2420b57cec5SDimitry Andric return error("Expected label after .type directive, got: ", 2430b57cec5SDimitry Andric Lexer->getTok()); 2440b57cec5SDimitry Andric auto WasmSym = cast<MCSymbolWasm>( 2450b57cec5SDimitry Andric getStreamer().getContext().getOrCreateSymbol( 2460b57cec5SDimitry Andric Lexer->getTok().getString())); 2470b57cec5SDimitry Andric Lex(); 2480b57cec5SDimitry Andric if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) && 2490b57cec5SDimitry Andric Lexer->is(AsmToken::Identifier))) 2500b57cec5SDimitry Andric return error("Expected label,@type declaration, got: ", Lexer->getTok()); 2510b57cec5SDimitry Andric auto TypeName = Lexer->getTok().getString(); 252e8d8bef9SDimitry Andric if (TypeName == "function") { 2530b57cec5SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 254e8d8bef9SDimitry Andric auto *Current = 255*0fca6ea1SDimitry Andric cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly()); 256e8d8bef9SDimitry Andric if (Current->getGroup()) 257e8d8bef9SDimitry Andric WasmSym->setComdat(true); 258e8d8bef9SDimitry Andric } else if (TypeName == "global") 2590b57cec5SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); 2600b57cec5SDimitry Andric else if (TypeName == "object") 2610b57cec5SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA); 2620b57cec5SDimitry Andric else 2630b57cec5SDimitry Andric return error("Unknown WASM symbol type: ", Lexer->getTok()); 2640b57cec5SDimitry Andric Lex(); 2650b57cec5SDimitry Andric return expect(AsmToken::EndOfStatement, "EOL"); 2660b57cec5SDimitry Andric } 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric // FIXME: Shared with ELF. 2690b57cec5SDimitry Andric /// ParseDirectiveIdent 2700b57cec5SDimitry Andric /// ::= .ident string 2710b57cec5SDimitry Andric bool ParseDirectiveIdent(StringRef, SMLoc) { 2720b57cec5SDimitry Andric if (getLexer().isNot(AsmToken::String)) 2730b57cec5SDimitry Andric return TokError("unexpected token in '.ident' directive"); 2740b57cec5SDimitry Andric StringRef Data = getTok().getIdentifier(); 2750b57cec5SDimitry Andric Lex(); 2760b57cec5SDimitry Andric if (getLexer().isNot(AsmToken::EndOfStatement)) 2770b57cec5SDimitry Andric return TokError("unexpected token in '.ident' directive"); 2780b57cec5SDimitry Andric Lex(); 2795ffd83dbSDimitry Andric getStreamer().emitIdent(Data); 2800b57cec5SDimitry Andric return false; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // FIXME: Shared with ELF. 2840b57cec5SDimitry Andric /// ParseDirectiveSymbolAttribute 2850b57cec5SDimitry Andric /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ] 2860b57cec5SDimitry Andric bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { 2870b57cec5SDimitry Andric MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive) 2880b57cec5SDimitry Andric .Case(".weak", MCSA_Weak) 2890b57cec5SDimitry Andric .Case(".local", MCSA_Local) 2900b57cec5SDimitry Andric .Case(".hidden", MCSA_Hidden) 2910b57cec5SDimitry Andric .Case(".internal", MCSA_Internal) 2920b57cec5SDimitry Andric .Case(".protected", MCSA_Protected) 2930b57cec5SDimitry Andric .Default(MCSA_Invalid); 2940b57cec5SDimitry Andric assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); 2950b57cec5SDimitry Andric if (getLexer().isNot(AsmToken::EndOfStatement)) { 2960b57cec5SDimitry Andric while (true) { 2970b57cec5SDimitry Andric StringRef Name; 2980b57cec5SDimitry Andric if (getParser().parseIdentifier(Name)) 2990b57cec5SDimitry Andric return TokError("expected identifier in directive"); 3000b57cec5SDimitry Andric MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 3015ffd83dbSDimitry Andric getStreamer().emitSymbolAttribute(Sym, Attr); 3020b57cec5SDimitry Andric if (getLexer().is(AsmToken::EndOfStatement)) 3030b57cec5SDimitry Andric break; 3040b57cec5SDimitry Andric if (getLexer().isNot(AsmToken::Comma)) 3050b57cec5SDimitry Andric return TokError("unexpected token in directive"); 3060b57cec5SDimitry Andric Lex(); 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric Lex(); 3100b57cec5SDimitry Andric return false; 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric }; 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric } // end anonymous namespace 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric namespace llvm { 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric MCAsmParserExtension *createWasmAsmParser() { 3190b57cec5SDimitry Andric return new WasmAsmParser; 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric } // end namespace llvm 323