xref: /freebsd-src/contrib/llvm-project/llvm/lib/MC/MCParser/WasmAsmParser.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // --
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Note, this is for wasm, the binary format (analogous to ELF), not wasm,
100b57cec5SDimitry Andric // the instruction set (analogous to x86), for which parsing code lives in
110b57cec5SDimitry Andric // WebAssemblyAsmParser.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // This file contains processing for generic directives implemented using
140b57cec5SDimitry Andric // MCTargetStreamer, the ones that depend on WebAssemblyTargetStreamer are in
150b57cec5SDimitry Andric // WebAssemblyAsmParser.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric 
1906c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h"
200b57cec5SDimitry Andric #include "llvm/BinaryFormat/Wasm.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
2206c3fb27SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h"
230b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmLexer.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmParser.h"
250b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmParserExtension.h"
260b57cec5SDimitry Andric #include "llvm/MC/MCSectionWasm.h"
270b57cec5SDimitry Andric #include "llvm/MC/MCStreamer.h"
280b57cec5SDimitry Andric #include "llvm/MC/MCSymbolWasm.h"
2981ad6265SDimitry Andric #include "llvm/Support/Casting.h"
30bdd1243dSDimitry Andric #include <optional>
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace llvm;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric namespace {
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric class WasmAsmParser : public MCAsmParserExtension {
370b57cec5SDimitry Andric   MCAsmParser *Parser = nullptr;
380b57cec5SDimitry Andric   MCAsmLexer *Lexer = nullptr;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric   template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
410b57cec5SDimitry Andric   void addDirectiveHandler(StringRef Directive) {
420b57cec5SDimitry Andric     MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
430b57cec5SDimitry Andric         this, HandleDirective<WasmAsmParser, HandlerMethod>);
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric     getParser().addDirectiveHandler(Directive, Handler);
460b57cec5SDimitry Andric   }
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric public:
490b57cec5SDimitry Andric   WasmAsmParser() { BracketExpressionsSupported = true; }
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric   void Initialize(MCAsmParser &P) override {
520b57cec5SDimitry Andric     Parser = &P;
530b57cec5SDimitry Andric     Lexer = &Parser->getLexer();
540b57cec5SDimitry Andric     // Call the base implementation.
550b57cec5SDimitry Andric     this->MCAsmParserExtension::Initialize(*Parser);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric     addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
5881ad6265SDimitry Andric     addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data");
590b57cec5SDimitry Andric     addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
600b57cec5SDimitry Andric     addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
610b57cec5SDimitry Andric     addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
620b57cec5SDimitry Andric     addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
630b57cec5SDimitry Andric     addDirectiveHandler<
640b57cec5SDimitry Andric       &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
650b57cec5SDimitry Andric     addDirectiveHandler<
660b57cec5SDimitry Andric       &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
670b57cec5SDimitry Andric     addDirectiveHandler<
680b57cec5SDimitry Andric       &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
690b57cec5SDimitry Andric     addDirectiveHandler<
700b57cec5SDimitry Andric       &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
710b57cec5SDimitry Andric   }
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   bool error(const StringRef &Msg, const AsmToken &Tok) {
740b57cec5SDimitry Andric     return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
750b57cec5SDimitry Andric   }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric   bool isNext(AsmToken::TokenKind Kind) {
780b57cec5SDimitry Andric     auto Ok = Lexer->is(Kind);
790b57cec5SDimitry Andric     if (Ok)
800b57cec5SDimitry Andric       Lex();
810b57cec5SDimitry Andric     return Ok;
820b57cec5SDimitry Andric   }
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric   bool expect(AsmToken::TokenKind Kind, const char *KindName) {
850b57cec5SDimitry Andric     if (!isNext(Kind))
860b57cec5SDimitry Andric       return error(std::string("Expected ") + KindName + ", instead got: ",
870b57cec5SDimitry Andric                    Lexer->getTok());
880b57cec5SDimitry Andric     return false;
890b57cec5SDimitry Andric   }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   bool parseSectionDirectiveText(StringRef, SMLoc) {
920b57cec5SDimitry Andric     // FIXME: .text currently no-op.
930b57cec5SDimitry Andric     return false;
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric 
9681ad6265SDimitry Andric   bool parseSectionDirectiveData(StringRef, SMLoc) {
9781ad6265SDimitry Andric     auto *S = getContext().getObjectFileInfo()->getDataSection();
9881ad6265SDimitry Andric     getStreamer().switchSection(S);
9981ad6265SDimitry Andric     return false;
10081ad6265SDimitry Andric   }
10181ad6265SDimitry Andric 
102fe6060f1SDimitry Andric   uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
103fe6060f1SDimitry Andric     uint32_t flags = 0;
104e8d8bef9SDimitry Andric     for (char C : FlagStr) {
105e8d8bef9SDimitry Andric       switch (C) {
106e8d8bef9SDimitry Andric       case 'p':
1070b57cec5SDimitry Andric         Passive = true;
108e8d8bef9SDimitry Andric         break;
109e8d8bef9SDimitry Andric       case 'G':
110e8d8bef9SDimitry Andric         Group = true;
111e8d8bef9SDimitry Andric         break;
112fe6060f1SDimitry Andric       case 'T':
113fe6060f1SDimitry Andric         flags |= wasm::WASM_SEG_FLAG_TLS;
114fe6060f1SDimitry Andric         break;
115fe6060f1SDimitry Andric       case 'S':
116fe6060f1SDimitry Andric         flags |= wasm::WASM_SEG_FLAG_STRINGS;
117fe6060f1SDimitry Andric         break;
118*0fca6ea1SDimitry Andric       case 'R':
119*0fca6ea1SDimitry Andric         flags |= wasm::WASM_SEG_FLAG_RETAIN;
120*0fca6ea1SDimitry Andric         break;
121e8d8bef9SDimitry Andric       default:
122fe6060f1SDimitry Andric         return -1U;
123e8d8bef9SDimitry Andric       }
124e8d8bef9SDimitry Andric     }
125fe6060f1SDimitry Andric     return flags;
126e8d8bef9SDimitry Andric   }
127e8d8bef9SDimitry Andric 
128e8d8bef9SDimitry Andric   bool parseGroup(StringRef &GroupName) {
129e8d8bef9SDimitry Andric     if (Lexer->isNot(AsmToken::Comma))
130e8d8bef9SDimitry Andric       return TokError("expected group name");
131e8d8bef9SDimitry Andric     Lex();
132e8d8bef9SDimitry Andric     if (Lexer->is(AsmToken::Integer)) {
133e8d8bef9SDimitry Andric       GroupName = getTok().getString();
134e8d8bef9SDimitry Andric       Lex();
135e8d8bef9SDimitry Andric     } else if (Parser->parseIdentifier(GroupName)) {
136e8d8bef9SDimitry Andric       return TokError("invalid group name");
137e8d8bef9SDimitry Andric     }
138e8d8bef9SDimitry Andric     if (Lexer->is(AsmToken::Comma)) {
139e8d8bef9SDimitry Andric       Lex();
140e8d8bef9SDimitry Andric       StringRef Linkage;
141e8d8bef9SDimitry Andric       if (Parser->parseIdentifier(Linkage))
142e8d8bef9SDimitry Andric         return TokError("invalid linkage");
143e8d8bef9SDimitry Andric       if (Linkage != "comdat")
144e8d8bef9SDimitry Andric         return TokError("Linkage must be 'comdat'");
1450b57cec5SDimitry Andric     }
1460b57cec5SDimitry Andric     return false;
1470b57cec5SDimitry Andric   }
1480b57cec5SDimitry Andric 
149fe6060f1SDimitry Andric   bool parseSectionDirective(StringRef, SMLoc loc) {
1500b57cec5SDimitry Andric     StringRef Name;
1510b57cec5SDimitry Andric     if (Parser->parseIdentifier(Name))
1520b57cec5SDimitry Andric       return TokError("expected identifier in directive");
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric     if (expect(AsmToken::Comma, ","))
1550b57cec5SDimitry Andric       return true;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric     if (Lexer->isNot(AsmToken::String))
1580b57cec5SDimitry Andric       return error("expected string in directive, instead got: ", Lexer->getTok());
1590b57cec5SDimitry Andric 
160bdd1243dSDimitry Andric     auto Kind = StringSwitch<std::optional<SectionKind>>(Name)
1610b57cec5SDimitry Andric                     .StartsWith(".data", SectionKind::getData())
162e8d8bef9SDimitry Andric                     .StartsWith(".tdata", SectionKind::getThreadData())
163e8d8bef9SDimitry Andric                     .StartsWith(".tbss", SectionKind::getThreadBSS())
1640b57cec5SDimitry Andric                     .StartsWith(".rodata", SectionKind::getReadOnly())
1650b57cec5SDimitry Andric                     .StartsWith(".text", SectionKind::getText())
1660b57cec5SDimitry Andric                     .StartsWith(".custom_section", SectionKind::getMetadata())
1670b57cec5SDimitry Andric                     .StartsWith(".bss", SectionKind::getBSS())
1680b57cec5SDimitry Andric                     // See use of .init_array in WasmObjectWriter and
1690b57cec5SDimitry Andric                     // TargetLoweringObjectFileWasm
1700b57cec5SDimitry Andric                     .StartsWith(".init_array", SectionKind::getData())
1718bcb0991SDimitry Andric                     .StartsWith(".debug_", SectionKind::getMetadata())
172fe6060f1SDimitry Andric                     .Default(SectionKind::getData());
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric     // Update section flags if present in this .section directive
1750b57cec5SDimitry Andric     bool Passive = false;
176e8d8bef9SDimitry Andric     bool Group = false;
177fe6060f1SDimitry Andric     uint32_t Flags =
178fe6060f1SDimitry Andric         parseSectionFlags(getTok().getStringContents(), Passive, Group);
179fe6060f1SDimitry Andric     if (Flags == -1U)
180fe6060f1SDimitry Andric       return TokError("unknown flag");
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric     Lex();
1830b57cec5SDimitry Andric 
184e8d8bef9SDimitry Andric     if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@"))
1850b57cec5SDimitry Andric       return true;
1860b57cec5SDimitry Andric 
187e8d8bef9SDimitry Andric     StringRef GroupName;
188e8d8bef9SDimitry Andric     if (Group && parseGroup(GroupName))
189e8d8bef9SDimitry Andric       return true;
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric     if (expect(AsmToken::EndOfStatement, "eol"))
192e8d8bef9SDimitry Andric       return true;
193e8d8bef9SDimitry Andric 
194e8d8bef9SDimitry Andric     // TODO: Parse UniqueID
195e8d8bef9SDimitry Andric     MCSectionWasm *WS = getContext().getWasmSection(
19681ad6265SDimitry Andric         Name, *Kind, Flags, GroupName, MCContext::GenericSectionID);
197fe6060f1SDimitry Andric 
198fe6060f1SDimitry Andric     if (WS->getSegmentFlags() != Flags)
199fe6060f1SDimitry Andric       Parser->Error(loc, "changed section flags for " + Name +
200fe6060f1SDimitry Andric                              ", expected: 0x" +
201fe6060f1SDimitry Andric                              utohexstr(WS->getSegmentFlags()));
202fe6060f1SDimitry Andric 
203e8d8bef9SDimitry Andric     if (Passive) {
204e8d8bef9SDimitry Andric       if (!WS->isWasmData())
205fe6060f1SDimitry Andric         return Parser->Error(loc, "Only data sections can be passive");
206e8d8bef9SDimitry Andric       WS->setPassive();
207e8d8bef9SDimitry Andric     }
208fe6060f1SDimitry Andric 
20981ad6265SDimitry Andric     getStreamer().switchSection(WS);
2100b57cec5SDimitry Andric     return false;
2110b57cec5SDimitry Andric   }
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize
2140b57cec5SDimitry Andric   // so maybe could be shared somehow.
215bdd1243dSDimitry Andric   bool parseDirectiveSize(StringRef, SMLoc Loc) {
2160b57cec5SDimitry Andric     StringRef Name;
2170b57cec5SDimitry Andric     if (Parser->parseIdentifier(Name))
2180b57cec5SDimitry Andric       return TokError("expected identifier in directive");
2190b57cec5SDimitry Andric     auto Sym = getContext().getOrCreateSymbol(Name);
2200b57cec5SDimitry Andric     if (expect(AsmToken::Comma, ","))
2210b57cec5SDimitry Andric       return true;
2220b57cec5SDimitry Andric     const MCExpr *Expr;
2230b57cec5SDimitry Andric     if (Parser->parseExpression(Expr))
2240b57cec5SDimitry Andric       return true;
2250b57cec5SDimitry Andric     if (expect(AsmToken::EndOfStatement, "eol"))
2260b57cec5SDimitry Andric       return true;
227bdd1243dSDimitry Andric     auto WasmSym = cast<MCSymbolWasm>(Sym);
228bdd1243dSDimitry Andric     if (WasmSym->isFunction()) {
229bdd1243dSDimitry Andric       // Ignore .size directives for function symbols.  They get their size
230bdd1243dSDimitry Andric       // set automatically based on their content.
231bdd1243dSDimitry Andric       Warning(Loc, ".size directive ignored for function symbols");
232bdd1243dSDimitry Andric     } else {
2330b57cec5SDimitry Andric       getStreamer().emitELFSize(Sym, Expr);
234bdd1243dSDimitry Andric     }
2350b57cec5SDimitry Andric     return false;
2360b57cec5SDimitry Andric   }
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   bool parseDirectiveType(StringRef, SMLoc) {
2390b57cec5SDimitry Andric     // This could be the start of a function, check if followed by
2400b57cec5SDimitry Andric     // "label,@function"
2410b57cec5SDimitry Andric     if (!Lexer->is(AsmToken::Identifier))
2420b57cec5SDimitry Andric       return error("Expected label after .type directive, got: ",
2430b57cec5SDimitry Andric                    Lexer->getTok());
2440b57cec5SDimitry Andric     auto WasmSym = cast<MCSymbolWasm>(
2450b57cec5SDimitry Andric                      getStreamer().getContext().getOrCreateSymbol(
2460b57cec5SDimitry Andric                        Lexer->getTok().getString()));
2470b57cec5SDimitry Andric     Lex();
2480b57cec5SDimitry Andric     if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
2490b57cec5SDimitry Andric           Lexer->is(AsmToken::Identifier)))
2500b57cec5SDimitry Andric       return error("Expected label,@type declaration, got: ", Lexer->getTok());
2510b57cec5SDimitry Andric     auto TypeName = Lexer->getTok().getString();
252e8d8bef9SDimitry Andric     if (TypeName == "function") {
2530b57cec5SDimitry Andric       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
254e8d8bef9SDimitry Andric       auto *Current =
255*0fca6ea1SDimitry Andric           cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
256e8d8bef9SDimitry Andric       if (Current->getGroup())
257e8d8bef9SDimitry Andric         WasmSym->setComdat(true);
258e8d8bef9SDimitry Andric     } else if (TypeName == "global")
2590b57cec5SDimitry Andric       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
2600b57cec5SDimitry Andric     else if (TypeName == "object")
2610b57cec5SDimitry Andric       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
2620b57cec5SDimitry Andric     else
2630b57cec5SDimitry Andric       return error("Unknown WASM symbol type: ", Lexer->getTok());
2640b57cec5SDimitry Andric     Lex();
2650b57cec5SDimitry Andric     return expect(AsmToken::EndOfStatement, "EOL");
2660b57cec5SDimitry Andric   }
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric   // FIXME: Shared with ELF.
2690b57cec5SDimitry Andric   /// ParseDirectiveIdent
2700b57cec5SDimitry Andric   ///  ::= .ident string
2710b57cec5SDimitry Andric   bool ParseDirectiveIdent(StringRef, SMLoc) {
2720b57cec5SDimitry Andric     if (getLexer().isNot(AsmToken::String))
2730b57cec5SDimitry Andric       return TokError("unexpected token in '.ident' directive");
2740b57cec5SDimitry Andric     StringRef Data = getTok().getIdentifier();
2750b57cec5SDimitry Andric     Lex();
2760b57cec5SDimitry Andric     if (getLexer().isNot(AsmToken::EndOfStatement))
2770b57cec5SDimitry Andric       return TokError("unexpected token in '.ident' directive");
2780b57cec5SDimitry Andric     Lex();
2795ffd83dbSDimitry Andric     getStreamer().emitIdent(Data);
2800b57cec5SDimitry Andric     return false;
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   // FIXME: Shared with ELF.
2840b57cec5SDimitry Andric   /// ParseDirectiveSymbolAttribute
2850b57cec5SDimitry Andric   ///  ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
2860b57cec5SDimitry Andric   bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
2870b57cec5SDimitry Andric     MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
2880b57cec5SDimitry Andric       .Case(".weak", MCSA_Weak)
2890b57cec5SDimitry Andric       .Case(".local", MCSA_Local)
2900b57cec5SDimitry Andric       .Case(".hidden", MCSA_Hidden)
2910b57cec5SDimitry Andric       .Case(".internal", MCSA_Internal)
2920b57cec5SDimitry Andric       .Case(".protected", MCSA_Protected)
2930b57cec5SDimitry Andric       .Default(MCSA_Invalid);
2940b57cec5SDimitry Andric     assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
2950b57cec5SDimitry Andric     if (getLexer().isNot(AsmToken::EndOfStatement)) {
2960b57cec5SDimitry Andric       while (true) {
2970b57cec5SDimitry Andric         StringRef Name;
2980b57cec5SDimitry Andric         if (getParser().parseIdentifier(Name))
2990b57cec5SDimitry Andric           return TokError("expected identifier in directive");
3000b57cec5SDimitry Andric         MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3015ffd83dbSDimitry Andric         getStreamer().emitSymbolAttribute(Sym, Attr);
3020b57cec5SDimitry Andric         if (getLexer().is(AsmToken::EndOfStatement))
3030b57cec5SDimitry Andric           break;
3040b57cec5SDimitry Andric         if (getLexer().isNot(AsmToken::Comma))
3050b57cec5SDimitry Andric           return TokError("unexpected token in directive");
3060b57cec5SDimitry Andric         Lex();
3070b57cec5SDimitry Andric       }
3080b57cec5SDimitry Andric     }
3090b57cec5SDimitry Andric     Lex();
3100b57cec5SDimitry Andric     return false;
3110b57cec5SDimitry Andric   }
3120b57cec5SDimitry Andric };
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric } // end anonymous namespace
3150b57cec5SDimitry Andric 
3160b57cec5SDimitry Andric namespace llvm {
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric MCAsmParserExtension *createWasmAsmParser() {
3190b57cec5SDimitry Andric   return new WasmAsmParser;
3200b57cec5SDimitry Andric }
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric } // end namespace llvm
323