xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler.
110b57cec5SDimitry Andric ///
120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into
130b57cec5SDimitry Andric /// MCInsts.
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyInstPrinter.h"
180b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
190b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCFixedLenDisassembler.h"
230b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
250b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
260b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h"
278bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h"
280b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
290b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
300b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h"
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace llvm;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler"
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus;
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc"
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric namespace {
410b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256;
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler {
440b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
470b57cec5SDimitry Andric                               ArrayRef<uint8_t> Bytes, uint64_t Address,
480b57cec5SDimitry Andric                               raw_ostream &CStream) const override;
49*5ffd83dbSDimitry Andric   Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
50*5ffd83dbSDimitry Andric                                        ArrayRef<uint8_t> Bytes,
51*5ffd83dbSDimitry Andric                                        uint64_t Address,
520b57cec5SDimitry Andric                                        raw_ostream &CStream) const override;
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric public:
550b57cec5SDimitry Andric   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
560b57cec5SDimitry Andric                           std::unique_ptr<const MCInstrInfo> MCII)
570b57cec5SDimitry Andric       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
580b57cec5SDimitry Andric };
590b57cec5SDimitry Andric } // end anonymous namespace
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
620b57cec5SDimitry Andric                                                      const MCSubtargetInfo &STI,
630b57cec5SDimitry Andric                                                      MCContext &Ctx) {
640b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
650b57cec5SDimitry Andric   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric 
68480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void
69480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() {
700b57cec5SDimitry Andric   // Register the disassembler for each target.
710b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
720b57cec5SDimitry Andric                                          createWebAssemblyDisassembler);
730b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
740b57cec5SDimitry Andric                                          createWebAssemblyDisassembler);
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
780b57cec5SDimitry Andric   if (Size >= Bytes.size())
790b57cec5SDimitry Andric     return -1;
800b57cec5SDimitry Andric   auto V = Bytes[Size];
810b57cec5SDimitry Andric   Size++;
820b57cec5SDimitry Andric   return V;
830b57cec5SDimitry Andric }
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
860b57cec5SDimitry Andric                     bool Signed) {
870b57cec5SDimitry Andric   unsigned N = 0;
880b57cec5SDimitry Andric   const char *Error = nullptr;
890b57cec5SDimitry Andric   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
900b57cec5SDimitry Andric                                Bytes.data() + Bytes.size(), &Error)
910b57cec5SDimitry Andric                : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
920b57cec5SDimitry Andric                                                     Bytes.data() + Bytes.size(),
930b57cec5SDimitry Andric                                                     &Error));
940b57cec5SDimitry Andric   if (Error)
950b57cec5SDimitry Andric     return false;
960b57cec5SDimitry Andric   Size += N;
970b57cec5SDimitry Andric   return true;
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
1010b57cec5SDimitry Andric                               ArrayRef<uint8_t> Bytes, bool Signed) {
1020b57cec5SDimitry Andric   int64_t Val;
1030b57cec5SDimitry Andric   if (!nextLEB(Val, Bytes, Size, Signed))
1040b57cec5SDimitry Andric     return false;
1050b57cec5SDimitry Andric   MI.addOperand(MCOperand::createImm(Val));
1060b57cec5SDimitry Andric   return true;
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric template <typename T>
1100b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
1110b57cec5SDimitry Andric   if (Size + sizeof(T) > Bytes.size())
1120b57cec5SDimitry Andric     return false;
1130b57cec5SDimitry Andric   T Val = support::endian::read<T, support::endianness::little, 1>(
1140b57cec5SDimitry Andric       Bytes.data() + Size);
1150b57cec5SDimitry Andric   Size += sizeof(T);
1160b57cec5SDimitry Andric   if (std::is_floating_point<T>::value) {
1170b57cec5SDimitry Andric     MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
1180b57cec5SDimitry Andric   } else {
1190b57cec5SDimitry Andric     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
1200b57cec5SDimitry Andric   }
1210b57cec5SDimitry Andric   return true;
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric 
124*5ffd83dbSDimitry Andric Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
125*5ffd83dbSDimitry Andric     SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
126*5ffd83dbSDimitry Andric     uint64_t Address, raw_ostream &CStream) const {
1270b57cec5SDimitry Andric   Size = 0;
1280b57cec5SDimitry Andric   if (Address == 0) {
1290b57cec5SDimitry Andric     // Start of a code section: we're parsing only the function count.
1300b57cec5SDimitry Andric     int64_t FunctionCount;
1310b57cec5SDimitry Andric     if (!nextLEB(FunctionCount, Bytes, Size, false))
132*5ffd83dbSDimitry Andric       return None;
1330b57cec5SDimitry Andric     outs() << "        # " << FunctionCount << " functions in section.";
1340b57cec5SDimitry Andric   } else {
1350b57cec5SDimitry Andric     // Parse the start of a single function.
1360b57cec5SDimitry Andric     int64_t BodySize, LocalEntryCount;
1370b57cec5SDimitry Andric     if (!nextLEB(BodySize, Bytes, Size, false) ||
1380b57cec5SDimitry Andric         !nextLEB(LocalEntryCount, Bytes, Size, false))
139*5ffd83dbSDimitry Andric       return None;
1400b57cec5SDimitry Andric     if (LocalEntryCount) {
1410b57cec5SDimitry Andric       outs() << "        .local ";
1420b57cec5SDimitry Andric       for (int64_t I = 0; I < LocalEntryCount; I++) {
1430b57cec5SDimitry Andric         int64_t Count, Type;
1440b57cec5SDimitry Andric         if (!nextLEB(Count, Bytes, Size, false) ||
1450b57cec5SDimitry Andric             !nextLEB(Type, Bytes, Size, false))
146*5ffd83dbSDimitry Andric           return None;
1470b57cec5SDimitry Andric         for (int64_t J = 0; J < Count; J++) {
1480b57cec5SDimitry Andric           if (I || J)
1490b57cec5SDimitry Andric             outs() << ", ";
1500b57cec5SDimitry Andric           outs() << WebAssembly::anyTypeToString(Type);
1510b57cec5SDimitry Andric         }
1520b57cec5SDimitry Andric       }
1530b57cec5SDimitry Andric     }
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric   outs() << "\n";
1560b57cec5SDimitry Andric   return MCDisassembler::Success;
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
1600b57cec5SDimitry Andric     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
161480093f4SDimitry Andric     raw_ostream &CS) const {
1620b57cec5SDimitry Andric   CommentStream = &CS;
1630b57cec5SDimitry Andric   Size = 0;
1640b57cec5SDimitry Andric   int Opc = nextByte(Bytes, Size);
1650b57cec5SDimitry Andric   if (Opc < 0)
1660b57cec5SDimitry Andric     return MCDisassembler::Fail;
1670b57cec5SDimitry Andric   const auto *WasmInst = &InstructionTable0[Opc];
1680b57cec5SDimitry Andric   // If this is a prefix byte, indirect to another table.
1690b57cec5SDimitry Andric   if (WasmInst->ET == ET_Prefix) {
1700b57cec5SDimitry Andric     WasmInst = nullptr;
1710b57cec5SDimitry Andric     // Linear search, so far only 2 entries.
1720b57cec5SDimitry Andric     for (auto PT = PrefixTable; PT->Table; PT++) {
1730b57cec5SDimitry Andric       if (PT->Prefix == Opc) {
1740b57cec5SDimitry Andric         WasmInst = PT->Table;
1750b57cec5SDimitry Andric         break;
1760b57cec5SDimitry Andric       }
1770b57cec5SDimitry Andric     }
1780b57cec5SDimitry Andric     if (!WasmInst)
1790b57cec5SDimitry Andric       return MCDisassembler::Fail;
1800b57cec5SDimitry Andric     int64_t PrefixedOpc;
1810b57cec5SDimitry Andric     if (!nextLEB(PrefixedOpc, Bytes, Size, false))
1820b57cec5SDimitry Andric       return MCDisassembler::Fail;
1830b57cec5SDimitry Andric     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
1840b57cec5SDimitry Andric       return MCDisassembler::Fail;
1850b57cec5SDimitry Andric     WasmInst += PrefixedOpc;
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric   if (WasmInst->ET == ET_Unused)
1880b57cec5SDimitry Andric     return MCDisassembler::Fail;
1890b57cec5SDimitry Andric   // At this point we must have a valid instruction to decode.
1900b57cec5SDimitry Andric   assert(WasmInst->ET == ET_Instruction);
1910b57cec5SDimitry Andric   MI.setOpcode(WasmInst->Opcode);
1920b57cec5SDimitry Andric   // Parse any operands.
1930b57cec5SDimitry Andric   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
1940b57cec5SDimitry Andric     auto OT = OperandTable[WasmInst->OperandStart + OPI];
1950b57cec5SDimitry Andric     switch (OT) {
1960b57cec5SDimitry Andric     // ULEB operands:
1970b57cec5SDimitry Andric     case WebAssembly::OPERAND_BASIC_BLOCK:
1980b57cec5SDimitry Andric     case WebAssembly::OPERAND_LOCAL:
1990b57cec5SDimitry Andric     case WebAssembly::OPERAND_GLOBAL:
2000b57cec5SDimitry Andric     case WebAssembly::OPERAND_FUNCTION32:
2010b57cec5SDimitry Andric     case WebAssembly::OPERAND_OFFSET32:
202*5ffd83dbSDimitry Andric     case WebAssembly::OPERAND_OFFSET64:
2030b57cec5SDimitry Andric     case WebAssembly::OPERAND_P2ALIGN:
2040b57cec5SDimitry Andric     case WebAssembly::OPERAND_TYPEINDEX:
2050b57cec5SDimitry Andric     case WebAssembly::OPERAND_EVENT:
2060b57cec5SDimitry Andric     case MCOI::OPERAND_IMMEDIATE: {
2070b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
2080b57cec5SDimitry Andric         return MCDisassembler::Fail;
2090b57cec5SDimitry Andric       break;
2100b57cec5SDimitry Andric     }
2110b57cec5SDimitry Andric     // SLEB operands:
2120b57cec5SDimitry Andric     case WebAssembly::OPERAND_I32IMM:
2130b57cec5SDimitry Andric     case WebAssembly::OPERAND_I64IMM: {
2140b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, true))
2150b57cec5SDimitry Andric         return MCDisassembler::Fail;
2160b57cec5SDimitry Andric       break;
2170b57cec5SDimitry Andric     }
2188bcb0991SDimitry Andric     // block_type operands:
2190b57cec5SDimitry Andric     case WebAssembly::OPERAND_SIGNATURE: {
2208bcb0991SDimitry Andric       int64_t Val;
2218bcb0991SDimitry Andric       uint64_t PrevSize = Size;
2228bcb0991SDimitry Andric       if (!nextLEB(Val, Bytes, Size, true))
2230b57cec5SDimitry Andric         return MCDisassembler::Fail;
2248bcb0991SDimitry Andric       if (Val < 0) {
2258bcb0991SDimitry Andric         // Negative values are single septet value types or empty types
2268bcb0991SDimitry Andric         if (Size != PrevSize + 1) {
2278bcb0991SDimitry Andric           MI.addOperand(
2288bcb0991SDimitry Andric               MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
2298bcb0991SDimitry Andric         } else {
2308bcb0991SDimitry Andric           MI.addOperand(MCOperand::createImm(Val & 0x7f));
2318bcb0991SDimitry Andric         }
2328bcb0991SDimitry Andric       } else {
2338bcb0991SDimitry Andric         // We don't have access to the signature, so create a symbol without one
2348bcb0991SDimitry Andric         MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
2358bcb0991SDimitry Andric         auto *WasmSym = cast<MCSymbolWasm>(Sym);
2368bcb0991SDimitry Andric         WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
2378bcb0991SDimitry Andric         const MCExpr *Expr = MCSymbolRefExpr::create(
2388bcb0991SDimitry Andric             WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
2398bcb0991SDimitry Andric         MI.addOperand(MCOperand::createExpr(Expr));
2408bcb0991SDimitry Andric       }
2410b57cec5SDimitry Andric       break;
2420b57cec5SDimitry Andric     }
2430b57cec5SDimitry Andric     // FP operands.
2440b57cec5SDimitry Andric     case WebAssembly::OPERAND_F32IMM: {
2450b57cec5SDimitry Andric       if (!parseImmediate<float>(MI, Size, Bytes))
2460b57cec5SDimitry Andric         return MCDisassembler::Fail;
2470b57cec5SDimitry Andric       break;
2480b57cec5SDimitry Andric     }
2490b57cec5SDimitry Andric     case WebAssembly::OPERAND_F64IMM: {
2500b57cec5SDimitry Andric       if (!parseImmediate<double>(MI, Size, Bytes))
2510b57cec5SDimitry Andric         return MCDisassembler::Fail;
2520b57cec5SDimitry Andric       break;
2530b57cec5SDimitry Andric     }
2540b57cec5SDimitry Andric     // Vector lane operands (not LEB encoded).
2550b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I8IMM: {
2560b57cec5SDimitry Andric       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
2570b57cec5SDimitry Andric         return MCDisassembler::Fail;
2580b57cec5SDimitry Andric       break;
2590b57cec5SDimitry Andric     }
2600b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I16IMM: {
2610b57cec5SDimitry Andric       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
2620b57cec5SDimitry Andric         return MCDisassembler::Fail;
2630b57cec5SDimitry Andric       break;
2640b57cec5SDimitry Andric     }
2650b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I32IMM: {
2660b57cec5SDimitry Andric       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
2670b57cec5SDimitry Andric         return MCDisassembler::Fail;
2680b57cec5SDimitry Andric       break;
2690b57cec5SDimitry Andric     }
2700b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I64IMM: {
2710b57cec5SDimitry Andric       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
2720b57cec5SDimitry Andric         return MCDisassembler::Fail;
2730b57cec5SDimitry Andric       break;
2740b57cec5SDimitry Andric     }
2750b57cec5SDimitry Andric     case WebAssembly::OPERAND_BRLIST: {
2760b57cec5SDimitry Andric       int64_t TargetTableLen;
2770b57cec5SDimitry Andric       if (!nextLEB(TargetTableLen, Bytes, Size, false))
2780b57cec5SDimitry Andric         return MCDisassembler::Fail;
2790b57cec5SDimitry Andric       for (int64_t I = 0; I < TargetTableLen; I++) {
2800b57cec5SDimitry Andric         if (!parseLEBImmediate(MI, Size, Bytes, false))
2810b57cec5SDimitry Andric           return MCDisassembler::Fail;
2820b57cec5SDimitry Andric       }
2830b57cec5SDimitry Andric       // Default case.
2840b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
2850b57cec5SDimitry Andric         return MCDisassembler::Fail;
2860b57cec5SDimitry Andric       break;
2870b57cec5SDimitry Andric     }
2880b57cec5SDimitry Andric     case MCOI::OPERAND_REGISTER:
2890b57cec5SDimitry Andric       // The tablegen header currently does not have any register operands since
2900b57cec5SDimitry Andric       // we use only the stack (_S) instructions.
2910b57cec5SDimitry Andric       // If you hit this that probably means a bad instruction definition in
2920b57cec5SDimitry Andric       // tablegen.
2930b57cec5SDimitry Andric       llvm_unreachable("Register operand in WebAssemblyDisassembler");
2940b57cec5SDimitry Andric     default:
2950b57cec5SDimitry Andric       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
2960b57cec5SDimitry Andric     }
2970b57cec5SDimitry Andric   }
2980b57cec5SDimitry Andric   return MCDisassembler::Success;
2990b57cec5SDimitry Andric }
300