xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler.
110b57cec5SDimitry Andric ///
120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into
130b57cec5SDimitry Andric /// MCInsts.
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
1706c3fb27SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
180b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h"
19*0fca6ea1SDimitry Andric #include "llvm/BinaryFormat/Wasm.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
2181ad6265SDimitry Andric #include "llvm/MC/MCDecoderOps.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
230b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
250b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
260b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h"
278bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h"
28349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
295f757f3fSDimitry Andric #include "llvm/Support/Casting.h"
300b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
310b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler"
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc"
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric namespace {
420b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256;
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler {
450b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
480b57cec5SDimitry Andric                               ArrayRef<uint8_t> Bytes, uint64_t Address,
490b57cec5SDimitry Andric                               raw_ostream &CStream) const override;
50*0fca6ea1SDimitry Andric 
51*0fca6ea1SDimitry Andric   Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
52*0fca6ea1SDimitry Andric                                ArrayRef<uint8_t> Bytes,
53*0fca6ea1SDimitry Andric                                uint64_t Address) const override;
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric public:
560b57cec5SDimitry Andric   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
570b57cec5SDimitry Andric                           std::unique_ptr<const MCInstrInfo> MCII)
580b57cec5SDimitry Andric       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
590b57cec5SDimitry Andric };
600b57cec5SDimitry Andric } // end anonymous namespace
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
630b57cec5SDimitry Andric                                                      const MCSubtargetInfo &STI,
640b57cec5SDimitry Andric                                                      MCContext &Ctx) {
650b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
660b57cec5SDimitry Andric   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
69480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void
70480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() {
710b57cec5SDimitry Andric   // Register the disassembler for each target.
720b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
730b57cec5SDimitry Andric                                          createWebAssemblyDisassembler);
740b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
750b57cec5SDimitry Andric                                          createWebAssemblyDisassembler);
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
790b57cec5SDimitry Andric   if (Size >= Bytes.size())
800b57cec5SDimitry Andric     return -1;
810b57cec5SDimitry Andric   auto V = Bytes[Size];
820b57cec5SDimitry Andric   Size++;
830b57cec5SDimitry Andric   return V;
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
870b57cec5SDimitry Andric                     bool Signed) {
880b57cec5SDimitry Andric   unsigned N = 0;
890b57cec5SDimitry Andric   const char *Error = nullptr;
900b57cec5SDimitry Andric   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
910b57cec5SDimitry Andric                                Bytes.data() + Bytes.size(), &Error)
920b57cec5SDimitry Andric                : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
930b57cec5SDimitry Andric                                                     Bytes.data() + Bytes.size(),
940b57cec5SDimitry Andric                                                     &Error));
950b57cec5SDimitry Andric   if (Error)
960b57cec5SDimitry Andric     return false;
970b57cec5SDimitry Andric   Size += N;
980b57cec5SDimitry Andric   return true;
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
1020b57cec5SDimitry Andric                               ArrayRef<uint8_t> Bytes, bool Signed) {
1030b57cec5SDimitry Andric   int64_t Val;
1040b57cec5SDimitry Andric   if (!nextLEB(Val, Bytes, Size, Signed))
1050b57cec5SDimitry Andric     return false;
1060b57cec5SDimitry Andric   MI.addOperand(MCOperand::createImm(Val));
1070b57cec5SDimitry Andric   return true;
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric template <typename T>
1110b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
1120b57cec5SDimitry Andric   if (Size + sizeof(T) > Bytes.size())
1130b57cec5SDimitry Andric     return false;
1145f757f3fSDimitry Andric   T Val =
1155f757f3fSDimitry Andric       support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
1160b57cec5SDimitry Andric   Size += sizeof(T);
1170b57cec5SDimitry Andric   if (std::is_floating_point<T>::value) {
118fe6060f1SDimitry Andric     MI.addOperand(
119fe6060f1SDimitry Andric         MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
1200b57cec5SDimitry Andric   } else {
1210b57cec5SDimitry Andric     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric   return true;
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
126*0fca6ea1SDimitry Andric Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
127*0fca6ea1SDimitry Andric                                                       uint64_t &Size,
128bdd1243dSDimitry Andric                                                       ArrayRef<uint8_t> Bytes,
129*0fca6ea1SDimitry Andric                                                       uint64_t Address) const {
1300b57cec5SDimitry Andric   Size = 0;
131*0fca6ea1SDimitry Andric   if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
1320b57cec5SDimitry Andric     // Start of a code section: we're parsing only the function count.
1330b57cec5SDimitry Andric     int64_t FunctionCount;
1340b57cec5SDimitry Andric     if (!nextLEB(FunctionCount, Bytes, Size, false))
135*0fca6ea1SDimitry Andric       return false;
1360b57cec5SDimitry Andric     outs() << "        # " << FunctionCount << " functions in section.";
1370b57cec5SDimitry Andric   } else {
1380b57cec5SDimitry Andric     // Parse the start of a single function.
1390b57cec5SDimitry Andric     int64_t BodySize, LocalEntryCount;
1400b57cec5SDimitry Andric     if (!nextLEB(BodySize, Bytes, Size, false) ||
1410b57cec5SDimitry Andric         !nextLEB(LocalEntryCount, Bytes, Size, false))
142*0fca6ea1SDimitry Andric       return false;
1430b57cec5SDimitry Andric     if (LocalEntryCount) {
1440b57cec5SDimitry Andric       outs() << "        .local ";
1450b57cec5SDimitry Andric       for (int64_t I = 0; I < LocalEntryCount; I++) {
1460b57cec5SDimitry Andric         int64_t Count, Type;
1470b57cec5SDimitry Andric         if (!nextLEB(Count, Bytes, Size, false) ||
1480b57cec5SDimitry Andric             !nextLEB(Type, Bytes, Size, false))
149*0fca6ea1SDimitry Andric           return false;
1500b57cec5SDimitry Andric         for (int64_t J = 0; J < Count; J++) {
1510b57cec5SDimitry Andric           if (I || J)
1520b57cec5SDimitry Andric             outs() << ", ";
1530b57cec5SDimitry Andric           outs() << WebAssembly::anyTypeToString(Type);
1540b57cec5SDimitry Andric         }
1550b57cec5SDimitry Andric       }
1560b57cec5SDimitry Andric     }
1570b57cec5SDimitry Andric   }
1580b57cec5SDimitry Andric   outs() << "\n";
159*0fca6ea1SDimitry Andric   return true;
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
1630b57cec5SDimitry Andric     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
164480093f4SDimitry Andric     raw_ostream &CS) const {
1650b57cec5SDimitry Andric   CommentStream = &CS;
1660b57cec5SDimitry Andric   Size = 0;
1670b57cec5SDimitry Andric   int Opc = nextByte(Bytes, Size);
1680b57cec5SDimitry Andric   if (Opc < 0)
1690b57cec5SDimitry Andric     return MCDisassembler::Fail;
1700b57cec5SDimitry Andric   const auto *WasmInst = &InstructionTable0[Opc];
1710b57cec5SDimitry Andric   // If this is a prefix byte, indirect to another table.
1720b57cec5SDimitry Andric   if (WasmInst->ET == ET_Prefix) {
1730b57cec5SDimitry Andric     WasmInst = nullptr;
1740b57cec5SDimitry Andric     // Linear search, so far only 2 entries.
1750b57cec5SDimitry Andric     for (auto PT = PrefixTable; PT->Table; PT++) {
1760b57cec5SDimitry Andric       if (PT->Prefix == Opc) {
1770b57cec5SDimitry Andric         WasmInst = PT->Table;
1780b57cec5SDimitry Andric         break;
1790b57cec5SDimitry Andric       }
1800b57cec5SDimitry Andric     }
1810b57cec5SDimitry Andric     if (!WasmInst)
1820b57cec5SDimitry Andric       return MCDisassembler::Fail;
1830b57cec5SDimitry Andric     int64_t PrefixedOpc;
1840b57cec5SDimitry Andric     if (!nextLEB(PrefixedOpc, Bytes, Size, false))
1850b57cec5SDimitry Andric       return MCDisassembler::Fail;
1860b57cec5SDimitry Andric     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
1870b57cec5SDimitry Andric       return MCDisassembler::Fail;
1880b57cec5SDimitry Andric     WasmInst += PrefixedOpc;
1890b57cec5SDimitry Andric   }
1900b57cec5SDimitry Andric   if (WasmInst->ET == ET_Unused)
1910b57cec5SDimitry Andric     return MCDisassembler::Fail;
1920b57cec5SDimitry Andric   // At this point we must have a valid instruction to decode.
1930b57cec5SDimitry Andric   assert(WasmInst->ET == ET_Instruction);
1940b57cec5SDimitry Andric   MI.setOpcode(WasmInst->Opcode);
1950b57cec5SDimitry Andric   // Parse any operands.
1960b57cec5SDimitry Andric   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
1970b57cec5SDimitry Andric     auto OT = OperandTable[WasmInst->OperandStart + OPI];
1980b57cec5SDimitry Andric     switch (OT) {
1990b57cec5SDimitry Andric     // ULEB operands:
2000b57cec5SDimitry Andric     case WebAssembly::OPERAND_BASIC_BLOCK:
2010b57cec5SDimitry Andric     case WebAssembly::OPERAND_LOCAL:
2020b57cec5SDimitry Andric     case WebAssembly::OPERAND_GLOBAL:
2030b57cec5SDimitry Andric     case WebAssembly::OPERAND_FUNCTION32:
204e8d8bef9SDimitry Andric     case WebAssembly::OPERAND_TABLE:
2050b57cec5SDimitry Andric     case WebAssembly::OPERAND_OFFSET32:
2065ffd83dbSDimitry Andric     case WebAssembly::OPERAND_OFFSET64:
2070b57cec5SDimitry Andric     case WebAssembly::OPERAND_P2ALIGN:
2080b57cec5SDimitry Andric     case WebAssembly::OPERAND_TYPEINDEX:
209fe6060f1SDimitry Andric     case WebAssembly::OPERAND_TAG:
2100b57cec5SDimitry Andric     case MCOI::OPERAND_IMMEDIATE: {
2110b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
2120b57cec5SDimitry Andric         return MCDisassembler::Fail;
2130b57cec5SDimitry Andric       break;
2140b57cec5SDimitry Andric     }
2150b57cec5SDimitry Andric     // SLEB operands:
2160b57cec5SDimitry Andric     case WebAssembly::OPERAND_I32IMM:
2170b57cec5SDimitry Andric     case WebAssembly::OPERAND_I64IMM: {
2180b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, true))
2190b57cec5SDimitry Andric         return MCDisassembler::Fail;
2200b57cec5SDimitry Andric       break;
2210b57cec5SDimitry Andric     }
2228bcb0991SDimitry Andric     // block_type operands:
2230b57cec5SDimitry Andric     case WebAssembly::OPERAND_SIGNATURE: {
2248bcb0991SDimitry Andric       int64_t Val;
2258bcb0991SDimitry Andric       uint64_t PrevSize = Size;
2268bcb0991SDimitry Andric       if (!nextLEB(Val, Bytes, Size, true))
2270b57cec5SDimitry Andric         return MCDisassembler::Fail;
2288bcb0991SDimitry Andric       if (Val < 0) {
2298bcb0991SDimitry Andric         // Negative values are single septet value types or empty types
2308bcb0991SDimitry Andric         if (Size != PrevSize + 1) {
2318bcb0991SDimitry Andric           MI.addOperand(
2328bcb0991SDimitry Andric               MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
2338bcb0991SDimitry Andric         } else {
2348bcb0991SDimitry Andric           MI.addOperand(MCOperand::createImm(Val & 0x7f));
2358bcb0991SDimitry Andric         }
2368bcb0991SDimitry Andric       } else {
2378bcb0991SDimitry Andric         // We don't have access to the signature, so create a symbol without one
2388bcb0991SDimitry Andric         MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
2398bcb0991SDimitry Andric         auto *WasmSym = cast<MCSymbolWasm>(Sym);
2408bcb0991SDimitry Andric         WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
2418bcb0991SDimitry Andric         const MCExpr *Expr = MCSymbolRefExpr::create(
2428bcb0991SDimitry Andric             WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
2438bcb0991SDimitry Andric         MI.addOperand(MCOperand::createExpr(Expr));
2448bcb0991SDimitry Andric       }
2450b57cec5SDimitry Andric       break;
2460b57cec5SDimitry Andric     }
2470b57cec5SDimitry Andric     // FP operands.
2480b57cec5SDimitry Andric     case WebAssembly::OPERAND_F32IMM: {
2490b57cec5SDimitry Andric       if (!parseImmediate<float>(MI, Size, Bytes))
2500b57cec5SDimitry Andric         return MCDisassembler::Fail;
2510b57cec5SDimitry Andric       break;
2520b57cec5SDimitry Andric     }
2530b57cec5SDimitry Andric     case WebAssembly::OPERAND_F64IMM: {
2540b57cec5SDimitry Andric       if (!parseImmediate<double>(MI, Size, Bytes))
2550b57cec5SDimitry Andric         return MCDisassembler::Fail;
2560b57cec5SDimitry Andric       break;
2570b57cec5SDimitry Andric     }
2580b57cec5SDimitry Andric     // Vector lane operands (not LEB encoded).
2590b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I8IMM: {
2600b57cec5SDimitry Andric       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
2610b57cec5SDimitry Andric         return MCDisassembler::Fail;
2620b57cec5SDimitry Andric       break;
2630b57cec5SDimitry Andric     }
2640b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I16IMM: {
2650b57cec5SDimitry Andric       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
2660b57cec5SDimitry Andric         return MCDisassembler::Fail;
2670b57cec5SDimitry Andric       break;
2680b57cec5SDimitry Andric     }
2690b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I32IMM: {
2700b57cec5SDimitry Andric       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
2710b57cec5SDimitry Andric         return MCDisassembler::Fail;
2720b57cec5SDimitry Andric       break;
2730b57cec5SDimitry Andric     }
2740b57cec5SDimitry Andric     case WebAssembly::OPERAND_VEC_I64IMM: {
2750b57cec5SDimitry Andric       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
2760b57cec5SDimitry Andric         return MCDisassembler::Fail;
2770b57cec5SDimitry Andric       break;
2780b57cec5SDimitry Andric     }
2790b57cec5SDimitry Andric     case WebAssembly::OPERAND_BRLIST: {
2800b57cec5SDimitry Andric       int64_t TargetTableLen;
2810b57cec5SDimitry Andric       if (!nextLEB(TargetTableLen, Bytes, Size, false))
2820b57cec5SDimitry Andric         return MCDisassembler::Fail;
2830b57cec5SDimitry Andric       for (int64_t I = 0; I < TargetTableLen; I++) {
2840b57cec5SDimitry Andric         if (!parseLEBImmediate(MI, Size, Bytes, false))
2850b57cec5SDimitry Andric           return MCDisassembler::Fail;
2860b57cec5SDimitry Andric       }
2870b57cec5SDimitry Andric       // Default case.
2880b57cec5SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
2890b57cec5SDimitry Andric         return MCDisassembler::Fail;
2900b57cec5SDimitry Andric       break;
2910b57cec5SDimitry Andric     }
2920b57cec5SDimitry Andric     case MCOI::OPERAND_REGISTER:
2930b57cec5SDimitry Andric       // The tablegen header currently does not have any register operands since
2940b57cec5SDimitry Andric       // we use only the stack (_S) instructions.
2950b57cec5SDimitry Andric       // If you hit this that probably means a bad instruction definition in
2960b57cec5SDimitry Andric       // tablegen.
2970b57cec5SDimitry Andric       llvm_unreachable("Register operand in WebAssemblyDisassembler");
2980b57cec5SDimitry Andric     default:
2990b57cec5SDimitry Andric       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
3000b57cec5SDimitry Andric     }
3010b57cec5SDimitry Andric   }
3020b57cec5SDimitry Andric   return MCDisassembler::Success;
3030b57cec5SDimitry Andric }
304