10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into 130b57cec5SDimitry Andric /// MCInsts. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyInstPrinter.h" 180b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 190b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h" 200b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 210b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h" 220b57cec5SDimitry Andric #include "llvm/MC/MCFixedLenDisassembler.h" 230b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 240b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h" 250b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 260b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 278bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h" 280b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 290b57cec5SDimitry Andric #include "llvm/Support/LEB128.h" 300b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler" 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc" 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric namespace { 410b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256; 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler { 440b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 470b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address, 480b57cec5SDimitry Andric raw_ostream &CStream) const override; 495ffd83dbSDimitry Andric Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 505ffd83dbSDimitry Andric ArrayRef<uint8_t> Bytes, 515ffd83dbSDimitry Andric uint64_t Address, 520b57cec5SDimitry Andric raw_ostream &CStream) const override; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric public: 550b57cec5SDimitry Andric WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 560b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII) 570b57cec5SDimitry Andric : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 580b57cec5SDimitry Andric }; 590b57cec5SDimitry Andric } // end anonymous namespace 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 620b57cec5SDimitry Andric const MCSubtargetInfo &STI, 630b57cec5SDimitry Andric MCContext &Ctx) { 640b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 650b57cec5SDimitry Andric return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 68480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void 69480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() { 700b57cec5SDimitry Andric // Register the disassembler for each target. 710b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 720b57cec5SDimitry Andric createWebAssemblyDisassembler); 730b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 740b57cec5SDimitry Andric createWebAssemblyDisassembler); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 780b57cec5SDimitry Andric if (Size >= Bytes.size()) 790b57cec5SDimitry Andric return -1; 800b57cec5SDimitry Andric auto V = Bytes[Size]; 810b57cec5SDimitry Andric Size++; 820b57cec5SDimitry Andric return V; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 860b57cec5SDimitry Andric bool Signed) { 870b57cec5SDimitry Andric unsigned N = 0; 880b57cec5SDimitry Andric const char *Error = nullptr; 890b57cec5SDimitry Andric Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 900b57cec5SDimitry Andric Bytes.data() + Bytes.size(), &Error) 910b57cec5SDimitry Andric : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 920b57cec5SDimitry Andric Bytes.data() + Bytes.size(), 930b57cec5SDimitry Andric &Error)); 940b57cec5SDimitry Andric if (Error) 950b57cec5SDimitry Andric return false; 960b57cec5SDimitry Andric Size += N; 970b57cec5SDimitry Andric return true; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 1010b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, bool Signed) { 1020b57cec5SDimitry Andric int64_t Val; 1030b57cec5SDimitry Andric if (!nextLEB(Val, Bytes, Size, Signed)) 1040b57cec5SDimitry Andric return false; 1050b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(Val)); 1060b57cec5SDimitry Andric return true; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric template <typename T> 1100b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 1110b57cec5SDimitry Andric if (Size + sizeof(T) > Bytes.size()) 1120b57cec5SDimitry Andric return false; 1130b57cec5SDimitry Andric T Val = support::endian::read<T, support::endianness::little, 1>( 1140b57cec5SDimitry Andric Bytes.data() + Size); 1150b57cec5SDimitry Andric Size += sizeof(T); 1160b57cec5SDimitry Andric if (std::is_floating_point<T>::value) { 1170b57cec5SDimitry Andric MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 1180b57cec5SDimitry Andric } else { 1190b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric return true; 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric 1245ffd83dbSDimitry Andric Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart( 1255ffd83dbSDimitry Andric SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 1265ffd83dbSDimitry Andric uint64_t Address, raw_ostream &CStream) const { 1270b57cec5SDimitry Andric Size = 0; 1280b57cec5SDimitry Andric if (Address == 0) { 1290b57cec5SDimitry Andric // Start of a code section: we're parsing only the function count. 1300b57cec5SDimitry Andric int64_t FunctionCount; 1310b57cec5SDimitry Andric if (!nextLEB(FunctionCount, Bytes, Size, false)) 1325ffd83dbSDimitry Andric return None; 1330b57cec5SDimitry Andric outs() << " # " << FunctionCount << " functions in section."; 1340b57cec5SDimitry Andric } else { 1350b57cec5SDimitry Andric // Parse the start of a single function. 1360b57cec5SDimitry Andric int64_t BodySize, LocalEntryCount; 1370b57cec5SDimitry Andric if (!nextLEB(BodySize, Bytes, Size, false) || 1380b57cec5SDimitry Andric !nextLEB(LocalEntryCount, Bytes, Size, false)) 1395ffd83dbSDimitry Andric return None; 1400b57cec5SDimitry Andric if (LocalEntryCount) { 1410b57cec5SDimitry Andric outs() << " .local "; 1420b57cec5SDimitry Andric for (int64_t I = 0; I < LocalEntryCount; I++) { 1430b57cec5SDimitry Andric int64_t Count, Type; 1440b57cec5SDimitry Andric if (!nextLEB(Count, Bytes, Size, false) || 1450b57cec5SDimitry Andric !nextLEB(Type, Bytes, Size, false)) 1465ffd83dbSDimitry Andric return None; 1470b57cec5SDimitry Andric for (int64_t J = 0; J < Count; J++) { 1480b57cec5SDimitry Andric if (I || J) 1490b57cec5SDimitry Andric outs() << ", "; 1500b57cec5SDimitry Andric outs() << WebAssembly::anyTypeToString(Type); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric outs() << "\n"; 1560b57cec5SDimitry Andric return MCDisassembler::Success; 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1600b57cec5SDimitry Andric MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 161480093f4SDimitry Andric raw_ostream &CS) const { 1620b57cec5SDimitry Andric CommentStream = &CS; 1630b57cec5SDimitry Andric Size = 0; 1640b57cec5SDimitry Andric int Opc = nextByte(Bytes, Size); 1650b57cec5SDimitry Andric if (Opc < 0) 1660b57cec5SDimitry Andric return MCDisassembler::Fail; 1670b57cec5SDimitry Andric const auto *WasmInst = &InstructionTable0[Opc]; 1680b57cec5SDimitry Andric // If this is a prefix byte, indirect to another table. 1690b57cec5SDimitry Andric if (WasmInst->ET == ET_Prefix) { 1700b57cec5SDimitry Andric WasmInst = nullptr; 1710b57cec5SDimitry Andric // Linear search, so far only 2 entries. 1720b57cec5SDimitry Andric for (auto PT = PrefixTable; PT->Table; PT++) { 1730b57cec5SDimitry Andric if (PT->Prefix == Opc) { 1740b57cec5SDimitry Andric WasmInst = PT->Table; 1750b57cec5SDimitry Andric break; 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric if (!WasmInst) 1790b57cec5SDimitry Andric return MCDisassembler::Fail; 1800b57cec5SDimitry Andric int64_t PrefixedOpc; 1810b57cec5SDimitry Andric if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 1820b57cec5SDimitry Andric return MCDisassembler::Fail; 1830b57cec5SDimitry Andric if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 1840b57cec5SDimitry Andric return MCDisassembler::Fail; 1850b57cec5SDimitry Andric WasmInst += PrefixedOpc; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric if (WasmInst->ET == ET_Unused) 1880b57cec5SDimitry Andric return MCDisassembler::Fail; 1890b57cec5SDimitry Andric // At this point we must have a valid instruction to decode. 1900b57cec5SDimitry Andric assert(WasmInst->ET == ET_Instruction); 1910b57cec5SDimitry Andric MI.setOpcode(WasmInst->Opcode); 1920b57cec5SDimitry Andric // Parse any operands. 1930b57cec5SDimitry Andric for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 1940b57cec5SDimitry Andric auto OT = OperandTable[WasmInst->OperandStart + OPI]; 1950b57cec5SDimitry Andric switch (OT) { 1960b57cec5SDimitry Andric // ULEB operands: 1970b57cec5SDimitry Andric case WebAssembly::OPERAND_BASIC_BLOCK: 1980b57cec5SDimitry Andric case WebAssembly::OPERAND_LOCAL: 1990b57cec5SDimitry Andric case WebAssembly::OPERAND_GLOBAL: 2000b57cec5SDimitry Andric case WebAssembly::OPERAND_FUNCTION32: 201*e8d8bef9SDimitry Andric case WebAssembly::OPERAND_TABLE: 2020b57cec5SDimitry Andric case WebAssembly::OPERAND_OFFSET32: 2035ffd83dbSDimitry Andric case WebAssembly::OPERAND_OFFSET64: 2040b57cec5SDimitry Andric case WebAssembly::OPERAND_P2ALIGN: 2050b57cec5SDimitry Andric case WebAssembly::OPERAND_TYPEINDEX: 2060b57cec5SDimitry Andric case WebAssembly::OPERAND_EVENT: 2070b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: { 2080b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2090b57cec5SDimitry Andric return MCDisassembler::Fail; 2100b57cec5SDimitry Andric break; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric // SLEB operands: 2130b57cec5SDimitry Andric case WebAssembly::OPERAND_I32IMM: 2140b57cec5SDimitry Andric case WebAssembly::OPERAND_I64IMM: { 2150b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, true)) 2160b57cec5SDimitry Andric return MCDisassembler::Fail; 2170b57cec5SDimitry Andric break; 2180b57cec5SDimitry Andric } 2198bcb0991SDimitry Andric // block_type operands: 2200b57cec5SDimitry Andric case WebAssembly::OPERAND_SIGNATURE: { 2218bcb0991SDimitry Andric int64_t Val; 2228bcb0991SDimitry Andric uint64_t PrevSize = Size; 2238bcb0991SDimitry Andric if (!nextLEB(Val, Bytes, Size, true)) 2240b57cec5SDimitry Andric return MCDisassembler::Fail; 2258bcb0991SDimitry Andric if (Val < 0) { 2268bcb0991SDimitry Andric // Negative values are single septet value types or empty types 2278bcb0991SDimitry Andric if (Size != PrevSize + 1) { 2288bcb0991SDimitry Andric MI.addOperand( 2298bcb0991SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 2308bcb0991SDimitry Andric } else { 2318bcb0991SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f)); 2328bcb0991SDimitry Andric } 2338bcb0991SDimitry Andric } else { 2348bcb0991SDimitry Andric // We don't have access to the signature, so create a symbol without one 2358bcb0991SDimitry Andric MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 2368bcb0991SDimitry Andric auto *WasmSym = cast<MCSymbolWasm>(Sym); 2378bcb0991SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 2388bcb0991SDimitry Andric const MCExpr *Expr = MCSymbolRefExpr::create( 2398bcb0991SDimitry Andric WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 2408bcb0991SDimitry Andric MI.addOperand(MCOperand::createExpr(Expr)); 2418bcb0991SDimitry Andric } 2420b57cec5SDimitry Andric break; 2430b57cec5SDimitry Andric } 244*e8d8bef9SDimitry Andric // heap_type operands, for e.g. ref.null: 245*e8d8bef9SDimitry Andric case WebAssembly::OPERAND_HEAPTYPE: { 246*e8d8bef9SDimitry Andric int64_t Val; 247*e8d8bef9SDimitry Andric uint64_t PrevSize = Size; 248*e8d8bef9SDimitry Andric if (!nextLEB(Val, Bytes, Size, true)) 249*e8d8bef9SDimitry Andric return MCDisassembler::Fail; 250*e8d8bef9SDimitry Andric if (Val < 0 && Size == PrevSize + 1) { 251*e8d8bef9SDimitry Andric // The HeapType encoding is like BlockType, in that encodings that 252*e8d8bef9SDimitry Andric // decode as negative values indicate ValTypes. In practice we expect 253*e8d8bef9SDimitry Andric // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here. 254*e8d8bef9SDimitry Andric // 255*e8d8bef9SDimitry Andric // The positive SLEB values are reserved for future expansion and are 256*e8d8bef9SDimitry Andric // expected to be type indices in the typed function references 257*e8d8bef9SDimitry Andric // proposal, and should disassemble as MCSymbolRefExpr as in BlockType 258*e8d8bef9SDimitry Andric // above. 259*e8d8bef9SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f)); 260*e8d8bef9SDimitry Andric } else { 261*e8d8bef9SDimitry Andric MI.addOperand( 262*e8d8bef9SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid))); 263*e8d8bef9SDimitry Andric } 264*e8d8bef9SDimitry Andric break; 265*e8d8bef9SDimitry Andric } 2660b57cec5SDimitry Andric // FP operands. 2670b57cec5SDimitry Andric case WebAssembly::OPERAND_F32IMM: { 2680b57cec5SDimitry Andric if (!parseImmediate<float>(MI, Size, Bytes)) 2690b57cec5SDimitry Andric return MCDisassembler::Fail; 2700b57cec5SDimitry Andric break; 2710b57cec5SDimitry Andric } 2720b57cec5SDimitry Andric case WebAssembly::OPERAND_F64IMM: { 2730b57cec5SDimitry Andric if (!parseImmediate<double>(MI, Size, Bytes)) 2740b57cec5SDimitry Andric return MCDisassembler::Fail; 2750b57cec5SDimitry Andric break; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric // Vector lane operands (not LEB encoded). 2780b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I8IMM: { 2790b57cec5SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 2800b57cec5SDimitry Andric return MCDisassembler::Fail; 2810b57cec5SDimitry Andric break; 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I16IMM: { 2840b57cec5SDimitry Andric if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 2850b57cec5SDimitry Andric return MCDisassembler::Fail; 2860b57cec5SDimitry Andric break; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I32IMM: { 2890b57cec5SDimitry Andric if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 2900b57cec5SDimitry Andric return MCDisassembler::Fail; 2910b57cec5SDimitry Andric break; 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I64IMM: { 2940b57cec5SDimitry Andric if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 2950b57cec5SDimitry Andric return MCDisassembler::Fail; 2960b57cec5SDimitry Andric break; 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric case WebAssembly::OPERAND_BRLIST: { 2990b57cec5SDimitry Andric int64_t TargetTableLen; 3000b57cec5SDimitry Andric if (!nextLEB(TargetTableLen, Bytes, Size, false)) 3010b57cec5SDimitry Andric return MCDisassembler::Fail; 3020b57cec5SDimitry Andric for (int64_t I = 0; I < TargetTableLen; I++) { 3030b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 3040b57cec5SDimitry Andric return MCDisassembler::Fail; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric // Default case. 3070b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 3080b57cec5SDimitry Andric return MCDisassembler::Fail; 3090b57cec5SDimitry Andric break; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER: 3120b57cec5SDimitry Andric // The tablegen header currently does not have any register operands since 3130b57cec5SDimitry Andric // we use only the stack (_S) instructions. 3140b57cec5SDimitry Andric // If you hit this that probably means a bad instruction definition in 3150b57cec5SDimitry Andric // tablegen. 3160b57cec5SDimitry Andric llvm_unreachable("Register operand in WebAssemblyDisassembler"); 3170b57cec5SDimitry Andric default: 3180b57cec5SDimitry Andric llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric return MCDisassembler::Success; 3220b57cec5SDimitry Andric } 323