10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into 130b57cec5SDimitry Andric /// MCInsts. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyInstPrinter.h" 180b57cec5SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 190b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h" 200b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 210b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h" 220b57cec5SDimitry Andric #include "llvm/MC/MCFixedLenDisassembler.h" 230b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 240b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h" 250b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 260b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 278bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h" 280b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 290b57cec5SDimitry Andric #include "llvm/Support/LEB128.h" 300b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler" 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus; 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc" 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric namespace { 410b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256; 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler { 440b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII; 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 470b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address, 480b57cec5SDimitry Andric raw_ostream &CStream) const override; 49*5ffd83dbSDimitry Andric Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 50*5ffd83dbSDimitry Andric ArrayRef<uint8_t> Bytes, 51*5ffd83dbSDimitry Andric uint64_t Address, 520b57cec5SDimitry Andric raw_ostream &CStream) const override; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric public: 550b57cec5SDimitry Andric WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 560b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII) 570b57cec5SDimitry Andric : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 580b57cec5SDimitry Andric }; 590b57cec5SDimitry Andric } // end anonymous namespace 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 620b57cec5SDimitry Andric const MCSubtargetInfo &STI, 630b57cec5SDimitry Andric MCContext &Ctx) { 640b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 650b57cec5SDimitry Andric return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 68480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void 69480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() { 700b57cec5SDimitry Andric // Register the disassembler for each target. 710b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 720b57cec5SDimitry Andric createWebAssemblyDisassembler); 730b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 740b57cec5SDimitry Andric createWebAssemblyDisassembler); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 780b57cec5SDimitry Andric if (Size >= Bytes.size()) 790b57cec5SDimitry Andric return -1; 800b57cec5SDimitry Andric auto V = Bytes[Size]; 810b57cec5SDimitry Andric Size++; 820b57cec5SDimitry Andric return V; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 860b57cec5SDimitry Andric bool Signed) { 870b57cec5SDimitry Andric unsigned N = 0; 880b57cec5SDimitry Andric const char *Error = nullptr; 890b57cec5SDimitry Andric Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 900b57cec5SDimitry Andric Bytes.data() + Bytes.size(), &Error) 910b57cec5SDimitry Andric : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 920b57cec5SDimitry Andric Bytes.data() + Bytes.size(), 930b57cec5SDimitry Andric &Error)); 940b57cec5SDimitry Andric if (Error) 950b57cec5SDimitry Andric return false; 960b57cec5SDimitry Andric Size += N; 970b57cec5SDimitry Andric return true; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 1010b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, bool Signed) { 1020b57cec5SDimitry Andric int64_t Val; 1030b57cec5SDimitry Andric if (!nextLEB(Val, Bytes, Size, Signed)) 1040b57cec5SDimitry Andric return false; 1050b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(Val)); 1060b57cec5SDimitry Andric return true; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric template <typename T> 1100b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 1110b57cec5SDimitry Andric if (Size + sizeof(T) > Bytes.size()) 1120b57cec5SDimitry Andric return false; 1130b57cec5SDimitry Andric T Val = support::endian::read<T, support::endianness::little, 1>( 1140b57cec5SDimitry Andric Bytes.data() + Size); 1150b57cec5SDimitry Andric Size += sizeof(T); 1160b57cec5SDimitry Andric if (std::is_floating_point<T>::value) { 1170b57cec5SDimitry Andric MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 1180b57cec5SDimitry Andric } else { 1190b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric return true; 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric 124*5ffd83dbSDimitry Andric Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart( 125*5ffd83dbSDimitry Andric SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 126*5ffd83dbSDimitry Andric uint64_t Address, raw_ostream &CStream) const { 1270b57cec5SDimitry Andric Size = 0; 1280b57cec5SDimitry Andric if (Address == 0) { 1290b57cec5SDimitry Andric // Start of a code section: we're parsing only the function count. 1300b57cec5SDimitry Andric int64_t FunctionCount; 1310b57cec5SDimitry Andric if (!nextLEB(FunctionCount, Bytes, Size, false)) 132*5ffd83dbSDimitry Andric return None; 1330b57cec5SDimitry Andric outs() << " # " << FunctionCount << " functions in section."; 1340b57cec5SDimitry Andric } else { 1350b57cec5SDimitry Andric // Parse the start of a single function. 1360b57cec5SDimitry Andric int64_t BodySize, LocalEntryCount; 1370b57cec5SDimitry Andric if (!nextLEB(BodySize, Bytes, Size, false) || 1380b57cec5SDimitry Andric !nextLEB(LocalEntryCount, Bytes, Size, false)) 139*5ffd83dbSDimitry Andric return None; 1400b57cec5SDimitry Andric if (LocalEntryCount) { 1410b57cec5SDimitry Andric outs() << " .local "; 1420b57cec5SDimitry Andric for (int64_t I = 0; I < LocalEntryCount; I++) { 1430b57cec5SDimitry Andric int64_t Count, Type; 1440b57cec5SDimitry Andric if (!nextLEB(Count, Bytes, Size, false) || 1450b57cec5SDimitry Andric !nextLEB(Type, Bytes, Size, false)) 146*5ffd83dbSDimitry Andric return None; 1470b57cec5SDimitry Andric for (int64_t J = 0; J < Count; J++) { 1480b57cec5SDimitry Andric if (I || J) 1490b57cec5SDimitry Andric outs() << ", "; 1500b57cec5SDimitry Andric outs() << WebAssembly::anyTypeToString(Type); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric outs() << "\n"; 1560b57cec5SDimitry Andric return MCDisassembler::Success; 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1600b57cec5SDimitry Andric MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 161480093f4SDimitry Andric raw_ostream &CS) const { 1620b57cec5SDimitry Andric CommentStream = &CS; 1630b57cec5SDimitry Andric Size = 0; 1640b57cec5SDimitry Andric int Opc = nextByte(Bytes, Size); 1650b57cec5SDimitry Andric if (Opc < 0) 1660b57cec5SDimitry Andric return MCDisassembler::Fail; 1670b57cec5SDimitry Andric const auto *WasmInst = &InstructionTable0[Opc]; 1680b57cec5SDimitry Andric // If this is a prefix byte, indirect to another table. 1690b57cec5SDimitry Andric if (WasmInst->ET == ET_Prefix) { 1700b57cec5SDimitry Andric WasmInst = nullptr; 1710b57cec5SDimitry Andric // Linear search, so far only 2 entries. 1720b57cec5SDimitry Andric for (auto PT = PrefixTable; PT->Table; PT++) { 1730b57cec5SDimitry Andric if (PT->Prefix == Opc) { 1740b57cec5SDimitry Andric WasmInst = PT->Table; 1750b57cec5SDimitry Andric break; 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric if (!WasmInst) 1790b57cec5SDimitry Andric return MCDisassembler::Fail; 1800b57cec5SDimitry Andric int64_t PrefixedOpc; 1810b57cec5SDimitry Andric if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 1820b57cec5SDimitry Andric return MCDisassembler::Fail; 1830b57cec5SDimitry Andric if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 1840b57cec5SDimitry Andric return MCDisassembler::Fail; 1850b57cec5SDimitry Andric WasmInst += PrefixedOpc; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric if (WasmInst->ET == ET_Unused) 1880b57cec5SDimitry Andric return MCDisassembler::Fail; 1890b57cec5SDimitry Andric // At this point we must have a valid instruction to decode. 1900b57cec5SDimitry Andric assert(WasmInst->ET == ET_Instruction); 1910b57cec5SDimitry Andric MI.setOpcode(WasmInst->Opcode); 1920b57cec5SDimitry Andric // Parse any operands. 1930b57cec5SDimitry Andric for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 1940b57cec5SDimitry Andric auto OT = OperandTable[WasmInst->OperandStart + OPI]; 1950b57cec5SDimitry Andric switch (OT) { 1960b57cec5SDimitry Andric // ULEB operands: 1970b57cec5SDimitry Andric case WebAssembly::OPERAND_BASIC_BLOCK: 1980b57cec5SDimitry Andric case WebAssembly::OPERAND_LOCAL: 1990b57cec5SDimitry Andric case WebAssembly::OPERAND_GLOBAL: 2000b57cec5SDimitry Andric case WebAssembly::OPERAND_FUNCTION32: 2010b57cec5SDimitry Andric case WebAssembly::OPERAND_OFFSET32: 202*5ffd83dbSDimitry Andric case WebAssembly::OPERAND_OFFSET64: 2030b57cec5SDimitry Andric case WebAssembly::OPERAND_P2ALIGN: 2040b57cec5SDimitry Andric case WebAssembly::OPERAND_TYPEINDEX: 2050b57cec5SDimitry Andric case WebAssembly::OPERAND_EVENT: 2060b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: { 2070b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2080b57cec5SDimitry Andric return MCDisassembler::Fail; 2090b57cec5SDimitry Andric break; 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric // SLEB operands: 2120b57cec5SDimitry Andric case WebAssembly::OPERAND_I32IMM: 2130b57cec5SDimitry Andric case WebAssembly::OPERAND_I64IMM: { 2140b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, true)) 2150b57cec5SDimitry Andric return MCDisassembler::Fail; 2160b57cec5SDimitry Andric break; 2170b57cec5SDimitry Andric } 2188bcb0991SDimitry Andric // block_type operands: 2190b57cec5SDimitry Andric case WebAssembly::OPERAND_SIGNATURE: { 2208bcb0991SDimitry Andric int64_t Val; 2218bcb0991SDimitry Andric uint64_t PrevSize = Size; 2228bcb0991SDimitry Andric if (!nextLEB(Val, Bytes, Size, true)) 2230b57cec5SDimitry Andric return MCDisassembler::Fail; 2248bcb0991SDimitry Andric if (Val < 0) { 2258bcb0991SDimitry Andric // Negative values are single septet value types or empty types 2268bcb0991SDimitry Andric if (Size != PrevSize + 1) { 2278bcb0991SDimitry Andric MI.addOperand( 2288bcb0991SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 2298bcb0991SDimitry Andric } else { 2308bcb0991SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f)); 2318bcb0991SDimitry Andric } 2328bcb0991SDimitry Andric } else { 2338bcb0991SDimitry Andric // We don't have access to the signature, so create a symbol without one 2348bcb0991SDimitry Andric MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 2358bcb0991SDimitry Andric auto *WasmSym = cast<MCSymbolWasm>(Sym); 2368bcb0991SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 2378bcb0991SDimitry Andric const MCExpr *Expr = MCSymbolRefExpr::create( 2388bcb0991SDimitry Andric WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 2398bcb0991SDimitry Andric MI.addOperand(MCOperand::createExpr(Expr)); 2408bcb0991SDimitry Andric } 2410b57cec5SDimitry Andric break; 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric // FP operands. 2440b57cec5SDimitry Andric case WebAssembly::OPERAND_F32IMM: { 2450b57cec5SDimitry Andric if (!parseImmediate<float>(MI, Size, Bytes)) 2460b57cec5SDimitry Andric return MCDisassembler::Fail; 2470b57cec5SDimitry Andric break; 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric case WebAssembly::OPERAND_F64IMM: { 2500b57cec5SDimitry Andric if (!parseImmediate<double>(MI, Size, Bytes)) 2510b57cec5SDimitry Andric return MCDisassembler::Fail; 2520b57cec5SDimitry Andric break; 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric // Vector lane operands (not LEB encoded). 2550b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I8IMM: { 2560b57cec5SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 2570b57cec5SDimitry Andric return MCDisassembler::Fail; 2580b57cec5SDimitry Andric break; 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I16IMM: { 2610b57cec5SDimitry Andric if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 2620b57cec5SDimitry Andric return MCDisassembler::Fail; 2630b57cec5SDimitry Andric break; 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I32IMM: { 2660b57cec5SDimitry Andric if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 2670b57cec5SDimitry Andric return MCDisassembler::Fail; 2680b57cec5SDimitry Andric break; 2690b57cec5SDimitry Andric } 2700b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I64IMM: { 2710b57cec5SDimitry Andric if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 2720b57cec5SDimitry Andric return MCDisassembler::Fail; 2730b57cec5SDimitry Andric break; 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric case WebAssembly::OPERAND_BRLIST: { 2760b57cec5SDimitry Andric int64_t TargetTableLen; 2770b57cec5SDimitry Andric if (!nextLEB(TargetTableLen, Bytes, Size, false)) 2780b57cec5SDimitry Andric return MCDisassembler::Fail; 2790b57cec5SDimitry Andric for (int64_t I = 0; I < TargetTableLen; I++) { 2800b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2810b57cec5SDimitry Andric return MCDisassembler::Fail; 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric // Default case. 2840b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2850b57cec5SDimitry Andric return MCDisassembler::Fail; 2860b57cec5SDimitry Andric break; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER: 2890b57cec5SDimitry Andric // The tablegen header currently does not have any register operands since 2900b57cec5SDimitry Andric // we use only the stack (_S) instructions. 2910b57cec5SDimitry Andric // If you hit this that probably means a bad instruction definition in 2920b57cec5SDimitry Andric // tablegen. 2930b57cec5SDimitry Andric llvm_unreachable("Register operand in WebAssemblyDisassembler"); 2940b57cec5SDimitry Andric default: 2950b57cec5SDimitry Andric llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 2960b57cec5SDimitry Andric } 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric return MCDisassembler::Success; 2990b57cec5SDimitry Andric } 300