10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into 130b57cec5SDimitry Andric /// MCInsts. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 1706c3fb27SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h" 180b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h" 19*0fca6ea1SDimitry Andric #include "llvm/BinaryFormat/Wasm.h" 200b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 2181ad6265SDimitry Andric #include "llvm/MC/MCDecoderOps.h" 220b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h" 230b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 240b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h" 250b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 260b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 278bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h" 28349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 295f757f3fSDimitry Andric #include "llvm/Support/Casting.h" 300b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 310b57cec5SDimitry Andric #include "llvm/Support/LEB128.h" 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric using namespace llvm; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler" 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc" 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric namespace { 420b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler { 450b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 480b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address, 490b57cec5SDimitry Andric raw_ostream &CStream) const override; 50*0fca6ea1SDimitry Andric 51*0fca6ea1SDimitry Andric Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 52*0fca6ea1SDimitry Andric ArrayRef<uint8_t> Bytes, 53*0fca6ea1SDimitry Andric uint64_t Address) const override; 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric public: 560b57cec5SDimitry Andric WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 570b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII) 580b57cec5SDimitry Andric : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 590b57cec5SDimitry Andric }; 600b57cec5SDimitry Andric } // end anonymous namespace 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 630b57cec5SDimitry Andric const MCSubtargetInfo &STI, 640b57cec5SDimitry Andric MCContext &Ctx) { 650b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 660b57cec5SDimitry Andric return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 69480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void 70480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() { 710b57cec5SDimitry Andric // Register the disassembler for each target. 720b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 730b57cec5SDimitry Andric createWebAssemblyDisassembler); 740b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 750b57cec5SDimitry Andric createWebAssemblyDisassembler); 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 790b57cec5SDimitry Andric if (Size >= Bytes.size()) 800b57cec5SDimitry Andric return -1; 810b57cec5SDimitry Andric auto V = Bytes[Size]; 820b57cec5SDimitry Andric Size++; 830b57cec5SDimitry Andric return V; 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 870b57cec5SDimitry Andric bool Signed) { 880b57cec5SDimitry Andric unsigned N = 0; 890b57cec5SDimitry Andric const char *Error = nullptr; 900b57cec5SDimitry Andric Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 910b57cec5SDimitry Andric Bytes.data() + Bytes.size(), &Error) 920b57cec5SDimitry Andric : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 930b57cec5SDimitry Andric Bytes.data() + Bytes.size(), 940b57cec5SDimitry Andric &Error)); 950b57cec5SDimitry Andric if (Error) 960b57cec5SDimitry Andric return false; 970b57cec5SDimitry Andric Size += N; 980b57cec5SDimitry Andric return true; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 1020b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, bool Signed) { 1030b57cec5SDimitry Andric int64_t Val; 1040b57cec5SDimitry Andric if (!nextLEB(Val, Bytes, Size, Signed)) 1050b57cec5SDimitry Andric return false; 1060b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(Val)); 1070b57cec5SDimitry Andric return true; 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric template <typename T> 1110b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 1120b57cec5SDimitry Andric if (Size + sizeof(T) > Bytes.size()) 1130b57cec5SDimitry Andric return false; 1145f757f3fSDimitry Andric T Val = 1155f757f3fSDimitry Andric support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size); 1160b57cec5SDimitry Andric Size += sizeof(T); 1170b57cec5SDimitry Andric if (std::is_floating_point<T>::value) { 118fe6060f1SDimitry Andric MI.addOperand( 119fe6060f1SDimitry Andric MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 1200b57cec5SDimitry Andric } else { 1210b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric return true; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 126*0fca6ea1SDimitry Andric Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, 127*0fca6ea1SDimitry Andric uint64_t &Size, 128bdd1243dSDimitry Andric ArrayRef<uint8_t> Bytes, 129*0fca6ea1SDimitry Andric uint64_t Address) const { 1300b57cec5SDimitry Andric Size = 0; 131*0fca6ea1SDimitry Andric if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) { 1320b57cec5SDimitry Andric // Start of a code section: we're parsing only the function count. 1330b57cec5SDimitry Andric int64_t FunctionCount; 1340b57cec5SDimitry Andric if (!nextLEB(FunctionCount, Bytes, Size, false)) 135*0fca6ea1SDimitry Andric return false; 1360b57cec5SDimitry Andric outs() << " # " << FunctionCount << " functions in section."; 1370b57cec5SDimitry Andric } else { 1380b57cec5SDimitry Andric // Parse the start of a single function. 1390b57cec5SDimitry Andric int64_t BodySize, LocalEntryCount; 1400b57cec5SDimitry Andric if (!nextLEB(BodySize, Bytes, Size, false) || 1410b57cec5SDimitry Andric !nextLEB(LocalEntryCount, Bytes, Size, false)) 142*0fca6ea1SDimitry Andric return false; 1430b57cec5SDimitry Andric if (LocalEntryCount) { 1440b57cec5SDimitry Andric outs() << " .local "; 1450b57cec5SDimitry Andric for (int64_t I = 0; I < LocalEntryCount; I++) { 1460b57cec5SDimitry Andric int64_t Count, Type; 1470b57cec5SDimitry Andric if (!nextLEB(Count, Bytes, Size, false) || 1480b57cec5SDimitry Andric !nextLEB(Type, Bytes, Size, false)) 149*0fca6ea1SDimitry Andric return false; 1500b57cec5SDimitry Andric for (int64_t J = 0; J < Count; J++) { 1510b57cec5SDimitry Andric if (I || J) 1520b57cec5SDimitry Andric outs() << ", "; 1530b57cec5SDimitry Andric outs() << WebAssembly::anyTypeToString(Type); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric outs() << "\n"; 159*0fca6ea1SDimitry Andric return true; 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1630b57cec5SDimitry Andric MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 164480093f4SDimitry Andric raw_ostream &CS) const { 1650b57cec5SDimitry Andric CommentStream = &CS; 1660b57cec5SDimitry Andric Size = 0; 1670b57cec5SDimitry Andric int Opc = nextByte(Bytes, Size); 1680b57cec5SDimitry Andric if (Opc < 0) 1690b57cec5SDimitry Andric return MCDisassembler::Fail; 1700b57cec5SDimitry Andric const auto *WasmInst = &InstructionTable0[Opc]; 1710b57cec5SDimitry Andric // If this is a prefix byte, indirect to another table. 1720b57cec5SDimitry Andric if (WasmInst->ET == ET_Prefix) { 1730b57cec5SDimitry Andric WasmInst = nullptr; 1740b57cec5SDimitry Andric // Linear search, so far only 2 entries. 1750b57cec5SDimitry Andric for (auto PT = PrefixTable; PT->Table; PT++) { 1760b57cec5SDimitry Andric if (PT->Prefix == Opc) { 1770b57cec5SDimitry Andric WasmInst = PT->Table; 1780b57cec5SDimitry Andric break; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric if (!WasmInst) 1820b57cec5SDimitry Andric return MCDisassembler::Fail; 1830b57cec5SDimitry Andric int64_t PrefixedOpc; 1840b57cec5SDimitry Andric if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 1850b57cec5SDimitry Andric return MCDisassembler::Fail; 1860b57cec5SDimitry Andric if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 1870b57cec5SDimitry Andric return MCDisassembler::Fail; 1880b57cec5SDimitry Andric WasmInst += PrefixedOpc; 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric if (WasmInst->ET == ET_Unused) 1910b57cec5SDimitry Andric return MCDisassembler::Fail; 1920b57cec5SDimitry Andric // At this point we must have a valid instruction to decode. 1930b57cec5SDimitry Andric assert(WasmInst->ET == ET_Instruction); 1940b57cec5SDimitry Andric MI.setOpcode(WasmInst->Opcode); 1950b57cec5SDimitry Andric // Parse any operands. 1960b57cec5SDimitry Andric for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 1970b57cec5SDimitry Andric auto OT = OperandTable[WasmInst->OperandStart + OPI]; 1980b57cec5SDimitry Andric switch (OT) { 1990b57cec5SDimitry Andric // ULEB operands: 2000b57cec5SDimitry Andric case WebAssembly::OPERAND_BASIC_BLOCK: 2010b57cec5SDimitry Andric case WebAssembly::OPERAND_LOCAL: 2020b57cec5SDimitry Andric case WebAssembly::OPERAND_GLOBAL: 2030b57cec5SDimitry Andric case WebAssembly::OPERAND_FUNCTION32: 204e8d8bef9SDimitry Andric case WebAssembly::OPERAND_TABLE: 2050b57cec5SDimitry Andric case WebAssembly::OPERAND_OFFSET32: 2065ffd83dbSDimitry Andric case WebAssembly::OPERAND_OFFSET64: 2070b57cec5SDimitry Andric case WebAssembly::OPERAND_P2ALIGN: 2080b57cec5SDimitry Andric case WebAssembly::OPERAND_TYPEINDEX: 209fe6060f1SDimitry Andric case WebAssembly::OPERAND_TAG: 2100b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: { 2110b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2120b57cec5SDimitry Andric return MCDisassembler::Fail; 2130b57cec5SDimitry Andric break; 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric // SLEB operands: 2160b57cec5SDimitry Andric case WebAssembly::OPERAND_I32IMM: 2170b57cec5SDimitry Andric case WebAssembly::OPERAND_I64IMM: { 2180b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, true)) 2190b57cec5SDimitry Andric return MCDisassembler::Fail; 2200b57cec5SDimitry Andric break; 2210b57cec5SDimitry Andric } 2228bcb0991SDimitry Andric // block_type operands: 2230b57cec5SDimitry Andric case WebAssembly::OPERAND_SIGNATURE: { 2248bcb0991SDimitry Andric int64_t Val; 2258bcb0991SDimitry Andric uint64_t PrevSize = Size; 2268bcb0991SDimitry Andric if (!nextLEB(Val, Bytes, Size, true)) 2270b57cec5SDimitry Andric return MCDisassembler::Fail; 2288bcb0991SDimitry Andric if (Val < 0) { 2298bcb0991SDimitry Andric // Negative values are single septet value types or empty types 2308bcb0991SDimitry Andric if (Size != PrevSize + 1) { 2318bcb0991SDimitry Andric MI.addOperand( 2328bcb0991SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 2338bcb0991SDimitry Andric } else { 2348bcb0991SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f)); 2358bcb0991SDimitry Andric } 2368bcb0991SDimitry Andric } else { 2378bcb0991SDimitry Andric // We don't have access to the signature, so create a symbol without one 2388bcb0991SDimitry Andric MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 2398bcb0991SDimitry Andric auto *WasmSym = cast<MCSymbolWasm>(Sym); 2408bcb0991SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 2418bcb0991SDimitry Andric const MCExpr *Expr = MCSymbolRefExpr::create( 2428bcb0991SDimitry Andric WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 2438bcb0991SDimitry Andric MI.addOperand(MCOperand::createExpr(Expr)); 2448bcb0991SDimitry Andric } 2450b57cec5SDimitry Andric break; 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric // FP operands. 2480b57cec5SDimitry Andric case WebAssembly::OPERAND_F32IMM: { 2490b57cec5SDimitry Andric if (!parseImmediate<float>(MI, Size, Bytes)) 2500b57cec5SDimitry Andric return MCDisassembler::Fail; 2510b57cec5SDimitry Andric break; 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric case WebAssembly::OPERAND_F64IMM: { 2540b57cec5SDimitry Andric if (!parseImmediate<double>(MI, Size, Bytes)) 2550b57cec5SDimitry Andric return MCDisassembler::Fail; 2560b57cec5SDimitry Andric break; 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric // Vector lane operands (not LEB encoded). 2590b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I8IMM: { 2600b57cec5SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 2610b57cec5SDimitry Andric return MCDisassembler::Fail; 2620b57cec5SDimitry Andric break; 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I16IMM: { 2650b57cec5SDimitry Andric if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 2660b57cec5SDimitry Andric return MCDisassembler::Fail; 2670b57cec5SDimitry Andric break; 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I32IMM: { 2700b57cec5SDimitry Andric if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 2710b57cec5SDimitry Andric return MCDisassembler::Fail; 2720b57cec5SDimitry Andric break; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I64IMM: { 2750b57cec5SDimitry Andric if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 2760b57cec5SDimitry Andric return MCDisassembler::Fail; 2770b57cec5SDimitry Andric break; 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric case WebAssembly::OPERAND_BRLIST: { 2800b57cec5SDimitry Andric int64_t TargetTableLen; 2810b57cec5SDimitry Andric if (!nextLEB(TargetTableLen, Bytes, Size, false)) 2820b57cec5SDimitry Andric return MCDisassembler::Fail; 2830b57cec5SDimitry Andric for (int64_t I = 0; I < TargetTableLen; I++) { 2840b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2850b57cec5SDimitry Andric return MCDisassembler::Fail; 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric // Default case. 2880b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false)) 2890b57cec5SDimitry Andric return MCDisassembler::Fail; 2900b57cec5SDimitry Andric break; 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER: 2930b57cec5SDimitry Andric // The tablegen header currently does not have any register operands since 2940b57cec5SDimitry Andric // we use only the stack (_S) instructions. 2950b57cec5SDimitry Andric // If you hit this that probably means a bad instruction definition in 2960b57cec5SDimitry Andric // tablegen. 2970b57cec5SDimitry Andric llvm_unreachable("Register operand in WebAssemblyDisassembler"); 2980b57cec5SDimitry Andric default: 2990b57cec5SDimitry Andric llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric return MCDisassembler::Success; 3030b57cec5SDimitry Andric } 304