1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "InstPrinter/WebAssemblyInstPrinter.h" 18 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 21 #include "llvm/MC/MCFixedLenDisassembler.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCSymbol.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/LEB128.h" 28 #include "llvm/Support/TargetRegistry.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "wasm-disassembler" 33 34 using DecodeStatus = MCDisassembler::DecodeStatus; 35 36 #include "WebAssemblyGenDisassemblerTables.inc" 37 38 namespace { 39 static constexpr int WebAssemblyInstructionTableSize = 256; 40 41 class WebAssemblyDisassembler final : public MCDisassembler { 42 std::unique_ptr<const MCInstrInfo> MCII; 43 44 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 45 ArrayRef<uint8_t> Bytes, uint64_t Address, 46 raw_ostream &VStream, 47 raw_ostream &CStream) const override; 48 DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, 49 ArrayRef<uint8_t> Bytes, uint64_t Address, 50 raw_ostream &VStream, 51 raw_ostream &CStream) const override; 52 53 public: 54 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 55 std::unique_ptr<const MCInstrInfo> MCII) 56 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 57 }; 58 } // end anonymous namespace 59 60 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 61 const MCSubtargetInfo &STI, 62 MCContext &Ctx) { 63 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 64 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 65 } 66 67 extern "C" void LLVMInitializeWebAssemblyDisassembler() { 68 // Register the disassembler for each target. 69 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 70 createWebAssemblyDisassembler); 71 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 72 createWebAssemblyDisassembler); 73 } 74 75 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 76 if (Size >= Bytes.size()) 77 return -1; 78 auto V = Bytes[Size]; 79 Size++; 80 return V; 81 } 82 83 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 84 bool Signed) { 85 unsigned N = 0; 86 const char *Error = nullptr; 87 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 88 Bytes.data() + Bytes.size(), &Error) 89 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 90 Bytes.data() + Bytes.size(), 91 &Error)); 92 if (Error) 93 return false; 94 Size += N; 95 return true; 96 } 97 98 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 99 ArrayRef<uint8_t> Bytes, bool Signed) { 100 int64_t Val; 101 if (!nextLEB(Val, Bytes, Size, Signed)) 102 return false; 103 MI.addOperand(MCOperand::createImm(Val)); 104 return true; 105 } 106 107 template <typename T> 108 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 109 if (Size + sizeof(T) > Bytes.size()) 110 return false; 111 T Val = support::endian::read<T, support::endianness::little, 1>( 112 Bytes.data() + Size); 113 Size += sizeof(T); 114 if (std::is_floating_point<T>::value) { 115 MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 116 } else { 117 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 118 } 119 return true; 120 } 121 122 MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( 123 StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 124 raw_ostream &VStream, raw_ostream &CStream) const { 125 Size = 0; 126 if (Address == 0) { 127 // Start of a code section: we're parsing only the function count. 128 int64_t FunctionCount; 129 if (!nextLEB(FunctionCount, Bytes, Size, false)) 130 return MCDisassembler::Fail; 131 outs() << " # " << FunctionCount << " functions in section."; 132 } else { 133 // Parse the start of a single function. 134 int64_t BodySize, LocalEntryCount; 135 if (!nextLEB(BodySize, Bytes, Size, false) || 136 !nextLEB(LocalEntryCount, Bytes, Size, false)) 137 return MCDisassembler::Fail; 138 if (LocalEntryCount) { 139 outs() << " .local "; 140 for (int64_t I = 0; I < LocalEntryCount; I++) { 141 int64_t Count, Type; 142 if (!nextLEB(Count, Bytes, Size, false) || 143 !nextLEB(Type, Bytes, Size, false)) 144 return MCDisassembler::Fail; 145 for (int64_t J = 0; J < Count; J++) { 146 if (I || J) 147 outs() << ", "; 148 outs() << WebAssembly::anyTypeToString(Type); 149 } 150 } 151 } 152 } 153 outs() << "\n"; 154 return MCDisassembler::Success; 155 } 156 157 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 158 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 159 raw_ostream & /*OS*/, raw_ostream &CS) const { 160 CommentStream = &CS; 161 Size = 0; 162 int Opc = nextByte(Bytes, Size); 163 if (Opc < 0) 164 return MCDisassembler::Fail; 165 const auto *WasmInst = &InstructionTable0[Opc]; 166 // If this is a prefix byte, indirect to another table. 167 if (WasmInst->ET == ET_Prefix) { 168 WasmInst = nullptr; 169 // Linear search, so far only 2 entries. 170 for (auto PT = PrefixTable; PT->Table; PT++) { 171 if (PT->Prefix == Opc) { 172 WasmInst = PT->Table; 173 break; 174 } 175 } 176 if (!WasmInst) 177 return MCDisassembler::Fail; 178 int64_t PrefixedOpc; 179 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 180 return MCDisassembler::Fail; 181 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 182 return MCDisassembler::Fail; 183 WasmInst += PrefixedOpc; 184 } 185 if (WasmInst->ET == ET_Unused) 186 return MCDisassembler::Fail; 187 // At this point we must have a valid instruction to decode. 188 assert(WasmInst->ET == ET_Instruction); 189 MI.setOpcode(WasmInst->Opcode); 190 // Parse any operands. 191 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 192 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 193 switch (OT) { 194 // ULEB operands: 195 case WebAssembly::OPERAND_BASIC_BLOCK: 196 case WebAssembly::OPERAND_LOCAL: 197 case WebAssembly::OPERAND_GLOBAL: 198 case WebAssembly::OPERAND_FUNCTION32: 199 case WebAssembly::OPERAND_OFFSET32: 200 case WebAssembly::OPERAND_P2ALIGN: 201 case WebAssembly::OPERAND_TYPEINDEX: 202 case WebAssembly::OPERAND_EVENT: 203 case MCOI::OPERAND_IMMEDIATE: { 204 if (!parseLEBImmediate(MI, Size, Bytes, false)) 205 return MCDisassembler::Fail; 206 break; 207 } 208 // SLEB operands: 209 case WebAssembly::OPERAND_I32IMM: 210 case WebAssembly::OPERAND_I64IMM: { 211 if (!parseLEBImmediate(MI, Size, Bytes, true)) 212 return MCDisassembler::Fail; 213 break; 214 } 215 // block_type operands (uint8_t). 216 case WebAssembly::OPERAND_SIGNATURE: { 217 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 218 return MCDisassembler::Fail; 219 break; 220 } 221 // FP operands. 222 case WebAssembly::OPERAND_F32IMM: { 223 if (!parseImmediate<float>(MI, Size, Bytes)) 224 return MCDisassembler::Fail; 225 break; 226 } 227 case WebAssembly::OPERAND_F64IMM: { 228 if (!parseImmediate<double>(MI, Size, Bytes)) 229 return MCDisassembler::Fail; 230 break; 231 } 232 // Vector lane operands (not LEB encoded). 233 case WebAssembly::OPERAND_VEC_I8IMM: { 234 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 235 return MCDisassembler::Fail; 236 break; 237 } 238 case WebAssembly::OPERAND_VEC_I16IMM: { 239 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 240 return MCDisassembler::Fail; 241 break; 242 } 243 case WebAssembly::OPERAND_VEC_I32IMM: { 244 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 245 return MCDisassembler::Fail; 246 break; 247 } 248 case WebAssembly::OPERAND_VEC_I64IMM: { 249 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 250 return MCDisassembler::Fail; 251 break; 252 } 253 case WebAssembly::OPERAND_BRLIST: { 254 int64_t TargetTableLen; 255 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 256 return MCDisassembler::Fail; 257 for (int64_t I = 0; I < TargetTableLen; I++) { 258 if (!parseLEBImmediate(MI, Size, Bytes, false)) 259 return MCDisassembler::Fail; 260 } 261 // Default case. 262 if (!parseLEBImmediate(MI, Size, Bytes, false)) 263 return MCDisassembler::Fail; 264 break; 265 } 266 case MCOI::OPERAND_REGISTER: 267 // The tablegen header currently does not have any register operands since 268 // we use only the stack (_S) instructions. 269 // If you hit this that probably means a bad instruction definition in 270 // tablegen. 271 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 272 default: 273 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 274 } 275 } 276 return MCDisassembler::Success; 277 } 278