1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Disassembler.
11 ///
12 /// It contains code to translate the data produced by the decoder into
13 /// MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #include "TargetInfo/WebAssemblyTargetInfo.h"
18 #include "Utils/WebAssemblyTypeUtilities.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDecoderOps.h"
21 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolWasm.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/LEB128.h"
30
31 using namespace llvm;
32
33 #define DEBUG_TYPE "wasm-disassembler"
34
35 using DecodeStatus = MCDisassembler::DecodeStatus;
36
37 #include "WebAssemblyGenDisassemblerTables.inc"
38
39 namespace {
40 static constexpr int WebAssemblyInstructionTableSize = 256;
41
42 class WebAssemblyDisassembler final : public MCDisassembler {
43 std::unique_ptr<const MCInstrInfo> MCII;
44
45 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
46 ArrayRef<uint8_t> Bytes, uint64_t Address,
47 raw_ostream &CStream) const override;
48 std::optional<DecodeStatus>
49 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
50 uint64_t Address, raw_ostream &CStream) const override;
51
52 public:
WebAssemblyDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MCII)53 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
54 std::unique_ptr<const MCInstrInfo> MCII)
55 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
56 };
57 } // end anonymous namespace
58
createWebAssemblyDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)59 static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
60 const MCSubtargetInfo &STI,
61 MCContext &Ctx) {
62 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
63 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
64 }
65
66 extern "C" LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeWebAssemblyDisassembler()67 LLVMInitializeWebAssemblyDisassembler() {
68 // Register the disassembler for each target.
69 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
70 createWebAssemblyDisassembler);
71 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
72 createWebAssemblyDisassembler);
73 }
74
nextByte(ArrayRef<uint8_t> Bytes,uint64_t & Size)75 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
76 if (Size >= Bytes.size())
77 return -1;
78 auto V = Bytes[Size];
79 Size++;
80 return V;
81 }
82
nextLEB(int64_t & Val,ArrayRef<uint8_t> Bytes,uint64_t & Size,bool Signed)83 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
84 bool Signed) {
85 unsigned N = 0;
86 const char *Error = nullptr;
87 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
88 Bytes.data() + Bytes.size(), &Error)
89 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
90 Bytes.data() + Bytes.size(),
91 &Error));
92 if (Error)
93 return false;
94 Size += N;
95 return true;
96 }
97
parseLEBImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,bool Signed)98 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
99 ArrayRef<uint8_t> Bytes, bool Signed) {
100 int64_t Val;
101 if (!nextLEB(Val, Bytes, Size, Signed))
102 return false;
103 MI.addOperand(MCOperand::createImm(Val));
104 return true;
105 }
106
107 template <typename T>
parseImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes)108 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
109 if (Size + sizeof(T) > Bytes.size())
110 return false;
111 T Val = support::endian::read<T, support::endianness::little, 1>(
112 Bytes.data() + Size);
113 Size += sizeof(T);
114 if (std::is_floating_point<T>::value) {
115 MI.addOperand(
116 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
117 } else {
118 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
119 }
120 return true;
121 }
122
123 std::optional<MCDisassembler::DecodeStatus>
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address,raw_ostream & CStream) const124 WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
125 ArrayRef<uint8_t> Bytes,
126 uint64_t Address,
127 raw_ostream &CStream) const {
128 Size = 0;
129 if (Address == 0) {
130 // Start of a code section: we're parsing only the function count.
131 int64_t FunctionCount;
132 if (!nextLEB(FunctionCount, Bytes, Size, false))
133 return std::nullopt;
134 outs() << " # " << FunctionCount << " functions in section.";
135 } else {
136 // Parse the start of a single function.
137 int64_t BodySize, LocalEntryCount;
138 if (!nextLEB(BodySize, Bytes, Size, false) ||
139 !nextLEB(LocalEntryCount, Bytes, Size, false))
140 return std::nullopt;
141 if (LocalEntryCount) {
142 outs() << " .local ";
143 for (int64_t I = 0; I < LocalEntryCount; I++) {
144 int64_t Count, Type;
145 if (!nextLEB(Count, Bytes, Size, false) ||
146 !nextLEB(Type, Bytes, Size, false))
147 return std::nullopt;
148 for (int64_t J = 0; J < Count; J++) {
149 if (I || J)
150 outs() << ", ";
151 outs() << WebAssembly::anyTypeToString(Type);
152 }
153 }
154 }
155 }
156 outs() << "\n";
157 return MCDisassembler::Success;
158 }
159
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t,raw_ostream & CS) const160 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
161 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
162 raw_ostream &CS) const {
163 CommentStream = &CS;
164 Size = 0;
165 int Opc = nextByte(Bytes, Size);
166 if (Opc < 0)
167 return MCDisassembler::Fail;
168 const auto *WasmInst = &InstructionTable0[Opc];
169 // If this is a prefix byte, indirect to another table.
170 if (WasmInst->ET == ET_Prefix) {
171 WasmInst = nullptr;
172 // Linear search, so far only 2 entries.
173 for (auto PT = PrefixTable; PT->Table; PT++) {
174 if (PT->Prefix == Opc) {
175 WasmInst = PT->Table;
176 break;
177 }
178 }
179 if (!WasmInst)
180 return MCDisassembler::Fail;
181 int64_t PrefixedOpc;
182 if (!nextLEB(PrefixedOpc, Bytes, Size, false))
183 return MCDisassembler::Fail;
184 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
185 return MCDisassembler::Fail;
186 WasmInst += PrefixedOpc;
187 }
188 if (WasmInst->ET == ET_Unused)
189 return MCDisassembler::Fail;
190 // At this point we must have a valid instruction to decode.
191 assert(WasmInst->ET == ET_Instruction);
192 MI.setOpcode(WasmInst->Opcode);
193 // Parse any operands.
194 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
195 auto OT = OperandTable[WasmInst->OperandStart + OPI];
196 switch (OT) {
197 // ULEB operands:
198 case WebAssembly::OPERAND_BASIC_BLOCK:
199 case WebAssembly::OPERAND_LOCAL:
200 case WebAssembly::OPERAND_GLOBAL:
201 case WebAssembly::OPERAND_FUNCTION32:
202 case WebAssembly::OPERAND_TABLE:
203 case WebAssembly::OPERAND_OFFSET32:
204 case WebAssembly::OPERAND_OFFSET64:
205 case WebAssembly::OPERAND_P2ALIGN:
206 case WebAssembly::OPERAND_TYPEINDEX:
207 case WebAssembly::OPERAND_TAG:
208 case MCOI::OPERAND_IMMEDIATE: {
209 if (!parseLEBImmediate(MI, Size, Bytes, false))
210 return MCDisassembler::Fail;
211 break;
212 }
213 // SLEB operands:
214 case WebAssembly::OPERAND_I32IMM:
215 case WebAssembly::OPERAND_I64IMM: {
216 if (!parseLEBImmediate(MI, Size, Bytes, true))
217 return MCDisassembler::Fail;
218 break;
219 }
220 // block_type operands:
221 case WebAssembly::OPERAND_SIGNATURE: {
222 int64_t Val;
223 uint64_t PrevSize = Size;
224 if (!nextLEB(Val, Bytes, Size, true))
225 return MCDisassembler::Fail;
226 if (Val < 0) {
227 // Negative values are single septet value types or empty types
228 if (Size != PrevSize + 1) {
229 MI.addOperand(
230 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
231 } else {
232 MI.addOperand(MCOperand::createImm(Val & 0x7f));
233 }
234 } else {
235 // We don't have access to the signature, so create a symbol without one
236 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
237 auto *WasmSym = cast<MCSymbolWasm>(Sym);
238 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
239 const MCExpr *Expr = MCSymbolRefExpr::create(
240 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
241 MI.addOperand(MCOperand::createExpr(Expr));
242 }
243 break;
244 }
245 // FP operands.
246 case WebAssembly::OPERAND_F32IMM: {
247 if (!parseImmediate<float>(MI, Size, Bytes))
248 return MCDisassembler::Fail;
249 break;
250 }
251 case WebAssembly::OPERAND_F64IMM: {
252 if (!parseImmediate<double>(MI, Size, Bytes))
253 return MCDisassembler::Fail;
254 break;
255 }
256 // Vector lane operands (not LEB encoded).
257 case WebAssembly::OPERAND_VEC_I8IMM: {
258 if (!parseImmediate<uint8_t>(MI, Size, Bytes))
259 return MCDisassembler::Fail;
260 break;
261 }
262 case WebAssembly::OPERAND_VEC_I16IMM: {
263 if (!parseImmediate<uint16_t>(MI, Size, Bytes))
264 return MCDisassembler::Fail;
265 break;
266 }
267 case WebAssembly::OPERAND_VEC_I32IMM: {
268 if (!parseImmediate<uint32_t>(MI, Size, Bytes))
269 return MCDisassembler::Fail;
270 break;
271 }
272 case WebAssembly::OPERAND_VEC_I64IMM: {
273 if (!parseImmediate<uint64_t>(MI, Size, Bytes))
274 return MCDisassembler::Fail;
275 break;
276 }
277 case WebAssembly::OPERAND_BRLIST: {
278 int64_t TargetTableLen;
279 if (!nextLEB(TargetTableLen, Bytes, Size, false))
280 return MCDisassembler::Fail;
281 for (int64_t I = 0; I < TargetTableLen; I++) {
282 if (!parseLEBImmediate(MI, Size, Bytes, false))
283 return MCDisassembler::Fail;
284 }
285 // Default case.
286 if (!parseLEBImmediate(MI, Size, Bytes, false))
287 return MCDisassembler::Fail;
288 break;
289 }
290 case MCOI::OPERAND_REGISTER:
291 // The tablegen header currently does not have any register operands since
292 // we use only the stack (_S) instructions.
293 // If you hit this that probably means a bad instruction definition in
294 // tablegen.
295 llvm_unreachable("Register operand in WebAssemblyDisassembler");
296 default:
297 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
298 }
299 }
300 return MCDisassembler::Success;
301 }
302