1dda28197Spatrick //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2dda28197Spatrick //
3dda28197Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4dda28197Spatrick // See https://llvm.org/LICENSE.txt for license information.
5dda28197Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dda28197Spatrick //
7dda28197Spatrick //===----------------------------------------------------------------------===//
8dda28197Spatrick
9dda28197Spatrick #include "DisassemblerLLVMC.h"
10dda28197Spatrick
11dda28197Spatrick #include "llvm-c/Disassembler.h"
12dda28197Spatrick #include "llvm/ADT/SmallString.h"
13*f6aab3d8Srobert #include "llvm/ADT/StringExtras.h"
14dda28197Spatrick #include "llvm/MC/MCAsmInfo.h"
15dda28197Spatrick #include "llvm/MC/MCContext.h"
16dda28197Spatrick #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17dda28197Spatrick #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18dda28197Spatrick #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19dda28197Spatrick #include "llvm/MC/MCInst.h"
20dda28197Spatrick #include "llvm/MC/MCInstPrinter.h"
21dda28197Spatrick #include "llvm/MC/MCInstrInfo.h"
22dda28197Spatrick #include "llvm/MC/MCRegisterInfo.h"
23dda28197Spatrick #include "llvm/MC/MCSubtargetInfo.h"
24dda28197Spatrick #include "llvm/MC/MCTargetOptions.h"
25*f6aab3d8Srobert #include "llvm/MC/TargetRegistry.h"
26*f6aab3d8Srobert #include "llvm/Support/AArch64TargetParser.h"
27dda28197Spatrick #include "llvm/Support/ErrorHandling.h"
28dda28197Spatrick #include "llvm/Support/ScopedPrinter.h"
29dda28197Spatrick #include "llvm/Support/TargetSelect.h"
30dda28197Spatrick
31dda28197Spatrick #include "lldb/Core/Address.h"
32dda28197Spatrick #include "lldb/Core/Module.h"
33dda28197Spatrick #include "lldb/Symbol/SymbolContext.h"
34dda28197Spatrick #include "lldb/Target/ExecutionContext.h"
35dda28197Spatrick #include "lldb/Target/Process.h"
36dda28197Spatrick #include "lldb/Target/RegisterContext.h"
37dda28197Spatrick #include "lldb/Target/SectionLoadList.h"
38dda28197Spatrick #include "lldb/Target/StackFrame.h"
39dda28197Spatrick #include "lldb/Target/Target.h"
40dda28197Spatrick #include "lldb/Utility/DataExtractor.h"
41*f6aab3d8Srobert #include "lldb/Utility/LLDBLog.h"
42dda28197Spatrick #include "lldb/Utility/Log.h"
43dda28197Spatrick #include "lldb/Utility/RegularExpression.h"
44dda28197Spatrick #include "lldb/Utility/Stream.h"
45*f6aab3d8Srobert #include <optional>
46dda28197Spatrick
47dda28197Spatrick using namespace lldb;
48dda28197Spatrick using namespace lldb_private;
49dda28197Spatrick
50dda28197Spatrick LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)
51dda28197Spatrick
52dda28197Spatrick class DisassemblerLLVMC::MCDisasmInstance {
53dda28197Spatrick public:
54dda28197Spatrick static std::unique_ptr<MCDisasmInstance>
55dda28197Spatrick Create(const char *triple, const char *cpu, const char *features_str,
56dda28197Spatrick unsigned flavor, DisassemblerLLVMC &owner);
57dda28197Spatrick
58dda28197Spatrick ~MCDisasmInstance() = default;
59dda28197Spatrick
60dda28197Spatrick uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
61dda28197Spatrick lldb::addr_t pc, llvm::MCInst &mc_inst) const;
62dda28197Spatrick void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
63dda28197Spatrick std::string &comments_string);
64dda28197Spatrick void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
65dda28197Spatrick bool CanBranch(llvm::MCInst &mc_inst) const;
66dda28197Spatrick bool HasDelaySlot(llvm::MCInst &mc_inst) const;
67dda28197Spatrick bool IsCall(llvm::MCInst &mc_inst) const;
68*f6aab3d8Srobert bool IsLoad(llvm::MCInst &mc_inst) const;
69*f6aab3d8Srobert bool IsAuthenticated(llvm::MCInst &mc_inst) const;
70dda28197Spatrick
71dda28197Spatrick private:
72dda28197Spatrick MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
73dda28197Spatrick std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,
74dda28197Spatrick std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
75dda28197Spatrick std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
76dda28197Spatrick std::unique_ptr<llvm::MCContext> &&context_up,
77dda28197Spatrick std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
78dda28197Spatrick std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
79dda28197Spatrick
80dda28197Spatrick std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
81dda28197Spatrick std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
82dda28197Spatrick std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
83dda28197Spatrick std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
84dda28197Spatrick std::unique_ptr<llvm::MCContext> m_context_up;
85dda28197Spatrick std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
86dda28197Spatrick std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
87dda28197Spatrick };
88dda28197Spatrick
89*f6aab3d8Srobert namespace x86 {
90*f6aab3d8Srobert
91*f6aab3d8Srobert /// These are the three values deciding instruction control flow kind.
92*f6aab3d8Srobert /// InstructionLengthDecode function decodes an instruction and get this struct.
93*f6aab3d8Srobert ///
94*f6aab3d8Srobert /// primary_opcode
95*f6aab3d8Srobert /// Primary opcode of the instruction.
96*f6aab3d8Srobert /// For one-byte opcode instruction, it's the first byte after prefix.
97*f6aab3d8Srobert /// For two- and three-byte opcodes, it's the second byte.
98*f6aab3d8Srobert ///
99*f6aab3d8Srobert /// opcode_len
100*f6aab3d8Srobert /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
101*f6aab3d8Srobert ///
102*f6aab3d8Srobert /// modrm
103*f6aab3d8Srobert /// ModR/M byte of the instruction.
104*f6aab3d8Srobert /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
105*f6aab3d8Srobert /// may contain a register or specify an addressing mode, depending on MOD.
106*f6aab3d8Srobert struct InstructionOpcodeAndModrm {
107*f6aab3d8Srobert uint8_t primary_opcode;
108*f6aab3d8Srobert uint8_t opcode_len;
109*f6aab3d8Srobert uint8_t modrm;
110*f6aab3d8Srobert };
111*f6aab3d8Srobert
112*f6aab3d8Srobert /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
113*f6aab3d8Srobert /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
114*f6aab3d8Srobert /// instruction set.
115*f6aab3d8Srobert ///
116*f6aab3d8Srobert /// \param[in] opcode_and_modrm
117*f6aab3d8Srobert /// Contains primary_opcode byte, its length, and ModR/M byte.
118*f6aab3d8Srobert /// Refer to the struct InstructionOpcodeAndModrm for details.
119*f6aab3d8Srobert ///
120*f6aab3d8Srobert /// \return
121*f6aab3d8Srobert /// The control flow kind of the instruction or
122*f6aab3d8Srobert /// eInstructionControlFlowKindOther if the instruction doesn't affect
123*f6aab3d8Srobert /// the control flow of the program.
124*f6aab3d8Srobert lldb::InstructionControlFlowKind
MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm)125*f6aab3d8Srobert MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
126*f6aab3d8Srobert uint8_t opcode = opcode_and_modrm.primary_opcode;
127*f6aab3d8Srobert uint8_t opcode_len = opcode_and_modrm.opcode_len;
128*f6aab3d8Srobert uint8_t modrm = opcode_and_modrm.modrm;
129*f6aab3d8Srobert
130*f6aab3d8Srobert if (opcode_len > 2)
131*f6aab3d8Srobert return lldb::eInstructionControlFlowKindOther;
132*f6aab3d8Srobert
133*f6aab3d8Srobert if (opcode >= 0x70 && opcode <= 0x7F) {
134*f6aab3d8Srobert if (opcode_len == 1)
135*f6aab3d8Srobert return lldb::eInstructionControlFlowKindCondJump;
136*f6aab3d8Srobert else
137*f6aab3d8Srobert return lldb::eInstructionControlFlowKindOther;
138*f6aab3d8Srobert }
139*f6aab3d8Srobert
140*f6aab3d8Srobert if (opcode >= 0x80 && opcode <= 0x8F) {
141*f6aab3d8Srobert if (opcode_len == 2)
142*f6aab3d8Srobert return lldb::eInstructionControlFlowKindCondJump;
143*f6aab3d8Srobert else
144*f6aab3d8Srobert return lldb::eInstructionControlFlowKindOther;
145*f6aab3d8Srobert }
146*f6aab3d8Srobert
147*f6aab3d8Srobert switch (opcode) {
148*f6aab3d8Srobert case 0x9A:
149*f6aab3d8Srobert if (opcode_len == 1)
150*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarCall;
151*f6aab3d8Srobert break;
152*f6aab3d8Srobert case 0xFF:
153*f6aab3d8Srobert if (opcode_len == 1) {
154*f6aab3d8Srobert uint8_t modrm_reg = (modrm >> 3) & 7;
155*f6aab3d8Srobert if (modrm_reg == 2)
156*f6aab3d8Srobert return lldb::eInstructionControlFlowKindCall;
157*f6aab3d8Srobert else if (modrm_reg == 3)
158*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarCall;
159*f6aab3d8Srobert else if (modrm_reg == 4)
160*f6aab3d8Srobert return lldb::eInstructionControlFlowKindJump;
161*f6aab3d8Srobert else if (modrm_reg == 5)
162*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarJump;
163*f6aab3d8Srobert }
164*f6aab3d8Srobert break;
165*f6aab3d8Srobert case 0xE8:
166*f6aab3d8Srobert if (opcode_len == 1)
167*f6aab3d8Srobert return lldb::eInstructionControlFlowKindCall;
168*f6aab3d8Srobert break;
169*f6aab3d8Srobert case 0xCD:
170*f6aab3d8Srobert case 0xCC:
171*f6aab3d8Srobert case 0xCE:
172*f6aab3d8Srobert case 0xF1:
173*f6aab3d8Srobert if (opcode_len == 1)
174*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarCall;
175*f6aab3d8Srobert break;
176*f6aab3d8Srobert case 0xCF:
177*f6aab3d8Srobert if (opcode_len == 1)
178*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarReturn;
179*f6aab3d8Srobert break;
180*f6aab3d8Srobert case 0xE9:
181*f6aab3d8Srobert case 0xEB:
182*f6aab3d8Srobert if (opcode_len == 1)
183*f6aab3d8Srobert return lldb::eInstructionControlFlowKindJump;
184*f6aab3d8Srobert break;
185*f6aab3d8Srobert case 0xEA:
186*f6aab3d8Srobert if (opcode_len == 1)
187*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarJump;
188*f6aab3d8Srobert break;
189*f6aab3d8Srobert case 0xE3:
190*f6aab3d8Srobert case 0xE0:
191*f6aab3d8Srobert case 0xE1:
192*f6aab3d8Srobert case 0xE2:
193*f6aab3d8Srobert if (opcode_len == 1)
194*f6aab3d8Srobert return lldb::eInstructionControlFlowKindCondJump;
195*f6aab3d8Srobert break;
196*f6aab3d8Srobert case 0xC3:
197*f6aab3d8Srobert case 0xC2:
198*f6aab3d8Srobert if (opcode_len == 1)
199*f6aab3d8Srobert return lldb::eInstructionControlFlowKindReturn;
200*f6aab3d8Srobert break;
201*f6aab3d8Srobert case 0xCB:
202*f6aab3d8Srobert case 0xCA:
203*f6aab3d8Srobert if (opcode_len == 1)
204*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarReturn;
205*f6aab3d8Srobert break;
206*f6aab3d8Srobert case 0x05:
207*f6aab3d8Srobert case 0x34:
208*f6aab3d8Srobert if (opcode_len == 2)
209*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarCall;
210*f6aab3d8Srobert break;
211*f6aab3d8Srobert case 0x35:
212*f6aab3d8Srobert case 0x07:
213*f6aab3d8Srobert if (opcode_len == 2)
214*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarReturn;
215*f6aab3d8Srobert break;
216*f6aab3d8Srobert case 0x01:
217*f6aab3d8Srobert if (opcode_len == 2) {
218*f6aab3d8Srobert switch (modrm) {
219*f6aab3d8Srobert case 0xc1:
220*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarCall;
221*f6aab3d8Srobert case 0xc2:
222*f6aab3d8Srobert case 0xc3:
223*f6aab3d8Srobert return lldb::eInstructionControlFlowKindFarReturn;
224*f6aab3d8Srobert default:
225*f6aab3d8Srobert break;
226*f6aab3d8Srobert }
227*f6aab3d8Srobert }
228*f6aab3d8Srobert break;
229*f6aab3d8Srobert default:
230*f6aab3d8Srobert break;
231*f6aab3d8Srobert }
232*f6aab3d8Srobert
233*f6aab3d8Srobert return lldb::eInstructionControlFlowKindOther;
234*f6aab3d8Srobert }
235*f6aab3d8Srobert
236*f6aab3d8Srobert /// Decode an instruction into opcode, modrm and opcode_len.
237*f6aab3d8Srobert /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
238*f6aab3d8Srobert /// Opcodes in x86 are generally the first byte of instruction, though two-byte
239*f6aab3d8Srobert /// instructions and prefixes exist. ModR/M is the byte following the opcode
240*f6aab3d8Srobert /// and adds additional information for how the instruction is executed.
241*f6aab3d8Srobert ///
242*f6aab3d8Srobert /// \param[in] inst_bytes
243*f6aab3d8Srobert /// Raw bytes of the instruction
244*f6aab3d8Srobert ///
245*f6aab3d8Srobert ///
246*f6aab3d8Srobert /// \param[in] bytes_len
247*f6aab3d8Srobert /// The length of the inst_bytes array.
248*f6aab3d8Srobert ///
249*f6aab3d8Srobert /// \param[in] is_exec_mode_64b
250*f6aab3d8Srobert /// If true, the execution mode is 64 bit.
251*f6aab3d8Srobert ///
252*f6aab3d8Srobert /// \return
253*f6aab3d8Srobert /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
254*f6aab3d8Srobert /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
255*f6aab3d8Srobert /// for more details.
256*f6aab3d8Srobert /// Otherwise if the given instruction is invalid, returns std::nullopt.
257*f6aab3d8Srobert std::optional<InstructionOpcodeAndModrm>
InstructionLengthDecode(const uint8_t * inst_bytes,int bytes_len,bool is_exec_mode_64b)258*f6aab3d8Srobert InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
259*f6aab3d8Srobert bool is_exec_mode_64b) {
260*f6aab3d8Srobert int op_idx = 0;
261*f6aab3d8Srobert bool prefix_done = false;
262*f6aab3d8Srobert InstructionOpcodeAndModrm ret = {0, 0, 0};
263*f6aab3d8Srobert
264*f6aab3d8Srobert // In most cases, the primary_opcode is the first byte of the instruction
265*f6aab3d8Srobert // but some instructions have a prefix to be skipped for these calculations.
266*f6aab3d8Srobert // The following mapping is inspired from libipt's instruction decoding logic
267*f6aab3d8Srobert // in `src/pt_ild.c`
268*f6aab3d8Srobert while (!prefix_done) {
269*f6aab3d8Srobert if (op_idx >= bytes_len)
270*f6aab3d8Srobert return std::nullopt;
271*f6aab3d8Srobert
272*f6aab3d8Srobert ret.primary_opcode = inst_bytes[op_idx];
273*f6aab3d8Srobert switch (ret.primary_opcode) {
274*f6aab3d8Srobert // prefix_ignore
275*f6aab3d8Srobert case 0x26:
276*f6aab3d8Srobert case 0x2e:
277*f6aab3d8Srobert case 0x36:
278*f6aab3d8Srobert case 0x3e:
279*f6aab3d8Srobert case 0x64:
280*f6aab3d8Srobert case 0x65:
281*f6aab3d8Srobert // prefix_osz, prefix_asz
282*f6aab3d8Srobert case 0x66:
283*f6aab3d8Srobert case 0x67:
284*f6aab3d8Srobert // prefix_lock, prefix_f2, prefix_f3
285*f6aab3d8Srobert case 0xf0:
286*f6aab3d8Srobert case 0xf2:
287*f6aab3d8Srobert case 0xf3:
288*f6aab3d8Srobert op_idx++;
289*f6aab3d8Srobert break;
290*f6aab3d8Srobert
291*f6aab3d8Srobert // prefix_rex
292*f6aab3d8Srobert case 0x40:
293*f6aab3d8Srobert case 0x41:
294*f6aab3d8Srobert case 0x42:
295*f6aab3d8Srobert case 0x43:
296*f6aab3d8Srobert case 0x44:
297*f6aab3d8Srobert case 0x45:
298*f6aab3d8Srobert case 0x46:
299*f6aab3d8Srobert case 0x47:
300*f6aab3d8Srobert case 0x48:
301*f6aab3d8Srobert case 0x49:
302*f6aab3d8Srobert case 0x4a:
303*f6aab3d8Srobert case 0x4b:
304*f6aab3d8Srobert case 0x4c:
305*f6aab3d8Srobert case 0x4d:
306*f6aab3d8Srobert case 0x4e:
307*f6aab3d8Srobert case 0x4f:
308*f6aab3d8Srobert if (is_exec_mode_64b)
309*f6aab3d8Srobert op_idx++;
310*f6aab3d8Srobert else
311*f6aab3d8Srobert prefix_done = true;
312*f6aab3d8Srobert break;
313*f6aab3d8Srobert
314*f6aab3d8Srobert // prefix_vex_c4, c5
315*f6aab3d8Srobert case 0xc5:
316*f6aab3d8Srobert if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
317*f6aab3d8Srobert prefix_done = true;
318*f6aab3d8Srobert break;
319*f6aab3d8Srobert }
320*f6aab3d8Srobert
321*f6aab3d8Srobert ret.opcode_len = 2;
322*f6aab3d8Srobert ret.primary_opcode = inst_bytes[op_idx + 2];
323*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 3];
324*f6aab3d8Srobert return ret;
325*f6aab3d8Srobert
326*f6aab3d8Srobert case 0xc4:
327*f6aab3d8Srobert if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
328*f6aab3d8Srobert prefix_done = true;
329*f6aab3d8Srobert break;
330*f6aab3d8Srobert }
331*f6aab3d8Srobert ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
332*f6aab3d8Srobert ret.primary_opcode = inst_bytes[op_idx + 3];
333*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 4];
334*f6aab3d8Srobert return ret;
335*f6aab3d8Srobert
336*f6aab3d8Srobert // prefix_evex
337*f6aab3d8Srobert case 0x62:
338*f6aab3d8Srobert if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
339*f6aab3d8Srobert prefix_done = true;
340*f6aab3d8Srobert break;
341*f6aab3d8Srobert }
342*f6aab3d8Srobert ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
343*f6aab3d8Srobert ret.primary_opcode = inst_bytes[op_idx + 4];
344*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 5];
345*f6aab3d8Srobert return ret;
346*f6aab3d8Srobert
347*f6aab3d8Srobert default:
348*f6aab3d8Srobert prefix_done = true;
349*f6aab3d8Srobert break;
350*f6aab3d8Srobert }
351*f6aab3d8Srobert } // prefix done
352*f6aab3d8Srobert
353*f6aab3d8Srobert ret.primary_opcode = inst_bytes[op_idx];
354*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
355*f6aab3d8Srobert ret.opcode_len = 1;
356*f6aab3d8Srobert
357*f6aab3d8Srobert // If the first opcode is 0F, it's two- or three- byte opcodes.
358*f6aab3d8Srobert if (ret.primary_opcode == 0x0F) {
359*f6aab3d8Srobert ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
360*f6aab3d8Srobert
361*f6aab3d8Srobert if (ret.primary_opcode == 0x38) {
362*f6aab3d8Srobert ret.opcode_len = 3;
363*f6aab3d8Srobert ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
364*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
365*f6aab3d8Srobert } else if (ret.primary_opcode == 0x3A) {
366*f6aab3d8Srobert ret.opcode_len = 3;
367*f6aab3d8Srobert ret.primary_opcode = inst_bytes[++op_idx];
368*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
369*f6aab3d8Srobert } else if ((ret.primary_opcode & 0xf8) == 0x38) {
370*f6aab3d8Srobert ret.opcode_len = 0;
371*f6aab3d8Srobert ret.primary_opcode = inst_bytes[++op_idx];
372*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
373*f6aab3d8Srobert } else if (ret.primary_opcode == 0x0F) {
374*f6aab3d8Srobert ret.opcode_len = 3;
375*f6aab3d8Srobert // opcode is 0x0F, no needs to update
376*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
377*f6aab3d8Srobert } else {
378*f6aab3d8Srobert ret.opcode_len = 2;
379*f6aab3d8Srobert ret.modrm = inst_bytes[op_idx + 1];
380*f6aab3d8Srobert }
381*f6aab3d8Srobert }
382*f6aab3d8Srobert
383*f6aab3d8Srobert return ret;
384*f6aab3d8Srobert }
385*f6aab3d8Srobert
GetControlFlowKind(bool is_exec_mode_64b,Opcode m_opcode)386*f6aab3d8Srobert lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
387*f6aab3d8Srobert Opcode m_opcode) {
388*f6aab3d8Srobert std::optional<InstructionOpcodeAndModrm> ret;
389*f6aab3d8Srobert
390*f6aab3d8Srobert if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
391*f6aab3d8Srobert // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
392*f6aab3d8Srobert return lldb::eInstructionControlFlowKindUnknown;
393*f6aab3d8Srobert }
394*f6aab3d8Srobert
395*f6aab3d8Srobert // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
396*f6aab3d8Srobert // These are the three values deciding instruction control flow kind.
397*f6aab3d8Srobert ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
398*f6aab3d8Srobert m_opcode.GetByteSize(), is_exec_mode_64b);
399*f6aab3d8Srobert if (!ret)
400*f6aab3d8Srobert return lldb::eInstructionControlFlowKindUnknown;
401*f6aab3d8Srobert else
402*f6aab3d8Srobert return MapOpcodeIntoControlFlowKind(*ret);
403*f6aab3d8Srobert }
404*f6aab3d8Srobert
405*f6aab3d8Srobert } // namespace x86
406*f6aab3d8Srobert
407dda28197Spatrick class InstructionLLVMC : public lldb_private::Instruction {
408dda28197Spatrick public:
InstructionLLVMC(DisassemblerLLVMC & disasm,const lldb_private::Address & address,AddressClass addr_class)409dda28197Spatrick InstructionLLVMC(DisassemblerLLVMC &disasm,
410dda28197Spatrick const lldb_private::Address &address,
411dda28197Spatrick AddressClass addr_class)
412dda28197Spatrick : Instruction(address, addr_class),
413dda28197Spatrick m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
414*f6aab3d8Srobert disasm.shared_from_this())) {}
415dda28197Spatrick
416dda28197Spatrick ~InstructionLLVMC() override = default;
417dda28197Spatrick
DoesBranch()418dda28197Spatrick bool DoesBranch() override {
419dda28197Spatrick VisitInstruction();
420dda28197Spatrick return m_does_branch;
421dda28197Spatrick }
422dda28197Spatrick
HasDelaySlot()423dda28197Spatrick bool HasDelaySlot() override {
424dda28197Spatrick VisitInstruction();
425dda28197Spatrick return m_has_delay_slot;
426dda28197Spatrick }
427dda28197Spatrick
IsLoad()428*f6aab3d8Srobert bool IsLoad() override {
429*f6aab3d8Srobert VisitInstruction();
430*f6aab3d8Srobert return m_is_load;
431*f6aab3d8Srobert }
432*f6aab3d8Srobert
IsAuthenticated()433*f6aab3d8Srobert bool IsAuthenticated() override {
434*f6aab3d8Srobert VisitInstruction();
435*f6aab3d8Srobert return m_is_authenticated;
436*f6aab3d8Srobert }
437*f6aab3d8Srobert
GetDisasmToUse(bool & is_alternate_isa)438dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
439dda28197Spatrick DisassemblerScope disasm(*this);
440dda28197Spatrick return GetDisasmToUse(is_alternate_isa, disasm);
441dda28197Spatrick }
442dda28197Spatrick
Decode(const lldb_private::Disassembler & disassembler,const lldb_private::DataExtractor & data,lldb::offset_t data_offset)443dda28197Spatrick size_t Decode(const lldb_private::Disassembler &disassembler,
444dda28197Spatrick const lldb_private::DataExtractor &data,
445dda28197Spatrick lldb::offset_t data_offset) override {
446dda28197Spatrick // All we have to do is read the opcode which can be easy for some
447dda28197Spatrick // architectures
448dda28197Spatrick bool got_op = false;
449dda28197Spatrick DisassemblerScope disasm(*this);
450dda28197Spatrick if (disasm) {
451dda28197Spatrick const ArchSpec &arch = disasm->GetArchitecture();
452dda28197Spatrick const lldb::ByteOrder byte_order = data.GetByteOrder();
453dda28197Spatrick
454dda28197Spatrick const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
455dda28197Spatrick const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
456dda28197Spatrick if (min_op_byte_size == max_op_byte_size) {
457dda28197Spatrick // Fixed size instructions, just read that amount of data.
458dda28197Spatrick if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
459dda28197Spatrick return false;
460dda28197Spatrick
461dda28197Spatrick switch (min_op_byte_size) {
462dda28197Spatrick case 1:
463dda28197Spatrick m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
464dda28197Spatrick got_op = true;
465dda28197Spatrick break;
466dda28197Spatrick
467dda28197Spatrick case 2:
468dda28197Spatrick m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
469dda28197Spatrick got_op = true;
470dda28197Spatrick break;
471dda28197Spatrick
472dda28197Spatrick case 4:
473dda28197Spatrick m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
474dda28197Spatrick got_op = true;
475dda28197Spatrick break;
476dda28197Spatrick
477dda28197Spatrick case 8:
478dda28197Spatrick m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
479dda28197Spatrick got_op = true;
480dda28197Spatrick break;
481dda28197Spatrick
482dda28197Spatrick default:
483dda28197Spatrick m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
484dda28197Spatrick min_op_byte_size);
485dda28197Spatrick got_op = true;
486dda28197Spatrick break;
487dda28197Spatrick }
488dda28197Spatrick }
489dda28197Spatrick if (!got_op) {
490dda28197Spatrick bool is_alternate_isa = false;
491dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
492dda28197Spatrick GetDisasmToUse(is_alternate_isa, disasm);
493dda28197Spatrick
494dda28197Spatrick const llvm::Triple::ArchType machine = arch.GetMachine();
495dda28197Spatrick if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
496dda28197Spatrick if (machine == llvm::Triple::thumb || is_alternate_isa) {
497dda28197Spatrick uint32_t thumb_opcode = data.GetU16(&data_offset);
498dda28197Spatrick if ((thumb_opcode & 0xe000) != 0xe000 ||
499dda28197Spatrick ((thumb_opcode & 0x1800u) == 0)) {
500dda28197Spatrick m_opcode.SetOpcode16(thumb_opcode, byte_order);
501dda28197Spatrick m_is_valid = true;
502dda28197Spatrick } else {
503dda28197Spatrick thumb_opcode <<= 16;
504dda28197Spatrick thumb_opcode |= data.GetU16(&data_offset);
505dda28197Spatrick m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
506dda28197Spatrick m_is_valid = true;
507dda28197Spatrick }
508dda28197Spatrick } else {
509dda28197Spatrick m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
510dda28197Spatrick m_is_valid = true;
511dda28197Spatrick }
512dda28197Spatrick } else {
513dda28197Spatrick // The opcode isn't evenly sized, so we need to actually use the llvm
514dda28197Spatrick // disassembler to parse it and get the size.
515dda28197Spatrick uint8_t *opcode_data =
516dda28197Spatrick const_cast<uint8_t *>(data.PeekData(data_offset, 1));
517dda28197Spatrick const size_t opcode_data_len = data.BytesLeft(data_offset);
518dda28197Spatrick const addr_t pc = m_address.GetFileAddress();
519dda28197Spatrick llvm::MCInst inst;
520dda28197Spatrick
521dda28197Spatrick const size_t inst_size =
522dda28197Spatrick mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
523dda28197Spatrick if (inst_size == 0)
524dda28197Spatrick m_opcode.Clear();
525dda28197Spatrick else {
526dda28197Spatrick m_opcode.SetOpcodeBytes(opcode_data, inst_size);
527dda28197Spatrick m_is_valid = true;
528dda28197Spatrick }
529dda28197Spatrick }
530dda28197Spatrick }
531dda28197Spatrick return m_opcode.GetByteSize();
532dda28197Spatrick }
533dda28197Spatrick return 0;
534dda28197Spatrick }
535dda28197Spatrick
AppendComment(std::string & description)536dda28197Spatrick void AppendComment(std::string &description) {
537dda28197Spatrick if (m_comment.empty())
538dda28197Spatrick m_comment.swap(description);
539dda28197Spatrick else {
540dda28197Spatrick m_comment.append(", ");
541dda28197Spatrick m_comment.append(description);
542dda28197Spatrick }
543dda28197Spatrick }
544dda28197Spatrick
545*f6aab3d8Srobert lldb::InstructionControlFlowKind
GetControlFlowKind(const lldb_private::ExecutionContext * exe_ctx)546*f6aab3d8Srobert GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
547*f6aab3d8Srobert DisassemblerScope disasm(*this, exe_ctx);
548*f6aab3d8Srobert if (disasm){
549*f6aab3d8Srobert if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
550*f6aab3d8Srobert return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
551*f6aab3d8Srobert else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
552*f6aab3d8Srobert return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
553*f6aab3d8Srobert }
554*f6aab3d8Srobert
555*f6aab3d8Srobert return eInstructionControlFlowKindUnknown;
556*f6aab3d8Srobert }
557*f6aab3d8Srobert
CalculateMnemonicOperandsAndComment(const lldb_private::ExecutionContext * exe_ctx)558dda28197Spatrick void CalculateMnemonicOperandsAndComment(
559dda28197Spatrick const lldb_private::ExecutionContext *exe_ctx) override {
560dda28197Spatrick DataExtractor data;
561dda28197Spatrick const AddressClass address_class = GetAddressClass();
562dda28197Spatrick
563dda28197Spatrick if (m_opcode.GetData(data)) {
564dda28197Spatrick std::string out_string;
565dda28197Spatrick std::string comment_string;
566dda28197Spatrick
567dda28197Spatrick DisassemblerScope disasm(*this, exe_ctx);
568dda28197Spatrick if (disasm) {
569dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
570dda28197Spatrick
571dda28197Spatrick if (address_class == AddressClass::eCodeAlternateISA)
572dda28197Spatrick mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
573dda28197Spatrick else
574dda28197Spatrick mc_disasm_ptr = disasm->m_disasm_up.get();
575dda28197Spatrick
576dda28197Spatrick lldb::addr_t pc = m_address.GetFileAddress();
577dda28197Spatrick m_using_file_addr = true;
578dda28197Spatrick
579dda28197Spatrick const bool data_from_file = disasm->m_data_from_file;
580dda28197Spatrick bool use_hex_immediates = true;
581dda28197Spatrick Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
582dda28197Spatrick
583dda28197Spatrick if (exe_ctx) {
584dda28197Spatrick Target *target = exe_ctx->GetTargetPtr();
585dda28197Spatrick if (target) {
586dda28197Spatrick use_hex_immediates = target->GetUseHexImmediates();
587dda28197Spatrick hex_style = target->GetHexImmediateStyle();
588dda28197Spatrick
589dda28197Spatrick if (!data_from_file) {
590dda28197Spatrick const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
591dda28197Spatrick if (load_addr != LLDB_INVALID_ADDRESS) {
592dda28197Spatrick pc = load_addr;
593dda28197Spatrick m_using_file_addr = false;
594dda28197Spatrick }
595dda28197Spatrick }
596dda28197Spatrick }
597dda28197Spatrick }
598dda28197Spatrick
599dda28197Spatrick const uint8_t *opcode_data = data.GetDataStart();
600dda28197Spatrick const size_t opcode_data_len = data.GetByteSize();
601dda28197Spatrick llvm::MCInst inst;
602dda28197Spatrick size_t inst_size =
603dda28197Spatrick mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
604dda28197Spatrick
605dda28197Spatrick if (inst_size > 0) {
606dda28197Spatrick mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
607dda28197Spatrick mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
608dda28197Spatrick
609dda28197Spatrick if (!comment_string.empty()) {
610dda28197Spatrick AppendComment(comment_string);
611dda28197Spatrick }
612dda28197Spatrick }
613dda28197Spatrick
614dda28197Spatrick if (inst_size == 0) {
615dda28197Spatrick m_comment.assign("unknown opcode");
616dda28197Spatrick inst_size = m_opcode.GetByteSize();
617dda28197Spatrick StreamString mnemonic_strm;
618dda28197Spatrick lldb::offset_t offset = 0;
619dda28197Spatrick lldb::ByteOrder byte_order = data.GetByteOrder();
620dda28197Spatrick switch (inst_size) {
621dda28197Spatrick case 1: {
622dda28197Spatrick const uint8_t uval8 = data.GetU8(&offset);
623dda28197Spatrick m_opcode.SetOpcode8(uval8, byte_order);
624dda28197Spatrick m_opcode_name.assign(".byte");
625dda28197Spatrick mnemonic_strm.Printf("0x%2.2x", uval8);
626dda28197Spatrick } break;
627dda28197Spatrick case 2: {
628dda28197Spatrick const uint16_t uval16 = data.GetU16(&offset);
629dda28197Spatrick m_opcode.SetOpcode16(uval16, byte_order);
630dda28197Spatrick m_opcode_name.assign(".short");
631dda28197Spatrick mnemonic_strm.Printf("0x%4.4x", uval16);
632dda28197Spatrick } break;
633dda28197Spatrick case 4: {
634dda28197Spatrick const uint32_t uval32 = data.GetU32(&offset);
635dda28197Spatrick m_opcode.SetOpcode32(uval32, byte_order);
636dda28197Spatrick m_opcode_name.assign(".long");
637dda28197Spatrick mnemonic_strm.Printf("0x%8.8x", uval32);
638dda28197Spatrick } break;
639dda28197Spatrick case 8: {
640dda28197Spatrick const uint64_t uval64 = data.GetU64(&offset);
641dda28197Spatrick m_opcode.SetOpcode64(uval64, byte_order);
642dda28197Spatrick m_opcode_name.assign(".quad");
643dda28197Spatrick mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
644dda28197Spatrick } break;
645dda28197Spatrick default:
646dda28197Spatrick if (inst_size == 0)
647dda28197Spatrick return;
648dda28197Spatrick else {
649dda28197Spatrick const uint8_t *bytes = data.PeekData(offset, inst_size);
650dda28197Spatrick if (bytes == nullptr)
651dda28197Spatrick return;
652dda28197Spatrick m_opcode_name.assign(".byte");
653dda28197Spatrick m_opcode.SetOpcodeBytes(bytes, inst_size);
654dda28197Spatrick mnemonic_strm.Printf("0x%2.2x", bytes[0]);
655dda28197Spatrick for (uint32_t i = 1; i < inst_size; ++i)
656dda28197Spatrick mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
657dda28197Spatrick }
658dda28197Spatrick break;
659dda28197Spatrick }
660dda28197Spatrick m_mnemonics = std::string(mnemonic_strm.GetString());
661dda28197Spatrick return;
662dda28197Spatrick }
663dda28197Spatrick
664dda28197Spatrick static RegularExpression s_regex(
665dda28197Spatrick llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
666dda28197Spatrick
667dda28197Spatrick llvm::SmallVector<llvm::StringRef, 4> matches;
668dda28197Spatrick if (s_regex.Execute(out_string, &matches)) {
669dda28197Spatrick m_opcode_name = matches[1].str();
670dda28197Spatrick m_mnemonics = matches[2].str();
671dda28197Spatrick }
672dda28197Spatrick }
673dda28197Spatrick }
674dda28197Spatrick }
675dda28197Spatrick
IsValid() const676dda28197Spatrick bool IsValid() const { return m_is_valid; }
677dda28197Spatrick
UsingFileAddress() const678dda28197Spatrick bool UsingFileAddress() const { return m_using_file_addr; }
GetByteSize() const679dda28197Spatrick size_t GetByteSize() const { return m_opcode.GetByteSize(); }
680dda28197Spatrick
681dda28197Spatrick /// Grants exclusive access to the disassembler and initializes it with the
682dda28197Spatrick /// given InstructionLLVMC and an optional ExecutionContext.
683dda28197Spatrick class DisassemblerScope {
684dda28197Spatrick std::shared_ptr<DisassemblerLLVMC> m_disasm;
685dda28197Spatrick
686dda28197Spatrick public:
DisassemblerScope(InstructionLLVMC & i,const lldb_private::ExecutionContext * exe_ctx=nullptr)687dda28197Spatrick explicit DisassemblerScope(
688dda28197Spatrick InstructionLLVMC &i,
689dda28197Spatrick const lldb_private::ExecutionContext *exe_ctx = nullptr)
690dda28197Spatrick : m_disasm(i.m_disasm_wp.lock()) {
691dda28197Spatrick m_disasm->m_mutex.lock();
692dda28197Spatrick m_disasm->m_inst = &i;
693dda28197Spatrick m_disasm->m_exe_ctx = exe_ctx;
694dda28197Spatrick }
~DisassemblerScope()695dda28197Spatrick ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
696dda28197Spatrick
697dda28197Spatrick /// Evaluates to true if this scope contains a valid disassembler.
operator bool() const698dda28197Spatrick operator bool() const { return static_cast<bool>(m_disasm); }
699dda28197Spatrick
operator ->()700dda28197Spatrick std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
701dda28197Spatrick };
702dda28197Spatrick
703dda28197Spatrick static llvm::StringRef::const_iterator
ConsumeWhitespace(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)704dda28197Spatrick ConsumeWhitespace(llvm::StringRef::const_iterator osi,
705dda28197Spatrick llvm::StringRef::const_iterator ose) {
706dda28197Spatrick while (osi != ose) {
707dda28197Spatrick switch (*osi) {
708dda28197Spatrick default:
709dda28197Spatrick return osi;
710dda28197Spatrick case ' ':
711dda28197Spatrick case '\t':
712dda28197Spatrick break;
713dda28197Spatrick }
714dda28197Spatrick ++osi;
715dda28197Spatrick }
716dda28197Spatrick
717dda28197Spatrick return osi;
718dda28197Spatrick }
719dda28197Spatrick
720dda28197Spatrick static std::pair<bool, llvm::StringRef::const_iterator>
ConsumeChar(llvm::StringRef::const_iterator osi,const char c,llvm::StringRef::const_iterator ose)721dda28197Spatrick ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
722dda28197Spatrick llvm::StringRef::const_iterator ose) {
723dda28197Spatrick bool found = false;
724dda28197Spatrick
725dda28197Spatrick osi = ConsumeWhitespace(osi, ose);
726dda28197Spatrick if (osi != ose && *osi == c) {
727dda28197Spatrick found = true;
728dda28197Spatrick ++osi;
729dda28197Spatrick }
730dda28197Spatrick
731dda28197Spatrick return std::make_pair(found, osi);
732dda28197Spatrick }
733dda28197Spatrick
734dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseRegisterName(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)735dda28197Spatrick ParseRegisterName(llvm::StringRef::const_iterator osi,
736dda28197Spatrick llvm::StringRef::const_iterator ose) {
737dda28197Spatrick Operand ret;
738dda28197Spatrick ret.m_type = Operand::Type::Register;
739dda28197Spatrick std::string str;
740dda28197Spatrick
741dda28197Spatrick osi = ConsumeWhitespace(osi, ose);
742dda28197Spatrick
743dda28197Spatrick while (osi != ose) {
744dda28197Spatrick if (*osi >= '0' && *osi <= '9') {
745dda28197Spatrick if (str.empty()) {
746dda28197Spatrick return std::make_pair(Operand(), osi);
747dda28197Spatrick } else {
748dda28197Spatrick str.push_back(*osi);
749dda28197Spatrick }
750dda28197Spatrick } else if (*osi >= 'a' && *osi <= 'z') {
751dda28197Spatrick str.push_back(*osi);
752dda28197Spatrick } else {
753dda28197Spatrick switch (*osi) {
754dda28197Spatrick default:
755dda28197Spatrick if (str.empty()) {
756dda28197Spatrick return std::make_pair(Operand(), osi);
757dda28197Spatrick } else {
758dda28197Spatrick ret.m_register = ConstString(str);
759dda28197Spatrick return std::make_pair(ret, osi);
760dda28197Spatrick }
761dda28197Spatrick case '%':
762dda28197Spatrick if (!str.empty()) {
763dda28197Spatrick return std::make_pair(Operand(), osi);
764dda28197Spatrick }
765dda28197Spatrick break;
766dda28197Spatrick }
767dda28197Spatrick }
768dda28197Spatrick ++osi;
769dda28197Spatrick }
770dda28197Spatrick
771dda28197Spatrick ret.m_register = ConstString(str);
772dda28197Spatrick return std::make_pair(ret, osi);
773dda28197Spatrick }
774dda28197Spatrick
775dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseImmediate(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)776dda28197Spatrick ParseImmediate(llvm::StringRef::const_iterator osi,
777dda28197Spatrick llvm::StringRef::const_iterator ose) {
778dda28197Spatrick Operand ret;
779dda28197Spatrick ret.m_type = Operand::Type::Immediate;
780dda28197Spatrick std::string str;
781dda28197Spatrick bool is_hex = false;
782dda28197Spatrick
783dda28197Spatrick osi = ConsumeWhitespace(osi, ose);
784dda28197Spatrick
785dda28197Spatrick while (osi != ose) {
786dda28197Spatrick if (*osi >= '0' && *osi <= '9') {
787dda28197Spatrick str.push_back(*osi);
788dda28197Spatrick } else if (*osi >= 'a' && *osi <= 'f') {
789dda28197Spatrick if (is_hex) {
790dda28197Spatrick str.push_back(*osi);
791dda28197Spatrick } else {
792dda28197Spatrick return std::make_pair(Operand(), osi);
793dda28197Spatrick }
794dda28197Spatrick } else {
795dda28197Spatrick switch (*osi) {
796dda28197Spatrick default:
797dda28197Spatrick if (str.empty()) {
798dda28197Spatrick return std::make_pair(Operand(), osi);
799dda28197Spatrick } else {
800dda28197Spatrick ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
801dda28197Spatrick return std::make_pair(ret, osi);
802dda28197Spatrick }
803dda28197Spatrick case 'x':
804dda28197Spatrick if (!str.compare("0")) {
805dda28197Spatrick is_hex = true;
806dda28197Spatrick str.push_back(*osi);
807dda28197Spatrick } else {
808dda28197Spatrick return std::make_pair(Operand(), osi);
809dda28197Spatrick }
810dda28197Spatrick break;
811dda28197Spatrick case '#':
812dda28197Spatrick case '$':
813dda28197Spatrick if (!str.empty()) {
814dda28197Spatrick return std::make_pair(Operand(), osi);
815dda28197Spatrick }
816dda28197Spatrick break;
817dda28197Spatrick case '-':
818dda28197Spatrick if (str.empty()) {
819dda28197Spatrick ret.m_negative = true;
820dda28197Spatrick } else {
821dda28197Spatrick return std::make_pair(Operand(), osi);
822dda28197Spatrick }
823dda28197Spatrick }
824dda28197Spatrick }
825dda28197Spatrick ++osi;
826dda28197Spatrick }
827dda28197Spatrick
828dda28197Spatrick ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
829dda28197Spatrick return std::make_pair(ret, osi);
830dda28197Spatrick }
831dda28197Spatrick
832dda28197Spatrick // -0x5(%rax,%rax,2)
833dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)834dda28197Spatrick ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
835dda28197Spatrick llvm::StringRef::const_iterator ose) {
836dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
837dda28197Spatrick ParseImmediate(osi, ose);
838dda28197Spatrick if (offset_and_iterator.first.IsValid()) {
839dda28197Spatrick osi = offset_and_iterator.second;
840dda28197Spatrick }
841dda28197Spatrick
842dda28197Spatrick bool found = false;
843dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, '(', ose);
844dda28197Spatrick if (!found) {
845dda28197Spatrick return std::make_pair(Operand(), osi);
846dda28197Spatrick }
847dda28197Spatrick
848dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
849dda28197Spatrick ParseRegisterName(osi, ose);
850dda28197Spatrick if (base_and_iterator.first.IsValid()) {
851dda28197Spatrick osi = base_and_iterator.second;
852dda28197Spatrick } else {
853dda28197Spatrick return std::make_pair(Operand(), osi);
854dda28197Spatrick }
855dda28197Spatrick
856dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ',', ose);
857dda28197Spatrick if (!found) {
858dda28197Spatrick return std::make_pair(Operand(), osi);
859dda28197Spatrick }
860dda28197Spatrick
861dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
862dda28197Spatrick ParseRegisterName(osi, ose);
863dda28197Spatrick if (index_and_iterator.first.IsValid()) {
864dda28197Spatrick osi = index_and_iterator.second;
865dda28197Spatrick } else {
866dda28197Spatrick return std::make_pair(Operand(), osi);
867dda28197Spatrick }
868dda28197Spatrick
869dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ',', ose);
870dda28197Spatrick if (!found) {
871dda28197Spatrick return std::make_pair(Operand(), osi);
872dda28197Spatrick }
873dda28197Spatrick
874dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator>
875dda28197Spatrick multiplier_and_iterator = ParseImmediate(osi, ose);
876dda28197Spatrick if (index_and_iterator.first.IsValid()) {
877dda28197Spatrick osi = index_and_iterator.second;
878dda28197Spatrick } else {
879dda28197Spatrick return std::make_pair(Operand(), osi);
880dda28197Spatrick }
881dda28197Spatrick
882dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ')', ose);
883dda28197Spatrick if (!found) {
884dda28197Spatrick return std::make_pair(Operand(), osi);
885dda28197Spatrick }
886dda28197Spatrick
887dda28197Spatrick Operand product;
888dda28197Spatrick product.m_type = Operand::Type::Product;
889dda28197Spatrick product.m_children.push_back(index_and_iterator.first);
890dda28197Spatrick product.m_children.push_back(multiplier_and_iterator.first);
891dda28197Spatrick
892dda28197Spatrick Operand index;
893dda28197Spatrick index.m_type = Operand::Type::Sum;
894dda28197Spatrick index.m_children.push_back(base_and_iterator.first);
895dda28197Spatrick index.m_children.push_back(product);
896dda28197Spatrick
897dda28197Spatrick if (offset_and_iterator.first.IsValid()) {
898dda28197Spatrick Operand offset;
899dda28197Spatrick offset.m_type = Operand::Type::Sum;
900dda28197Spatrick offset.m_children.push_back(offset_and_iterator.first);
901dda28197Spatrick offset.m_children.push_back(index);
902dda28197Spatrick
903dda28197Spatrick Operand deref;
904dda28197Spatrick deref.m_type = Operand::Type::Dereference;
905dda28197Spatrick deref.m_children.push_back(offset);
906dda28197Spatrick return std::make_pair(deref, osi);
907dda28197Spatrick } else {
908dda28197Spatrick Operand deref;
909dda28197Spatrick deref.m_type = Operand::Type::Dereference;
910dda28197Spatrick deref.m_children.push_back(index);
911dda28197Spatrick return std::make_pair(deref, osi);
912dda28197Spatrick }
913dda28197Spatrick }
914dda28197Spatrick
915dda28197Spatrick // -0x10(%rbp)
916dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)917dda28197Spatrick ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
918dda28197Spatrick llvm::StringRef::const_iterator ose) {
919dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
920dda28197Spatrick ParseImmediate(osi, ose);
921dda28197Spatrick if (offset_and_iterator.first.IsValid()) {
922dda28197Spatrick osi = offset_and_iterator.second;
923dda28197Spatrick }
924dda28197Spatrick
925dda28197Spatrick bool found = false;
926dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, '(', ose);
927dda28197Spatrick if (!found) {
928dda28197Spatrick return std::make_pair(Operand(), osi);
929dda28197Spatrick }
930dda28197Spatrick
931dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
932dda28197Spatrick ParseRegisterName(osi, ose);
933dda28197Spatrick if (base_and_iterator.first.IsValid()) {
934dda28197Spatrick osi = base_and_iterator.second;
935dda28197Spatrick } else {
936dda28197Spatrick return std::make_pair(Operand(), osi);
937dda28197Spatrick }
938dda28197Spatrick
939dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ')', ose);
940dda28197Spatrick if (!found) {
941dda28197Spatrick return std::make_pair(Operand(), osi);
942dda28197Spatrick }
943dda28197Spatrick
944dda28197Spatrick if (offset_and_iterator.first.IsValid()) {
945dda28197Spatrick Operand offset;
946dda28197Spatrick offset.m_type = Operand::Type::Sum;
947dda28197Spatrick offset.m_children.push_back(offset_and_iterator.first);
948dda28197Spatrick offset.m_children.push_back(base_and_iterator.first);
949dda28197Spatrick
950dda28197Spatrick Operand deref;
951dda28197Spatrick deref.m_type = Operand::Type::Dereference;
952dda28197Spatrick deref.m_children.push_back(offset);
953dda28197Spatrick return std::make_pair(deref, osi);
954dda28197Spatrick } else {
955dda28197Spatrick Operand deref;
956dda28197Spatrick deref.m_type = Operand::Type::Dereference;
957dda28197Spatrick deref.m_children.push_back(base_and_iterator.first);
958dda28197Spatrick return std::make_pair(deref, osi);
959dda28197Spatrick }
960dda28197Spatrick }
961dda28197Spatrick
962dda28197Spatrick // [sp, #8]!
963dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)964dda28197Spatrick ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
965dda28197Spatrick llvm::StringRef::const_iterator ose) {
966dda28197Spatrick bool found = false;
967dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, '[', ose);
968dda28197Spatrick if (!found) {
969dda28197Spatrick return std::make_pair(Operand(), osi);
970dda28197Spatrick }
971dda28197Spatrick
972dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
973dda28197Spatrick ParseRegisterName(osi, ose);
974dda28197Spatrick if (base_and_iterator.first.IsValid()) {
975dda28197Spatrick osi = base_and_iterator.second;
976dda28197Spatrick } else {
977dda28197Spatrick return std::make_pair(Operand(), osi);
978dda28197Spatrick }
979dda28197Spatrick
980dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ',', ose);
981dda28197Spatrick if (!found) {
982dda28197Spatrick return std::make_pair(Operand(), osi);
983dda28197Spatrick }
984dda28197Spatrick
985dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
986dda28197Spatrick ParseImmediate(osi, ose);
987dda28197Spatrick if (offset_and_iterator.first.IsValid()) {
988dda28197Spatrick osi = offset_and_iterator.second;
989dda28197Spatrick }
990dda28197Spatrick
991dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ']', ose);
992dda28197Spatrick if (!found) {
993dda28197Spatrick return std::make_pair(Operand(), osi);
994dda28197Spatrick }
995dda28197Spatrick
996dda28197Spatrick Operand offset;
997dda28197Spatrick offset.m_type = Operand::Type::Sum;
998dda28197Spatrick offset.m_children.push_back(offset_and_iterator.first);
999dda28197Spatrick offset.m_children.push_back(base_and_iterator.first);
1000dda28197Spatrick
1001dda28197Spatrick Operand deref;
1002dda28197Spatrick deref.m_type = Operand::Type::Dereference;
1003dda28197Spatrick deref.m_children.push_back(offset);
1004dda28197Spatrick return std::make_pair(deref, osi);
1005dda28197Spatrick }
1006dda28197Spatrick
1007dda28197Spatrick // [sp]
1008dda28197Spatrick static std::pair<Operand, llvm::StringRef::const_iterator>
ParseARMDerefAccess(llvm::StringRef::const_iterator osi,llvm::StringRef::const_iterator ose)1009dda28197Spatrick ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
1010dda28197Spatrick llvm::StringRef::const_iterator ose) {
1011dda28197Spatrick bool found = false;
1012dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, '[', ose);
1013dda28197Spatrick if (!found) {
1014dda28197Spatrick return std::make_pair(Operand(), osi);
1015dda28197Spatrick }
1016dda28197Spatrick
1017dda28197Spatrick std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
1018dda28197Spatrick ParseRegisterName(osi, ose);
1019dda28197Spatrick if (base_and_iterator.first.IsValid()) {
1020dda28197Spatrick osi = base_and_iterator.second;
1021dda28197Spatrick } else {
1022dda28197Spatrick return std::make_pair(Operand(), osi);
1023dda28197Spatrick }
1024dda28197Spatrick
1025dda28197Spatrick std::tie(found, osi) = ConsumeChar(osi, ']', ose);
1026dda28197Spatrick if (!found) {
1027dda28197Spatrick return std::make_pair(Operand(), osi);
1028dda28197Spatrick }
1029dda28197Spatrick
1030dda28197Spatrick Operand deref;
1031dda28197Spatrick deref.m_type = Operand::Type::Dereference;
1032dda28197Spatrick deref.m_children.push_back(base_and_iterator.first);
1033dda28197Spatrick return std::make_pair(deref, osi);
1034dda28197Spatrick }
1035dda28197Spatrick
DumpOperand(const Operand & op,Stream & s)1036dda28197Spatrick static void DumpOperand(const Operand &op, Stream &s) {
1037dda28197Spatrick switch (op.m_type) {
1038dda28197Spatrick case Operand::Type::Dereference:
1039dda28197Spatrick s.PutCString("*");
1040dda28197Spatrick DumpOperand(op.m_children[0], s);
1041dda28197Spatrick break;
1042dda28197Spatrick case Operand::Type::Immediate:
1043dda28197Spatrick if (op.m_negative) {
1044dda28197Spatrick s.PutCString("-");
1045dda28197Spatrick }
1046dda28197Spatrick s.PutCString(llvm::to_string(op.m_immediate));
1047dda28197Spatrick break;
1048dda28197Spatrick case Operand::Type::Invalid:
1049dda28197Spatrick s.PutCString("Invalid");
1050dda28197Spatrick break;
1051dda28197Spatrick case Operand::Type::Product:
1052dda28197Spatrick s.PutCString("(");
1053dda28197Spatrick DumpOperand(op.m_children[0], s);
1054dda28197Spatrick s.PutCString("*");
1055dda28197Spatrick DumpOperand(op.m_children[1], s);
1056dda28197Spatrick s.PutCString(")");
1057dda28197Spatrick break;
1058dda28197Spatrick case Operand::Type::Register:
1059dda28197Spatrick s.PutCString(op.m_register.GetStringRef());
1060dda28197Spatrick break;
1061dda28197Spatrick case Operand::Type::Sum:
1062dda28197Spatrick s.PutCString("(");
1063dda28197Spatrick DumpOperand(op.m_children[0], s);
1064dda28197Spatrick s.PutCString("+");
1065dda28197Spatrick DumpOperand(op.m_children[1], s);
1066dda28197Spatrick s.PutCString(")");
1067dda28197Spatrick break;
1068dda28197Spatrick }
1069dda28197Spatrick }
1070dda28197Spatrick
ParseOperands(llvm::SmallVectorImpl<Instruction::Operand> & operands)1071dda28197Spatrick bool ParseOperands(
1072dda28197Spatrick llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
1073dda28197Spatrick const char *operands_string = GetOperands(nullptr);
1074dda28197Spatrick
1075dda28197Spatrick if (!operands_string) {
1076dda28197Spatrick return false;
1077dda28197Spatrick }
1078dda28197Spatrick
1079dda28197Spatrick llvm::StringRef operands_ref(operands_string);
1080dda28197Spatrick
1081dda28197Spatrick llvm::StringRef::const_iterator osi = operands_ref.begin();
1082dda28197Spatrick llvm::StringRef::const_iterator ose = operands_ref.end();
1083dda28197Spatrick
1084dda28197Spatrick while (osi != ose) {
1085dda28197Spatrick Operand operand;
1086dda28197Spatrick llvm::StringRef::const_iterator iter;
1087dda28197Spatrick
1088dda28197Spatrick if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
1089dda28197Spatrick operand.IsValid()) ||
1090dda28197Spatrick (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
1091dda28197Spatrick operand.IsValid()) ||
1092dda28197Spatrick (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
1093dda28197Spatrick operand.IsValid()) ||
1094dda28197Spatrick (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
1095dda28197Spatrick operand.IsValid()) ||
1096dda28197Spatrick (std::tie(operand, iter) = ParseRegisterName(osi, ose),
1097dda28197Spatrick operand.IsValid()) ||
1098dda28197Spatrick (std::tie(operand, iter) = ParseImmediate(osi, ose),
1099dda28197Spatrick operand.IsValid())) {
1100dda28197Spatrick osi = iter;
1101dda28197Spatrick operands.push_back(operand);
1102dda28197Spatrick } else {
1103dda28197Spatrick return false;
1104dda28197Spatrick }
1105dda28197Spatrick
1106dda28197Spatrick std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
1107dda28197Spatrick ConsumeChar(osi, ',', ose);
1108dda28197Spatrick if (found_and_iter.first) {
1109dda28197Spatrick osi = found_and_iter.second;
1110dda28197Spatrick }
1111dda28197Spatrick
1112dda28197Spatrick osi = ConsumeWhitespace(osi, ose);
1113dda28197Spatrick }
1114dda28197Spatrick
1115dda28197Spatrick DisassemblerSP disasm_sp = m_disasm_wp.lock();
1116dda28197Spatrick
1117dda28197Spatrick if (disasm_sp && operands.size() > 1) {
1118dda28197Spatrick // TODO tie this into the MC Disassembler's notion of clobbers.
1119dda28197Spatrick switch (disasm_sp->GetArchitecture().GetMachine()) {
1120dda28197Spatrick default:
1121dda28197Spatrick break;
1122dda28197Spatrick case llvm::Triple::x86:
1123dda28197Spatrick case llvm::Triple::x86_64:
1124dda28197Spatrick operands[operands.size() - 1].m_clobbered = true;
1125dda28197Spatrick break;
1126dda28197Spatrick case llvm::Triple::arm:
1127dda28197Spatrick operands[0].m_clobbered = true;
1128dda28197Spatrick break;
1129dda28197Spatrick }
1130dda28197Spatrick }
1131dda28197Spatrick
1132*f6aab3d8Srobert if (Log *log = GetLog(LLDBLog::Process)) {
1133dda28197Spatrick StreamString ss;
1134dda28197Spatrick
1135dda28197Spatrick ss.Printf("[%s] expands to %zu operands:\n", operands_string,
1136dda28197Spatrick operands.size());
1137dda28197Spatrick for (const Operand &operand : operands) {
1138dda28197Spatrick ss.PutCString(" ");
1139dda28197Spatrick DumpOperand(operand, ss);
1140dda28197Spatrick ss.PutCString("\n");
1141dda28197Spatrick }
1142dda28197Spatrick
1143dda28197Spatrick log->PutString(ss.GetString());
1144dda28197Spatrick }
1145dda28197Spatrick
1146dda28197Spatrick return true;
1147dda28197Spatrick }
1148dda28197Spatrick
IsCall()1149dda28197Spatrick bool IsCall() override {
1150dda28197Spatrick VisitInstruction();
1151dda28197Spatrick return m_is_call;
1152dda28197Spatrick }
1153dda28197Spatrick
1154dda28197Spatrick protected:
1155dda28197Spatrick std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
1156dda28197Spatrick
1157dda28197Spatrick bool m_is_valid = false;
1158*f6aab3d8Srobert bool m_using_file_addr = false;
1159dda28197Spatrick bool m_has_visited_instruction = false;
1160dda28197Spatrick
1161dda28197Spatrick // Be conservative. If we didn't understand the instruction, say it:
1162dda28197Spatrick // - Might branch
1163dda28197Spatrick // - Does not have a delay slot
1164dda28197Spatrick // - Is not a call
1165*f6aab3d8Srobert // - Is not a load
1166*f6aab3d8Srobert // - Is not an authenticated instruction
1167dda28197Spatrick bool m_does_branch = true;
1168dda28197Spatrick bool m_has_delay_slot = false;
1169dda28197Spatrick bool m_is_call = false;
1170*f6aab3d8Srobert bool m_is_load = false;
1171*f6aab3d8Srobert bool m_is_authenticated = false;
1172dda28197Spatrick
VisitInstruction()1173dda28197Spatrick void VisitInstruction() {
1174dda28197Spatrick if (m_has_visited_instruction)
1175dda28197Spatrick return;
1176dda28197Spatrick
1177dda28197Spatrick DisassemblerScope disasm(*this);
1178dda28197Spatrick if (!disasm)
1179dda28197Spatrick return;
1180dda28197Spatrick
1181dda28197Spatrick DataExtractor data;
1182dda28197Spatrick if (!m_opcode.GetData(data))
1183dda28197Spatrick return;
1184dda28197Spatrick
1185dda28197Spatrick bool is_alternate_isa;
1186dda28197Spatrick lldb::addr_t pc = m_address.GetFileAddress();
1187dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
1188dda28197Spatrick GetDisasmToUse(is_alternate_isa, disasm);
1189dda28197Spatrick const uint8_t *opcode_data = data.GetDataStart();
1190dda28197Spatrick const size_t opcode_data_len = data.GetByteSize();
1191dda28197Spatrick llvm::MCInst inst;
1192dda28197Spatrick const size_t inst_size =
1193dda28197Spatrick mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
1194dda28197Spatrick if (inst_size == 0)
1195dda28197Spatrick return;
1196dda28197Spatrick
1197dda28197Spatrick m_has_visited_instruction = true;
1198dda28197Spatrick m_does_branch = mc_disasm_ptr->CanBranch(inst);
1199dda28197Spatrick m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
1200dda28197Spatrick m_is_call = mc_disasm_ptr->IsCall(inst);
1201*f6aab3d8Srobert m_is_load = mc_disasm_ptr->IsLoad(inst);
1202*f6aab3d8Srobert m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);
1203dda28197Spatrick }
1204dda28197Spatrick
1205dda28197Spatrick private:
1206dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance *
GetDisasmToUse(bool & is_alternate_isa,DisassemblerScope & disasm)1207dda28197Spatrick GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
1208dda28197Spatrick is_alternate_isa = false;
1209dda28197Spatrick if (disasm) {
1210dda28197Spatrick if (disasm->m_alternate_disasm_up) {
1211dda28197Spatrick const AddressClass address_class = GetAddressClass();
1212dda28197Spatrick
1213dda28197Spatrick if (address_class == AddressClass::eCodeAlternateISA) {
1214dda28197Spatrick is_alternate_isa = true;
1215dda28197Spatrick return disasm->m_alternate_disasm_up.get();
1216dda28197Spatrick }
1217dda28197Spatrick }
1218dda28197Spatrick return disasm->m_disasm_up.get();
1219dda28197Spatrick }
1220dda28197Spatrick return nullptr;
1221dda28197Spatrick }
1222dda28197Spatrick };
1223dda28197Spatrick
1224dda28197Spatrick std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
Create(const char * triple,const char * cpu,const char * features_str,unsigned flavor,DisassemblerLLVMC & owner)1225dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
1226dda28197Spatrick const char *features_str,
1227dda28197Spatrick unsigned flavor,
1228dda28197Spatrick DisassemblerLLVMC &owner) {
1229dda28197Spatrick using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
1230dda28197Spatrick
1231dda28197Spatrick std::string Status;
1232dda28197Spatrick const llvm::Target *curr_target =
1233dda28197Spatrick llvm::TargetRegistry::lookupTarget(triple, Status);
1234dda28197Spatrick if (!curr_target)
1235dda28197Spatrick return Instance();
1236dda28197Spatrick
1237dda28197Spatrick std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
1238dda28197Spatrick curr_target->createMCInstrInfo());
1239dda28197Spatrick if (!instr_info_up)
1240dda28197Spatrick return Instance();
1241dda28197Spatrick
1242dda28197Spatrick std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
1243dda28197Spatrick curr_target->createMCRegInfo(triple));
1244dda28197Spatrick if (!reg_info_up)
1245dda28197Spatrick return Instance();
1246dda28197Spatrick
1247dda28197Spatrick std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
1248dda28197Spatrick curr_target->createMCSubtargetInfo(triple, cpu, features_str));
1249dda28197Spatrick if (!subtarget_info_up)
1250dda28197Spatrick return Instance();
1251dda28197Spatrick
1252dda28197Spatrick llvm::MCTargetOptions MCOptions;
1253dda28197Spatrick std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
1254dda28197Spatrick curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));
1255dda28197Spatrick if (!asm_info_up)
1256dda28197Spatrick return Instance();
1257dda28197Spatrick
1258dda28197Spatrick std::unique_ptr<llvm::MCContext> context_up(
1259be691f3bSpatrick new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),
1260be691f3bSpatrick reg_info_up.get(), subtarget_info_up.get()));
1261dda28197Spatrick if (!context_up)
1262dda28197Spatrick return Instance();
1263dda28197Spatrick
1264dda28197Spatrick std::unique_ptr<llvm::MCDisassembler> disasm_up(
1265dda28197Spatrick curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
1266dda28197Spatrick if (!disasm_up)
1267dda28197Spatrick return Instance();
1268dda28197Spatrick
1269dda28197Spatrick std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
1270dda28197Spatrick curr_target->createMCRelocationInfo(triple, *context_up));
1271dda28197Spatrick if (!rel_info_up)
1272dda28197Spatrick return Instance();
1273dda28197Spatrick
1274dda28197Spatrick std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
1275dda28197Spatrick curr_target->createMCSymbolizer(
1276dda28197Spatrick triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
1277dda28197Spatrick context_up.get(), std::move(rel_info_up)));
1278dda28197Spatrick disasm_up->setSymbolizer(std::move(symbolizer_up));
1279dda28197Spatrick
1280dda28197Spatrick unsigned asm_printer_variant =
1281dda28197Spatrick flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
1282dda28197Spatrick
1283dda28197Spatrick std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
1284dda28197Spatrick curr_target->createMCInstPrinter(llvm::Triple{triple},
1285dda28197Spatrick asm_printer_variant, *asm_info_up,
1286dda28197Spatrick *instr_info_up, *reg_info_up));
1287dda28197Spatrick if (!instr_printer_up)
1288dda28197Spatrick return Instance();
1289dda28197Spatrick
1290dda28197Spatrick return Instance(
1291dda28197Spatrick new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
1292dda28197Spatrick std::move(subtarget_info_up), std::move(asm_info_up),
1293dda28197Spatrick std::move(context_up), std::move(disasm_up),
1294dda28197Spatrick std::move(instr_printer_up)));
1295dda28197Spatrick }
1296dda28197Spatrick
MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> && instr_info_up,std::unique_ptr<llvm::MCRegisterInfo> && reg_info_up,std::unique_ptr<llvm::MCSubtargetInfo> && subtarget_info_up,std::unique_ptr<llvm::MCAsmInfo> && asm_info_up,std::unique_ptr<llvm::MCContext> && context_up,std::unique_ptr<llvm::MCDisassembler> && disasm_up,std::unique_ptr<llvm::MCInstPrinter> && instr_printer_up)1297dda28197Spatrick DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1298dda28197Spatrick std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
1299dda28197Spatrick std::unique_ptr<llvm::MCRegisterInfo> &®_info_up,
1300dda28197Spatrick std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1301dda28197Spatrick std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1302dda28197Spatrick std::unique_ptr<llvm::MCContext> &&context_up,
1303dda28197Spatrick std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1304dda28197Spatrick std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
1305dda28197Spatrick : m_instr_info_up(std::move(instr_info_up)),
1306dda28197Spatrick m_reg_info_up(std::move(reg_info_up)),
1307dda28197Spatrick m_subtarget_info_up(std::move(subtarget_info_up)),
1308dda28197Spatrick m_asm_info_up(std::move(asm_info_up)),
1309dda28197Spatrick m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1310dda28197Spatrick m_instr_printer_up(std::move(instr_printer_up)) {
1311dda28197Spatrick assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1312dda28197Spatrick m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1313dda28197Spatrick }
1314dda28197Spatrick
GetMCInst(const uint8_t * opcode_data,size_t opcode_data_len,lldb::addr_t pc,llvm::MCInst & mc_inst) const1315dda28197Spatrick uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1316dda28197Spatrick const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1317dda28197Spatrick llvm::MCInst &mc_inst) const {
1318dda28197Spatrick llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1319dda28197Spatrick llvm::MCDisassembler::DecodeStatus status;
1320dda28197Spatrick
1321dda28197Spatrick uint64_t new_inst_size;
1322dda28197Spatrick status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
1323dda28197Spatrick llvm::nulls());
1324dda28197Spatrick if (status == llvm::MCDisassembler::Success)
1325dda28197Spatrick return new_inst_size;
1326dda28197Spatrick else
1327dda28197Spatrick return 0;
1328dda28197Spatrick }
1329dda28197Spatrick
PrintMCInst(llvm::MCInst & mc_inst,std::string & inst_string,std::string & comments_string)1330dda28197Spatrick void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1331dda28197Spatrick llvm::MCInst &mc_inst, std::string &inst_string,
1332dda28197Spatrick std::string &comments_string) {
1333dda28197Spatrick llvm::raw_string_ostream inst_stream(inst_string);
1334dda28197Spatrick llvm::raw_string_ostream comments_stream(comments_string);
1335dda28197Spatrick
1336dda28197Spatrick m_instr_printer_up->setCommentStream(comments_stream);
1337dda28197Spatrick m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(),
1338dda28197Spatrick *m_subtarget_info_up, inst_stream);
1339dda28197Spatrick m_instr_printer_up->setCommentStream(llvm::nulls());
1340dda28197Spatrick comments_stream.flush();
1341dda28197Spatrick
1342dda28197Spatrick static std::string g_newlines("\r\n");
1343dda28197Spatrick
1344dda28197Spatrick for (size_t newline_pos = 0;
1345dda28197Spatrick (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1346dda28197Spatrick comments_string.npos;
1347dda28197Spatrick /**/) {
1348dda28197Spatrick comments_string.replace(comments_string.begin() + newline_pos,
1349dda28197Spatrick comments_string.begin() + newline_pos + 1, 1, ' ');
1350dda28197Spatrick }
1351dda28197Spatrick }
1352dda28197Spatrick
SetStyle(bool use_hex_immed,HexImmediateStyle hex_style)1353dda28197Spatrick void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1354dda28197Spatrick bool use_hex_immed, HexImmediateStyle hex_style) {
1355dda28197Spatrick m_instr_printer_up->setPrintImmHex(use_hex_immed);
1356dda28197Spatrick switch (hex_style) {
1357dda28197Spatrick case eHexStyleC:
1358dda28197Spatrick m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1359dda28197Spatrick break;
1360dda28197Spatrick case eHexStyleAsm:
1361dda28197Spatrick m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1362dda28197Spatrick break;
1363dda28197Spatrick }
1364dda28197Spatrick }
1365dda28197Spatrick
CanBranch(llvm::MCInst & mc_inst) const1366dda28197Spatrick bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1367dda28197Spatrick llvm::MCInst &mc_inst) const {
1368dda28197Spatrick return m_instr_info_up->get(mc_inst.getOpcode())
1369dda28197Spatrick .mayAffectControlFlow(mc_inst, *m_reg_info_up);
1370dda28197Spatrick }
1371dda28197Spatrick
HasDelaySlot(llvm::MCInst & mc_inst) const1372dda28197Spatrick bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1373dda28197Spatrick llvm::MCInst &mc_inst) const {
1374dda28197Spatrick return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1375dda28197Spatrick }
1376dda28197Spatrick
IsCall(llvm::MCInst & mc_inst) const1377dda28197Spatrick bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1378dda28197Spatrick return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1379dda28197Spatrick }
1380dda28197Spatrick
IsLoad(llvm::MCInst & mc_inst) const1381*f6aab3d8Srobert bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
1382*f6aab3d8Srobert return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();
1383*f6aab3d8Srobert }
1384*f6aab3d8Srobert
IsAuthenticated(llvm::MCInst & mc_inst) const1385*f6aab3d8Srobert bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1386*f6aab3d8Srobert llvm::MCInst &mc_inst) const {
1387*f6aab3d8Srobert const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());
1388*f6aab3d8Srobert
1389*f6aab3d8Srobert // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1390*f6aab3d8Srobert // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1391*f6aab3d8Srobert // addition to the standard authenticated instructions specified in ARMv8.3.
1392*f6aab3d8Srobert bool IsBrkC47x = false;
1393*f6aab3d8Srobert if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
1394*f6aab3d8Srobert const llvm::MCOperand &Op0 = mc_inst.getOperand(0);
1395*f6aab3d8Srobert if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
1396*f6aab3d8Srobert IsBrkC47x = true;
1397*f6aab3d8Srobert }
1398*f6aab3d8Srobert
1399*f6aab3d8Srobert return InstrDesc.isAuthenticated() || IsBrkC47x;
1400*f6aab3d8Srobert }
1401*f6aab3d8Srobert
DisassemblerLLVMC(const ArchSpec & arch,const char * flavor_string)1402dda28197Spatrick DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1403dda28197Spatrick const char *flavor_string)
1404dda28197Spatrick : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1405*f6aab3d8Srobert m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
1406*f6aab3d8Srobert m_adrp_insn() {
1407dda28197Spatrick if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1408dda28197Spatrick m_flavor.assign("default");
1409dda28197Spatrick }
1410dda28197Spatrick
1411dda28197Spatrick unsigned flavor = ~0U;
1412dda28197Spatrick llvm::Triple triple = arch.GetTriple();
1413dda28197Spatrick
1414dda28197Spatrick // So far the only supported flavor is "intel" on x86. The base class will
1415dda28197Spatrick // set this correctly coming in.
1416dda28197Spatrick if (triple.getArch() == llvm::Triple::x86 ||
1417dda28197Spatrick triple.getArch() == llvm::Triple::x86_64) {
1418dda28197Spatrick if (m_flavor == "intel") {
1419dda28197Spatrick flavor = 1;
1420dda28197Spatrick } else if (m_flavor == "att") {
1421dda28197Spatrick flavor = 0;
1422dda28197Spatrick }
1423dda28197Spatrick }
1424dda28197Spatrick
1425dda28197Spatrick ArchSpec thumb_arch(arch);
1426dda28197Spatrick if (triple.getArch() == llvm::Triple::arm) {
1427dda28197Spatrick std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1428dda28197Spatrick // Replace "arm" with "thumb" so we get all thumb variants correct
1429dda28197Spatrick if (thumb_arch_name.size() > 3) {
1430dda28197Spatrick thumb_arch_name.erase(0, 3);
1431dda28197Spatrick thumb_arch_name.insert(0, "thumb");
1432dda28197Spatrick } else {
1433*f6aab3d8Srobert thumb_arch_name = "thumbv9.3a";
1434dda28197Spatrick }
1435dda28197Spatrick thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1436dda28197Spatrick }
1437dda28197Spatrick
1438dda28197Spatrick // If no sub architecture specified then use the most recent arm architecture
1439*f6aab3d8Srobert // so the disassembler will return all instructions. Without it we will see a
1440*f6aab3d8Srobert // lot of unknown opcodes if the code uses instructions which are not
1441*f6aab3d8Srobert // available in the oldest arm version (which is used when no sub architecture
1442*f6aab3d8Srobert // is specified).
1443dda28197Spatrick if (triple.getArch() == llvm::Triple::arm &&
1444dda28197Spatrick triple.getSubArch() == llvm::Triple::NoSubArch)
1445*f6aab3d8Srobert triple.setArchName("armv9.3a");
1446dda28197Spatrick
1447*f6aab3d8Srobert std::string features_str;
1448dda28197Spatrick const char *triple_str = triple.getTriple().c_str();
1449dda28197Spatrick
1450dda28197Spatrick // ARM Cortex M0-M7 devices only execute thumb instructions
1451dda28197Spatrick if (arch.IsAlwaysThumbInstructions()) {
1452dda28197Spatrick triple_str = thumb_arch.GetTriple().getTriple().c_str();
1453dda28197Spatrick features_str += "+fp-armv8,";
1454dda28197Spatrick }
1455dda28197Spatrick
1456dda28197Spatrick const char *cpu = "";
1457dda28197Spatrick
1458dda28197Spatrick switch (arch.GetCore()) {
1459dda28197Spatrick case ArchSpec::eCore_mips32:
1460dda28197Spatrick case ArchSpec::eCore_mips32el:
1461dda28197Spatrick cpu = "mips32";
1462dda28197Spatrick break;
1463dda28197Spatrick case ArchSpec::eCore_mips32r2:
1464dda28197Spatrick case ArchSpec::eCore_mips32r2el:
1465dda28197Spatrick cpu = "mips32r2";
1466dda28197Spatrick break;
1467dda28197Spatrick case ArchSpec::eCore_mips32r3:
1468dda28197Spatrick case ArchSpec::eCore_mips32r3el:
1469dda28197Spatrick cpu = "mips32r3";
1470dda28197Spatrick break;
1471dda28197Spatrick case ArchSpec::eCore_mips32r5:
1472dda28197Spatrick case ArchSpec::eCore_mips32r5el:
1473dda28197Spatrick cpu = "mips32r5";
1474dda28197Spatrick break;
1475dda28197Spatrick case ArchSpec::eCore_mips32r6:
1476dda28197Spatrick case ArchSpec::eCore_mips32r6el:
1477dda28197Spatrick cpu = "mips32r6";
1478dda28197Spatrick break;
1479dda28197Spatrick case ArchSpec::eCore_mips64:
1480dda28197Spatrick case ArchSpec::eCore_mips64el:
1481dda28197Spatrick cpu = "mips64";
1482dda28197Spatrick break;
1483dda28197Spatrick case ArchSpec::eCore_mips64r2:
1484dda28197Spatrick case ArchSpec::eCore_mips64r2el:
1485dda28197Spatrick cpu = "mips64r2";
1486dda28197Spatrick break;
1487dda28197Spatrick case ArchSpec::eCore_mips64r3:
1488dda28197Spatrick case ArchSpec::eCore_mips64r3el:
1489dda28197Spatrick cpu = "mips64r3";
1490dda28197Spatrick break;
1491dda28197Spatrick case ArchSpec::eCore_mips64r5:
1492dda28197Spatrick case ArchSpec::eCore_mips64r5el:
1493dda28197Spatrick cpu = "mips64r5";
1494dda28197Spatrick break;
1495dda28197Spatrick case ArchSpec::eCore_mips64r6:
1496dda28197Spatrick case ArchSpec::eCore_mips64r6el:
1497dda28197Spatrick cpu = "mips64r6";
1498dda28197Spatrick break;
1499dda28197Spatrick default:
1500dda28197Spatrick cpu = "";
1501dda28197Spatrick break;
1502dda28197Spatrick }
1503dda28197Spatrick
1504dda28197Spatrick if (arch.IsMIPS()) {
1505dda28197Spatrick uint32_t arch_flags = arch.GetFlags();
1506dda28197Spatrick if (arch_flags & ArchSpec::eMIPSAse_msa)
1507dda28197Spatrick features_str += "+msa,";
1508dda28197Spatrick if (arch_flags & ArchSpec::eMIPSAse_dsp)
1509dda28197Spatrick features_str += "+dsp,";
1510dda28197Spatrick if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1511dda28197Spatrick features_str += "+dspr2,";
1512dda28197Spatrick }
1513dda28197Spatrick
1514*f6aab3d8Srobert // If any AArch64 variant, enable latest ISA with all extensions.
1515be691f3bSpatrick if (triple.isAArch64()) {
1516*f6aab3d8Srobert features_str += "+all,";
1517dda28197Spatrick
1518be691f3bSpatrick if (triple.getVendor() == llvm::Triple::Apple)
1519dda28197Spatrick cpu = "apple-latest";
1520dda28197Spatrick }
1521dda28197Spatrick
1522*f6aab3d8Srobert if (triple.isRISCV()) {
1523*f6aab3d8Srobert uint32_t arch_flags = arch.GetFlags();
1524*f6aab3d8Srobert if (arch_flags & ArchSpec::eRISCV_rvc)
1525*f6aab3d8Srobert features_str += "+c,";
1526*f6aab3d8Srobert if (arch_flags & ArchSpec::eRISCV_rve)
1527*f6aab3d8Srobert features_str += "+e,";
1528*f6aab3d8Srobert if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==
1529*f6aab3d8Srobert ArchSpec::eRISCV_float_abi_single)
1530*f6aab3d8Srobert features_str += "+f,";
1531*f6aab3d8Srobert if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==
1532*f6aab3d8Srobert ArchSpec::eRISCV_float_abi_double)
1533*f6aab3d8Srobert features_str += "+f,+d,";
1534*f6aab3d8Srobert if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==
1535*f6aab3d8Srobert ArchSpec::eRISCV_float_abi_quad)
1536*f6aab3d8Srobert features_str += "+f,+d,+q,";
1537*f6aab3d8Srobert // FIXME: how do we detect features such as `+a`, `+m`?
1538*f6aab3d8Srobert }
1539*f6aab3d8Srobert
1540dda28197Spatrick // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1541dda28197Spatrick // isn't good for some reason, we won't be valid and FindPlugin will fail and
1542dda28197Spatrick // we won't get used.
1543dda28197Spatrick m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1544dda28197Spatrick flavor, *this);
1545dda28197Spatrick
1546dda28197Spatrick llvm::Triple::ArchType llvm_arch = triple.getArch();
1547dda28197Spatrick
1548dda28197Spatrick // For arm CPUs that can execute arm or thumb instructions, also create a
1549dda28197Spatrick // thumb instruction disassembler.
1550dda28197Spatrick if (llvm_arch == llvm::Triple::arm) {
1551dda28197Spatrick std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1552dda28197Spatrick m_alternate_disasm_up =
1553dda28197Spatrick MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1554dda28197Spatrick flavor, *this);
1555dda28197Spatrick if (!m_alternate_disasm_up)
1556dda28197Spatrick m_disasm_up.reset();
1557dda28197Spatrick
1558dda28197Spatrick } else if (arch.IsMIPS()) {
1559dda28197Spatrick /* Create alternate disassembler for MIPS16 and microMIPS */
1560dda28197Spatrick uint32_t arch_flags = arch.GetFlags();
1561dda28197Spatrick if (arch_flags & ArchSpec::eMIPSAse_mips16)
1562dda28197Spatrick features_str += "+mips16,";
1563dda28197Spatrick else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1564dda28197Spatrick features_str += "+micromips,";
1565dda28197Spatrick
1566dda28197Spatrick m_alternate_disasm_up = MCDisasmInstance::Create(
1567dda28197Spatrick triple_str, cpu, features_str.c_str(), flavor, *this);
1568dda28197Spatrick if (!m_alternate_disasm_up)
1569dda28197Spatrick m_disasm_up.reset();
1570dda28197Spatrick }
1571dda28197Spatrick }
1572dda28197Spatrick
1573dda28197Spatrick DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1574dda28197Spatrick
CreateInstance(const ArchSpec & arch,const char * flavor)1575dda28197Spatrick Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1576dda28197Spatrick const char *flavor) {
1577dda28197Spatrick if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1578dda28197Spatrick std::unique_ptr<DisassemblerLLVMC> disasm_up(
1579dda28197Spatrick new DisassemblerLLVMC(arch, flavor));
1580dda28197Spatrick
1581dda28197Spatrick if (disasm_up.get() && disasm_up->IsValid())
1582dda28197Spatrick return disasm_up.release();
1583dda28197Spatrick }
1584dda28197Spatrick return nullptr;
1585dda28197Spatrick }
1586dda28197Spatrick
DecodeInstructions(const Address & base_addr,const DataExtractor & data,lldb::offset_t data_offset,size_t num_instructions,bool append,bool data_from_file)1587dda28197Spatrick size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1588dda28197Spatrick const DataExtractor &data,
1589dda28197Spatrick lldb::offset_t data_offset,
1590dda28197Spatrick size_t num_instructions,
1591dda28197Spatrick bool append, bool data_from_file) {
1592dda28197Spatrick if (!append)
1593dda28197Spatrick m_instruction_list.Clear();
1594dda28197Spatrick
1595dda28197Spatrick if (!IsValid())
1596dda28197Spatrick return 0;
1597dda28197Spatrick
1598dda28197Spatrick m_data_from_file = data_from_file;
1599dda28197Spatrick uint32_t data_cursor = data_offset;
1600dda28197Spatrick const size_t data_byte_size = data.GetByteSize();
1601dda28197Spatrick uint32_t instructions_parsed = 0;
1602dda28197Spatrick Address inst_addr(base_addr);
1603dda28197Spatrick
1604dda28197Spatrick while (data_cursor < data_byte_size &&
1605dda28197Spatrick instructions_parsed < num_instructions) {
1606dda28197Spatrick
1607dda28197Spatrick AddressClass address_class = AddressClass::eCode;
1608dda28197Spatrick
1609dda28197Spatrick if (m_alternate_disasm_up)
1610dda28197Spatrick address_class = inst_addr.GetAddressClass();
1611dda28197Spatrick
1612dda28197Spatrick InstructionSP inst_sp(
1613dda28197Spatrick new InstructionLLVMC(*this, inst_addr, address_class));
1614dda28197Spatrick
1615dda28197Spatrick if (!inst_sp)
1616dda28197Spatrick break;
1617dda28197Spatrick
1618dda28197Spatrick uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1619dda28197Spatrick
1620dda28197Spatrick if (inst_size == 0)
1621dda28197Spatrick break;
1622dda28197Spatrick
1623dda28197Spatrick m_instruction_list.Append(inst_sp);
1624dda28197Spatrick data_cursor += inst_size;
1625dda28197Spatrick inst_addr.Slide(inst_size);
1626dda28197Spatrick instructions_parsed++;
1627dda28197Spatrick }
1628dda28197Spatrick
1629dda28197Spatrick return data_cursor - data_offset;
1630dda28197Spatrick }
1631dda28197Spatrick
Initialize()1632dda28197Spatrick void DisassemblerLLVMC::Initialize() {
1633dda28197Spatrick PluginManager::RegisterPlugin(GetPluginNameStatic(),
1634dda28197Spatrick "Disassembler that uses LLVM MC to disassemble "
1635dda28197Spatrick "i386, x86_64, ARM, and ARM64.",
1636dda28197Spatrick CreateInstance);
1637dda28197Spatrick
1638dda28197Spatrick llvm::InitializeAllTargetInfos();
1639dda28197Spatrick llvm::InitializeAllTargetMCs();
1640dda28197Spatrick llvm::InitializeAllAsmParsers();
1641dda28197Spatrick llvm::InitializeAllDisassemblers();
1642dda28197Spatrick }
1643dda28197Spatrick
Terminate()1644dda28197Spatrick void DisassemblerLLVMC::Terminate() {
1645dda28197Spatrick PluginManager::UnregisterPlugin(CreateInstance);
1646dda28197Spatrick }
1647dda28197Spatrick
OpInfoCallback(void * disassembler,uint64_t pc,uint64_t offset,uint64_t size,int tag_type,void * tag_bug)1648dda28197Spatrick int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1649dda28197Spatrick uint64_t offset, uint64_t size,
1650dda28197Spatrick int tag_type, void *tag_bug) {
1651dda28197Spatrick return static_cast<DisassemblerLLVMC *>(disassembler)
1652dda28197Spatrick ->OpInfo(pc, offset, size, tag_type, tag_bug);
1653dda28197Spatrick }
1654dda28197Spatrick
SymbolLookupCallback(void * disassembler,uint64_t value,uint64_t * type,uint64_t pc,const char ** name)1655dda28197Spatrick const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1656dda28197Spatrick uint64_t value,
1657dda28197Spatrick uint64_t *type, uint64_t pc,
1658dda28197Spatrick const char **name) {
1659dda28197Spatrick return static_cast<DisassemblerLLVMC *>(disassembler)
1660dda28197Spatrick ->SymbolLookup(value, type, pc, name);
1661dda28197Spatrick }
1662dda28197Spatrick
FlavorValidForArchSpec(const lldb_private::ArchSpec & arch,const char * flavor)1663dda28197Spatrick bool DisassemblerLLVMC::FlavorValidForArchSpec(
1664dda28197Spatrick const lldb_private::ArchSpec &arch, const char *flavor) {
1665dda28197Spatrick llvm::Triple triple = arch.GetTriple();
1666dda28197Spatrick if (flavor == nullptr || strcmp(flavor, "default") == 0)
1667dda28197Spatrick return true;
1668dda28197Spatrick
1669dda28197Spatrick if (triple.getArch() == llvm::Triple::x86 ||
1670dda28197Spatrick triple.getArch() == llvm::Triple::x86_64) {
1671dda28197Spatrick return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1672dda28197Spatrick } else
1673dda28197Spatrick return false;
1674dda28197Spatrick }
1675dda28197Spatrick
IsValid() const1676dda28197Spatrick bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1677dda28197Spatrick
OpInfo(uint64_t PC,uint64_t Offset,uint64_t Size,int tag_type,void * tag_bug)1678dda28197Spatrick int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1679dda28197Spatrick int tag_type, void *tag_bug) {
1680dda28197Spatrick switch (tag_type) {
1681dda28197Spatrick default:
1682dda28197Spatrick break;
1683dda28197Spatrick case 1:
1684dda28197Spatrick memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1685dda28197Spatrick break;
1686dda28197Spatrick }
1687dda28197Spatrick return 0;
1688dda28197Spatrick }
1689dda28197Spatrick
SymbolLookup(uint64_t value,uint64_t * type_ptr,uint64_t pc,const char ** name)1690dda28197Spatrick const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1691dda28197Spatrick uint64_t pc, const char **name) {
1692dda28197Spatrick if (*type_ptr) {
1693dda28197Spatrick if (m_exe_ctx && m_inst) {
1694dda28197Spatrick // std::string remove_this_prior_to_checkin;
1695dda28197Spatrick Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1696dda28197Spatrick Address value_so_addr;
1697dda28197Spatrick Address pc_so_addr;
1698*f6aab3d8Srobert if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1699*f6aab3d8Srobert target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1700*f6aab3d8Srobert target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1701*f6aab3d8Srobert if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1702*f6aab3d8Srobert m_adrp_address = pc;
1703*f6aab3d8Srobert m_adrp_insn = value;
1704*f6aab3d8Srobert *name = nullptr;
1705*f6aab3d8Srobert *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1706*f6aab3d8Srobert return nullptr;
1707*f6aab3d8Srobert }
1708*f6aab3d8Srobert // If this instruction is an ADD and
1709*f6aab3d8Srobert // the previous instruction was an ADRP and
1710*f6aab3d8Srobert // the ADRP's register and this ADD's register are the same,
1711*f6aab3d8Srobert // then this is a pc-relative address calculation.
1712*f6aab3d8Srobert if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1713*f6aab3d8Srobert m_adrp_insn && m_adrp_address == pc - 4 &&
1714*f6aab3d8Srobert (*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {
1715*f6aab3d8Srobert uint32_t addxri_inst;
1716*f6aab3d8Srobert uint64_t adrp_imm, addxri_imm;
1717*f6aab3d8Srobert // Get immlo and immhi bits, OR them together to get the ADRP imm
1718*f6aab3d8Srobert // value.
1719*f6aab3d8Srobert adrp_imm =
1720*f6aab3d8Srobert ((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3);
1721*f6aab3d8Srobert // if high bit of immhi after right-shifting set, sign extend
1722*f6aab3d8Srobert if (adrp_imm & (1ULL << 20))
1723*f6aab3d8Srobert adrp_imm |= ~((1ULL << 21) - 1);
1724*f6aab3d8Srobert
1725*f6aab3d8Srobert addxri_inst = value;
1726*f6aab3d8Srobert addxri_imm = (addxri_inst >> 10) & 0xfff;
1727*f6aab3d8Srobert // check if 'sh' bit is set, shift imm value up if so
1728*f6aab3d8Srobert // (this would make no sense, ADRP already gave us this part)
1729*f6aab3d8Srobert if ((addxri_inst >> (12 + 5 + 5)) & 1)
1730*f6aab3d8Srobert addxri_imm <<= 12;
1731*f6aab3d8Srobert value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1732*f6aab3d8Srobert addxri_imm;
1733*f6aab3d8Srobert }
1734*f6aab3d8Srobert m_adrp_address = LLDB_INVALID_ADDRESS;
1735*f6aab3d8Srobert m_adrp_insn.reset();
1736*f6aab3d8Srobert }
1737*f6aab3d8Srobert
1738dda28197Spatrick if (m_inst->UsingFileAddress()) {
1739dda28197Spatrick ModuleSP module_sp(m_inst->GetAddress().GetModule());
1740dda28197Spatrick if (module_sp) {
1741dda28197Spatrick module_sp->ResolveFileAddress(value, value_so_addr);
1742dda28197Spatrick module_sp->ResolveFileAddress(pc, pc_so_addr);
1743dda28197Spatrick }
1744dda28197Spatrick } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1745dda28197Spatrick target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1746dda28197Spatrick target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1747dda28197Spatrick }
1748dda28197Spatrick
1749dda28197Spatrick SymbolContext sym_ctx;
1750dda28197Spatrick const SymbolContextItem resolve_scope =
1751dda28197Spatrick eSymbolContextFunction | eSymbolContextSymbol;
1752dda28197Spatrick if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1753dda28197Spatrick pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1754dda28197Spatrick pc_so_addr, resolve_scope, sym_ctx);
1755dda28197Spatrick }
1756dda28197Spatrick
1757dda28197Spatrick if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1758dda28197Spatrick StreamString ss;
1759dda28197Spatrick
1760dda28197Spatrick bool format_omitting_current_func_name = false;
1761dda28197Spatrick if (sym_ctx.symbol || sym_ctx.function) {
1762dda28197Spatrick AddressRange range;
1763dda28197Spatrick if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1764dda28197Spatrick range.GetBaseAddress().IsValid() &&
1765dda28197Spatrick range.ContainsLoadAddress(value_so_addr, target)) {
1766dda28197Spatrick format_omitting_current_func_name = true;
1767dda28197Spatrick }
1768dda28197Spatrick }
1769dda28197Spatrick
1770dda28197Spatrick // If the "value" address (the target address we're symbolicating) is
1771dda28197Spatrick // inside the same SymbolContext as the current instruction pc
1772dda28197Spatrick // (pc_so_addr), don't print the full function name - just print it
1773dda28197Spatrick // with DumpStyleNoFunctionName style, e.g. "<+36>".
1774dda28197Spatrick if (format_omitting_current_func_name) {
1775dda28197Spatrick value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1776dda28197Spatrick Address::DumpStyleSectionNameOffset);
1777dda28197Spatrick } else {
1778dda28197Spatrick value_so_addr.Dump(
1779dda28197Spatrick &ss, target,
1780dda28197Spatrick Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1781dda28197Spatrick Address::DumpStyleSectionNameOffset);
1782dda28197Spatrick }
1783dda28197Spatrick
1784dda28197Spatrick if (!ss.GetString().empty()) {
1785dda28197Spatrick // If Address::Dump returned a multi-line description, most commonly
1786dda28197Spatrick // seen when we have multiple levels of inlined functions at an
1787dda28197Spatrick // address, only show the first line.
1788dda28197Spatrick std::string str = std::string(ss.GetString());
1789dda28197Spatrick size_t first_eol_char = str.find_first_of("\r\n");
1790dda28197Spatrick if (first_eol_char != std::string::npos) {
1791dda28197Spatrick str.erase(first_eol_char);
1792dda28197Spatrick }
1793dda28197Spatrick m_inst->AppendComment(str);
1794dda28197Spatrick }
1795dda28197Spatrick }
1796dda28197Spatrick }
1797dda28197Spatrick }
1798dda28197Spatrick
1799*f6aab3d8Srobert // TODO: llvm-objdump sets the type_ptr to the
1800*f6aab3d8Srobert // LLVMDisassembler_ReferenceType_Out_* values
1801*f6aab3d8Srobert // based on where value_so_addr is pointing, with
1802*f6aab3d8Srobert // Mach-O specific augmentations in MachODump.cpp. e.g.
1803*f6aab3d8Srobert // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1804*f6aab3d8Srobert // handles.
1805dda28197Spatrick *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1806dda28197Spatrick *name = nullptr;
1807dda28197Spatrick return nullptr;
1808dda28197Spatrick }
1809