xref: /freebsd-src/contrib/llvm-project/lldb/source/Core/Disassembler.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1 //===-- Disassembler.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Disassembler.h"
10 
11 #include "lldb/Core/AddressRange.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/Mangled.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Core/ModuleList.h"
17 #include "lldb/Core/PluginManager.h"
18 #include "lldb/Core/SourceManager.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/OptionValue.h"
21 #include "lldb/Interpreter/OptionValueArray.h"
22 #include "lldb/Interpreter/OptionValueDictionary.h"
23 #include "lldb/Interpreter/OptionValueRegex.h"
24 #include "lldb/Interpreter/OptionValueString.h"
25 #include "lldb/Interpreter/OptionValueUInt64.h"
26 #include "lldb/Symbol/Function.h"
27 #include "lldb/Symbol/Symbol.h"
28 #include "lldb/Symbol/SymbolContext.h"
29 #include "lldb/Target/ExecutionContext.h"
30 #include "lldb/Target/SectionLoadList.h"
31 #include "lldb/Target/StackFrame.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Utility/DataBufferHeap.h"
35 #include "lldb/Utility/DataExtractor.h"
36 #include "lldb/Utility/RegularExpression.h"
37 #include "lldb/Utility/Status.h"
38 #include "lldb/Utility/Stream.h"
39 #include "lldb/Utility/StreamString.h"
40 #include "lldb/Utility/Timer.h"
41 #include "lldb/lldb-private-enumerations.h"
42 #include "lldb/lldb-private-interfaces.h"
43 #include "lldb/lldb-private-types.h"
44 #include "llvm/ADT/Triple.h"
45 #include "llvm/Support/Compiler.h"
46 
47 #include <cstdint>
48 #include <cstring>
49 #include <utility>
50 
51 #include <cassert>
52 
53 #define DEFAULT_DISASM_BYTE_SIZE 32
54 
55 using namespace lldb;
56 using namespace lldb_private;
57 
58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
59                                         const char *flavor,
60                                         const char *plugin_name) {
61   LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
62                      arch.GetArchitectureName(), plugin_name);
63 
64   DisassemblerCreateInstance create_callback = nullptr;
65 
66   if (plugin_name) {
67     create_callback =
68         PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name);
69     if (create_callback) {
70       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
71 
72       if (disassembler_sp)
73         return disassembler_sp;
74     }
75   } else {
76     for (uint32_t idx = 0;
77          (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
78               idx)) != nullptr;
79          ++idx) {
80       DisassemblerSP disassembler_sp(create_callback(arch, flavor));
81 
82       if (disassembler_sp)
83         return disassembler_sp;
84     }
85   }
86   return DisassemblerSP();
87 }
88 
89 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target,
90                                                  const ArchSpec &arch,
91                                                  const char *flavor,
92                                                  const char *plugin_name) {
93   if (flavor == nullptr) {
94     // FIXME - we don't have the mechanism in place to do per-architecture
95     // settings.  But since we know that for now we only support flavors on x86
96     // & x86_64,
97     if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
98         arch.GetTriple().getArch() == llvm::Triple::x86_64)
99       flavor = target.GetDisassemblyFlavor();
100   }
101   return FindPlugin(arch, flavor, plugin_name);
102 }
103 
104 static Address ResolveAddress(Target &target, const Address &addr) {
105   if (!addr.IsSectionOffset()) {
106     Address resolved_addr;
107     // If we weren't passed in a section offset address range, try and resolve
108     // it to something
109     bool is_resolved = target.GetSectionLoadList().IsEmpty()
110                            ? target.GetImages().ResolveFileAddress(
111                                  addr.GetOffset(), resolved_addr)
112                            : target.GetSectionLoadList().ResolveLoadAddress(
113                                  addr.GetOffset(), resolved_addr);
114 
115     // We weren't able to resolve the address, just treat it as a raw address
116     if (is_resolved && resolved_addr.IsValid())
117       return resolved_addr;
118   }
119   return addr;
120 }
121 
122 lldb::DisassemblerSP Disassembler::DisassembleRange(
123     const ArchSpec &arch, const char *plugin_name, const char *flavor,
124     Target &target, const AddressRange &range, bool force_live_memory) {
125   if (range.GetByteSize() <= 0)
126     return {};
127 
128   if (!range.GetBaseAddress().IsValid())
129     return {};
130 
131   lldb::DisassemblerSP disasm_sp =
132       Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name);
133 
134   if (!disasm_sp)
135     return {};
136 
137   const size_t bytes_disassembled = disasm_sp->ParseInstructions(
138       target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()},
139       nullptr, force_live_memory);
140   if (bytes_disassembled == 0)
141     return {};
142 
143   return disasm_sp;
144 }
145 
146 lldb::DisassemblerSP
147 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
148                                const char *flavor, const Address &start,
149                                const void *src, size_t src_len,
150                                uint32_t num_instructions, bool data_from_file) {
151   if (!src)
152     return {};
153 
154   lldb::DisassemblerSP disasm_sp =
155       Disassembler::FindPlugin(arch, flavor, plugin_name);
156 
157   if (!disasm_sp)
158     return {};
159 
160   DataExtractor data(src, src_len, arch.GetByteOrder(),
161                      arch.GetAddressByteSize());
162 
163   (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false,
164                                       data_from_file);
165   return disasm_sp;
166 }
167 
168 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
169                                const char *plugin_name, const char *flavor,
170                                const ExecutionContext &exe_ctx,
171                                const Address &address, Limit limit,
172                                bool mixed_source_and_assembly,
173                                uint32_t num_mixed_context_lines,
174                                uint32_t options, Stream &strm) {
175   if (!exe_ctx.GetTargetPtr())
176     return false;
177 
178   lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
179       exe_ctx.GetTargetRef(), arch, flavor, plugin_name));
180   if (!disasm_sp)
181     return false;
182 
183   const bool force_live_memory = true;
184   size_t bytes_disassembled = disasm_sp->ParseInstructions(
185       exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory);
186   if (bytes_disassembled == 0)
187     return false;
188 
189   disasm_sp->PrintInstructions(debugger, arch, exe_ctx,
190                                mixed_source_and_assembly,
191                                num_mixed_context_lines, options, strm);
192   return true;
193 }
194 
195 Disassembler::SourceLine
196 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
197   if (!sc.function)
198     return {};
199 
200   if (!sc.line_entry.IsValid())
201     return {};
202 
203   LineEntry prologue_end_line = sc.line_entry;
204   FileSpec func_decl_file;
205   uint32_t func_decl_line;
206   sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
207 
208   if (func_decl_file != prologue_end_line.file &&
209       func_decl_file != prologue_end_line.original_file)
210     return {};
211 
212   SourceLine decl_line;
213   decl_line.file = func_decl_file;
214   decl_line.line = func_decl_line;
215   // TODO: Do we care about column on these entries?  If so, we need to plumb
216   // that through GetStartLineSourceInfo.
217   decl_line.column = 0;
218   return decl_line;
219 }
220 
221 void Disassembler::AddLineToSourceLineTables(
222     SourceLine &line,
223     std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
224   if (line.IsValid()) {
225     auto source_lines_seen_pos = source_lines_seen.find(line.file);
226     if (source_lines_seen_pos == source_lines_seen.end()) {
227       std::set<uint32_t> lines;
228       lines.insert(line.line);
229       source_lines_seen.emplace(line.file, lines);
230     } else {
231       source_lines_seen_pos->second.insert(line.line);
232     }
233   }
234 }
235 
236 bool Disassembler::ElideMixedSourceAndDisassemblyLine(
237     const ExecutionContext &exe_ctx, const SymbolContext &sc,
238     SourceLine &line) {
239 
240   // TODO: should we also check target.process.thread.step-avoid-libraries ?
241 
242   const RegularExpression *avoid_regex = nullptr;
243 
244   // Skip any line #0 entries - they are implementation details
245   if (line.line == 0)
246     return false;
247 
248   ThreadSP thread_sp = exe_ctx.GetThreadSP();
249   if (thread_sp) {
250     avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
251   } else {
252     TargetSP target_sp = exe_ctx.GetTargetSP();
253     if (target_sp) {
254       Status error;
255       OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
256           &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
257       if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
258         OptionValueRegex *re = value_sp->GetAsRegex();
259         if (re) {
260           avoid_regex = re->GetCurrentValue();
261         }
262       }
263     }
264   }
265   if (avoid_regex && sc.symbol != nullptr) {
266     const char *function_name =
267         sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
268             .GetCString();
269     if (function_name && avoid_regex->Execute(function_name)) {
270       // skip this source line
271       return true;
272     }
273   }
274   // don't skip this source line
275   return false;
276 }
277 
278 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
279                                      const ExecutionContext &exe_ctx,
280                                      bool mixed_source_and_assembly,
281                                      uint32_t num_mixed_context_lines,
282                                      uint32_t options, Stream &strm) {
283   // We got some things disassembled...
284   size_t num_instructions_found = GetInstructionList().GetSize();
285 
286   const uint32_t max_opcode_byte_size =
287       GetInstructionList().GetMaxOpcocdeByteSize();
288   SymbolContext sc;
289   SymbolContext prev_sc;
290   AddressRange current_source_line_range;
291   const Address *pc_addr_ptr = nullptr;
292   StackFrame *frame = exe_ctx.GetFramePtr();
293 
294   TargetSP target_sp(exe_ctx.GetTargetSP());
295   SourceManager &source_manager =
296       target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
297 
298   if (frame) {
299     pc_addr_ptr = &frame->GetFrameCodeAddress();
300   }
301   const uint32_t scope =
302       eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
303   const bool use_inline_block_range = false;
304 
305   const FormatEntity::Entry *disassembly_format = nullptr;
306   FormatEntity::Entry format;
307   if (exe_ctx.HasTargetScope()) {
308     disassembly_format =
309         exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
310   } else {
311     FormatEntity::Parse("${addr}: ", format);
312     disassembly_format = &format;
313   }
314 
315   // First pass: step through the list of instructions, find how long the
316   // initial addresses strings are, insert padding in the second pass so the
317   // opcodes all line up nicely.
318 
319   // Also build up the source line mapping if this is mixed source & assembly
320   // mode. Calculate the source line for each assembly instruction (eliding
321   // inlined functions which the user wants to skip).
322 
323   std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
324   Symbol *previous_symbol = nullptr;
325 
326   size_t address_text_size = 0;
327   for (size_t i = 0; i < num_instructions_found; ++i) {
328     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
329     if (inst) {
330       const Address &addr = inst->GetAddress();
331       ModuleSP module_sp(addr.GetModule());
332       if (module_sp) {
333         const SymbolContextItem resolve_mask = eSymbolContextFunction |
334                                                eSymbolContextSymbol |
335                                                eSymbolContextLineEntry;
336         uint32_t resolved_mask =
337             module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
338         if (resolved_mask) {
339           StreamString strmstr;
340           Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
341                                               &exe_ctx, &addr, strmstr);
342           size_t cur_line = strmstr.GetSizeOfLastLine();
343           if (cur_line > address_text_size)
344             address_text_size = cur_line;
345 
346           // Add entries to our "source_lines_seen" map+set which list which
347           // sources lines occur in this disassembly session.  We will print
348           // lines of context around a source line, but we don't want to print
349           // a source line that has a line table entry of its own - we'll leave
350           // that source line to be printed when it actually occurs in the
351           // disassembly.
352 
353           if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
354             if (sc.symbol != previous_symbol) {
355               SourceLine decl_line = GetFunctionDeclLineEntry(sc);
356               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
357                 AddLineToSourceLineTables(decl_line, source_lines_seen);
358             }
359             if (sc.line_entry.IsValid()) {
360               SourceLine this_line;
361               this_line.file = sc.line_entry.file;
362               this_line.line = sc.line_entry.line;
363               this_line.column = sc.line_entry.column;
364               if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
365                 AddLineToSourceLineTables(this_line, source_lines_seen);
366             }
367           }
368         }
369         sc.Clear(false);
370       }
371     }
372   }
373 
374   previous_symbol = nullptr;
375   SourceLine previous_line;
376   for (size_t i = 0; i < num_instructions_found; ++i) {
377     Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get();
378 
379     if (inst) {
380       const Address &addr = inst->GetAddress();
381       const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
382       SourceLinesToDisplay source_lines_to_display;
383 
384       prev_sc = sc;
385 
386       ModuleSP module_sp(addr.GetModule());
387       if (module_sp) {
388         uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
389             addr, eSymbolContextEverything, sc);
390         if (resolved_mask) {
391           if (mixed_source_and_assembly) {
392 
393             // If we've started a new function (non-inlined), print all of the
394             // source lines from the function declaration until the first line
395             // table entry - typically the opening curly brace of the function.
396             if (previous_symbol != sc.symbol) {
397               // The default disassembly format puts an extra blank line
398               // between functions - so when we're displaying the source
399               // context for a function, we don't want to add a blank line
400               // after the source context or we'll end up with two of them.
401               if (previous_symbol != nullptr)
402                 source_lines_to_display.print_source_context_end_eol = false;
403 
404               previous_symbol = sc.symbol;
405               if (sc.function && sc.line_entry.IsValid()) {
406                 LineEntry prologue_end_line = sc.line_entry;
407                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
408                                                         prologue_end_line)) {
409                   FileSpec func_decl_file;
410                   uint32_t func_decl_line;
411                   sc.function->GetStartLineSourceInfo(func_decl_file,
412                                                       func_decl_line);
413                   if (func_decl_file == prologue_end_line.file ||
414                       func_decl_file == prologue_end_line.original_file) {
415                     // Add all the lines between the function declaration and
416                     // the first non-prologue source line to the list of lines
417                     // to print.
418                     for (uint32_t lineno = func_decl_line;
419                          lineno <= prologue_end_line.line; lineno++) {
420                       SourceLine this_line;
421                       this_line.file = func_decl_file;
422                       this_line.line = lineno;
423                       source_lines_to_display.lines.push_back(this_line);
424                     }
425                     // Mark the last line as the "current" one.  Usually this
426                     // is the open curly brace.
427                     if (source_lines_to_display.lines.size() > 0)
428                       source_lines_to_display.current_source_line =
429                           source_lines_to_display.lines.size() - 1;
430                   }
431                 }
432               }
433               sc.GetAddressRange(scope, 0, use_inline_block_range,
434                                  current_source_line_range);
435             }
436 
437             // If we've left a previous source line's address range, print a
438             // new source line
439             if (!current_source_line_range.ContainsFileAddress(addr)) {
440               sc.GetAddressRange(scope, 0, use_inline_block_range,
441                                  current_source_line_range);
442 
443               if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
444                 SourceLine this_line;
445                 this_line.file = sc.line_entry.file;
446                 this_line.line = sc.line_entry.line;
447 
448                 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
449                                                         this_line)) {
450                   // Only print this source line if it is different from the
451                   // last source line we printed.  There may have been inlined
452                   // functions between these lines that we elided, resulting in
453                   // the same line being printed twice in a row for a
454                   // contiguous block of assembly instructions.
455                   if (this_line != previous_line) {
456 
457                     std::vector<uint32_t> previous_lines;
458                     for (uint32_t i = 0;
459                          i < num_mixed_context_lines &&
460                          (this_line.line - num_mixed_context_lines) > 0;
461                          i++) {
462                       uint32_t line =
463                           this_line.line - num_mixed_context_lines + i;
464                       auto pos = source_lines_seen.find(this_line.file);
465                       if (pos != source_lines_seen.end()) {
466                         if (pos->second.count(line) == 1) {
467                           previous_lines.clear();
468                         } else {
469                           previous_lines.push_back(line);
470                         }
471                       }
472                     }
473                     for (size_t i = 0; i < previous_lines.size(); i++) {
474                       SourceLine previous_line;
475                       previous_line.file = this_line.file;
476                       previous_line.line = previous_lines[i];
477                       auto pos = source_lines_seen.find(previous_line.file);
478                       if (pos != source_lines_seen.end()) {
479                         pos->second.insert(previous_line.line);
480                       }
481                       source_lines_to_display.lines.push_back(previous_line);
482                     }
483 
484                     source_lines_to_display.lines.push_back(this_line);
485                     source_lines_to_display.current_source_line =
486                         source_lines_to_display.lines.size() - 1;
487 
488                     for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
489                       SourceLine next_line;
490                       next_line.file = this_line.file;
491                       next_line.line = this_line.line + i + 1;
492                       auto pos = source_lines_seen.find(next_line.file);
493                       if (pos != source_lines_seen.end()) {
494                         if (pos->second.count(next_line.line) == 1)
495                           break;
496                         pos->second.insert(next_line.line);
497                       }
498                       source_lines_to_display.lines.push_back(next_line);
499                     }
500                   }
501                   previous_line = this_line;
502                 }
503               }
504             }
505           }
506         } else {
507           sc.Clear(true);
508         }
509       }
510 
511       if (source_lines_to_display.lines.size() > 0) {
512         strm.EOL();
513         for (size_t idx = 0; idx < source_lines_to_display.lines.size();
514              idx++) {
515           SourceLine ln = source_lines_to_display.lines[idx];
516           const char *line_highlight = "";
517           if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
518             line_highlight = "->";
519           } else if (idx == source_lines_to_display.current_source_line) {
520             line_highlight = "**";
521           }
522           source_manager.DisplaySourceLinesWithLineNumbers(
523               ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
524         }
525         if (source_lines_to_display.print_source_context_end_eol)
526           strm.EOL();
527       }
528 
529       const bool show_bytes = (options & eOptionShowBytes) != 0;
530       const bool show_control_flow_kind =
531           (options & eOptionShowControlFlowKind) != 0;
532       inst->Dump(&strm, max_opcode_byte_size, true, show_bytes,
533                  show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr,
534                  address_text_size);
535       strm.EOL();
536     } else {
537       break;
538     }
539   }
540 }
541 
542 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
543                                StackFrame &frame, Stream &strm) {
544   AddressRange range;
545   SymbolContext sc(
546       frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
547   if (sc.function) {
548     range = sc.function->GetAddressRange();
549   } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
550     range.GetBaseAddress() = sc.symbol->GetAddressRef();
551     range.SetByteSize(sc.symbol->GetByteSize());
552   } else {
553     range.GetBaseAddress() = frame.GetFrameCodeAddress();
554   }
555 
556     if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
557       range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
558 
559     Disassembler::Limit limit = {Disassembler::Limit::Bytes,
560                                  range.GetByteSize()};
561     if (limit.value == 0)
562       limit.value = DEFAULT_DISASM_BYTE_SIZE;
563 
564     return Disassemble(debugger, arch, nullptr, nullptr, frame,
565                        range.GetBaseAddress(), limit, false, 0, 0, strm);
566 }
567 
568 Instruction::Instruction(const Address &address, AddressClass addr_class)
569     : m_address(address), m_address_class(addr_class), m_opcode(),
570       m_calculated_strings(false) {}
571 
572 Instruction::~Instruction() = default;
573 
574 namespace x86 {
575 
576 /// These are the three values deciding instruction control flow kind.
577 /// InstructionLengthDecode function decodes an instruction and get this struct.
578 ///
579 /// primary_opcode
580 ///    Primary opcode of the instruction.
581 ///    For one-byte opcode instruction, it's the first byte after prefix.
582 ///    For two- and three-byte opcodes, it's the second byte.
583 ///
584 /// opcode_len
585 ///    The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
586 ///
587 /// modrm
588 ///    ModR/M byte of the instruction.
589 ///    Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
590 ///    may contain a register or specify an addressing mode, depending on MOD.
591 struct InstructionOpcodeAndModrm {
592   uint8_t primary_opcode;
593   uint8_t opcode_len;
594   uint8_t modrm;
595 };
596 
597 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
598 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
599 /// instruction set.
600 ///
601 /// \param[in] opcode_and_modrm
602 ///    Contains primary_opcode byte, its length, and ModR/M byte.
603 ///    Refer to the struct InstructionOpcodeAndModrm for details.
604 ///
605 /// \return
606 ///   The control flow kind of the instruction or
607 ///   eInstructionControlFlowKindOther if the instruction doesn't affect
608 ///   the control flow of the program.
609 lldb::InstructionControlFlowKind
610 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
611   uint8_t opcode = opcode_and_modrm.primary_opcode;
612   uint8_t opcode_len = opcode_and_modrm.opcode_len;
613   uint8_t modrm = opcode_and_modrm.modrm;
614 
615   if (opcode_len > 2)
616     return lldb::eInstructionControlFlowKindOther;
617 
618   if (opcode >= 0x70 && opcode <= 0x7F) {
619     if (opcode_len == 1)
620       return lldb::eInstructionControlFlowKindCondJump;
621     else
622       return lldb::eInstructionControlFlowKindOther;
623   }
624 
625   if (opcode >= 0x80 && opcode <= 0x8F) {
626     if (opcode_len == 2)
627       return lldb::eInstructionControlFlowKindCondJump;
628     else
629       return lldb::eInstructionControlFlowKindOther;
630   }
631 
632   switch (opcode) {
633   case 0x9A:
634     if (opcode_len == 1)
635       return lldb::eInstructionControlFlowKindFarCall;
636     break;
637   case 0xFF:
638     if (opcode_len == 1) {
639       uint8_t modrm_reg = (modrm >> 3) & 7;
640       if (modrm_reg == 2)
641         return lldb::eInstructionControlFlowKindCall;
642       else if (modrm_reg == 3)
643         return lldb::eInstructionControlFlowKindFarCall;
644       else if (modrm_reg == 4)
645         return lldb::eInstructionControlFlowKindJump;
646       else if (modrm_reg == 5)
647         return lldb::eInstructionControlFlowKindFarJump;
648     }
649     break;
650   case 0xE8:
651     if (opcode_len == 1)
652       return lldb::eInstructionControlFlowKindCall;
653     break;
654   case 0xCD:
655   case 0xCC:
656   case 0xCE:
657   case 0xF1:
658     if (opcode_len == 1)
659       return lldb::eInstructionControlFlowKindFarCall;
660     break;
661   case 0xCF:
662     if (opcode_len == 1)
663       return lldb::eInstructionControlFlowKindFarReturn;
664     break;
665   case 0xE9:
666   case 0xEB:
667     if (opcode_len == 1)
668       return lldb::eInstructionControlFlowKindJump;
669     break;
670   case 0xEA:
671     if (opcode_len == 1)
672       return lldb::eInstructionControlFlowKindFarJump;
673     break;
674   case 0xE3:
675   case 0xE0:
676   case 0xE1:
677   case 0xE2:
678     if (opcode_len == 1)
679       return lldb::eInstructionControlFlowKindCondJump;
680     break;
681   case 0xC3:
682   case 0xC2:
683     if (opcode_len == 1)
684       return lldb::eInstructionControlFlowKindReturn;
685     break;
686   case 0xCB:
687   case 0xCA:
688     if (opcode_len == 1)
689       return lldb::eInstructionControlFlowKindFarReturn;
690     break;
691   case 0x05:
692   case 0x34:
693     if (opcode_len == 2)
694       return lldb::eInstructionControlFlowKindFarCall;
695     break;
696   case 0x35:
697   case 0x07:
698     if (opcode_len == 2)
699       return lldb::eInstructionControlFlowKindFarReturn;
700     break;
701   case 0x01:
702     if (opcode_len == 2) {
703       switch (modrm) {
704       case 0xc1:
705         return lldb::eInstructionControlFlowKindFarCall;
706       case 0xc2:
707       case 0xc3:
708         return lldb::eInstructionControlFlowKindFarReturn;
709       default:
710         break;
711       }
712     }
713     break;
714   default:
715     break;
716   }
717 
718   return lldb::eInstructionControlFlowKindOther;
719 }
720 
721 /// Decode an instruction into opcode, modrm and opcode_len.
722 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
723 /// Opcodes in x86 are generally the first byte of instruction, though two-byte
724 /// instructions and prefixes exist. ModR/M is the byte following the opcode
725 /// and adds additional information for how the instruction is executed.
726 ///
727 /// \param[in] inst_bytes
728 ///    Raw bytes of the instruction
729 ///
730 ///
731 /// \param[in] bytes_len
732 ///    The length of the inst_bytes array.
733 ///
734 /// \param[in] is_exec_mode_64b
735 ///    If true, the execution mode is 64 bit.
736 ///
737 /// \return
738 ///    Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
739 ///    primary_opcode, opcode_len and modrm byte. Refer to the struct definition
740 ///    for more details.
741 ///    Otherwise if the given instruction is invalid, returns None.
742 llvm::Optional<InstructionOpcodeAndModrm>
743 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
744                         bool is_exec_mode_64b) {
745   int op_idx = 0;
746   bool prefix_done = false;
747   InstructionOpcodeAndModrm ret = {0, 0, 0};
748 
749   // In most cases, the primary_opcode is the first byte of the instruction
750   // but some instructions have a prefix to be skipped for these calculations.
751   // The following mapping is inspired from libipt's instruction decoding logic
752   // in `src/pt_ild.c`
753   while (!prefix_done) {
754     if (op_idx >= bytes_len)
755       return llvm::None;
756 
757     ret.primary_opcode = inst_bytes[op_idx];
758     switch (ret.primary_opcode) {
759     // prefix_ignore
760     case 0x26:
761     case 0x2e:
762     case 0x36:
763     case 0x3e:
764     case 0x64:
765     case 0x65:
766     // prefix_osz, prefix_asz
767     case 0x66:
768     case 0x67:
769     // prefix_lock, prefix_f2, prefix_f3
770     case 0xf0:
771     case 0xf2:
772     case 0xf3:
773       op_idx++;
774       break;
775 
776     // prefix_rex
777     case 0x40:
778     case 0x41:
779     case 0x42:
780     case 0x43:
781     case 0x44:
782     case 0x45:
783     case 0x46:
784     case 0x47:
785     case 0x48:
786     case 0x49:
787     case 0x4a:
788     case 0x4b:
789     case 0x4c:
790     case 0x4d:
791     case 0x4e:
792     case 0x4f:
793       if (is_exec_mode_64b)
794         op_idx++;
795       else
796         prefix_done = true;
797       break;
798 
799     // prefix_vex_c4, c5
800     case 0xc5:
801       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
802         prefix_done = true;
803         break;
804       }
805 
806       ret.opcode_len = 2;
807       ret.primary_opcode = inst_bytes[op_idx + 2];
808       ret.modrm = inst_bytes[op_idx + 3];
809       return ret;
810 
811     case 0xc4:
812       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
813         prefix_done = true;
814         break;
815       }
816       ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
817       ret.primary_opcode = inst_bytes[op_idx + 3];
818       ret.modrm = inst_bytes[op_idx + 4];
819       return ret;
820 
821     // prefix_evex
822     case 0x62:
823       if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
824         prefix_done = true;
825         break;
826       }
827       ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
828       ret.primary_opcode = inst_bytes[op_idx + 4];
829       ret.modrm = inst_bytes[op_idx + 5];
830       return ret;
831 
832     default:
833       prefix_done = true;
834       break;
835     }
836   } // prefix done
837 
838   ret.primary_opcode = inst_bytes[op_idx];
839   ret.modrm = inst_bytes[op_idx + 1];
840   ret.opcode_len = 1;
841 
842   // If the first opcode is 0F, it's two- or three- byte opcodes.
843   if (ret.primary_opcode == 0x0F) {
844     ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
845 
846     if (ret.primary_opcode == 0x38) {
847       ret.opcode_len = 3;
848       ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
849       ret.modrm = inst_bytes[op_idx + 1];
850     } else if (ret.primary_opcode == 0x3A) {
851       ret.opcode_len = 3;
852       ret.primary_opcode = inst_bytes[++op_idx];
853       ret.modrm = inst_bytes[op_idx + 1];
854     } else if ((ret.primary_opcode & 0xf8) == 0x38) {
855       ret.opcode_len = 0;
856       ret.primary_opcode = inst_bytes[++op_idx];
857       ret.modrm = inst_bytes[op_idx + 1];
858     } else if (ret.primary_opcode == 0x0F) {
859       ret.opcode_len = 3;
860       // opcode is 0x0F, no needs to update
861       ret.modrm = inst_bytes[op_idx + 1];
862     } else {
863       ret.opcode_len = 2;
864       ret.modrm = inst_bytes[op_idx + 1];
865     }
866   }
867 
868   return ret;
869 }
870 
871 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
872                                                     Opcode m_opcode) {
873   llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
874 
875   if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
876     // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
877     return lldb::eInstructionControlFlowKindUnknown;
878   }
879 
880   // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
881   // These are the three values deciding instruction control flow kind.
882   ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
883                                 m_opcode.GetByteSize(), is_exec_mode_64b);
884   if (!ret)
885     return lldb::eInstructionControlFlowKindUnknown;
886   else
887     return MapOpcodeIntoControlFlowKind(ret.value());
888 }
889 
890 } // namespace x86
891 
892 lldb::InstructionControlFlowKind
893 Instruction::GetControlFlowKind(const ArchSpec &arch) {
894   if (arch.GetTriple().getArch() == llvm::Triple::x86)
895     return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode);
896   else if (arch.GetTriple().getArch() == llvm::Triple::x86_64)
897     return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode);
898   else
899     return eInstructionControlFlowKindUnknown; // not implemented
900 }
901 
902 AddressClass Instruction::GetAddressClass() {
903   if (m_address_class == AddressClass::eInvalid)
904     m_address_class = m_address.GetAddressClass();
905   return m_address_class;
906 }
907 
908 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
909                        bool show_address, bool show_bytes,
910                        bool show_control_flow_kind,
911                        const ExecutionContext *exe_ctx,
912                        const SymbolContext *sym_ctx,
913                        const SymbolContext *prev_sym_ctx,
914                        const FormatEntity::Entry *disassembly_addr_format,
915                        size_t max_address_text_size) {
916   size_t opcode_column_width = 7;
917   const size_t operand_column_width = 25;
918 
919   CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
920 
921   StreamString ss;
922 
923   if (show_address) {
924     Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
925                                         prev_sym_ctx, exe_ctx, &m_address, ss);
926     ss.FillLastLineToColumn(max_address_text_size, ' ');
927   }
928 
929   if (show_bytes) {
930     if (m_opcode.GetType() == Opcode::eTypeBytes) {
931       // x86_64 and i386 are the only ones that use bytes right now so pad out
932       // the byte dump to be able to always show 15 bytes (3 chars each) plus a
933       // space
934       if (max_opcode_byte_size > 0)
935         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
936       else
937         m_opcode.Dump(&ss, 15 * 3 + 1);
938     } else {
939       // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
940       // (10 spaces) plus two for padding...
941       if (max_opcode_byte_size > 0)
942         m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
943       else
944         m_opcode.Dump(&ss, 12);
945     }
946   }
947 
948   if (show_control_flow_kind) {
949     switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) {
950     case eInstructionControlFlowKindUnknown:
951       ss.Printf("%-12s", "unknown");
952       break;
953     case eInstructionControlFlowKindOther:
954       ss.Printf("%-12s", "other");
955       break;
956     case eInstructionControlFlowKindCall:
957       ss.Printf("%-12s", "call");
958       break;
959     case eInstructionControlFlowKindReturn:
960       ss.Printf("%-12s", "return");
961       break;
962     case eInstructionControlFlowKindJump:
963       ss.Printf("%-12s", "jump");
964       break;
965     case eInstructionControlFlowKindCondJump:
966       ss.Printf("%-12s", "cond jump");
967       break;
968     case eInstructionControlFlowKindFarCall:
969       ss.Printf("%-12s", "far call");
970       break;
971     case eInstructionControlFlowKindFarReturn:
972       ss.Printf("%-12s", "far return");
973       break;
974     case eInstructionControlFlowKindFarJump:
975       ss.Printf("%-12s", "far jump");
976       break;
977     }
978   }
979 
980   const size_t opcode_pos = ss.GetSizeOfLastLine();
981 
982   // The default opcode size of 7 characters is plenty for most architectures
983   // but some like arm can pull out the occasional vqrshrun.s16.  We won't get
984   // consistent column spacing in these cases, unfortunately.
985   if (m_opcode_name.length() >= opcode_column_width) {
986     opcode_column_width = m_opcode_name.length() + 1;
987   }
988 
989   ss.PutCString(m_opcode_name);
990   ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
991   ss.PutCString(m_mnemonics);
992 
993   if (!m_comment.empty()) {
994     ss.FillLastLineToColumn(
995         opcode_pos + opcode_column_width + operand_column_width, ' ');
996     ss.PutCString(" ; ");
997     ss.PutCString(m_comment);
998   }
999   s->PutCString(ss.GetString());
1000 }
1001 
1002 bool Instruction::DumpEmulation(const ArchSpec &arch) {
1003   std::unique_ptr<EmulateInstruction> insn_emulator_up(
1004       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
1005   if (insn_emulator_up) {
1006     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
1007     return insn_emulator_up->EvaluateInstruction(0);
1008   }
1009 
1010   return false;
1011 }
1012 
1013 bool Instruction::CanSetBreakpoint () {
1014   return !HasDelaySlot();
1015 }
1016 
1017 bool Instruction::HasDelaySlot() {
1018   // Default is false.
1019   return false;
1020 }
1021 
1022 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
1023                                      OptionValue::Type data_type) {
1024   bool done = false;
1025   char buffer[1024];
1026 
1027   auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
1028 
1029   int idx = 0;
1030   while (!done) {
1031     if (!fgets(buffer, 1023, in_file)) {
1032       out_stream->Printf(
1033           "Instruction::ReadArray:  Error reading file (fgets).\n");
1034       option_value_sp.reset();
1035       return option_value_sp;
1036     }
1037 
1038     std::string line(buffer);
1039 
1040     size_t len = line.size();
1041     if (line[len - 1] == '\n') {
1042       line[len - 1] = '\0';
1043       line.resize(len - 1);
1044     }
1045 
1046     if ((line.size() == 1) && line[0] == ']') {
1047       done = true;
1048       line.clear();
1049     }
1050 
1051     if (!line.empty()) {
1052       std::string value;
1053       static RegularExpression g_reg_exp(
1054           llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
1055       llvm::SmallVector<llvm::StringRef, 2> matches;
1056       if (g_reg_exp.Execute(line, &matches))
1057         value = matches[1].str();
1058       else
1059         value = line;
1060 
1061       OptionValueSP data_value_sp;
1062       switch (data_type) {
1063       case OptionValue::eTypeUInt64:
1064         data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
1065         data_value_sp->SetValueFromString(value);
1066         break;
1067       // Other types can be added later as needed.
1068       default:
1069         data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
1070         break;
1071       }
1072 
1073       option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
1074       ++idx;
1075     }
1076   }
1077 
1078   return option_value_sp;
1079 }
1080 
1081 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
1082   bool done = false;
1083   char buffer[1024];
1084 
1085   auto option_value_sp = std::make_shared<OptionValueDictionary>();
1086   static ConstString encoding_key("data_encoding");
1087   OptionValue::Type data_type = OptionValue::eTypeInvalid;
1088 
1089   while (!done) {
1090     // Read the next line in the file
1091     if (!fgets(buffer, 1023, in_file)) {
1092       out_stream->Printf(
1093           "Instruction::ReadDictionary: Error reading file (fgets).\n");
1094       option_value_sp.reset();
1095       return option_value_sp;
1096     }
1097 
1098     // Check to see if the line contains the end-of-dictionary marker ("}")
1099     std::string line(buffer);
1100 
1101     size_t len = line.size();
1102     if (line[len - 1] == '\n') {
1103       line[len - 1] = '\0';
1104       line.resize(len - 1);
1105     }
1106 
1107     if ((line.size() == 1) && (line[0] == '}')) {
1108       done = true;
1109       line.clear();
1110     }
1111 
1112     // Try to find a key-value pair in the current line and add it to the
1113     // dictionary.
1114     if (!line.empty()) {
1115       static RegularExpression g_reg_exp(llvm::StringRef(
1116           "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
1117 
1118       llvm::SmallVector<llvm::StringRef, 3> matches;
1119 
1120       bool reg_exp_success = g_reg_exp.Execute(line, &matches);
1121       std::string key;
1122       std::string value;
1123       if (reg_exp_success) {
1124         key = matches[1].str();
1125         value = matches[2].str();
1126       } else {
1127         out_stream->Printf("Instruction::ReadDictionary: Failure executing "
1128                            "regular expression.\n");
1129         option_value_sp.reset();
1130         return option_value_sp;
1131       }
1132 
1133       ConstString const_key(key.c_str());
1134       // Check value to see if it's the start of an array or dictionary.
1135 
1136       lldb::OptionValueSP value_sp;
1137       assert(value.empty() == false);
1138       assert(key.empty() == false);
1139 
1140       if (value[0] == '{') {
1141         assert(value.size() == 1);
1142         // value is a dictionary
1143         value_sp = ReadDictionary(in_file, out_stream);
1144         if (!value_sp) {
1145           option_value_sp.reset();
1146           return option_value_sp;
1147         }
1148       } else if (value[0] == '[') {
1149         assert(value.size() == 1);
1150         // value is an array
1151         value_sp = ReadArray(in_file, out_stream, data_type);
1152         if (!value_sp) {
1153           option_value_sp.reset();
1154           return option_value_sp;
1155         }
1156         // We've used the data_type to read an array; re-set the type to
1157         // Invalid
1158         data_type = OptionValue::eTypeInvalid;
1159       } else if ((value[0] == '0') && (value[1] == 'x')) {
1160         value_sp = std::make_shared<OptionValueUInt64>(0, 0);
1161         value_sp->SetValueFromString(value);
1162       } else {
1163         size_t len = value.size();
1164         if ((value[0] == '"') && (value[len - 1] == '"'))
1165           value = value.substr(1, len - 2);
1166         value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
1167       }
1168 
1169       if (const_key == encoding_key) {
1170         // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
1171         // indicating the
1172         // data type of an upcoming array (usually the next bit of data to be
1173         // read in).
1174         if (strcmp(value.c_str(), "uint32_t") == 0)
1175           data_type = OptionValue::eTypeUInt64;
1176       } else
1177         option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
1178                                                            false);
1179     }
1180   }
1181 
1182   return option_value_sp;
1183 }
1184 
1185 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
1186   if (!out_stream)
1187     return false;
1188 
1189   if (!file_name) {
1190     out_stream->Printf("Instruction::TestEmulation:  Missing file_name.");
1191     return false;
1192   }
1193   FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
1194   if (!test_file) {
1195     out_stream->Printf(
1196         "Instruction::TestEmulation: Attempt to open test file failed.");
1197     return false;
1198   }
1199 
1200   char buffer[256];
1201   if (!fgets(buffer, 255, test_file)) {
1202     out_stream->Printf(
1203         "Instruction::TestEmulation: Error reading first line of test file.\n");
1204     fclose(test_file);
1205     return false;
1206   }
1207 
1208   if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
1209     out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
1210                        "emulation state dictionary\n");
1211     fclose(test_file);
1212     return false;
1213   }
1214 
1215   // Read all the test information from the test file into an
1216   // OptionValueDictionary.
1217 
1218   OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
1219   if (!data_dictionary_sp) {
1220     out_stream->Printf(
1221         "Instruction::TestEmulation:  Error reading Dictionary Object.\n");
1222     fclose(test_file);
1223     return false;
1224   }
1225 
1226   fclose(test_file);
1227 
1228   OptionValueDictionary *data_dictionary =
1229       data_dictionary_sp->GetAsDictionary();
1230   static ConstString description_key("assembly_string");
1231   static ConstString triple_key("triple");
1232 
1233   OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
1234 
1235   if (!value_sp) {
1236     out_stream->Printf("Instruction::TestEmulation:  Test file does not "
1237                        "contain description string.\n");
1238     return false;
1239   }
1240 
1241   SetDescription(value_sp->GetStringValue());
1242 
1243   value_sp = data_dictionary->GetValueForKey(triple_key);
1244   if (!value_sp) {
1245     out_stream->Printf(
1246         "Instruction::TestEmulation: Test file does not contain triple.\n");
1247     return false;
1248   }
1249 
1250   ArchSpec arch;
1251   arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
1252 
1253   bool success = false;
1254   std::unique_ptr<EmulateInstruction> insn_emulator_up(
1255       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
1256   if (insn_emulator_up)
1257     success =
1258         insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
1259 
1260   if (success)
1261     out_stream->Printf("Emulation test succeeded.");
1262   else
1263     out_stream->Printf("Emulation test failed.");
1264 
1265   return success;
1266 }
1267 
1268 bool Instruction::Emulate(
1269     const ArchSpec &arch, uint32_t evaluate_options, void *baton,
1270     EmulateInstruction::ReadMemoryCallback read_mem_callback,
1271     EmulateInstruction::WriteMemoryCallback write_mem_callback,
1272     EmulateInstruction::ReadRegisterCallback read_reg_callback,
1273     EmulateInstruction::WriteRegisterCallback write_reg_callback) {
1274   std::unique_ptr<EmulateInstruction> insn_emulator_up(
1275       EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
1276   if (insn_emulator_up) {
1277     insn_emulator_up->SetBaton(baton);
1278     insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
1279                                    read_reg_callback, write_reg_callback);
1280     insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
1281     return insn_emulator_up->EvaluateInstruction(evaluate_options);
1282   }
1283 
1284   return false;
1285 }
1286 
1287 uint32_t Instruction::GetData(DataExtractor &data) {
1288   return m_opcode.GetData(data);
1289 }
1290 
1291 InstructionList::InstructionList() : m_instructions() {}
1292 
1293 InstructionList::~InstructionList() = default;
1294 
1295 size_t InstructionList::GetSize() const { return m_instructions.size(); }
1296 
1297 uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
1298   uint32_t max_inst_size = 0;
1299   collection::const_iterator pos, end;
1300   for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
1301        ++pos) {
1302     uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
1303     if (max_inst_size < inst_size)
1304       max_inst_size = inst_size;
1305   }
1306   return max_inst_size;
1307 }
1308 
1309 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
1310   InstructionSP inst_sp;
1311   if (idx < m_instructions.size())
1312     inst_sp = m_instructions[idx];
1313   return inst_sp;
1314 }
1315 
1316 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) {
1317   uint32_t index = GetIndexOfInstructionAtAddress(address);
1318   if (index != UINT32_MAX)
1319     return GetInstructionAtIndex(index);
1320   return nullptr;
1321 }
1322 
1323 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
1324                            bool show_control_flow_kind,
1325                            const ExecutionContext *exe_ctx) {
1326   const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
1327   collection::const_iterator pos, begin, end;
1328 
1329   const FormatEntity::Entry *disassembly_format = nullptr;
1330   FormatEntity::Entry format;
1331   if (exe_ctx && exe_ctx->HasTargetScope()) {
1332     disassembly_format =
1333         exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
1334   } else {
1335     FormatEntity::Parse("${addr}: ", format);
1336     disassembly_format = &format;
1337   }
1338 
1339   for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
1340        pos != end; ++pos) {
1341     if (pos != begin)
1342       s->EOL();
1343     (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes,
1344                  show_control_flow_kind, exe_ctx, nullptr, nullptr,
1345                  disassembly_format, 0);
1346   }
1347 }
1348 
1349 void InstructionList::Clear() { m_instructions.clear(); }
1350 
1351 void InstructionList::Append(lldb::InstructionSP &inst_sp) {
1352   if (inst_sp)
1353     m_instructions.push_back(inst_sp);
1354 }
1355 
1356 uint32_t
1357 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
1358                                                  bool ignore_calls,
1359                                                  bool *found_calls) const {
1360   size_t num_instructions = m_instructions.size();
1361 
1362   uint32_t next_branch = UINT32_MAX;
1363 
1364   if (found_calls)
1365     *found_calls = false;
1366   for (size_t i = start; i < num_instructions; i++) {
1367     if (m_instructions[i]->DoesBranch()) {
1368       if (ignore_calls && m_instructions[i]->IsCall()) {
1369         if (found_calls)
1370           *found_calls = true;
1371         continue;
1372       }
1373       next_branch = i;
1374       break;
1375     }
1376   }
1377 
1378   return next_branch;
1379 }
1380 
1381 uint32_t
1382 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1383   size_t num_instructions = m_instructions.size();
1384   uint32_t index = UINT32_MAX;
1385   for (size_t i = 0; i < num_instructions; i++) {
1386     if (m_instructions[i]->GetAddress() == address) {
1387       index = i;
1388       break;
1389     }
1390   }
1391   return index;
1392 }
1393 
1394 uint32_t
1395 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1396                                                     Target &target) {
1397   Address address;
1398   address.SetLoadAddress(load_addr, &target);
1399   return GetIndexOfInstructionAtAddress(address);
1400 }
1401 
1402 size_t Disassembler::ParseInstructions(Target &target, Address start,
1403                                        Limit limit, Stream *error_strm_ptr,
1404                                        bool force_live_memory) {
1405   m_instruction_list.Clear();
1406 
1407   if (!start.IsValid())
1408     return 0;
1409 
1410   start = ResolveAddress(target, start);
1411 
1412   addr_t byte_size = limit.value;
1413   if (limit.kind == Limit::Instructions)
1414     byte_size *= m_arch.GetMaximumOpcodeByteSize();
1415   auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1416 
1417   Status error;
1418   lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1419   const size_t bytes_read =
1420       target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(),
1421                         error, force_live_memory, &load_addr);
1422   const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1423 
1424   if (bytes_read == 0) {
1425     if (error_strm_ptr) {
1426       if (const char *error_cstr = error.AsCString())
1427         error_strm_ptr->Printf("error: %s\n", error_cstr);
1428     }
1429     return 0;
1430   }
1431 
1432   if (bytes_read != data_sp->GetByteSize())
1433     data_sp->SetByteSize(bytes_read);
1434   DataExtractor data(data_sp, m_arch.GetByteOrder(),
1435                      m_arch.GetAddressByteSize());
1436   return DecodeInstructions(start, data, 0,
1437                             limit.kind == Limit::Instructions ? limit.value
1438                                                               : UINT32_MAX,
1439                             false, data_from_file);
1440 }
1441 
1442 // Disassembler copy constructor
1443 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1444     : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1445       m_flavor() {
1446   if (flavor == nullptr)
1447     m_flavor.assign("default");
1448   else
1449     m_flavor.assign(flavor);
1450 
1451   // If this is an arm variant that can only include thumb (T16, T32)
1452   // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1453   if (arch.IsAlwaysThumbInstructions()) {
1454     std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1455     // Replace "arm" with "thumb" so we get all thumb variants correct
1456     if (thumb_arch_name.size() > 3) {
1457       thumb_arch_name.erase(0, 3);
1458       thumb_arch_name.insert(0, "thumb");
1459     }
1460     m_arch.SetTriple(thumb_arch_name.c_str());
1461   }
1462 }
1463 
1464 Disassembler::~Disassembler() = default;
1465 
1466 InstructionList &Disassembler::GetInstructionList() {
1467   return m_instruction_list;
1468 }
1469 
1470 const InstructionList &Disassembler::GetInstructionList() const {
1471   return m_instruction_list;
1472 }
1473 
1474 // Class PseudoInstruction
1475 
1476 PseudoInstruction::PseudoInstruction()
1477     : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1478 
1479 PseudoInstruction::~PseudoInstruction() = default;
1480 
1481 bool PseudoInstruction::DoesBranch() {
1482   // This is NOT a valid question for a pseudo instruction.
1483   return false;
1484 }
1485 
1486 bool PseudoInstruction::HasDelaySlot() {
1487   // This is NOT a valid question for a pseudo instruction.
1488   return false;
1489 }
1490 
1491 bool PseudoInstruction::IsLoad() { return false; }
1492 
1493 bool PseudoInstruction::IsAuthenticated() { return false; }
1494 
1495 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1496                                  const lldb_private::DataExtractor &data,
1497                                  lldb::offset_t data_offset) {
1498   return m_opcode.GetByteSize();
1499 }
1500 
1501 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1502   if (!opcode_data)
1503     return;
1504 
1505   switch (opcode_size) {
1506   case 8: {
1507     uint8_t value8 = *((uint8_t *)opcode_data);
1508     m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1509     break;
1510   }
1511   case 16: {
1512     uint16_t value16 = *((uint16_t *)opcode_data);
1513     m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1514     break;
1515   }
1516   case 32: {
1517     uint32_t value32 = *((uint32_t *)opcode_data);
1518     m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1519     break;
1520   }
1521   case 64: {
1522     uint64_t value64 = *((uint64_t *)opcode_data);
1523     m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1524     break;
1525   }
1526   default:
1527     break;
1528   }
1529 }
1530 
1531 void PseudoInstruction::SetDescription(llvm::StringRef description) {
1532   m_description = std::string(description);
1533 }
1534 
1535 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1536   Operand ret;
1537   ret.m_type = Type::Register;
1538   ret.m_register = r;
1539   return ret;
1540 }
1541 
1542 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1543                                                           bool neg) {
1544   Operand ret;
1545   ret.m_type = Type::Immediate;
1546   ret.m_immediate = imm;
1547   ret.m_negative = neg;
1548   return ret;
1549 }
1550 
1551 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1552   Operand ret;
1553   ret.m_type = Type::Immediate;
1554   if (imm < 0) {
1555     ret.m_immediate = -imm;
1556     ret.m_negative = true;
1557   } else {
1558     ret.m_immediate = imm;
1559     ret.m_negative = false;
1560   }
1561   return ret;
1562 }
1563 
1564 Instruction::Operand
1565 Instruction::Operand::BuildDereference(const Operand &ref) {
1566   Operand ret;
1567   ret.m_type = Type::Dereference;
1568   ret.m_children = {ref};
1569   return ret;
1570 }
1571 
1572 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1573                                                     const Operand &rhs) {
1574   Operand ret;
1575   ret.m_type = Type::Sum;
1576   ret.m_children = {lhs, rhs};
1577   return ret;
1578 }
1579 
1580 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1581                                                         const Operand &rhs) {
1582   Operand ret;
1583   ret.m_type = Type::Product;
1584   ret.m_children = {lhs, rhs};
1585   return ret;
1586 }
1587 
1588 std::function<bool(const Instruction::Operand &)>
1589 lldb_private::OperandMatchers::MatchBinaryOp(
1590     std::function<bool(const Instruction::Operand &)> base,
1591     std::function<bool(const Instruction::Operand &)> left,
1592     std::function<bool(const Instruction::Operand &)> right) {
1593   return [base, left, right](const Instruction::Operand &op) -> bool {
1594     return (base(op) && op.m_children.size() == 2 &&
1595             ((left(op.m_children[0]) && right(op.m_children[1])) ||
1596              (left(op.m_children[1]) && right(op.m_children[0]))));
1597   };
1598 }
1599 
1600 std::function<bool(const Instruction::Operand &)>
1601 lldb_private::OperandMatchers::MatchUnaryOp(
1602     std::function<bool(const Instruction::Operand &)> base,
1603     std::function<bool(const Instruction::Operand &)> child) {
1604   return [base, child](const Instruction::Operand &op) -> bool {
1605     return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1606   };
1607 }
1608 
1609 std::function<bool(const Instruction::Operand &)>
1610 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1611   return [&info](const Instruction::Operand &op) {
1612     return (op.m_type == Instruction::Operand::Type::Register &&
1613             (op.m_register == ConstString(info.name) ||
1614              op.m_register == ConstString(info.alt_name)));
1615   };
1616 }
1617 
1618 std::function<bool(const Instruction::Operand &)>
1619 lldb_private::OperandMatchers::FetchRegOp(ConstString &reg) {
1620   return [&reg](const Instruction::Operand &op) {
1621     if (op.m_type != Instruction::Operand::Type::Register) {
1622       return false;
1623     }
1624     reg = op.m_register;
1625     return true;
1626   };
1627 }
1628 
1629 std::function<bool(const Instruction::Operand &)>
1630 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1631   return [imm](const Instruction::Operand &op) {
1632     return (op.m_type == Instruction::Operand::Type::Immediate &&
1633             ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1634              (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1635   };
1636 }
1637 
1638 std::function<bool(const Instruction::Operand &)>
1639 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1640   return [&imm](const Instruction::Operand &op) {
1641     if (op.m_type != Instruction::Operand::Type::Immediate) {
1642       return false;
1643     }
1644     if (op.m_negative) {
1645       imm = -((int64_t)op.m_immediate);
1646     } else {
1647       imm = ((int64_t)op.m_immediate);
1648     }
1649     return true;
1650   };
1651 }
1652 
1653 std::function<bool(const Instruction::Operand &)>
1654 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1655   return [type](const Instruction::Operand &op) { return op.m_type == type; };
1656 }
1657