1 //===-- Disassembler.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Core/Disassembler.h" 10 11 #include "lldb/Core/AddressRange.h" 12 #include "lldb/Core/Debugger.h" 13 #include "lldb/Core/EmulateInstruction.h" 14 #include "lldb/Core/Mangled.h" 15 #include "lldb/Core/Module.h" 16 #include "lldb/Core/ModuleList.h" 17 #include "lldb/Core/PluginManager.h" 18 #include "lldb/Core/SourceManager.h" 19 #include "lldb/Host/FileSystem.h" 20 #include "lldb/Interpreter/OptionValue.h" 21 #include "lldb/Interpreter/OptionValueArray.h" 22 #include "lldb/Interpreter/OptionValueDictionary.h" 23 #include "lldb/Interpreter/OptionValueRegex.h" 24 #include "lldb/Interpreter/OptionValueString.h" 25 #include "lldb/Interpreter/OptionValueUInt64.h" 26 #include "lldb/Symbol/Function.h" 27 #include "lldb/Symbol/Symbol.h" 28 #include "lldb/Symbol/SymbolContext.h" 29 #include "lldb/Target/ExecutionContext.h" 30 #include "lldb/Target/SectionLoadList.h" 31 #include "lldb/Target/StackFrame.h" 32 #include "lldb/Target/Target.h" 33 #include "lldb/Target/Thread.h" 34 #include "lldb/Utility/DataBufferHeap.h" 35 #include "lldb/Utility/DataExtractor.h" 36 #include "lldb/Utility/RegularExpression.h" 37 #include "lldb/Utility/Status.h" 38 #include "lldb/Utility/Stream.h" 39 #include "lldb/Utility/StreamString.h" 40 #include "lldb/Utility/Timer.h" 41 #include "lldb/lldb-private-enumerations.h" 42 #include "lldb/lldb-private-interfaces.h" 43 #include "lldb/lldb-private-types.h" 44 #include "llvm/ADT/Triple.h" 45 #include "llvm/Support/Compiler.h" 46 47 #include <cstdint> 48 #include <cstring> 49 #include <utility> 50 51 #include <cassert> 52 53 #define DEFAULT_DISASM_BYTE_SIZE 32 54 55 using namespace lldb; 56 using namespace lldb_private; 57 58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch, 59 const char *flavor, 60 const char *plugin_name) { 61 LLDB_SCOPED_TIMERF("Disassembler::FindPlugin (arch = %s, plugin_name = %s)", 62 arch.GetArchitectureName(), plugin_name); 63 64 DisassemblerCreateInstance create_callback = nullptr; 65 66 if (plugin_name) { 67 create_callback = 68 PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name); 69 if (create_callback) { 70 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 71 72 if (disassembler_sp) 73 return disassembler_sp; 74 } 75 } else { 76 for (uint32_t idx = 0; 77 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex( 78 idx)) != nullptr; 79 ++idx) { 80 DisassemblerSP disassembler_sp(create_callback(arch, flavor)); 81 82 if (disassembler_sp) 83 return disassembler_sp; 84 } 85 } 86 return DisassemblerSP(); 87 } 88 89 DisassemblerSP Disassembler::FindPluginForTarget(const Target &target, 90 const ArchSpec &arch, 91 const char *flavor, 92 const char *plugin_name) { 93 if (flavor == nullptr) { 94 // FIXME - we don't have the mechanism in place to do per-architecture 95 // settings. But since we know that for now we only support flavors on x86 96 // & x86_64, 97 if (arch.GetTriple().getArch() == llvm::Triple::x86 || 98 arch.GetTriple().getArch() == llvm::Triple::x86_64) 99 flavor = target.GetDisassemblyFlavor(); 100 } 101 return FindPlugin(arch, flavor, plugin_name); 102 } 103 104 static Address ResolveAddress(Target &target, const Address &addr) { 105 if (!addr.IsSectionOffset()) { 106 Address resolved_addr; 107 // If we weren't passed in a section offset address range, try and resolve 108 // it to something 109 bool is_resolved = target.GetSectionLoadList().IsEmpty() 110 ? target.GetImages().ResolveFileAddress( 111 addr.GetOffset(), resolved_addr) 112 : target.GetSectionLoadList().ResolveLoadAddress( 113 addr.GetOffset(), resolved_addr); 114 115 // We weren't able to resolve the address, just treat it as a raw address 116 if (is_resolved && resolved_addr.IsValid()) 117 return resolved_addr; 118 } 119 return addr; 120 } 121 122 lldb::DisassemblerSP Disassembler::DisassembleRange( 123 const ArchSpec &arch, const char *plugin_name, const char *flavor, 124 Target &target, const AddressRange &range, bool force_live_memory) { 125 if (range.GetByteSize() <= 0) 126 return {}; 127 128 if (!range.GetBaseAddress().IsValid()) 129 return {}; 130 131 lldb::DisassemblerSP disasm_sp = 132 Disassembler::FindPluginForTarget(target, arch, flavor, plugin_name); 133 134 if (!disasm_sp) 135 return {}; 136 137 const size_t bytes_disassembled = disasm_sp->ParseInstructions( 138 target, range.GetBaseAddress(), {Limit::Bytes, range.GetByteSize()}, 139 nullptr, force_live_memory); 140 if (bytes_disassembled == 0) 141 return {}; 142 143 return disasm_sp; 144 } 145 146 lldb::DisassemblerSP 147 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name, 148 const char *flavor, const Address &start, 149 const void *src, size_t src_len, 150 uint32_t num_instructions, bool data_from_file) { 151 if (!src) 152 return {}; 153 154 lldb::DisassemblerSP disasm_sp = 155 Disassembler::FindPlugin(arch, flavor, plugin_name); 156 157 if (!disasm_sp) 158 return {}; 159 160 DataExtractor data(src, src_len, arch.GetByteOrder(), 161 arch.GetAddressByteSize()); 162 163 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions, false, 164 data_from_file); 165 return disasm_sp; 166 } 167 168 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 169 const char *plugin_name, const char *flavor, 170 const ExecutionContext &exe_ctx, 171 const Address &address, Limit limit, 172 bool mixed_source_and_assembly, 173 uint32_t num_mixed_context_lines, 174 uint32_t options, Stream &strm) { 175 if (!exe_ctx.GetTargetPtr()) 176 return false; 177 178 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget( 179 exe_ctx.GetTargetRef(), arch, flavor, plugin_name)); 180 if (!disasm_sp) 181 return false; 182 183 const bool force_live_memory = true; 184 size_t bytes_disassembled = disasm_sp->ParseInstructions( 185 exe_ctx.GetTargetRef(), address, limit, &strm, force_live_memory); 186 if (bytes_disassembled == 0) 187 return false; 188 189 disasm_sp->PrintInstructions(debugger, arch, exe_ctx, 190 mixed_source_and_assembly, 191 num_mixed_context_lines, options, strm); 192 return true; 193 } 194 195 Disassembler::SourceLine 196 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) { 197 if (!sc.function) 198 return {}; 199 200 if (!sc.line_entry.IsValid()) 201 return {}; 202 203 LineEntry prologue_end_line = sc.line_entry; 204 FileSpec func_decl_file; 205 uint32_t func_decl_line; 206 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line); 207 208 if (func_decl_file != prologue_end_line.file && 209 func_decl_file != prologue_end_line.original_file) 210 return {}; 211 212 SourceLine decl_line; 213 decl_line.file = func_decl_file; 214 decl_line.line = func_decl_line; 215 // TODO: Do we care about column on these entries? If so, we need to plumb 216 // that through GetStartLineSourceInfo. 217 decl_line.column = 0; 218 return decl_line; 219 } 220 221 void Disassembler::AddLineToSourceLineTables( 222 SourceLine &line, 223 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) { 224 if (line.IsValid()) { 225 auto source_lines_seen_pos = source_lines_seen.find(line.file); 226 if (source_lines_seen_pos == source_lines_seen.end()) { 227 std::set<uint32_t> lines; 228 lines.insert(line.line); 229 source_lines_seen.emplace(line.file, lines); 230 } else { 231 source_lines_seen_pos->second.insert(line.line); 232 } 233 } 234 } 235 236 bool Disassembler::ElideMixedSourceAndDisassemblyLine( 237 const ExecutionContext &exe_ctx, const SymbolContext &sc, 238 SourceLine &line) { 239 240 // TODO: should we also check target.process.thread.step-avoid-libraries ? 241 242 const RegularExpression *avoid_regex = nullptr; 243 244 // Skip any line #0 entries - they are implementation details 245 if (line.line == 0) 246 return false; 247 248 ThreadSP thread_sp = exe_ctx.GetThreadSP(); 249 if (thread_sp) { 250 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp(); 251 } else { 252 TargetSP target_sp = exe_ctx.GetTargetSP(); 253 if (target_sp) { 254 Status error; 255 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue( 256 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error); 257 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) { 258 OptionValueRegex *re = value_sp->GetAsRegex(); 259 if (re) { 260 avoid_regex = re->GetCurrentValue(); 261 } 262 } 263 } 264 } 265 if (avoid_regex && sc.symbol != nullptr) { 266 const char *function_name = 267 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments) 268 .GetCString(); 269 if (function_name && avoid_regex->Execute(function_name)) { 270 // skip this source line 271 return true; 272 } 273 } 274 // don't skip this source line 275 return false; 276 } 277 278 void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, 279 const ExecutionContext &exe_ctx, 280 bool mixed_source_and_assembly, 281 uint32_t num_mixed_context_lines, 282 uint32_t options, Stream &strm) { 283 // We got some things disassembled... 284 size_t num_instructions_found = GetInstructionList().GetSize(); 285 286 const uint32_t max_opcode_byte_size = 287 GetInstructionList().GetMaxOpcocdeByteSize(); 288 SymbolContext sc; 289 SymbolContext prev_sc; 290 AddressRange current_source_line_range; 291 const Address *pc_addr_ptr = nullptr; 292 StackFrame *frame = exe_ctx.GetFramePtr(); 293 294 TargetSP target_sp(exe_ctx.GetTargetSP()); 295 SourceManager &source_manager = 296 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager(); 297 298 if (frame) { 299 pc_addr_ptr = &frame->GetFrameCodeAddress(); 300 } 301 const uint32_t scope = 302 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol; 303 const bool use_inline_block_range = false; 304 305 const FormatEntity::Entry *disassembly_format = nullptr; 306 FormatEntity::Entry format; 307 if (exe_ctx.HasTargetScope()) { 308 disassembly_format = 309 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat(); 310 } else { 311 FormatEntity::Parse("${addr}: ", format); 312 disassembly_format = &format; 313 } 314 315 // First pass: step through the list of instructions, find how long the 316 // initial addresses strings are, insert padding in the second pass so the 317 // opcodes all line up nicely. 318 319 // Also build up the source line mapping if this is mixed source & assembly 320 // mode. Calculate the source line for each assembly instruction (eliding 321 // inlined functions which the user wants to skip). 322 323 std::map<FileSpec, std::set<uint32_t>> source_lines_seen; 324 Symbol *previous_symbol = nullptr; 325 326 size_t address_text_size = 0; 327 for (size_t i = 0; i < num_instructions_found; ++i) { 328 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 329 if (inst) { 330 const Address &addr = inst->GetAddress(); 331 ModuleSP module_sp(addr.GetModule()); 332 if (module_sp) { 333 const SymbolContextItem resolve_mask = eSymbolContextFunction | 334 eSymbolContextSymbol | 335 eSymbolContextLineEntry; 336 uint32_t resolved_mask = 337 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc); 338 if (resolved_mask) { 339 StreamString strmstr; 340 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr, 341 &exe_ctx, &addr, strmstr); 342 size_t cur_line = strmstr.GetSizeOfLastLine(); 343 if (cur_line > address_text_size) 344 address_text_size = cur_line; 345 346 // Add entries to our "source_lines_seen" map+set which list which 347 // sources lines occur in this disassembly session. We will print 348 // lines of context around a source line, but we don't want to print 349 // a source line that has a line table entry of its own - we'll leave 350 // that source line to be printed when it actually occurs in the 351 // disassembly. 352 353 if (mixed_source_and_assembly && sc.line_entry.IsValid()) { 354 if (sc.symbol != previous_symbol) { 355 SourceLine decl_line = GetFunctionDeclLineEntry(sc); 356 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line)) 357 AddLineToSourceLineTables(decl_line, source_lines_seen); 358 } 359 if (sc.line_entry.IsValid()) { 360 SourceLine this_line; 361 this_line.file = sc.line_entry.file; 362 this_line.line = sc.line_entry.line; 363 this_line.column = sc.line_entry.column; 364 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line)) 365 AddLineToSourceLineTables(this_line, source_lines_seen); 366 } 367 } 368 } 369 sc.Clear(false); 370 } 371 } 372 } 373 374 previous_symbol = nullptr; 375 SourceLine previous_line; 376 for (size_t i = 0; i < num_instructions_found; ++i) { 377 Instruction *inst = GetInstructionList().GetInstructionAtIndex(i).get(); 378 379 if (inst) { 380 const Address &addr = inst->GetAddress(); 381 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr; 382 SourceLinesToDisplay source_lines_to_display; 383 384 prev_sc = sc; 385 386 ModuleSP module_sp(addr.GetModule()); 387 if (module_sp) { 388 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress( 389 addr, eSymbolContextEverything, sc); 390 if (resolved_mask) { 391 if (mixed_source_and_assembly) { 392 393 // If we've started a new function (non-inlined), print all of the 394 // source lines from the function declaration until the first line 395 // table entry - typically the opening curly brace of the function. 396 if (previous_symbol != sc.symbol) { 397 // The default disassembly format puts an extra blank line 398 // between functions - so when we're displaying the source 399 // context for a function, we don't want to add a blank line 400 // after the source context or we'll end up with two of them. 401 if (previous_symbol != nullptr) 402 source_lines_to_display.print_source_context_end_eol = false; 403 404 previous_symbol = sc.symbol; 405 if (sc.function && sc.line_entry.IsValid()) { 406 LineEntry prologue_end_line = sc.line_entry; 407 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 408 prologue_end_line)) { 409 FileSpec func_decl_file; 410 uint32_t func_decl_line; 411 sc.function->GetStartLineSourceInfo(func_decl_file, 412 func_decl_line); 413 if (func_decl_file == prologue_end_line.file || 414 func_decl_file == prologue_end_line.original_file) { 415 // Add all the lines between the function declaration and 416 // the first non-prologue source line to the list of lines 417 // to print. 418 for (uint32_t lineno = func_decl_line; 419 lineno <= prologue_end_line.line; lineno++) { 420 SourceLine this_line; 421 this_line.file = func_decl_file; 422 this_line.line = lineno; 423 source_lines_to_display.lines.push_back(this_line); 424 } 425 // Mark the last line as the "current" one. Usually this 426 // is the open curly brace. 427 if (source_lines_to_display.lines.size() > 0) 428 source_lines_to_display.current_source_line = 429 source_lines_to_display.lines.size() - 1; 430 } 431 } 432 } 433 sc.GetAddressRange(scope, 0, use_inline_block_range, 434 current_source_line_range); 435 } 436 437 // If we've left a previous source line's address range, print a 438 // new source line 439 if (!current_source_line_range.ContainsFileAddress(addr)) { 440 sc.GetAddressRange(scope, 0, use_inline_block_range, 441 current_source_line_range); 442 443 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) { 444 SourceLine this_line; 445 this_line.file = sc.line_entry.file; 446 this_line.line = sc.line_entry.line; 447 448 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, 449 this_line)) { 450 // Only print this source line if it is different from the 451 // last source line we printed. There may have been inlined 452 // functions between these lines that we elided, resulting in 453 // the same line being printed twice in a row for a 454 // contiguous block of assembly instructions. 455 if (this_line != previous_line) { 456 457 std::vector<uint32_t> previous_lines; 458 for (uint32_t i = 0; 459 i < num_mixed_context_lines && 460 (this_line.line - num_mixed_context_lines) > 0; 461 i++) { 462 uint32_t line = 463 this_line.line - num_mixed_context_lines + i; 464 auto pos = source_lines_seen.find(this_line.file); 465 if (pos != source_lines_seen.end()) { 466 if (pos->second.count(line) == 1) { 467 previous_lines.clear(); 468 } else { 469 previous_lines.push_back(line); 470 } 471 } 472 } 473 for (size_t i = 0; i < previous_lines.size(); i++) { 474 SourceLine previous_line; 475 previous_line.file = this_line.file; 476 previous_line.line = previous_lines[i]; 477 auto pos = source_lines_seen.find(previous_line.file); 478 if (pos != source_lines_seen.end()) { 479 pos->second.insert(previous_line.line); 480 } 481 source_lines_to_display.lines.push_back(previous_line); 482 } 483 484 source_lines_to_display.lines.push_back(this_line); 485 source_lines_to_display.current_source_line = 486 source_lines_to_display.lines.size() - 1; 487 488 for (uint32_t i = 0; i < num_mixed_context_lines; i++) { 489 SourceLine next_line; 490 next_line.file = this_line.file; 491 next_line.line = this_line.line + i + 1; 492 auto pos = source_lines_seen.find(next_line.file); 493 if (pos != source_lines_seen.end()) { 494 if (pos->second.count(next_line.line) == 1) 495 break; 496 pos->second.insert(next_line.line); 497 } 498 source_lines_to_display.lines.push_back(next_line); 499 } 500 } 501 previous_line = this_line; 502 } 503 } 504 } 505 } 506 } else { 507 sc.Clear(true); 508 } 509 } 510 511 if (source_lines_to_display.lines.size() > 0) { 512 strm.EOL(); 513 for (size_t idx = 0; idx < source_lines_to_display.lines.size(); 514 idx++) { 515 SourceLine ln = source_lines_to_display.lines[idx]; 516 const char *line_highlight = ""; 517 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) { 518 line_highlight = "->"; 519 } else if (idx == source_lines_to_display.current_source_line) { 520 line_highlight = "**"; 521 } 522 source_manager.DisplaySourceLinesWithLineNumbers( 523 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm); 524 } 525 if (source_lines_to_display.print_source_context_end_eol) 526 strm.EOL(); 527 } 528 529 const bool show_bytes = (options & eOptionShowBytes) != 0; 530 const bool show_control_flow_kind = 531 (options & eOptionShowControlFlowKind) != 0; 532 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, 533 show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr, 534 address_text_size); 535 strm.EOL(); 536 } else { 537 break; 538 } 539 } 540 } 541 542 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch, 543 StackFrame &frame, Stream &strm) { 544 AddressRange range; 545 SymbolContext sc( 546 frame.GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol)); 547 if (sc.function) { 548 range = sc.function->GetAddressRange(); 549 } else if (sc.symbol && sc.symbol->ValueIsAddress()) { 550 range.GetBaseAddress() = sc.symbol->GetAddressRef(); 551 range.SetByteSize(sc.symbol->GetByteSize()); 552 } else { 553 range.GetBaseAddress() = frame.GetFrameCodeAddress(); 554 } 555 556 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0) 557 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE); 558 559 Disassembler::Limit limit = {Disassembler::Limit::Bytes, 560 range.GetByteSize()}; 561 if (limit.value == 0) 562 limit.value = DEFAULT_DISASM_BYTE_SIZE; 563 564 return Disassemble(debugger, arch, nullptr, nullptr, frame, 565 range.GetBaseAddress(), limit, false, 0, 0, strm); 566 } 567 568 Instruction::Instruction(const Address &address, AddressClass addr_class) 569 : m_address(address), m_address_class(addr_class), m_opcode(), 570 m_calculated_strings(false) {} 571 572 Instruction::~Instruction() = default; 573 574 namespace x86 { 575 576 /// These are the three values deciding instruction control flow kind. 577 /// InstructionLengthDecode function decodes an instruction and get this struct. 578 /// 579 /// primary_opcode 580 /// Primary opcode of the instruction. 581 /// For one-byte opcode instruction, it's the first byte after prefix. 582 /// For two- and three-byte opcodes, it's the second byte. 583 /// 584 /// opcode_len 585 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. 586 /// 587 /// modrm 588 /// ModR/M byte of the instruction. 589 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] 590 /// may contain a register or specify an addressing mode, depending on MOD. 591 struct InstructionOpcodeAndModrm { 592 uint8_t primary_opcode; 593 uint8_t opcode_len; 594 uint8_t modrm; 595 }; 596 597 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes. 598 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and 599 /// instruction set. 600 /// 601 /// \param[in] opcode_and_modrm 602 /// Contains primary_opcode byte, its length, and ModR/M byte. 603 /// Refer to the struct InstructionOpcodeAndModrm for details. 604 /// 605 /// \return 606 /// The control flow kind of the instruction or 607 /// eInstructionControlFlowKindOther if the instruction doesn't affect 608 /// the control flow of the program. 609 lldb::InstructionControlFlowKind 610 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { 611 uint8_t opcode = opcode_and_modrm.primary_opcode; 612 uint8_t opcode_len = opcode_and_modrm.opcode_len; 613 uint8_t modrm = opcode_and_modrm.modrm; 614 615 if (opcode_len > 2) 616 return lldb::eInstructionControlFlowKindOther; 617 618 if (opcode >= 0x70 && opcode <= 0x7F) { 619 if (opcode_len == 1) 620 return lldb::eInstructionControlFlowKindCondJump; 621 else 622 return lldb::eInstructionControlFlowKindOther; 623 } 624 625 if (opcode >= 0x80 && opcode <= 0x8F) { 626 if (opcode_len == 2) 627 return lldb::eInstructionControlFlowKindCondJump; 628 else 629 return lldb::eInstructionControlFlowKindOther; 630 } 631 632 switch (opcode) { 633 case 0x9A: 634 if (opcode_len == 1) 635 return lldb::eInstructionControlFlowKindFarCall; 636 break; 637 case 0xFF: 638 if (opcode_len == 1) { 639 uint8_t modrm_reg = (modrm >> 3) & 7; 640 if (modrm_reg == 2) 641 return lldb::eInstructionControlFlowKindCall; 642 else if (modrm_reg == 3) 643 return lldb::eInstructionControlFlowKindFarCall; 644 else if (modrm_reg == 4) 645 return lldb::eInstructionControlFlowKindJump; 646 else if (modrm_reg == 5) 647 return lldb::eInstructionControlFlowKindFarJump; 648 } 649 break; 650 case 0xE8: 651 if (opcode_len == 1) 652 return lldb::eInstructionControlFlowKindCall; 653 break; 654 case 0xCD: 655 case 0xCC: 656 case 0xCE: 657 case 0xF1: 658 if (opcode_len == 1) 659 return lldb::eInstructionControlFlowKindFarCall; 660 break; 661 case 0xCF: 662 if (opcode_len == 1) 663 return lldb::eInstructionControlFlowKindFarReturn; 664 break; 665 case 0xE9: 666 case 0xEB: 667 if (opcode_len == 1) 668 return lldb::eInstructionControlFlowKindJump; 669 break; 670 case 0xEA: 671 if (opcode_len == 1) 672 return lldb::eInstructionControlFlowKindFarJump; 673 break; 674 case 0xE3: 675 case 0xE0: 676 case 0xE1: 677 case 0xE2: 678 if (opcode_len == 1) 679 return lldb::eInstructionControlFlowKindCondJump; 680 break; 681 case 0xC3: 682 case 0xC2: 683 if (opcode_len == 1) 684 return lldb::eInstructionControlFlowKindReturn; 685 break; 686 case 0xCB: 687 case 0xCA: 688 if (opcode_len == 1) 689 return lldb::eInstructionControlFlowKindFarReturn; 690 break; 691 case 0x05: 692 case 0x34: 693 if (opcode_len == 2) 694 return lldb::eInstructionControlFlowKindFarCall; 695 break; 696 case 0x35: 697 case 0x07: 698 if (opcode_len == 2) 699 return lldb::eInstructionControlFlowKindFarReturn; 700 break; 701 case 0x01: 702 if (opcode_len == 2) { 703 switch (modrm) { 704 case 0xc1: 705 return lldb::eInstructionControlFlowKindFarCall; 706 case 0xc2: 707 case 0xc3: 708 return lldb::eInstructionControlFlowKindFarReturn; 709 default: 710 break; 711 } 712 } 713 break; 714 default: 715 break; 716 } 717 718 return lldb::eInstructionControlFlowKindOther; 719 } 720 721 /// Decode an instruction into opcode, modrm and opcode_len. 722 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. 723 /// Opcodes in x86 are generally the first byte of instruction, though two-byte 724 /// instructions and prefixes exist. ModR/M is the byte following the opcode 725 /// and adds additional information for how the instruction is executed. 726 /// 727 /// \param[in] inst_bytes 728 /// Raw bytes of the instruction 729 /// 730 /// 731 /// \param[in] bytes_len 732 /// The length of the inst_bytes array. 733 /// 734 /// \param[in] is_exec_mode_64b 735 /// If true, the execution mode is 64 bit. 736 /// 737 /// \return 738 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding 739 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition 740 /// for more details. 741 /// Otherwise if the given instruction is invalid, returns None. 742 llvm::Optional<InstructionOpcodeAndModrm> 743 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, 744 bool is_exec_mode_64b) { 745 int op_idx = 0; 746 bool prefix_done = false; 747 InstructionOpcodeAndModrm ret = {0, 0, 0}; 748 749 // In most cases, the primary_opcode is the first byte of the instruction 750 // but some instructions have a prefix to be skipped for these calculations. 751 // The following mapping is inspired from libipt's instruction decoding logic 752 // in `src/pt_ild.c` 753 while (!prefix_done) { 754 if (op_idx >= bytes_len) 755 return llvm::None; 756 757 ret.primary_opcode = inst_bytes[op_idx]; 758 switch (ret.primary_opcode) { 759 // prefix_ignore 760 case 0x26: 761 case 0x2e: 762 case 0x36: 763 case 0x3e: 764 case 0x64: 765 case 0x65: 766 // prefix_osz, prefix_asz 767 case 0x66: 768 case 0x67: 769 // prefix_lock, prefix_f2, prefix_f3 770 case 0xf0: 771 case 0xf2: 772 case 0xf3: 773 op_idx++; 774 break; 775 776 // prefix_rex 777 case 0x40: 778 case 0x41: 779 case 0x42: 780 case 0x43: 781 case 0x44: 782 case 0x45: 783 case 0x46: 784 case 0x47: 785 case 0x48: 786 case 0x49: 787 case 0x4a: 788 case 0x4b: 789 case 0x4c: 790 case 0x4d: 791 case 0x4e: 792 case 0x4f: 793 if (is_exec_mode_64b) 794 op_idx++; 795 else 796 prefix_done = true; 797 break; 798 799 // prefix_vex_c4, c5 800 case 0xc5: 801 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 802 prefix_done = true; 803 break; 804 } 805 806 ret.opcode_len = 2; 807 ret.primary_opcode = inst_bytes[op_idx + 2]; 808 ret.modrm = inst_bytes[op_idx + 3]; 809 return ret; 810 811 case 0xc4: 812 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 813 prefix_done = true; 814 break; 815 } 816 ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; 817 ret.primary_opcode = inst_bytes[op_idx + 3]; 818 ret.modrm = inst_bytes[op_idx + 4]; 819 return ret; 820 821 // prefix_evex 822 case 0x62: 823 if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { 824 prefix_done = true; 825 break; 826 } 827 ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; 828 ret.primary_opcode = inst_bytes[op_idx + 4]; 829 ret.modrm = inst_bytes[op_idx + 5]; 830 return ret; 831 832 default: 833 prefix_done = true; 834 break; 835 } 836 } // prefix done 837 838 ret.primary_opcode = inst_bytes[op_idx]; 839 ret.modrm = inst_bytes[op_idx + 1]; 840 ret.opcode_len = 1; 841 842 // If the first opcode is 0F, it's two- or three- byte opcodes. 843 if (ret.primary_opcode == 0x0F) { 844 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 845 846 if (ret.primary_opcode == 0x38) { 847 ret.opcode_len = 3; 848 ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte 849 ret.modrm = inst_bytes[op_idx + 1]; 850 } else if (ret.primary_opcode == 0x3A) { 851 ret.opcode_len = 3; 852 ret.primary_opcode = inst_bytes[++op_idx]; 853 ret.modrm = inst_bytes[op_idx + 1]; 854 } else if ((ret.primary_opcode & 0xf8) == 0x38) { 855 ret.opcode_len = 0; 856 ret.primary_opcode = inst_bytes[++op_idx]; 857 ret.modrm = inst_bytes[op_idx + 1]; 858 } else if (ret.primary_opcode == 0x0F) { 859 ret.opcode_len = 3; 860 // opcode is 0x0F, no needs to update 861 ret.modrm = inst_bytes[op_idx + 1]; 862 } else { 863 ret.opcode_len = 2; 864 ret.modrm = inst_bytes[op_idx + 1]; 865 } 866 } 867 868 return ret; 869 } 870 871 lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, 872 Opcode m_opcode) { 873 llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None; 874 875 if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { 876 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes 877 return lldb::eInstructionControlFlowKindUnknown; 878 } 879 880 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. 881 // These are the three values deciding instruction control flow kind. 882 ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), 883 m_opcode.GetByteSize(), is_exec_mode_64b); 884 if (!ret) 885 return lldb::eInstructionControlFlowKindUnknown; 886 else 887 return MapOpcodeIntoControlFlowKind(ret.value()); 888 } 889 890 } // namespace x86 891 892 lldb::InstructionControlFlowKind 893 Instruction::GetControlFlowKind(const ArchSpec &arch) { 894 if (arch.GetTriple().getArch() == llvm::Triple::x86) 895 return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); 896 else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) 897 return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); 898 else 899 return eInstructionControlFlowKindUnknown; // not implemented 900 } 901 902 AddressClass Instruction::GetAddressClass() { 903 if (m_address_class == AddressClass::eInvalid) 904 m_address_class = m_address.GetAddressClass(); 905 return m_address_class; 906 } 907 908 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, 909 bool show_address, bool show_bytes, 910 bool show_control_flow_kind, 911 const ExecutionContext *exe_ctx, 912 const SymbolContext *sym_ctx, 913 const SymbolContext *prev_sym_ctx, 914 const FormatEntity::Entry *disassembly_addr_format, 915 size_t max_address_text_size) { 916 size_t opcode_column_width = 7; 917 const size_t operand_column_width = 25; 918 919 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx); 920 921 StreamString ss; 922 923 if (show_address) { 924 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx, 925 prev_sym_ctx, exe_ctx, &m_address, ss); 926 ss.FillLastLineToColumn(max_address_text_size, ' '); 927 } 928 929 if (show_bytes) { 930 if (m_opcode.GetType() == Opcode::eTypeBytes) { 931 // x86_64 and i386 are the only ones that use bytes right now so pad out 932 // the byte dump to be able to always show 15 bytes (3 chars each) plus a 933 // space 934 if (max_opcode_byte_size > 0) 935 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 936 else 937 m_opcode.Dump(&ss, 15 * 3 + 1); 938 } else { 939 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000 940 // (10 spaces) plus two for padding... 941 if (max_opcode_byte_size > 0) 942 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); 943 else 944 m_opcode.Dump(&ss, 12); 945 } 946 } 947 948 if (show_control_flow_kind) { 949 switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { 950 case eInstructionControlFlowKindUnknown: 951 ss.Printf("%-12s", "unknown"); 952 break; 953 case eInstructionControlFlowKindOther: 954 ss.Printf("%-12s", "other"); 955 break; 956 case eInstructionControlFlowKindCall: 957 ss.Printf("%-12s", "call"); 958 break; 959 case eInstructionControlFlowKindReturn: 960 ss.Printf("%-12s", "return"); 961 break; 962 case eInstructionControlFlowKindJump: 963 ss.Printf("%-12s", "jump"); 964 break; 965 case eInstructionControlFlowKindCondJump: 966 ss.Printf("%-12s", "cond jump"); 967 break; 968 case eInstructionControlFlowKindFarCall: 969 ss.Printf("%-12s", "far call"); 970 break; 971 case eInstructionControlFlowKindFarReturn: 972 ss.Printf("%-12s", "far return"); 973 break; 974 case eInstructionControlFlowKindFarJump: 975 ss.Printf("%-12s", "far jump"); 976 break; 977 } 978 } 979 980 const size_t opcode_pos = ss.GetSizeOfLastLine(); 981 982 // The default opcode size of 7 characters is plenty for most architectures 983 // but some like arm can pull out the occasional vqrshrun.s16. We won't get 984 // consistent column spacing in these cases, unfortunately. 985 if (m_opcode_name.length() >= opcode_column_width) { 986 opcode_column_width = m_opcode_name.length() + 1; 987 } 988 989 ss.PutCString(m_opcode_name); 990 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' '); 991 ss.PutCString(m_mnemonics); 992 993 if (!m_comment.empty()) { 994 ss.FillLastLineToColumn( 995 opcode_pos + opcode_column_width + operand_column_width, ' '); 996 ss.PutCString(" ; "); 997 ss.PutCString(m_comment); 998 } 999 s->PutCString(ss.GetString()); 1000 } 1001 1002 bool Instruction::DumpEmulation(const ArchSpec &arch) { 1003 std::unique_ptr<EmulateInstruction> insn_emulator_up( 1004 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 1005 if (insn_emulator_up) { 1006 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 1007 return insn_emulator_up->EvaluateInstruction(0); 1008 } 1009 1010 return false; 1011 } 1012 1013 bool Instruction::CanSetBreakpoint () { 1014 return !HasDelaySlot(); 1015 } 1016 1017 bool Instruction::HasDelaySlot() { 1018 // Default is false. 1019 return false; 1020 } 1021 1022 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, 1023 OptionValue::Type data_type) { 1024 bool done = false; 1025 char buffer[1024]; 1026 1027 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type); 1028 1029 int idx = 0; 1030 while (!done) { 1031 if (!fgets(buffer, 1023, in_file)) { 1032 out_stream->Printf( 1033 "Instruction::ReadArray: Error reading file (fgets).\n"); 1034 option_value_sp.reset(); 1035 return option_value_sp; 1036 } 1037 1038 std::string line(buffer); 1039 1040 size_t len = line.size(); 1041 if (line[len - 1] == '\n') { 1042 line[len - 1] = '\0'; 1043 line.resize(len - 1); 1044 } 1045 1046 if ((line.size() == 1) && line[0] == ']') { 1047 done = true; 1048 line.clear(); 1049 } 1050 1051 if (!line.empty()) { 1052 std::string value; 1053 static RegularExpression g_reg_exp( 1054 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); 1055 llvm::SmallVector<llvm::StringRef, 2> matches; 1056 if (g_reg_exp.Execute(line, &matches)) 1057 value = matches[1].str(); 1058 else 1059 value = line; 1060 1061 OptionValueSP data_value_sp; 1062 switch (data_type) { 1063 case OptionValue::eTypeUInt64: 1064 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0); 1065 data_value_sp->SetValueFromString(value); 1066 break; 1067 // Other types can be added later as needed. 1068 default: 1069 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 1070 break; 1071 } 1072 1073 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp); 1074 ++idx; 1075 } 1076 } 1077 1078 return option_value_sp; 1079 } 1080 1081 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) { 1082 bool done = false; 1083 char buffer[1024]; 1084 1085 auto option_value_sp = std::make_shared<OptionValueDictionary>(); 1086 static ConstString encoding_key("data_encoding"); 1087 OptionValue::Type data_type = OptionValue::eTypeInvalid; 1088 1089 while (!done) { 1090 // Read the next line in the file 1091 if (!fgets(buffer, 1023, in_file)) { 1092 out_stream->Printf( 1093 "Instruction::ReadDictionary: Error reading file (fgets).\n"); 1094 option_value_sp.reset(); 1095 return option_value_sp; 1096 } 1097 1098 // Check to see if the line contains the end-of-dictionary marker ("}") 1099 std::string line(buffer); 1100 1101 size_t len = line.size(); 1102 if (line[len - 1] == '\n') { 1103 line[len - 1] = '\0'; 1104 line.resize(len - 1); 1105 } 1106 1107 if ((line.size() == 1) && (line[0] == '}')) { 1108 done = true; 1109 line.clear(); 1110 } 1111 1112 // Try to find a key-value pair in the current line and add it to the 1113 // dictionary. 1114 if (!line.empty()) { 1115 static RegularExpression g_reg_exp(llvm::StringRef( 1116 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$")); 1117 1118 llvm::SmallVector<llvm::StringRef, 3> matches; 1119 1120 bool reg_exp_success = g_reg_exp.Execute(line, &matches); 1121 std::string key; 1122 std::string value; 1123 if (reg_exp_success) { 1124 key = matches[1].str(); 1125 value = matches[2].str(); 1126 } else { 1127 out_stream->Printf("Instruction::ReadDictionary: Failure executing " 1128 "regular expression.\n"); 1129 option_value_sp.reset(); 1130 return option_value_sp; 1131 } 1132 1133 ConstString const_key(key.c_str()); 1134 // Check value to see if it's the start of an array or dictionary. 1135 1136 lldb::OptionValueSP value_sp; 1137 assert(value.empty() == false); 1138 assert(key.empty() == false); 1139 1140 if (value[0] == '{') { 1141 assert(value.size() == 1); 1142 // value is a dictionary 1143 value_sp = ReadDictionary(in_file, out_stream); 1144 if (!value_sp) { 1145 option_value_sp.reset(); 1146 return option_value_sp; 1147 } 1148 } else if (value[0] == '[') { 1149 assert(value.size() == 1); 1150 // value is an array 1151 value_sp = ReadArray(in_file, out_stream, data_type); 1152 if (!value_sp) { 1153 option_value_sp.reset(); 1154 return option_value_sp; 1155 } 1156 // We've used the data_type to read an array; re-set the type to 1157 // Invalid 1158 data_type = OptionValue::eTypeInvalid; 1159 } else if ((value[0] == '0') && (value[1] == 'x')) { 1160 value_sp = std::make_shared<OptionValueUInt64>(0, 0); 1161 value_sp->SetValueFromString(value); 1162 } else { 1163 size_t len = value.size(); 1164 if ((value[0] == '"') && (value[len - 1] == '"')) 1165 value = value.substr(1, len - 2); 1166 value_sp = std::make_shared<OptionValueString>(value.c_str(), ""); 1167 } 1168 1169 if (const_key == encoding_key) { 1170 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data 1171 // indicating the 1172 // data type of an upcoming array (usually the next bit of data to be 1173 // read in). 1174 if (strcmp(value.c_str(), "uint32_t") == 0) 1175 data_type = OptionValue::eTypeUInt64; 1176 } else 1177 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp, 1178 false); 1179 } 1180 } 1181 1182 return option_value_sp; 1183 } 1184 1185 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) { 1186 if (!out_stream) 1187 return false; 1188 1189 if (!file_name) { 1190 out_stream->Printf("Instruction::TestEmulation: Missing file_name."); 1191 return false; 1192 } 1193 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r"); 1194 if (!test_file) { 1195 out_stream->Printf( 1196 "Instruction::TestEmulation: Attempt to open test file failed."); 1197 return false; 1198 } 1199 1200 char buffer[256]; 1201 if (!fgets(buffer, 255, test_file)) { 1202 out_stream->Printf( 1203 "Instruction::TestEmulation: Error reading first line of test file.\n"); 1204 fclose(test_file); 1205 return false; 1206 } 1207 1208 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) { 1209 out_stream->Printf("Instructin::TestEmulation: Test file does not contain " 1210 "emulation state dictionary\n"); 1211 fclose(test_file); 1212 return false; 1213 } 1214 1215 // Read all the test information from the test file into an 1216 // OptionValueDictionary. 1217 1218 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream)); 1219 if (!data_dictionary_sp) { 1220 out_stream->Printf( 1221 "Instruction::TestEmulation: Error reading Dictionary Object.\n"); 1222 fclose(test_file); 1223 return false; 1224 } 1225 1226 fclose(test_file); 1227 1228 OptionValueDictionary *data_dictionary = 1229 data_dictionary_sp->GetAsDictionary(); 1230 static ConstString description_key("assembly_string"); 1231 static ConstString triple_key("triple"); 1232 1233 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key); 1234 1235 if (!value_sp) { 1236 out_stream->Printf("Instruction::TestEmulation: Test file does not " 1237 "contain description string.\n"); 1238 return false; 1239 } 1240 1241 SetDescription(value_sp->GetStringValue()); 1242 1243 value_sp = data_dictionary->GetValueForKey(triple_key); 1244 if (!value_sp) { 1245 out_stream->Printf( 1246 "Instruction::TestEmulation: Test file does not contain triple.\n"); 1247 return false; 1248 } 1249 1250 ArchSpec arch; 1251 arch.SetTriple(llvm::Triple(value_sp->GetStringValue())); 1252 1253 bool success = false; 1254 std::unique_ptr<EmulateInstruction> insn_emulator_up( 1255 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 1256 if (insn_emulator_up) 1257 success = 1258 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary); 1259 1260 if (success) 1261 out_stream->Printf("Emulation test succeeded."); 1262 else 1263 out_stream->Printf("Emulation test failed."); 1264 1265 return success; 1266 } 1267 1268 bool Instruction::Emulate( 1269 const ArchSpec &arch, uint32_t evaluate_options, void *baton, 1270 EmulateInstruction::ReadMemoryCallback read_mem_callback, 1271 EmulateInstruction::WriteMemoryCallback write_mem_callback, 1272 EmulateInstruction::ReadRegisterCallback read_reg_callback, 1273 EmulateInstruction::WriteRegisterCallback write_reg_callback) { 1274 std::unique_ptr<EmulateInstruction> insn_emulator_up( 1275 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr)); 1276 if (insn_emulator_up) { 1277 insn_emulator_up->SetBaton(baton); 1278 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback, 1279 read_reg_callback, write_reg_callback); 1280 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr); 1281 return insn_emulator_up->EvaluateInstruction(evaluate_options); 1282 } 1283 1284 return false; 1285 } 1286 1287 uint32_t Instruction::GetData(DataExtractor &data) { 1288 return m_opcode.GetData(data); 1289 } 1290 1291 InstructionList::InstructionList() : m_instructions() {} 1292 1293 InstructionList::~InstructionList() = default; 1294 1295 size_t InstructionList::GetSize() const { return m_instructions.size(); } 1296 1297 uint32_t InstructionList::GetMaxOpcocdeByteSize() const { 1298 uint32_t max_inst_size = 0; 1299 collection::const_iterator pos, end; 1300 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end; 1301 ++pos) { 1302 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize(); 1303 if (max_inst_size < inst_size) 1304 max_inst_size = inst_size; 1305 } 1306 return max_inst_size; 1307 } 1308 1309 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const { 1310 InstructionSP inst_sp; 1311 if (idx < m_instructions.size()) 1312 inst_sp = m_instructions[idx]; 1313 return inst_sp; 1314 } 1315 1316 InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { 1317 uint32_t index = GetIndexOfInstructionAtAddress(address); 1318 if (index != UINT32_MAX) 1319 return GetInstructionAtIndex(index); 1320 return nullptr; 1321 } 1322 1323 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, 1324 bool show_control_flow_kind, 1325 const ExecutionContext *exe_ctx) { 1326 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); 1327 collection::const_iterator pos, begin, end; 1328 1329 const FormatEntity::Entry *disassembly_format = nullptr; 1330 FormatEntity::Entry format; 1331 if (exe_ctx && exe_ctx->HasTargetScope()) { 1332 disassembly_format = 1333 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat(); 1334 } else { 1335 FormatEntity::Parse("${addr}: ", format); 1336 disassembly_format = &format; 1337 } 1338 1339 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin; 1340 pos != end; ++pos) { 1341 if (pos != begin) 1342 s->EOL(); 1343 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, 1344 show_control_flow_kind, exe_ctx, nullptr, nullptr, 1345 disassembly_format, 0); 1346 } 1347 } 1348 1349 void InstructionList::Clear() { m_instructions.clear(); } 1350 1351 void InstructionList::Append(lldb::InstructionSP &inst_sp) { 1352 if (inst_sp) 1353 m_instructions.push_back(inst_sp); 1354 } 1355 1356 uint32_t 1357 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, 1358 bool ignore_calls, 1359 bool *found_calls) const { 1360 size_t num_instructions = m_instructions.size(); 1361 1362 uint32_t next_branch = UINT32_MAX; 1363 1364 if (found_calls) 1365 *found_calls = false; 1366 for (size_t i = start; i < num_instructions; i++) { 1367 if (m_instructions[i]->DoesBranch()) { 1368 if (ignore_calls && m_instructions[i]->IsCall()) { 1369 if (found_calls) 1370 *found_calls = true; 1371 continue; 1372 } 1373 next_branch = i; 1374 break; 1375 } 1376 } 1377 1378 return next_branch; 1379 } 1380 1381 uint32_t 1382 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) { 1383 size_t num_instructions = m_instructions.size(); 1384 uint32_t index = UINT32_MAX; 1385 for (size_t i = 0; i < num_instructions; i++) { 1386 if (m_instructions[i]->GetAddress() == address) { 1387 index = i; 1388 break; 1389 } 1390 } 1391 return index; 1392 } 1393 1394 uint32_t 1395 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr, 1396 Target &target) { 1397 Address address; 1398 address.SetLoadAddress(load_addr, &target); 1399 return GetIndexOfInstructionAtAddress(address); 1400 } 1401 1402 size_t Disassembler::ParseInstructions(Target &target, Address start, 1403 Limit limit, Stream *error_strm_ptr, 1404 bool force_live_memory) { 1405 m_instruction_list.Clear(); 1406 1407 if (!start.IsValid()) 1408 return 0; 1409 1410 start = ResolveAddress(target, start); 1411 1412 addr_t byte_size = limit.value; 1413 if (limit.kind == Limit::Instructions) 1414 byte_size *= m_arch.GetMaximumOpcodeByteSize(); 1415 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0'); 1416 1417 Status error; 1418 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS; 1419 const size_t bytes_read = 1420 target.ReadMemory(start, data_sp->GetBytes(), data_sp->GetByteSize(), 1421 error, force_live_memory, &load_addr); 1422 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS; 1423 1424 if (bytes_read == 0) { 1425 if (error_strm_ptr) { 1426 if (const char *error_cstr = error.AsCString()) 1427 error_strm_ptr->Printf("error: %s\n", error_cstr); 1428 } 1429 return 0; 1430 } 1431 1432 if (bytes_read != data_sp->GetByteSize()) 1433 data_sp->SetByteSize(bytes_read); 1434 DataExtractor data(data_sp, m_arch.GetByteOrder(), 1435 m_arch.GetAddressByteSize()); 1436 return DecodeInstructions(start, data, 0, 1437 limit.kind == Limit::Instructions ? limit.value 1438 : UINT32_MAX, 1439 false, data_from_file); 1440 } 1441 1442 // Disassembler copy constructor 1443 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor) 1444 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS), 1445 m_flavor() { 1446 if (flavor == nullptr) 1447 m_flavor.assign("default"); 1448 else 1449 m_flavor.assign(flavor); 1450 1451 // If this is an arm variant that can only include thumb (T16, T32) 1452 // instructions, force the arch triple to be "thumbv.." instead of "armv..." 1453 if (arch.IsAlwaysThumbInstructions()) { 1454 std::string thumb_arch_name(arch.GetTriple().getArchName().str()); 1455 // Replace "arm" with "thumb" so we get all thumb variants correct 1456 if (thumb_arch_name.size() > 3) { 1457 thumb_arch_name.erase(0, 3); 1458 thumb_arch_name.insert(0, "thumb"); 1459 } 1460 m_arch.SetTriple(thumb_arch_name.c_str()); 1461 } 1462 } 1463 1464 Disassembler::~Disassembler() = default; 1465 1466 InstructionList &Disassembler::GetInstructionList() { 1467 return m_instruction_list; 1468 } 1469 1470 const InstructionList &Disassembler::GetInstructionList() const { 1471 return m_instruction_list; 1472 } 1473 1474 // Class PseudoInstruction 1475 1476 PseudoInstruction::PseudoInstruction() 1477 : Instruction(Address(), AddressClass::eUnknown), m_description() {} 1478 1479 PseudoInstruction::~PseudoInstruction() = default; 1480 1481 bool PseudoInstruction::DoesBranch() { 1482 // This is NOT a valid question for a pseudo instruction. 1483 return false; 1484 } 1485 1486 bool PseudoInstruction::HasDelaySlot() { 1487 // This is NOT a valid question for a pseudo instruction. 1488 return false; 1489 } 1490 1491 bool PseudoInstruction::IsLoad() { return false; } 1492 1493 bool PseudoInstruction::IsAuthenticated() { return false; } 1494 1495 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler, 1496 const lldb_private::DataExtractor &data, 1497 lldb::offset_t data_offset) { 1498 return m_opcode.GetByteSize(); 1499 } 1500 1501 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) { 1502 if (!opcode_data) 1503 return; 1504 1505 switch (opcode_size) { 1506 case 8: { 1507 uint8_t value8 = *((uint8_t *)opcode_data); 1508 m_opcode.SetOpcode8(value8, eByteOrderInvalid); 1509 break; 1510 } 1511 case 16: { 1512 uint16_t value16 = *((uint16_t *)opcode_data); 1513 m_opcode.SetOpcode16(value16, eByteOrderInvalid); 1514 break; 1515 } 1516 case 32: { 1517 uint32_t value32 = *((uint32_t *)opcode_data); 1518 m_opcode.SetOpcode32(value32, eByteOrderInvalid); 1519 break; 1520 } 1521 case 64: { 1522 uint64_t value64 = *((uint64_t *)opcode_data); 1523 m_opcode.SetOpcode64(value64, eByteOrderInvalid); 1524 break; 1525 } 1526 default: 1527 break; 1528 } 1529 } 1530 1531 void PseudoInstruction::SetDescription(llvm::StringRef description) { 1532 m_description = std::string(description); 1533 } 1534 1535 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) { 1536 Operand ret; 1537 ret.m_type = Type::Register; 1538 ret.m_register = r; 1539 return ret; 1540 } 1541 1542 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm, 1543 bool neg) { 1544 Operand ret; 1545 ret.m_type = Type::Immediate; 1546 ret.m_immediate = imm; 1547 ret.m_negative = neg; 1548 return ret; 1549 } 1550 1551 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) { 1552 Operand ret; 1553 ret.m_type = Type::Immediate; 1554 if (imm < 0) { 1555 ret.m_immediate = -imm; 1556 ret.m_negative = true; 1557 } else { 1558 ret.m_immediate = imm; 1559 ret.m_negative = false; 1560 } 1561 return ret; 1562 } 1563 1564 Instruction::Operand 1565 Instruction::Operand::BuildDereference(const Operand &ref) { 1566 Operand ret; 1567 ret.m_type = Type::Dereference; 1568 ret.m_children = {ref}; 1569 return ret; 1570 } 1571 1572 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs, 1573 const Operand &rhs) { 1574 Operand ret; 1575 ret.m_type = Type::Sum; 1576 ret.m_children = {lhs, rhs}; 1577 return ret; 1578 } 1579 1580 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs, 1581 const Operand &rhs) { 1582 Operand ret; 1583 ret.m_type = Type::Product; 1584 ret.m_children = {lhs, rhs}; 1585 return ret; 1586 } 1587 1588 std::function<bool(const Instruction::Operand &)> 1589 lldb_private::OperandMatchers::MatchBinaryOp( 1590 std::function<bool(const Instruction::Operand &)> base, 1591 std::function<bool(const Instruction::Operand &)> left, 1592 std::function<bool(const Instruction::Operand &)> right) { 1593 return [base, left, right](const Instruction::Operand &op) -> bool { 1594 return (base(op) && op.m_children.size() == 2 && 1595 ((left(op.m_children[0]) && right(op.m_children[1])) || 1596 (left(op.m_children[1]) && right(op.m_children[0])))); 1597 }; 1598 } 1599 1600 std::function<bool(const Instruction::Operand &)> 1601 lldb_private::OperandMatchers::MatchUnaryOp( 1602 std::function<bool(const Instruction::Operand &)> base, 1603 std::function<bool(const Instruction::Operand &)> child) { 1604 return [base, child](const Instruction::Operand &op) -> bool { 1605 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0])); 1606 }; 1607 } 1608 1609 std::function<bool(const Instruction::Operand &)> 1610 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) { 1611 return [&info](const Instruction::Operand &op) { 1612 return (op.m_type == Instruction::Operand::Type::Register && 1613 (op.m_register == ConstString(info.name) || 1614 op.m_register == ConstString(info.alt_name))); 1615 }; 1616 } 1617 1618 std::function<bool(const Instruction::Operand &)> 1619 lldb_private::OperandMatchers::FetchRegOp(ConstString ®) { 1620 return [®](const Instruction::Operand &op) { 1621 if (op.m_type != Instruction::Operand::Type::Register) { 1622 return false; 1623 } 1624 reg = op.m_register; 1625 return true; 1626 }; 1627 } 1628 1629 std::function<bool(const Instruction::Operand &)> 1630 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) { 1631 return [imm](const Instruction::Operand &op) { 1632 return (op.m_type == Instruction::Operand::Type::Immediate && 1633 ((op.m_negative && op.m_immediate == (uint64_t)-imm) || 1634 (!op.m_negative && op.m_immediate == (uint64_t)imm))); 1635 }; 1636 } 1637 1638 std::function<bool(const Instruction::Operand &)> 1639 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) { 1640 return [&imm](const Instruction::Operand &op) { 1641 if (op.m_type != Instruction::Operand::Type::Immediate) { 1642 return false; 1643 } 1644 if (op.m_negative) { 1645 imm = -((int64_t)op.m_immediate); 1646 } else { 1647 imm = ((int64_t)op.m_immediate); 1648 } 1649 return true; 1650 }; 1651 } 1652 1653 std::function<bool(const Instruction::Operand &)> 1654 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) { 1655 return [type](const Instruction::Operand &op) { return op.m_type == type; }; 1656 } 1657