1 /* Python interface to instruction disassembly. 2 3 Copyright (C) 2021-2024 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "python-internal.h" 21 #include "language.h" 22 #include "dis-asm.h" 23 #include "arch-utils.h" 24 #include "charset.h" 25 #include "disasm.h" 26 #include "progspace.h" 27 28 /* Implement gdb.disassembler.DisassembleInfo type. An object of this type 29 represents a single disassembler request from GDB. */ 30 31 struct disasm_info_object 32 { 33 PyObject_HEAD 34 35 /* The architecture in which we are disassembling. */ 36 struct gdbarch *gdbarch; 37 38 /* The program_space in which we are disassembling. */ 39 struct program_space *program_space; 40 41 /* Address of the instruction to disassemble. */ 42 bfd_vma address; 43 44 /* The disassemble_info passed from core GDB, this contains the 45 callbacks necessary to read the instruction from core GDB, and to 46 print the disassembled instruction. */ 47 disassemble_info *gdb_info; 48 49 /* If copies of this object are created then they are chained together 50 via this NEXT pointer, this allows all the copies to be invalidated at 51 the same time as the parent object. */ 52 struct disasm_info_object *next; 53 }; 54 55 extern PyTypeObject disasm_info_object_type 56 CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_info_object"); 57 58 /* Implement gdb.disassembler.DisassembleAddressPart type. An object of 59 this type represents a small part of a disassembled instruction; a part 60 that is an address that should be printed using a call to GDB's 61 internal print_address function. */ 62 63 struct disasm_addr_part_object 64 { 65 PyObject_HEAD 66 67 /* The address to be formatted. */ 68 bfd_vma address; 69 70 /* A gdbarch. This is only needed in the case where the user asks for 71 the DisassemblerAddressPart to be converted to a string. When we 72 return this part to GDB within a DisassemblerResult then GDB will use 73 the gdbarch from the initial disassembly request. */ 74 struct gdbarch *gdbarch; 75 }; 76 77 extern PyTypeObject disasm_addr_part_object_type 78 CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_addr_part_object"); 79 80 /* Implement gdb.disassembler.DisassembleTextPart type. An object of 81 this type represents a small part of a disassembled instruction; a part 82 that is a piece of test along with an associated style. */ 83 84 struct disasm_text_part_object 85 { 86 PyObject_HEAD 87 88 /* The string that is this part. */ 89 std::string *string; 90 91 /* The style to use when displaying this part. */ 92 enum disassembler_style style; 93 }; 94 95 extern PyTypeObject disasm_text_part_object_type 96 CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_text_part_object"); 97 98 extern PyTypeObject disasm_part_object_type 99 CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("PyObject"); 100 101 /* Implement gdb.disassembler.DisassemblerResult type, an object that holds 102 the result of calling the disassembler. This is mostly the length of 103 the disassembled instruction (in bytes), and the string representing the 104 disassembled instruction. */ 105 106 struct disasm_result_object 107 { 108 PyObject_HEAD 109 110 /* The length of the disassembled instruction in bytes. */ 111 int length; 112 113 /* A vector containing all the parts of the disassembled instruction. 114 Each part will be a DisassemblerPart sub-class. */ 115 std::vector<gdbpy_ref<>> *parts; 116 }; 117 118 extern PyTypeObject disasm_result_object_type 119 CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_result_object"); 120 121 /* When this is false we fast path out of gdbpy_print_insn, which should 122 keep the performance impact of the Python disassembler down. This is 123 set to true from Python by calling gdb.disassembler._set_enabled() when 124 the user registers a disassembler. */ 125 126 static bool python_print_insn_enabled = false; 127 128 /* A sub-class of gdb_disassembler that holds a pointer to a Python 129 DisassembleInfo object. A pointer to an instance of this class is 130 placed in the application_data field of the disassemble_info that is 131 used when we call gdbarch_print_insn. */ 132 133 struct gdbpy_disassembler : public gdb_disassemble_info 134 { 135 /* Constructor. */ 136 gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source); 137 138 /* Get the DisassembleInfo object pointer. */ 139 disasm_info_object * 140 py_disasm_info () const 141 { 142 return m_disasm_info_object; 143 } 144 145 /* Callbacks used by disassemble_info. */ 146 static void memory_error_func (int status, bfd_vma memaddr, 147 struct disassemble_info *info) noexcept; 148 static void print_address_func (bfd_vma addr, 149 struct disassemble_info *info) noexcept; 150 static int read_memory_func (bfd_vma memaddr, gdb_byte *buff, 151 unsigned int len, 152 struct disassemble_info *info) noexcept; 153 154 /* Callback used as the disassemble_info's fprintf_func callback. The 155 DIS_INFO pointer is a pointer to a gdbpy_disassembler object. */ 156 static int fprintf_func (void *dis_info, const char *format, ...) noexcept 157 ATTRIBUTE_PRINTF(2,3); 158 159 /* Callback used as the disassemble_info's fprintf_styled_func callback. 160 The DIS_INFO pointer is a pointer to a gdbpy_disassembler. */ 161 static int fprintf_styled_func (void *dis_info, 162 enum disassembler_style style, 163 const char *format, ...) noexcept 164 ATTRIBUTE_PRINTF(3,4); 165 166 /* Helper used by fprintf_func and fprintf_styled_func. This function 167 creates a new DisassemblerTextPart and adds it to the disassembler's 168 parts list. The actual disassembler is accessed through DIS_INFO, 169 which is a pointer to the gdbpy_disassembler object. */ 170 static int vfprintf_styled_func (void *dis_info, 171 enum disassembler_style style, 172 const char *format, va_list args) noexcept 173 ATTRIBUTE_PRINTF(3,0); 174 175 /* Return a reference to an optional that contains the address at which a 176 memory error occurred. The optional will only have a value if a 177 memory error actually occurred. */ 178 const std::optional<CORE_ADDR> &memory_error_address () const 179 { return m_memory_error_address; } 180 181 /* Return the content of the disassembler as a string. The contents are 182 moved out of the disassembler, so after this call the disassembler 183 contents have been reset back to empty. */ 184 std::vector<gdbpy_ref<>> release () 185 { 186 return std::move (m_parts); 187 } 188 189 /* If there is a Python exception stored in this disassembler then 190 restore it (i.e. set the PyErr_* state), clear the exception within 191 this disassembler, and return true. There must be no current 192 exception set (i.e. !PyErr_Occurred()) when this function is called, 193 as any such exception might get lost. 194 195 Otherwise, there is no exception stored in this disassembler, return 196 false. */ 197 bool restore_exception () 198 { 199 gdb_assert (!PyErr_Occurred ()); 200 if (m_stored_exception.has_value ()) 201 { 202 gdbpy_err_fetch ex = std::move (*m_stored_exception); 203 m_stored_exception.reset (); 204 ex.restore (); 205 return true; 206 } 207 208 return false; 209 } 210 211 private: 212 213 /* The list of all the parts that make up this disassembled instruction. 214 This is populated as a result of the callbacks from libopcodes as the 215 instruction is disassembled. */ 216 std::vector<gdbpy_ref<>> m_parts; 217 218 /* The DisassembleInfo object we are disassembling for. */ 219 disasm_info_object *m_disasm_info_object; 220 221 /* When the user indicates that a memory error has occurred then the 222 address of the memory error is stored in here. */ 223 std::optional<CORE_ADDR> m_memory_error_address; 224 225 /* When the user calls the builtin_disassemble function, if they pass a 226 memory source object then a pointer to the object is placed in here, 227 otherwise, this field is nullptr. */ 228 PyObject *m_memory_source; 229 230 /* Move the exception EX into this disassembler object. */ 231 void store_exception (gdbpy_err_fetch &&ex) 232 { 233 /* The only calls to store_exception are from read_memory_func, which 234 will return early if there's already an exception stored. */ 235 gdb_assert (!m_stored_exception.has_value ()); 236 m_stored_exception.emplace (std::move (ex)); 237 } 238 239 /* Return true if there is an exception stored in this disassembler. */ 240 bool has_stored_exception () const 241 { 242 return m_stored_exception.has_value (); 243 } 244 245 /* Store a single exception. This is used to pass Python exceptions back 246 from ::memory_read to disasmpy_builtin_disassemble. */ 247 std::optional<gdbpy_err_fetch> m_stored_exception; 248 }; 249 250 /* Return true if OBJ is still valid, otherwise, return false. A valid OBJ 251 will have a non-nullptr gdb_info field. */ 252 253 static bool 254 disasm_info_object_is_valid (disasm_info_object *obj) 255 { 256 return obj->gdb_info != nullptr; 257 } 258 259 /* Fill in OBJ with all the other arguments. */ 260 261 static void 262 disasm_info_fill (disasm_info_object *obj, struct gdbarch *gdbarch, 263 program_space *progspace, bfd_vma address, 264 disassemble_info *di, disasm_info_object *next) 265 { 266 obj->gdbarch = gdbarch; 267 obj->program_space = progspace; 268 obj->address = address; 269 obj->gdb_info = di; 270 obj->next = next; 271 } 272 273 /* Implement DisassembleInfo.__init__. Takes a single argument that must 274 be another DisassembleInfo object and copies the contents from the 275 argument into this new object. */ 276 277 static int 278 disasm_info_init (PyObject *self, PyObject *args, PyObject *kwargs) 279 { 280 static const char *keywords[] = { "info", NULL }; 281 PyObject *info_obj; 282 if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O!", keywords, 283 &disasm_info_object_type, 284 &info_obj)) 285 return -1; 286 287 disasm_info_object *other = (disasm_info_object *) info_obj; 288 disasm_info_object *info = (disasm_info_object *) self; 289 disasm_info_fill (info, other->gdbarch, other->program_space, 290 other->address, other->gdb_info, other->next); 291 other->next = info; 292 293 /* As the OTHER object now holds a pointer to INFO we inc the ref count 294 on INFO. This stops INFO being deleted until OTHER has gone away. */ 295 Py_INCREF ((PyObject *) info); 296 return 0; 297 } 298 299 /* The tp_dealloc callback for the DisassembleInfo type. */ 300 301 static void 302 disasm_info_dealloc (PyObject *self) 303 { 304 disasm_info_object *obj = (disasm_info_object *) self; 305 306 /* We no longer care about the object our NEXT pointer points at, so we 307 can decrement its reference count. This macro handles the case when 308 NEXT is nullptr. */ 309 Py_XDECREF ((PyObject *) obj->next); 310 311 /* Now core deallocation behaviour. */ 312 Py_TYPE (self)->tp_free (self); 313 } 314 315 /* Implement __repr__ for the DisassembleInfo type. */ 316 317 static PyObject * 318 disasmpy_info_repr (PyObject *self) 319 { 320 disasm_info_object *obj = (disasm_info_object *) self; 321 322 const char *arch_name 323 = (gdbarch_bfd_arch_info (obj->gdbarch))->printable_name; 324 return PyUnicode_FromFormat ("<%s address=%s architecture=%s>", 325 Py_TYPE (obj)->tp_name, 326 core_addr_to_string_nz (obj->address), 327 arch_name); 328 } 329 330 /* Implement DisassembleInfo.is_valid(), really just a wrapper around the 331 disasm_info_object_is_valid function above. */ 332 333 static PyObject * 334 disasmpy_info_is_valid (PyObject *self, PyObject *args) 335 { 336 disasm_info_object *disasm_obj = (disasm_info_object *) self; 337 338 if (disasm_info_object_is_valid (disasm_obj)) 339 Py_RETURN_TRUE; 340 341 Py_RETURN_FALSE; 342 } 343 344 /* Set the Python exception to be a gdb.MemoryError object, with ADDRESS 345 as its payload. */ 346 347 static void 348 disasmpy_set_memory_error_for_address (CORE_ADDR address) 349 { 350 PyObject *address_obj = gdb_py_object_from_longest (address).release (); 351 PyErr_SetObject (gdbpy_gdb_memory_error, address_obj); 352 } 353 354 /* Create a new DisassemblerTextPart and return a gdbpy_ref wrapper for 355 the new object. STR is the string content of the part and STYLE is the 356 style to be used when GDB displays this part. */ 357 358 static gdbpy_ref<> 359 make_disasm_text_part (std::string &&str, enum disassembler_style style) 360 { 361 PyTypeObject *type = &disasm_text_part_object_type; 362 disasm_text_part_object *text_part 363 = (disasm_text_part_object *) type->tp_alloc (type, 0); 364 text_part->string = new std::string (str); 365 text_part->style = style; 366 367 return gdbpy_ref<> ((PyObject *) text_part); 368 } 369 370 /* Create a new DisassemblerAddressPart and return a gdbpy_ref wrapper for 371 the new object. GDBARCH is the architecture used when formatting the 372 address, and ADDRESS is the numerical address to be displayed. */ 373 374 static gdbpy_ref<> 375 make_disasm_addr_part (struct gdbarch *gdbarch, CORE_ADDR address) 376 { 377 PyTypeObject *type = &disasm_addr_part_object_type; 378 disasm_addr_part_object *addr_part 379 = (disasm_addr_part_object *) type->tp_alloc (type, 0); 380 addr_part->address = address; 381 addr_part->gdbarch = gdbarch; 382 383 return gdbpy_ref<> ((PyObject *) addr_part); 384 } 385 386 /* Ensure that a gdb.disassembler.DisassembleInfo is valid. */ 387 388 #define DISASMPY_DISASM_INFO_REQUIRE_VALID(Info) \ 389 do { \ 390 if (!disasm_info_object_is_valid (Info)) \ 391 { \ 392 PyErr_SetString (PyExc_RuntimeError, \ 393 _("DisassembleInfo is no longer valid.")); \ 394 return nullptr; \ 395 } \ 396 } while (0) 397 398 /* Implement DisassembleInfo.text_part method. Creates and returns a new 399 DisassemblerTextPart object. */ 400 401 static PyObject * 402 disasmpy_info_make_text_part (PyObject *self, PyObject *args, 403 PyObject *kwargs) 404 { 405 disasm_info_object *obj = (disasm_info_object *) self; 406 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 407 408 static const char *keywords[] = { "style", "string", NULL }; 409 int style_num; 410 const char *string; 411 if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords, 412 &style_num, &string)) 413 return nullptr; 414 415 if (style_num < 0 || style_num > ((int) dis_style_comment_start)) 416 { 417 PyErr_SetString (PyExc_ValueError, 418 _("Invalid disassembler style.")); 419 return nullptr; 420 } 421 422 if (strlen (string) == 0) 423 { 424 PyErr_SetString (PyExc_ValueError, 425 _("String must not be empty.")); 426 return nullptr; 427 } 428 429 gdbpy_ref<> text_part 430 = make_disasm_text_part (std::string (string), 431 (enum disassembler_style) style_num); 432 return text_part.release (); 433 } 434 435 /* Implement DisassembleInfo.address_part method. Creates and returns a 436 new DisassemblerAddressPart object. */ 437 438 static PyObject * 439 disasmpy_info_make_address_part (PyObject *self, PyObject *args, 440 PyObject *kwargs) 441 { 442 disasm_info_object *obj = (disasm_info_object *) self; 443 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 444 445 static const char *keywords[] = { "address", NULL }; 446 CORE_ADDR address; 447 PyObject *address_object; 448 if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O", keywords, 449 &address_object)) 450 return nullptr; 451 452 if (get_addr_from_python (address_object, &address) < 0) 453 return nullptr; 454 455 return make_disasm_addr_part (obj->gdbarch, address).release (); 456 } 457 458 /* Return a string representation of TEXT_PART. The returned string does 459 not include any styling. */ 460 461 static std::string 462 disasmpy_part_to_string (const disasm_text_part_object *text_part) 463 { 464 gdb_assert (text_part->string != nullptr); 465 return *(text_part->string); 466 } 467 468 /* Return a string representation of ADDR_PART. The returned string does 469 not include any styling. */ 470 471 static std::string 472 disasmpy_part_to_string (const disasm_addr_part_object *addr_part) 473 { 474 string_file buf; 475 print_address (addr_part->gdbarch, addr_part->address, &buf); 476 return buf.release (); 477 } 478 479 /* PARTS is a vector of Python objects, each is a sub-class of 480 DisassemblerPart. Create a string by concatenating the string 481 representation of each part, and return this new string. 482 483 Converting an address part requires that we call back into GDB core, 484 which could throw an exception. As such, calls to this function should 485 be wrapped with a try/catch. */ 486 487 static std::string 488 disasmpy_parts_list_to_string (const std::vector<gdbpy_ref<>> &parts) 489 { 490 std::string str; 491 for (auto p : parts) 492 { 493 if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) 494 { 495 disasm_text_part_object *text_part 496 = (disasm_text_part_object *) p.get (); 497 str += disasmpy_part_to_string (text_part); 498 } 499 else 500 { 501 gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); 502 503 disasm_addr_part_object *addr_part 504 = (disasm_addr_part_object *) p.get (); 505 str += disasmpy_part_to_string (addr_part); 506 } 507 } 508 509 return str; 510 } 511 512 /* Initialise OBJ, a DisassemblerResult object with LENGTH and PARTS. 513 OBJ might already have been initialised, in which case any existing 514 content should be discarded before the new PARTS are moved in. */ 515 516 static void 517 disasmpy_init_disassembler_result (disasm_result_object *obj, int length, 518 std::vector<gdbpy_ref<>> &&parts) 519 { 520 if (obj->parts == nullptr) 521 obj->parts = new std::vector<gdbpy_ref<>>; 522 else 523 obj->parts->clear (); 524 525 obj->length = length; 526 *(obj->parts) = std::move (parts); 527 } 528 529 /* Implement gdb.disassembler.builtin_disassemble(). Calls back into GDB's 530 builtin disassembler. The first argument is a DisassembleInfo object 531 describing what to disassemble. The second argument is optional and 532 provides a mechanism to modify the memory contents that the builtin 533 disassembler will actually disassemble. 534 535 Returns an instance of gdb.disassembler.DisassemblerResult, an object 536 that wraps a disassembled instruction, or it raises a 537 gdb.MemoryError. */ 538 539 static PyObject * 540 disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw) 541 { 542 PyObject *info_obj, *memory_source_obj = nullptr; 543 static const char *keywords[] = { "info", "memory_source", nullptr }; 544 if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O!|O", keywords, 545 &disasm_info_object_type, &info_obj, 546 &memory_source_obj)) 547 return nullptr; 548 549 disasm_info_object *disasm_info = (disasm_info_object *) info_obj; 550 DISASMPY_DISASM_INFO_REQUIRE_VALID (disasm_info); 551 552 /* Where the result will be written. */ 553 gdbpy_disassembler disassembler (disasm_info, memory_source_obj); 554 555 /* Now actually perform the disassembly. LENGTH is set to the length of 556 the disassembled instruction, or -1 if there was a memory-error 557 encountered while disassembling. See below more more details on 558 handling of -1 return value. */ 559 int length = gdbarch_print_insn (disasm_info->gdbarch, disasm_info->address, 560 disassembler.disasm_info ()); 561 562 /* It is possible that, while calling a user overridden memory read 563 function, a Python exception was raised that couldn't be 564 translated into a standard memory-error. In this case the first such 565 exception is stored in the disassembler and restored here. */ 566 if (disassembler.restore_exception ()) 567 return nullptr; 568 569 if (length == -1) 570 { 571 572 /* In an ideal world, every disassembler should always call the 573 memory error function before returning a status of -1 as the only 574 error a disassembler should encounter is a failure to read 575 memory. Unfortunately, there are some disassemblers who don't 576 follow this rule, and will return -1 without calling the memory 577 error function. 578 579 To make the Python API simpler, we just classify everything as a 580 memory error, but the message has to be modified for the case 581 where the disassembler didn't call the memory error function. */ 582 if (disassembler.memory_error_address ().has_value ()) 583 { 584 CORE_ADDR addr = *disassembler.memory_error_address (); 585 disasmpy_set_memory_error_for_address (addr); 586 } 587 else 588 { 589 auto content = disassembler.release (); 590 std::string str; 591 592 try 593 { 594 str = disasmpy_parts_list_to_string (content); 595 } 596 catch (const gdb_exception &except) 597 { 598 GDB_PY_HANDLE_EXCEPTION (except); 599 } 600 if (!str.empty ()) 601 PyErr_SetString (gdbpy_gdberror_exc, str.c_str ()); 602 else 603 PyErr_SetString (gdbpy_gdberror_exc, 604 _("Unknown disassembly error.")); 605 } 606 return nullptr; 607 } 608 609 /* Instructions are either non-zero in length, or we got an error, 610 indicated by a length of -1, which we handled above. */ 611 gdb_assert (length > 0); 612 613 /* We should not have seen a memory error in this case. */ 614 gdb_assert (!disassembler.memory_error_address ().has_value ()); 615 616 /* Create a DisassemblerResult containing the results. */ 617 PyTypeObject *type = &disasm_result_object_type; 618 gdbpy_ref<disasm_result_object> res 619 ((disasm_result_object *) type->tp_alloc (type, 0)); 620 auto content = disassembler.release (); 621 disasmpy_init_disassembler_result (res.get (), length, std::move (content)); 622 return reinterpret_cast<PyObject *> (res.release ()); 623 } 624 625 /* Implement gdb._set_enabled function. Takes a boolean parameter, and 626 sets whether GDB should enter the Python disassembler code or not. 627 628 This is called from within the Python code when a new disassembler is 629 registered. When no disassemblers are registered the global C++ flag 630 is set to false, and GDB never even enters the Python environment to 631 check for a disassembler. 632 633 When the user registers a new Python disassembler, the global C++ flag 634 is set to true, and now GDB will enter the Python environment to check 635 if there's a disassembler registered for the current architecture. */ 636 637 static PyObject * 638 disasmpy_set_enabled (PyObject *self, PyObject *args, PyObject *kw) 639 { 640 PyObject *newstate; 641 static const char *keywords[] = { "state", nullptr }; 642 if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O", keywords, 643 &newstate)) 644 return nullptr; 645 646 if (!PyBool_Check (newstate)) 647 { 648 PyErr_SetString (PyExc_TypeError, 649 _("The value passed to `_set_enabled' must be a boolean.")); 650 return nullptr; 651 } 652 653 python_print_insn_enabled = PyObject_IsTrue (newstate); 654 Py_RETURN_NONE; 655 } 656 657 /* Implement DisassembleInfo.read_memory(LENGTH, OFFSET). Read LENGTH 658 bytes at OFFSET from the start of the instruction currently being 659 disassembled, and return a memory buffer containing the bytes. 660 661 OFFSET defaults to zero if it is not provided. LENGTH is required. If 662 the read fails then this will raise a gdb.MemoryError exception. */ 663 664 static PyObject * 665 disasmpy_info_read_memory (PyObject *self, PyObject *args, PyObject *kw) 666 { 667 disasm_info_object *obj = (disasm_info_object *) self; 668 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 669 670 gdb_py_longest length, offset = 0; 671 gdb::unique_xmalloc_ptr<gdb_byte> buffer; 672 static const char *keywords[] = { "length", "offset", nullptr }; 673 674 if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, 675 GDB_PY_LL_ARG "|" GDB_PY_LL_ARG, 676 keywords, &length, &offset)) 677 return nullptr; 678 679 /* The apparent address from which we are reading memory. Note that in 680 some cases GDB actually disassembles instructions from a buffer, so 681 we might not actually be reading this information directly from the 682 inferior memory. This is all hidden behind the read_memory_func API 683 within the disassemble_info structure. */ 684 CORE_ADDR address = obj->address + offset; 685 686 /* Setup a buffer to hold the result. */ 687 buffer.reset ((gdb_byte *) xmalloc (length)); 688 689 /* Read content into BUFFER. If the read fails then raise a memory 690 error, otherwise, convert BUFFER to a Python memory buffer, and return 691 it to the user. */ 692 disassemble_info *info = obj->gdb_info; 693 if (info->read_memory_func ((bfd_vma) address, buffer.get (), 694 (unsigned int) length, info) != 0) 695 { 696 disasmpy_set_memory_error_for_address (address); 697 return nullptr; 698 } 699 return gdbpy_buffer_to_membuf (std::move (buffer), address, length); 700 } 701 702 /* Implement DisassembleInfo.address attribute, return the address at which 703 GDB would like an instruction disassembled. */ 704 705 static PyObject * 706 disasmpy_info_address (PyObject *self, void *closure) 707 { 708 disasm_info_object *obj = (disasm_info_object *) self; 709 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 710 return gdb_py_object_from_longest (obj->address).release (); 711 } 712 713 /* Implement DisassembleInfo.architecture attribute. Return the 714 gdb.Architecture in which we are disassembling. */ 715 716 static PyObject * 717 disasmpy_info_architecture (PyObject *self, void *closure) 718 { 719 disasm_info_object *obj = (disasm_info_object *) self; 720 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 721 return gdbarch_to_arch_object (obj->gdbarch); 722 } 723 724 /* Implement DisassembleInfo.progspace attribute. Return the 725 gdb.Progspace in which we are disassembling. */ 726 727 static PyObject * 728 disasmpy_info_progspace (PyObject *self, void *closure) 729 { 730 disasm_info_object *obj = (disasm_info_object *) self; 731 DISASMPY_DISASM_INFO_REQUIRE_VALID (obj); 732 return pspace_to_pspace_object (obj->program_space).release (); 733 } 734 735 /* Helper function called when the libopcodes disassembler produces some 736 output. FORMAT and ARGS are used to create a string which GDB will 737 display using STYLE. The string is either added as a new 738 DisassemblerTextPart to the list of parts being built in the current 739 gdbpy_disassembler object (accessed through DIS_INFO). Or, if the last 740 part in the gdbpy_disassembler is a text part in the same STYLE, then 741 the new string is appended to the previous part. 742 743 The merging behaviour make the Python API a little more user friendly, 744 some disassemblers produce their output character at a time, there's no 745 particular reason for this, it's just how they are implemented. By 746 merging parts with the same style we make it easier for the user to 747 analyse the disassembler output. */ 748 749 int 750 gdbpy_disassembler::vfprintf_styled_func (void *dis_info, 751 enum disassembler_style style, 752 const char *format, 753 va_list args) noexcept 754 { 755 gdb_disassemble_info *di = (gdb_disassemble_info *) dis_info; 756 gdbpy_disassembler *dis 757 = gdb::checked_static_cast<gdbpy_disassembler *> (di); 758 759 if (!dis->m_parts.empty () 760 && Py_TYPE (dis->m_parts.back ().get ()) == &disasm_text_part_object_type 761 && (((disasm_text_part_object *) dis->m_parts.back ().get ())->style 762 == style)) 763 { 764 std::string *string 765 = ((disasm_text_part_object *) dis->m_parts.back ().get ())->string; 766 string_vappendf (*string, format, args); 767 } 768 else 769 { 770 std::string str = string_vprintf (format, args); 771 if (str.size () > 0) 772 { 773 gdbpy_ref<> text_part 774 = make_disasm_text_part (std::move (str), style); 775 dis->m_parts.emplace_back (std::move (text_part)); 776 } 777 } 778 779 /* Something non -ve. */ 780 return 0; 781 } 782 783 /* Disassembler callback for architectures where libopcodes doesn't 784 created styled output. In these cases we format all the output using 785 the (default) text style. */ 786 787 int 788 gdbpy_disassembler::fprintf_func (void *dis_info, 789 const char *format, ...) noexcept 790 { 791 va_list args; 792 va_start (args, format); 793 vfprintf_styled_func (dis_info, dis_style_text, format, args); 794 va_end (args); 795 796 /* Something non -ve. */ 797 return 0; 798 } 799 800 /* Disassembler callback for architectures where libopcodes does create 801 styled output. Just creates a new text part with the given STYLE. */ 802 803 int 804 gdbpy_disassembler::fprintf_styled_func (void *dis_info, 805 enum disassembler_style style, 806 const char *format, ...) noexcept 807 { 808 va_list args; 809 va_start (args, format); 810 vfprintf_styled_func (dis_info, style, format, args); 811 va_end (args); 812 813 /* Something non -ve. */ 814 return 0; 815 } 816 817 /* This implements the disassemble_info read_memory_func callback and is 818 called from the libopcodes disassembler when the disassembler wants to 819 read memory. 820 821 From the INFO argument we can find the gdbpy_disassembler object for 822 which we are disassembling, and from that object we can find the 823 DisassembleInfo for the current disassembly call. 824 825 This function reads the instruction bytes by calling the read_memory 826 method on the DisassembleInfo object. This method might have been 827 overridden by user code. 828 829 Read LEN bytes from MEMADDR and place them into BUFF. Return 0 on 830 success (in which case BUFF has been filled), or -1 on error, in which 831 case the contents of BUFF are undefined. */ 832 833 int 834 gdbpy_disassembler::read_memory_func (bfd_vma memaddr, gdb_byte *buff, 835 unsigned int len, 836 struct disassemble_info *info) noexcept 837 { 838 gdbpy_disassembler *dis 839 = static_cast<gdbpy_disassembler *> (info->application_data); 840 disasm_info_object *obj = dis->py_disasm_info (); 841 842 /* If a previous read attempt resulted in an exception, then we don't 843 allow any further reads to succeed. We only do this check for the 844 read_memory_func as this is the only one the user can hook into, 845 thus, this check prevents us calling back into user code if a 846 previous call has already thrown an error. */ 847 if (dis->has_stored_exception ()) 848 return -1; 849 850 /* The DisassembleInfo.read_memory method expects an offset from the 851 address stored within the DisassembleInfo object; calculate that 852 offset here. */ 853 gdb_py_longest offset 854 = (gdb_py_longest) memaddr - (gdb_py_longest) obj->address; 855 856 /* Now call the DisassembleInfo.read_memory method. This might have been 857 overridden by the user. */ 858 gdbpy_ref<> result_obj (PyObject_CallMethod ((PyObject *) obj, 859 "read_memory", 860 "I" GDB_PY_LL_ARG, len, offset)); 861 862 /* Handle any exceptions. */ 863 if (result_obj == nullptr) 864 { 865 /* If we got a gdb.MemoryError then we ignore this and just report 866 that the read failed to the caller. The caller is then 867 responsible for calling the memory_error_func if it wants to. 868 Remember, the disassembler might just be probing to see if these 869 bytes can be read, if we automatically call the memory error 870 function, we can end up registering an error prematurely. */ 871 if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error)) 872 { 873 PyErr_Clear (); 874 return -1; 875 } 876 877 /* For any other exception type we capture the value of the Python 878 exception and throw it, this will then be caught in 879 disasmpy_builtin_disassemble, at which point the exception will be 880 restored. */ 881 dis->store_exception (gdbpy_err_fetch ()); 882 return -1; 883 } 884 885 /* Convert the result to a buffer. */ 886 Py_buffer py_buff; 887 if (!PyObject_CheckBuffer (result_obj.get ()) 888 || PyObject_GetBuffer (result_obj.get(), &py_buff, PyBUF_CONTIG_RO) < 0) 889 { 890 PyErr_Format (PyExc_TypeError, 891 _("Result from read_memory is not a buffer")); 892 dis->store_exception (gdbpy_err_fetch ()); 893 return -1; 894 } 895 896 /* Wrap PY_BUFF so that it is cleaned up correctly at the end of this 897 scope. */ 898 Py_buffer_up buffer_up (&py_buff); 899 900 /* Validate that the buffer is the correct length. */ 901 if (py_buff.len != len) 902 { 903 PyErr_Format (PyExc_ValueError, 904 _("Buffer returned from read_memory is sized %d instead of the expected %d"), 905 py_buff.len, len); 906 dis->store_exception (gdbpy_err_fetch ()); 907 return -1; 908 } 909 910 /* Copy the data out of the Python buffer and return success. */ 911 const gdb_byte *buffer = (const gdb_byte *) py_buff.buf; 912 memcpy (buff, buffer, len); 913 return 0; 914 } 915 916 /* Implement __str__ for the DisassemblerResult type. */ 917 918 static PyObject * 919 disasmpy_result_str (PyObject *self) 920 { 921 disasm_result_object *obj = (disasm_result_object *) self; 922 923 /* These conditions are all enforced when the DisassemblerResult object 924 is created. */ 925 gdb_assert (obj->parts != nullptr); 926 gdb_assert (obj->parts->size () > 0); 927 gdb_assert (obj->length > 0); 928 929 std::string str; 930 931 try 932 { 933 str = disasmpy_parts_list_to_string (*obj->parts); 934 } 935 catch (const gdb_exception &except) 936 { 937 GDB_PY_HANDLE_EXCEPTION (except); 938 } 939 940 return PyUnicode_Decode (str.c_str (), str.size (), 941 host_charset (), nullptr); 942 } 943 944 /* Implement DisassemblerResult.length attribute, return the length of the 945 disassembled instruction. */ 946 947 static PyObject * 948 disasmpy_result_length (PyObject *self, void *closure) 949 { 950 disasm_result_object *obj = (disasm_result_object *) self; 951 return gdb_py_object_from_longest (obj->length).release (); 952 } 953 954 /* Implement DisassemblerResult.string attribute, return the content string 955 of the disassembled instruction. */ 956 957 static PyObject * 958 disasmpy_result_string (PyObject *self, void *closure) 959 { 960 return disasmpy_result_str (self); 961 } 962 963 /* Implement DisassemblerResult.parts method. Returns a list of all the 964 parts that make up this result. There should always be at least one 965 part, so the returned list should never be empty. */ 966 967 static PyObject * 968 disasmpy_result_parts (PyObject *self, void *closure) 969 { 970 disasm_result_object *obj = (disasm_result_object *) self; 971 972 /* These conditions are all enforced when the DisassemblerResult object 973 is created. */ 974 gdb_assert (obj->parts != nullptr); 975 gdb_assert (obj->parts->size () > 0); 976 gdb_assert (obj->length > 0); 977 978 gdbpy_ref<> result_list (PyList_New (obj->parts->size ())); 979 if (result_list == nullptr) 980 return nullptr; 981 Py_ssize_t idx = 0; 982 for (auto p : *obj->parts) 983 { 984 gdbpy_ref<> item = gdbpy_ref<>::new_reference (p.get ()); 985 PyList_SET_ITEM (result_list.get (), idx, item.release ()); 986 ++idx; 987 } 988 989 /* This should follow naturally from the obj->parts list being 990 non-empty. */ 991 gdb_assert (PyList_Size (result_list.get()) > 0); 992 993 return result_list.release (); 994 } 995 996 /* Implement DisassemblerResult.__init__. Takes two arguments, an 997 integer, the length in bytes of the disassembled instruction, and a 998 string, the disassembled content of the instruction. */ 999 1000 static int 1001 disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs) 1002 { 1003 static const char *keywords[] = { "length", "string", "parts", NULL }; 1004 int length; 1005 const char *string = nullptr; 1006 PyObject *parts_list = nullptr; 1007 if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "i|zO", keywords, 1008 &length, &string, &parts_list)) 1009 return -1; 1010 1011 if (length <= 0) 1012 { 1013 PyErr_SetString (PyExc_ValueError, 1014 _("Length must be greater than 0.")); 1015 return -1; 1016 } 1017 1018 if (parts_list == Py_None) 1019 parts_list = nullptr; 1020 1021 if (string != nullptr && parts_list != nullptr) 1022 { 1023 PyErr_Format (PyExc_ValueError, 1024 _("Cannot use 'string' and 'parts' when creating %s."), 1025 Py_TYPE (self)->tp_name); 1026 return -1; 1027 } 1028 1029 if (string != nullptr) 1030 { 1031 if (strlen (string) == 0) 1032 { 1033 PyErr_SetString (PyExc_ValueError, 1034 _("String must not be empty.")); 1035 return -1; 1036 } 1037 1038 disasm_result_object *obj = (disasm_result_object *) self; 1039 std::vector<gdbpy_ref<>> content; 1040 gdbpy_ref<> text_part 1041 = make_disasm_text_part (std::string (string), dis_style_text); 1042 content.emplace_back (text_part.release ()); 1043 disasmpy_init_disassembler_result (obj, length, std::move (content)); 1044 } 1045 else 1046 { 1047 if (!PySequence_Check (parts_list)) 1048 { 1049 PyErr_SetString (PyExc_TypeError, 1050 _("'parts' argument is not a sequence")); 1051 return -1; 1052 } 1053 1054 Py_ssize_t parts_count = PySequence_Size (parts_list); 1055 if (parts_count <= 0) 1056 { 1057 PyErr_SetString (PyExc_ValueError, 1058 _("'parts' list must not be empty.")); 1059 return -1; 1060 } 1061 1062 disasm_result_object *obj = (disasm_result_object *) self; 1063 std::vector<gdbpy_ref<>> content (parts_count); 1064 1065 struct gdbarch *gdbarch = nullptr; 1066 for (Py_ssize_t i = 0; i < parts_count; ++i) 1067 { 1068 gdbpy_ref<> part (PySequence_GetItem (parts_list, i)); 1069 1070 if (part == nullptr) 1071 return -1; 1072 1073 if (Py_TYPE (part.get ()) == &disasm_addr_part_object_type) 1074 { 1075 disasm_addr_part_object *addr_part 1076 = (disasm_addr_part_object *) part.get (); 1077 gdb_assert (addr_part->gdbarch != nullptr); 1078 if (gdbarch == nullptr) 1079 gdbarch = addr_part->gdbarch; 1080 else if (addr_part->gdbarch != gdbarch) 1081 { 1082 PyErr_SetString (PyExc_ValueError, 1083 _("Inconsistent gdb.Architectures used " 1084 "in 'parts' sequence.")); 1085 return -1; 1086 } 1087 } 1088 1089 content[i] = std::move (part); 1090 } 1091 1092 disasmpy_init_disassembler_result (obj, length, std::move (content)); 1093 } 1094 1095 return 0; 1096 1097 } 1098 1099 /* Implement __repr__ for the DisassemblerResult type. */ 1100 1101 static PyObject * 1102 disasmpy_result_repr (PyObject *self) 1103 { 1104 disasm_result_object *obj = (disasm_result_object *) self; 1105 1106 gdb_assert (obj->parts != nullptr); 1107 1108 return PyUnicode_FromFormat ("<%s length=%d string=\"%U\">", 1109 Py_TYPE (obj)->tp_name, 1110 obj->length, 1111 disasmpy_result_str (self)); 1112 } 1113 1114 /* Implement memory_error_func callback for disassemble_info. Extract the 1115 underlying DisassembleInfo Python object, and set a memory error on 1116 it. */ 1117 1118 void 1119 gdbpy_disassembler::memory_error_func (int status, bfd_vma memaddr, 1120 struct disassemble_info *info) noexcept 1121 { 1122 gdbpy_disassembler *dis 1123 = static_cast<gdbpy_disassembler *> (info->application_data); 1124 dis->m_memory_error_address.emplace (memaddr); 1125 } 1126 1127 /* Wrapper of print_address. */ 1128 1129 void 1130 gdbpy_disassembler::print_address_func (bfd_vma addr, 1131 struct disassemble_info *info) noexcept 1132 { 1133 gdbpy_disassembler *dis 1134 = static_cast<gdbpy_disassembler *> (info->application_data); 1135 1136 gdbpy_ref<> addr_part 1137 = make_disasm_addr_part (dis->arch (), addr); 1138 dis->m_parts.emplace_back (std::move (addr_part)); 1139 } 1140 1141 /* constructor. */ 1142 1143 gdbpy_disassembler::gdbpy_disassembler (disasm_info_object *obj, 1144 PyObject *memory_source) 1145 : gdb_disassemble_info (obj->gdbarch, 1146 read_memory_func, 1147 memory_error_func, 1148 print_address_func, 1149 fprintf_func, 1150 fprintf_styled_func), 1151 m_disasm_info_object (obj), 1152 m_memory_source (memory_source) 1153 { /* Nothing. */ } 1154 1155 /* A wrapper around a reference to a Python DisassembleInfo object, which 1156 ensures that the object is marked as invalid when we leave the enclosing 1157 scope. 1158 1159 Each DisassembleInfo is created in gdbpy_print_insn, and is done with by 1160 the time that function returns. However, there's nothing to stop a user 1161 caching a reference to the DisassembleInfo, and thus keeping the object 1162 around. 1163 1164 We therefore have the notion of a DisassembleInfo becoming invalid, this 1165 happens when gdbpy_print_insn returns. This class is responsible for 1166 marking the DisassembleInfo as invalid in its destructor. */ 1167 1168 struct scoped_disasm_info_object 1169 { 1170 /* Constructor. */ 1171 scoped_disasm_info_object (struct gdbarch *gdbarch, CORE_ADDR memaddr, 1172 disassemble_info *info) 1173 : m_disasm_info (allocate_disasm_info_object ()) 1174 { 1175 disasm_info_fill (m_disasm_info.get (), gdbarch, current_program_space, 1176 memaddr, info, nullptr); 1177 } 1178 1179 /* Upon destruction mark m_disasm_info as invalid. */ 1180 ~scoped_disasm_info_object () 1181 { 1182 /* Invalidate the original DisassembleInfo object as well as any copies 1183 that the user might have made. */ 1184 for (disasm_info_object *obj = m_disasm_info.get (); 1185 obj != nullptr; 1186 obj = obj->next) 1187 obj->gdb_info = nullptr; 1188 } 1189 1190 /* Return a pointer to the underlying disasm_info_object instance. */ 1191 disasm_info_object * 1192 get () const 1193 { 1194 return m_disasm_info.get (); 1195 } 1196 1197 private: 1198 1199 /* Wrapper around the call to PyObject_New, this wrapper function can be 1200 called from the constructor initialization list, while PyObject_New, a 1201 macro, can't. */ 1202 static disasm_info_object * 1203 allocate_disasm_info_object () 1204 { 1205 return (disasm_info_object *) PyObject_New (disasm_info_object, 1206 &disasm_info_object_type); 1207 } 1208 1209 /* A reference to a gdb.disassembler.DisassembleInfo object. When this 1210 containing instance goes out of scope this reference is released, 1211 however, the user might be holding other references to the 1212 DisassembleInfo object in Python code, so the underlying object might 1213 not be deleted. */ 1214 gdbpy_ref<disasm_info_object> m_disasm_info; 1215 }; 1216 1217 /* See python-internal.h. */ 1218 1219 std::optional<int> 1220 gdbpy_print_insn (struct gdbarch *gdbarch, CORE_ADDR memaddr, 1221 disassemble_info *info) 1222 { 1223 /* Early exit case. This must be done as early as possible, and 1224 definitely before we enter Python environment. The 1225 python_print_insn_enabled flag is set (from Python) only when the user 1226 has installed one (or more) Python disassemblers. So in the common 1227 case (no custom disassembler installed) this flag will be false, 1228 allowing for a quick return. */ 1229 if (!gdb_python_initialized || !python_print_insn_enabled) 1230 return {}; 1231 1232 gdbpy_enter enter_py (get_current_arch (), current_language); 1233 1234 /* Import the gdb.disassembler module. */ 1235 gdbpy_ref<> gdb_python_disassembler_module 1236 (PyImport_ImportModule ("gdb.disassembler")); 1237 if (gdb_python_disassembler_module == nullptr) 1238 { 1239 gdbpy_print_stack (); 1240 return {}; 1241 } 1242 1243 /* Get the _print_insn attribute from the module, this should be the 1244 function we are going to call to actually perform the disassembly. */ 1245 gdbpy_ref<> hook 1246 (PyObject_GetAttrString (gdb_python_disassembler_module.get (), 1247 "_print_insn")); 1248 if (hook == nullptr) 1249 { 1250 gdbpy_print_stack (); 1251 return {}; 1252 } 1253 1254 /* Create the new DisassembleInfo object we will pass into Python. This 1255 object will be marked as invalid when we leave this scope. */ 1256 scoped_disasm_info_object scoped_disasm_info (gdbarch, memaddr, info); 1257 disasm_info_object *disasm_info = scoped_disasm_info.get (); 1258 1259 /* Call into the registered disassembler to (possibly) perform the 1260 disassembly. */ 1261 PyObject *insn_disas_obj = (PyObject *) disasm_info; 1262 gdbpy_ref<> result (PyObject_CallFunctionObjArgs (hook.get (), 1263 insn_disas_obj, 1264 nullptr)); 1265 1266 if (result == nullptr) 1267 { 1268 /* The call into Python code resulted in an exception. If this was a 1269 gdb.MemoryError, then we can figure out an address and call the 1270 disassemble_info::memory_error_func to report the error back to 1271 core GDB. Any other exception type we report back to core GDB as 1272 an unknown error (return -1 without first calling the 1273 memory_error_func callback). */ 1274 1275 if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error)) 1276 { 1277 /* A gdb.MemoryError might have an address attribute which 1278 contains the address at which the memory error occurred. If 1279 this is the case then use this address, otherwise, fallback to 1280 just using the address of the instruction we were asked to 1281 disassemble. */ 1282 gdbpy_err_fetch err; 1283 PyErr_Clear (); 1284 1285 CORE_ADDR addr; 1286 if (err.value () != nullptr 1287 && PyObject_HasAttrString (err.value ().get (), "address")) 1288 { 1289 PyObject *addr_obj 1290 = PyObject_GetAttrString (err.value ().get (), "address"); 1291 if (get_addr_from_python (addr_obj, &addr) < 0) 1292 addr = disasm_info->address; 1293 } 1294 else 1295 addr = disasm_info->address; 1296 1297 info->memory_error_func (-1, addr, info); 1298 return std::optional<int> (-1); 1299 } 1300 else if (PyErr_ExceptionMatches (gdbpy_gdberror_exc)) 1301 { 1302 gdbpy_err_fetch err; 1303 gdb::unique_xmalloc_ptr<char> msg = err.to_string (); 1304 1305 info->fprintf_func (info->stream, "%s", msg.get ()); 1306 return std::optional<int> (-1); 1307 } 1308 else 1309 { 1310 gdbpy_print_stack (); 1311 return std::optional<int> (-1); 1312 } 1313 1314 } 1315 else if (result == Py_None) 1316 { 1317 /* A return value of None indicates that the Python code could not, 1318 or doesn't want to, disassemble this instruction. Just return an 1319 empty result and core GDB will try to disassemble this for us. */ 1320 return {}; 1321 } 1322 1323 /* Check the result is a DisassemblerResult (or a sub-class). */ 1324 if (!PyObject_IsInstance (result.get (), 1325 (PyObject *) &disasm_result_object_type)) 1326 { 1327 PyErr_SetString (PyExc_TypeError, 1328 _("Result is not a DisassemblerResult.")); 1329 gdbpy_print_stack (); 1330 return std::optional<int> (-1); 1331 } 1332 1333 /* The result from the Python disassembler has the correct type. Convert 1334 this back to the underlying C++ object and read the state directly 1335 from this object. */ 1336 struct disasm_result_object *result_obj 1337 = (struct disasm_result_object *) result.get (); 1338 1339 /* Validate the length of the disassembled instruction. */ 1340 long length = result_obj->length; 1341 long max_insn_length = (gdbarch_max_insn_length_p (gdbarch) ? 1342 gdbarch_max_insn_length (gdbarch) : INT_MAX); 1343 if (length <= 0) 1344 { 1345 PyErr_SetString 1346 (PyExc_ValueError, 1347 _("Invalid length attribute: length must be greater than 0.")); 1348 gdbpy_print_stack (); 1349 return std::optional<int> (-1); 1350 } 1351 if (length > max_insn_length) 1352 { 1353 PyErr_Format 1354 (PyExc_ValueError, 1355 _("Invalid length attribute: length %d greater than architecture maximum of %d"), 1356 length, max_insn_length); 1357 gdbpy_print_stack (); 1358 return std::optional<int> (-1); 1359 } 1360 1361 /* It is impossible to create a DisassemblerResult object with an empty 1362 parts list. We know that each part results in a non-empty string, so 1363 we know that the instruction disassembly will not be the empty 1364 string. */ 1365 gdb_assert (result_obj->parts->size () > 0); 1366 1367 /* Now print out the parts that make up this instruction. */ 1368 for (auto &p : *result_obj->parts) 1369 { 1370 if (Py_TYPE (p.get ()) == &disasm_text_part_object_type) 1371 { 1372 disasm_text_part_object *text_part 1373 = (disasm_text_part_object *) p.get (); 1374 gdb_assert (text_part->string != nullptr); 1375 info->fprintf_styled_func (info->stream, text_part->style, 1376 "%s", text_part->string->c_str ()); 1377 } 1378 else 1379 { 1380 gdb_assert (Py_TYPE (p.get ()) == &disasm_addr_part_object_type); 1381 disasm_addr_part_object *addr_part 1382 = (disasm_addr_part_object *) p.get (); 1383 /* A DisassemblerAddressPart can only be created by calling a 1384 method on DisassembleInfo, and the gdbarch is copied from the 1385 DisassembleInfo into the DisassemblerAddressPart. As the 1386 DisassembleInfo has its gdbarch initialised from GDBARCH in 1387 this scope, and this architecture can't be changed, then the 1388 following assert should hold. */ 1389 gdb_assert (addr_part->gdbarch == gdbarch); 1390 info->print_address_func (addr_part->address, info); 1391 } 1392 } 1393 1394 return std::optional<int> (length); 1395 } 1396 1397 /* The tp_dealloc callback for the DisassemblerResult type. Takes care of 1398 deallocating the content buffer. */ 1399 1400 static void 1401 disasmpy_dealloc_result (PyObject *self) 1402 { 1403 disasm_result_object *obj = (disasm_result_object *) self; 1404 delete obj->parts; 1405 Py_TYPE (self)->tp_free (self); 1406 } 1407 1408 /* The tp_init callback for the DisassemblerPart type. This just raises an 1409 exception, which prevents the user from creating objects of this type. 1410 Instead the user should create instances of a sub-class. */ 1411 1412 static int 1413 disasmpy_part_init (PyObject *self, PyObject *args, PyObject *kwargs) 1414 { 1415 PyErr_SetString (PyExc_RuntimeError, 1416 _("Cannot create instances of DisassemblerPart.")); 1417 return -1; 1418 } 1419 1420 /* Return a string representing STYLE. The returned string is used as a 1421 constant defined in the gdb.disassembler module. */ 1422 1423 static const char * 1424 get_style_name (enum disassembler_style style) 1425 { 1426 switch (style) 1427 { 1428 case dis_style_text: return "STYLE_TEXT"; 1429 case dis_style_mnemonic: return "STYLE_MNEMONIC"; 1430 case dis_style_sub_mnemonic: return "STYLE_SUB_MNEMONIC"; 1431 case dis_style_assembler_directive: return "STYLE_ASSEMBLER_DIRECTIVE"; 1432 case dis_style_register: return "STYLE_REGISTER"; 1433 case dis_style_immediate: return "STYLE_IMMEDIATE"; 1434 case dis_style_address: return "STYLE_ADDRESS"; 1435 case dis_style_address_offset: return "STYLE_ADDRESS_OFFSET"; 1436 case dis_style_symbol: return "STYLE_SYMBOL"; 1437 case dis_style_comment_start: return "STYLE_COMMENT_START"; 1438 } 1439 1440 gdb_assert_not_reached ("unknown disassembler style"); 1441 } 1442 1443 /* Implement DisassemblerTextPart.__repr__ method. */ 1444 1445 static PyObject * 1446 disasmpy_text_part_repr (PyObject *self) 1447 { 1448 disasm_text_part_object *obj = (disasm_text_part_object *) self; 1449 1450 gdb_assert (obj->string != nullptr); 1451 1452 return PyUnicode_FromFormat ("<%s string='%s', style='%s'>", 1453 Py_TYPE (obj)->tp_name, 1454 obj->string->c_str (), 1455 get_style_name (obj->style)); 1456 } 1457 1458 /* Implement DisassemblerTextPart.__str__ attribute. */ 1459 1460 static PyObject * 1461 disasmpy_text_part_str (PyObject *self) 1462 { 1463 disasm_text_part_object *obj = (disasm_text_part_object *) self; 1464 1465 return PyUnicode_Decode (obj->string->c_str (), obj->string->size (), 1466 host_charset (), nullptr); 1467 } 1468 1469 /* Implement DisassemblerTextPart.string attribute. */ 1470 1471 static PyObject * 1472 disasmpy_text_part_string (PyObject *self, void *closure) 1473 { 1474 return disasmpy_text_part_str (self); 1475 } 1476 1477 /* Implement DisassemblerTextPart.style attribute. */ 1478 1479 static PyObject * 1480 disasmpy_text_part_style (PyObject *self, void *closure) 1481 { 1482 disasm_text_part_object *obj = (disasm_text_part_object *) self; 1483 1484 LONGEST style_val = (LONGEST) obj->style; 1485 return gdb_py_object_from_longest (style_val).release (); 1486 } 1487 1488 /* Implement DisassemblerAddressPart.__repr__ method. */ 1489 1490 static PyObject * 1491 disasmpy_addr_part_repr (PyObject *self) 1492 { 1493 disasm_addr_part_object *obj = (disasm_addr_part_object *) self; 1494 1495 return PyUnicode_FromFormat ("<%s address='%s'>", 1496 Py_TYPE (obj)->tp_name, 1497 core_addr_to_string_nz (obj->address)); 1498 } 1499 1500 /* Implement DisassemblerAddressPart.__str__ attribute. */ 1501 1502 static PyObject * 1503 disasmpy_addr_part_str (PyObject *self) 1504 { 1505 disasm_addr_part_object *obj = (disasm_addr_part_object *) self; 1506 1507 std::string str; 1508 try 1509 { 1510 string_file buf; 1511 print_address (obj->gdbarch, obj->address, &buf); 1512 str = buf.release (); 1513 } 1514 catch (const gdb_exception &except) 1515 { 1516 GDB_PY_HANDLE_EXCEPTION (except); 1517 } 1518 1519 return PyUnicode_Decode (str.c_str (), str.size (), 1520 host_charset (), nullptr); 1521 } 1522 1523 /* Implement DisassemblerAddressPart.string attribute. */ 1524 1525 static PyObject * 1526 disasmpy_addr_part_string (PyObject *self, void *closure) 1527 { 1528 return disasmpy_addr_part_str (self); 1529 } 1530 1531 /* Implement DisassemblerAddressPart.address attribute. */ 1532 1533 static PyObject * 1534 disasmpy_addr_part_address (PyObject *self, void *closure) 1535 { 1536 disasm_addr_part_object *obj = (disasm_addr_part_object *) self; 1537 1538 return gdb_py_object_from_longest (obj->address).release (); 1539 } 1540 1541 /* The get/set attributes of the gdb.disassembler.DisassembleInfo type. */ 1542 1543 static gdb_PyGetSetDef disasm_info_object_getset[] = { 1544 { "address", disasmpy_info_address, nullptr, 1545 "Start address of the instruction to disassemble.", nullptr }, 1546 { "architecture", disasmpy_info_architecture, nullptr, 1547 "Architecture to disassemble in", nullptr }, 1548 { "progspace", disasmpy_info_progspace, nullptr, 1549 "Program space to disassemble in", nullptr }, 1550 { nullptr } /* Sentinel */ 1551 }; 1552 1553 /* The methods of the gdb.disassembler.DisassembleInfo type. */ 1554 1555 static PyMethodDef disasm_info_object_methods[] = { 1556 { "read_memory", (PyCFunction) disasmpy_info_read_memory, 1557 METH_VARARGS | METH_KEYWORDS, 1558 "read_memory (LEN, OFFSET = 0) -> Octets[]\n\ 1559 Read LEN octets for the instruction to disassemble." }, 1560 { "is_valid", disasmpy_info_is_valid, METH_NOARGS, 1561 "is_valid () -> Boolean.\n\ 1562 Return true if this DisassembleInfo is valid, false if not." }, 1563 { "text_part", (PyCFunction) disasmpy_info_make_text_part, 1564 METH_VARARGS | METH_KEYWORDS, 1565 "text_part (STRING, STYLE) -> DisassemblerTextPart\n\ 1566 Create a new text part, with contents STRING styled with STYLE." }, 1567 { "address_part", (PyCFunction) disasmpy_info_make_address_part, 1568 METH_VARARGS | METH_KEYWORDS, 1569 "address_part (ADDRESS) -> DisassemblerAddressPart\n\ 1570 Create a new address part representing ADDRESS." }, 1571 {nullptr} /* Sentinel */ 1572 }; 1573 1574 /* The get/set attributes of the gdb.disassembler.DisassemblerResult type. */ 1575 1576 static gdb_PyGetSetDef disasm_result_object_getset[] = { 1577 { "length", disasmpy_result_length, nullptr, 1578 "Length of the disassembled instruction.", nullptr }, 1579 { "string", disasmpy_result_string, nullptr, 1580 "String representing the disassembled instruction.", nullptr }, 1581 { "parts", disasmpy_result_parts, nullptr, 1582 "List of all the separate disassembly parts", nullptr }, 1583 { nullptr } /* Sentinel */ 1584 }; 1585 1586 /* The get/set attributes of the gdb.disassembler.DisassemblerTextPart type. */ 1587 1588 static gdb_PyGetSetDef disasmpy_text_part_getset[] = { 1589 { "string", disasmpy_text_part_string, nullptr, 1590 "String representing a text part.", nullptr }, 1591 { "style", disasmpy_text_part_style, nullptr, 1592 "The style of this text part.", nullptr }, 1593 { nullptr } /* Sentinel */ 1594 }; 1595 1596 /* The get/set attributes of the gdb.disassembler.DisassemblerAddressPart type. */ 1597 1598 static gdb_PyGetSetDef disasmpy_addr_part_getset[] = { 1599 { "string", disasmpy_addr_part_string, nullptr, 1600 "String representing an address part.", nullptr }, 1601 { "address", disasmpy_addr_part_address, nullptr, 1602 "The address of this address part.", nullptr }, 1603 { nullptr } /* Sentinel */ 1604 }; 1605 1606 /* These are the methods we add into the _gdb.disassembler module, which 1607 are then imported into the gdb.disassembler module. These are global 1608 functions that support performing disassembly. */ 1609 1610 PyMethodDef python_disassembler_methods[] = 1611 { 1612 { "builtin_disassemble", (PyCFunction) disasmpy_builtin_disassemble, 1613 METH_VARARGS | METH_KEYWORDS, 1614 "builtin_disassemble (INFO, MEMORY_SOURCE = None) -> None\n\ 1615 Disassemble using GDB's builtin disassembler. INFO is an instance of\n\ 1616 gdb.disassembler.DisassembleInfo. The MEMORY_SOURCE, if not None, should\n\ 1617 be an object with the read_memory method." }, 1618 { "_set_enabled", (PyCFunction) disasmpy_set_enabled, 1619 METH_VARARGS | METH_KEYWORDS, 1620 "_set_enabled (STATE) -> None\n\ 1621 Set whether GDB should call into the Python _print_insn code or not." }, 1622 {nullptr, nullptr, 0, nullptr} 1623 }; 1624 1625 /* Structure to define the _gdb.disassembler module. */ 1626 1627 static struct PyModuleDef python_disassembler_module_def = 1628 { 1629 PyModuleDef_HEAD_INIT, 1630 "_gdb.disassembler", 1631 nullptr, 1632 -1, 1633 python_disassembler_methods, 1634 nullptr, 1635 nullptr, 1636 nullptr, 1637 nullptr 1638 }; 1639 1640 /* Called to initialize the Python structures in this file. */ 1641 1642 static int CPYCHECKER_NEGATIVE_RESULT_SETS_EXCEPTION 1643 gdbpy_initialize_disasm () 1644 { 1645 /* Create the _gdb.disassembler module, and add it to the _gdb module. */ 1646 1647 PyObject *gdb_disassembler_module; 1648 gdb_disassembler_module = PyModule_Create (&python_disassembler_module_def); 1649 if (gdb_disassembler_module == nullptr) 1650 return -1; 1651 if (gdb_pymodule_addobject (gdb_module, "disassembler", 1652 gdb_disassembler_module) < 0) 1653 return -1; 1654 1655 /* This is needed so that 'import _gdb.disassembler' will work. */ 1656 PyObject *dict = PyImport_GetModuleDict (); 1657 if (PyDict_SetItemString (dict, "_gdb.disassembler", 1658 gdb_disassembler_module) < 0) 1659 return -1; 1660 1661 for (int i = 0; i <= (int) dis_style_comment_start; ++i) 1662 { 1663 const char *style_name = get_style_name ((enum disassembler_style) i); 1664 if (PyModule_AddIntConstant (gdb_disassembler_module, style_name, i) < 0) 1665 return -1; 1666 } 1667 1668 disasm_info_object_type.tp_new = PyType_GenericNew; 1669 if (PyType_Ready (&disasm_info_object_type) < 0) 1670 return -1; 1671 1672 if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassembleInfo", 1673 (PyObject *) &disasm_info_object_type) < 0) 1674 return -1; 1675 1676 disasm_result_object_type.tp_new = PyType_GenericNew; 1677 if (PyType_Ready (&disasm_result_object_type) < 0) 1678 return -1; 1679 1680 if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerResult", 1681 (PyObject *) &disasm_result_object_type) < 0) 1682 return -1; 1683 1684 disasm_part_object_type.tp_new = PyType_GenericNew; 1685 if (PyType_Ready (&disasm_part_object_type) < 0) 1686 return -1; 1687 1688 if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerPart", 1689 (PyObject *) &disasm_part_object_type) < 0) 1690 return -1; 1691 1692 disasm_addr_part_object_type.tp_new = PyType_GenericNew; 1693 if (PyType_Ready (&disasm_addr_part_object_type) < 0) 1694 return -1; 1695 1696 if (gdb_pymodule_addobject (gdb_disassembler_module, 1697 "DisassemblerAddressPart", 1698 (PyObject *) &disasm_addr_part_object_type) < 0) 1699 return -1; 1700 1701 disasm_text_part_object_type.tp_new = PyType_GenericNew; 1702 if (PyType_Ready (&disasm_text_part_object_type) < 0) 1703 return -1; 1704 1705 if (gdb_pymodule_addobject (gdb_disassembler_module, 1706 "DisassemblerTextPart", 1707 (PyObject *) &disasm_text_part_object_type) < 0) 1708 return -1; 1709 1710 return 0; 1711 } 1712 1713 GDBPY_INITIALIZE_FILE (gdbpy_initialize_disasm); 1714 1715 1716 1717 /* Describe the gdb.disassembler.DisassembleInfo type. */ 1718 1719 PyTypeObject disasm_info_object_type = { 1720 PyVarObject_HEAD_INIT (nullptr, 0) 1721 "gdb.disassembler.DisassembleInfo", /*tp_name*/ 1722 sizeof (disasm_info_object), /*tp_basicsize*/ 1723 0, /*tp_itemsize*/ 1724 disasm_info_dealloc, /*tp_dealloc*/ 1725 0, /*tp_print*/ 1726 0, /*tp_getattr*/ 1727 0, /*tp_setattr*/ 1728 0, /*tp_compare*/ 1729 disasmpy_info_repr, /*tp_repr*/ 1730 0, /*tp_as_number*/ 1731 0, /*tp_as_sequence*/ 1732 0, /*tp_as_mapping*/ 1733 0, /*tp_hash */ 1734 0, /*tp_call*/ 1735 0, /*tp_str*/ 1736 0, /*tp_getattro*/ 1737 0, /*tp_setattro*/ 1738 0, /*tp_as_buffer*/ 1739 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ 1740 "GDB instruction disassembler object", /* tp_doc */ 1741 0, /* tp_traverse */ 1742 0, /* tp_clear */ 1743 0, /* tp_richcompare */ 1744 0, /* tp_weaklistoffset */ 1745 0, /* tp_iter */ 1746 0, /* tp_iternext */ 1747 disasm_info_object_methods, /* tp_methods */ 1748 0, /* tp_members */ 1749 disasm_info_object_getset, /* tp_getset */ 1750 0, /* tp_base */ 1751 0, /* tp_dict */ 1752 0, /* tp_descr_get */ 1753 0, /* tp_descr_set */ 1754 0, /* tp_dictoffset */ 1755 disasm_info_init, /* tp_init */ 1756 0, /* tp_alloc */ 1757 }; 1758 1759 /* Describe the gdb.disassembler.DisassemblerResult type. */ 1760 1761 PyTypeObject disasm_result_object_type = { 1762 PyVarObject_HEAD_INIT (nullptr, 0) 1763 "gdb.disassembler.DisassemblerResult", /*tp_name*/ 1764 sizeof (disasm_result_object), /*tp_basicsize*/ 1765 0, /*tp_itemsize*/ 1766 disasmpy_dealloc_result, /*tp_dealloc*/ 1767 0, /*tp_print*/ 1768 0, /*tp_getattr*/ 1769 0, /*tp_setattr*/ 1770 0, /*tp_compare*/ 1771 disasmpy_result_repr, /*tp_repr*/ 1772 0, /*tp_as_number*/ 1773 0, /*tp_as_sequence*/ 1774 0, /*tp_as_mapping*/ 1775 0, /*tp_hash */ 1776 0, /*tp_call*/ 1777 disasmpy_result_str, /*tp_str*/ 1778 0, /*tp_getattro*/ 1779 0, /*tp_setattro*/ 1780 0, /*tp_as_buffer*/ 1781 Py_TPFLAGS_DEFAULT, /*tp_flags*/ 1782 "GDB object, representing a disassembler result", /* tp_doc */ 1783 0, /* tp_traverse */ 1784 0, /* tp_clear */ 1785 0, /* tp_richcompare */ 1786 0, /* tp_weaklistoffset */ 1787 0, /* tp_iter */ 1788 0, /* tp_iternext */ 1789 0, /* tp_methods */ 1790 0, /* tp_members */ 1791 disasm_result_object_getset, /* tp_getset */ 1792 0, /* tp_base */ 1793 0, /* tp_dict */ 1794 0, /* tp_descr_get */ 1795 0, /* tp_descr_set */ 1796 0, /* tp_dictoffset */ 1797 disasmpy_result_init, /* tp_init */ 1798 0, /* tp_alloc */ 1799 }; 1800 1801 /* Describe the gdb.disassembler.DisassemblerPart type. This type exists 1802 only as an abstract base-class for the various part sub-types. The 1803 init method for this type throws an error. As such we don't both to 1804 provide a tp_repr method for this parent class. */ 1805 1806 PyTypeObject disasm_part_object_type = { 1807 PyVarObject_HEAD_INIT (nullptr, 0) 1808 "gdb.disassembler.DisassemblerPart", /*tp_name*/ 1809 sizeof (PyObject), /*tp_basicsize*/ 1810 0, /*tp_itemsize*/ 1811 0, /*tp_dealloc*/ 1812 0, /*tp_print*/ 1813 0, /*tp_getattr*/ 1814 0, /*tp_setattr*/ 1815 0, /*tp_compare*/ 1816 0, /*tp_repr*/ 1817 0, /*tp_as_number*/ 1818 0, /*tp_as_sequence*/ 1819 0, /*tp_as_mapping*/ 1820 0, /*tp_hash */ 1821 0, /*tp_call*/ 1822 0, /*tp_str*/ 1823 0, /*tp_getattro*/ 1824 0, /*tp_setattro*/ 1825 0, /*tp_as_buffer*/ 1826 Py_TPFLAGS_DEFAULT, /*tp_flags*/ 1827 "GDB object, representing part of a disassembled instruction", /* tp_doc */ 1828 0, /* tp_traverse */ 1829 0, /* tp_clear */ 1830 0, /* tp_richcompare */ 1831 0, /* tp_weaklistoffset */ 1832 0, /* tp_iter */ 1833 0, /* tp_iternext */ 1834 0, /* tp_methods */ 1835 0, /* tp_members */ 1836 0, /* tp_getset */ 1837 0, /* tp_base */ 1838 0, /* tp_dict */ 1839 0, /* tp_descr_get */ 1840 0, /* tp_descr_set */ 1841 0, /* tp_dictoffset */ 1842 disasmpy_part_init, /* tp_init */ 1843 0, /* tp_alloc */ 1844 }; 1845 1846 /* Describe the gdb.disassembler.DisassemblerTextPart type. */ 1847 1848 PyTypeObject disasm_text_part_object_type = { 1849 PyVarObject_HEAD_INIT (nullptr, 0) 1850 "gdb.disassembler.DisassemblerTextPart", /*tp_name*/ 1851 sizeof (disasm_text_part_object_type), /*tp_basicsize*/ 1852 0, /*tp_itemsize*/ 1853 0, /*tp_dealloc*/ 1854 0, /*tp_print*/ 1855 0, /*tp_getattr*/ 1856 0, /*tp_setattr*/ 1857 0, /*tp_compare*/ 1858 disasmpy_text_part_repr, /*tp_repr*/ 1859 0, /*tp_as_number*/ 1860 0, /*tp_as_sequence*/ 1861 0, /*tp_as_mapping*/ 1862 0, /*tp_hash */ 1863 0, /*tp_call*/ 1864 disasmpy_text_part_str, /*tp_str*/ 1865 0, /*tp_getattro*/ 1866 0, /*tp_setattro*/ 1867 0, /*tp_as_buffer*/ 1868 Py_TPFLAGS_DEFAULT, /*tp_flags*/ 1869 "GDB object, representing a text part of an instruction", /* tp_doc */ 1870 0, /* tp_traverse */ 1871 0, /* tp_clear */ 1872 0, /* tp_richcompare */ 1873 0, /* tp_weaklistoffset */ 1874 0, /* tp_iter */ 1875 0, /* tp_iternext */ 1876 0, /* tp_methods */ 1877 0, /* tp_members */ 1878 disasmpy_text_part_getset, /* tp_getset */ 1879 &disasm_part_object_type, /* tp_base */ 1880 0, /* tp_dict */ 1881 0, /* tp_descr_get */ 1882 0, /* tp_descr_set */ 1883 0, /* tp_dictoffset */ 1884 0, /* tp_init */ 1885 0, /* tp_alloc */ 1886 }; 1887 1888 /* Describe the gdb.disassembler.DisassemblerAddressPart type. */ 1889 1890 PyTypeObject disasm_addr_part_object_type = { 1891 PyVarObject_HEAD_INIT (nullptr, 0) 1892 "gdb.disassembler.DisassemblerAddressPart", /*tp_name*/ 1893 sizeof (disasm_addr_part_object), /*tp_basicsize*/ 1894 0, /*tp_itemsize*/ 1895 0, /*tp_dealloc*/ 1896 0, /*tp_print*/ 1897 0, /*tp_getattr*/ 1898 0, /*tp_setattr*/ 1899 0, /*tp_compare*/ 1900 disasmpy_addr_part_repr, /*tp_repr*/ 1901 0, /*tp_as_number*/ 1902 0, /*tp_as_sequence*/ 1903 0, /*tp_as_mapping*/ 1904 0, /*tp_hash */ 1905 0, /*tp_call*/ 1906 disasmpy_addr_part_str, /*tp_str*/ 1907 0, /*tp_getattro*/ 1908 0, /*tp_setattro*/ 1909 0, /*tp_as_buffer*/ 1910 Py_TPFLAGS_DEFAULT, /*tp_flags*/ 1911 "GDB object, representing an address part of an instruction", /* tp_doc */ 1912 0, /* tp_traverse */ 1913 0, /* tp_clear */ 1914 0, /* tp_richcompare */ 1915 0, /* tp_weaklistoffset */ 1916 0, /* tp_iter */ 1917 0, /* tp_iternext */ 1918 0, /* tp_methods */ 1919 0, /* tp_members */ 1920 disasmpy_addr_part_getset, /* tp_getset */ 1921 &disasm_part_object_type, /* tp_base */ 1922 0, /* tp_dict */ 1923 0, /* tp_descr_get */ 1924 0, /* tp_descr_set */ 1925 0, /* tp_dictoffset */ 1926 0, /* tp_init */ 1927 0, /* tp_alloc */ 1928 }; 1929