xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/python/py-disasm.c (revision 6881a4007f077b54e5f51159c52b9b25f57deb0d)
1 /* Python interface to instruction disassembly.
2 
3    Copyright (C) 2021-2023 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "defs.h"
21 #include "python-internal.h"
22 #include "dis-asm.h"
23 #include "arch-utils.h"
24 #include "charset.h"
25 #include "disasm.h"
26 #include "progspace.h"
27 
28 /* Implement gdb.disassembler.DisassembleInfo type.  An object of this type
29    represents a single disassembler request from GDB.  */
30 
31 struct disasm_info_object
32 {
33   PyObject_HEAD
34 
35   /* The architecture in which we are disassembling.  */
36   struct gdbarch *gdbarch;
37 
38   /* The program_space in which we are disassembling.  */
39   struct program_space *program_space;
40 
41   /* Address of the instruction to disassemble.  */
42   bfd_vma address;
43 
44   /* The disassemble_info passed from core GDB, this contains the
45      callbacks necessary to read the instruction from core GDB, and to
46      print the disassembled instruction.  */
47   disassemble_info *gdb_info;
48 
49   /* If copies of this object are created then they are chained together
50      via this NEXT pointer, this allows all the copies to be invalidated at
51      the same time as the parent object.  */
52   struct disasm_info_object *next;
53 };
54 
55 extern PyTypeObject disasm_info_object_type
56     CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_info_object");
57 
58 /* Implement gdb.disassembler.DisassemblerResult type, an object that holds
59    the result of calling the disassembler.  This is mostly the length of
60    the disassembled instruction (in bytes), and the string representing the
61    disassembled instruction.  */
62 
63 struct disasm_result_object
64 {
65   PyObject_HEAD
66 
67   /* The length of the disassembled instruction in bytes.  */
68   int length;
69 
70   /* A buffer which, when allocated, holds the disassembled content of an
71      instruction.  */
72   string_file *content;
73 };
74 
75 extern PyTypeObject disasm_result_object_type
76     CPYCHECKER_TYPE_OBJECT_FOR_TYPEDEF ("disasm_result_object");
77 
78 /* When this is false we fast path out of gdbpy_print_insn, which should
79    keep the performance impact of the Python disassembler down.  This is
80    set to true from Python by calling gdb.disassembler._set_enabled() when
81    the user registers a disassembler.  */
82 
83 static bool python_print_insn_enabled = false;
84 
85 /* A sub-class of gdb_disassembler that holds a pointer to a Python
86    DisassembleInfo object.  A pointer to an instance of this class is
87    placed in the application_data field of the disassemble_info that is
88    used when we call gdbarch_print_insn.  */
89 
90 struct gdbpy_disassembler : public gdb_printing_disassembler
91 {
92   /* Constructor.  */
93   gdbpy_disassembler (disasm_info_object *obj, PyObject *memory_source);
94 
95   /* Get the DisassembleInfo object pointer.  */
96   disasm_info_object *
97   py_disasm_info () const
98   {
99     return m_disasm_info_object;
100   }
101 
102   /* Callbacks used by disassemble_info.  */
103   static void memory_error_func (int status, bfd_vma memaddr,
104 				 struct disassemble_info *info) noexcept;
105   static void print_address_func (bfd_vma addr,
106 				  struct disassemble_info *info) noexcept;
107   static int read_memory_func (bfd_vma memaddr, gdb_byte *buff,
108 			       unsigned int len,
109 			       struct disassemble_info *info) noexcept;
110 
111   /* Return a reference to an optional that contains the address at which a
112      memory error occurred.  The optional will only have a value if a
113      memory error actually occurred.  */
114   const gdb::optional<CORE_ADDR> &memory_error_address () const
115   { return m_memory_error_address; }
116 
117   /* Return the content of the disassembler as a string.  The contents are
118      moved out of the disassembler, so after this call the disassembler
119      contents have been reset back to empty.  */
120   std::string release ()
121   {
122     return m_string_file.release ();
123   }
124 
125   /* If there is a Python exception stored in this disassembler then
126      restore it (i.e. set the PyErr_* state), clear the exception within
127      this disassembler, and return true.  There must be no current
128      exception set (i.e. !PyErr_Occurred()) when this function is called,
129      as any such exception might get lost.
130 
131      Otherwise, there is no exception stored in this disassembler, return
132      false.  */
133   bool restore_exception ()
134   {
135     gdb_assert (!PyErr_Occurred ());
136     if (m_stored_exception.has_value ())
137       {
138 	gdbpy_err_fetch ex = std::move (*m_stored_exception);
139 	m_stored_exception.reset ();
140 	ex.restore ();
141 	return true;
142       }
143 
144     return false;
145   }
146 
147 private:
148 
149   /* Where the disassembler result is written.  */
150   string_file m_string_file;
151 
152   /* The DisassembleInfo object we are disassembling for.  */
153   disasm_info_object *m_disasm_info_object;
154 
155   /* When the user indicates that a memory error has occurred then the
156      address of the memory error is stored in here.  */
157   gdb::optional<CORE_ADDR> m_memory_error_address;
158 
159   /* When the user calls the builtin_disassemble function, if they pass a
160      memory source object then a pointer to the object is placed in here,
161      otherwise, this field is nullptr.  */
162   PyObject *m_memory_source;
163 
164   /* Move the exception EX into this disassembler object.  */
165   void store_exception (gdbpy_err_fetch &&ex)
166   {
167     /* The only calls to store_exception are from read_memory_func, which
168        will return early if there's already an exception stored.  */
169     gdb_assert (!m_stored_exception.has_value ());
170     m_stored_exception.emplace (std::move (ex));
171   }
172 
173   /* Return true if there is an exception stored in this disassembler.  */
174   bool has_stored_exception () const
175   {
176     return m_stored_exception.has_value ();
177   }
178 
179   /* Store a single exception.  This is used to pass Python exceptions back
180      from ::memory_read to disasmpy_builtin_disassemble.  */
181   gdb::optional<gdbpy_err_fetch> m_stored_exception;
182 };
183 
184 /* Return true if OBJ is still valid, otherwise, return false.  A valid OBJ
185    will have a non-nullptr gdb_info field.  */
186 
187 static bool
188 disasm_info_object_is_valid (disasm_info_object *obj)
189 {
190   return obj->gdb_info != nullptr;
191 }
192 
193 /* Fill in OBJ with all the other arguments.  */
194 
195 static void
196 disasm_info_fill (disasm_info_object *obj, struct gdbarch *gdbarch,
197 		  program_space *progspace, bfd_vma address,
198 		  disassemble_info *di, disasm_info_object *next)
199 {
200   obj->gdbarch = gdbarch;
201   obj->program_space = progspace;
202   obj->address = address;
203   obj->gdb_info = di;
204   obj->next = next;
205 }
206 
207 /* Implement DisassembleInfo.__init__.  Takes a single argument that must
208    be another DisassembleInfo object and copies the contents from the
209    argument into this new object.  */
210 
211 static int
212 disasm_info_init (PyObject *self, PyObject *args, PyObject *kwargs)
213 {
214   static const char *keywords[] = { "info", NULL };
215   PyObject *info_obj;
216   if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "O!", keywords,
217 					&disasm_info_object_type,
218 					&info_obj))
219     return -1;
220 
221   disasm_info_object *other = (disasm_info_object *) info_obj;
222   disasm_info_object *info = (disasm_info_object *) self;
223   disasm_info_fill (info, other->gdbarch, other->program_space,
224 		    other->address, other->gdb_info, other->next);
225   other->next = info;
226 
227   /* As the OTHER object now holds a pointer to INFO we inc the ref count
228      on INFO.  This stops INFO being deleted until OTHER has gone away.  */
229   Py_INCREF ((PyObject *) info);
230   return 0;
231 }
232 
233 /* The tp_dealloc callback for the DisassembleInfo type.  */
234 
235 static void
236 disasm_info_dealloc (PyObject *self)
237 {
238   disasm_info_object *obj = (disasm_info_object *) self;
239 
240   /* We no longer care about the object our NEXT pointer points at, so we
241      can decrement its reference count.  This macro handles the case when
242      NEXT is nullptr.  */
243   Py_XDECREF ((PyObject *) obj->next);
244 
245   /* Now core deallocation behaviour.  */
246   Py_TYPE (self)->tp_free (self);
247 }
248 
249 /* Implement DisassembleInfo.is_valid(), really just a wrapper around the
250    disasm_info_object_is_valid function above.  */
251 
252 static PyObject *
253 disasmpy_info_is_valid (PyObject *self, PyObject *args)
254 {
255   disasm_info_object *disasm_obj = (disasm_info_object *) self;
256 
257   if (disasm_info_object_is_valid (disasm_obj))
258     Py_RETURN_TRUE;
259 
260   Py_RETURN_FALSE;
261 }
262 
263 /* Set the Python exception to be a gdb.MemoryError object, with ADDRESS
264    as its payload.  */
265 
266 static void
267 disasmpy_set_memory_error_for_address (CORE_ADDR address)
268 {
269   PyObject *address_obj = gdb_py_object_from_longest (address).release ();
270   PyErr_SetObject (gdbpy_gdb_memory_error, address_obj);
271 }
272 
273 /* Ensure that a gdb.disassembler.DisassembleInfo is valid.  */
274 
275 #define DISASMPY_DISASM_INFO_REQUIRE_VALID(Info)			\
276   do {									\
277     if (!disasm_info_object_is_valid (Info))				\
278       {									\
279 	PyErr_SetString (PyExc_RuntimeError,				\
280 			 _("DisassembleInfo is no longer valid."));	\
281 	return nullptr;							\
282       }									\
283   } while (0)
284 
285 /* Initialise OBJ, a DisassemblerResult object with LENGTH and CONTENT.
286    OBJ might already have been initialised, in which case any existing
287    content should be discarded before the new CONTENT is moved in.  */
288 
289 static void
290 disasmpy_init_disassembler_result (disasm_result_object *obj, int length,
291 				   std::string content)
292 {
293   if (obj->content == nullptr)
294     obj->content = new string_file;
295   else
296     obj->content->clear ();
297 
298   obj->length = length;
299   *(obj->content) = std::move (content);
300 }
301 
302 /* Implement gdb.disassembler.builtin_disassemble().  Calls back into GDB's
303    builtin disassembler.  The first argument is a DisassembleInfo object
304    describing what to disassemble.  The second argument is optional and
305    provides a mechanism to modify the memory contents that the builtin
306    disassembler will actually disassemble.
307 
308    Returns an instance of gdb.disassembler.DisassemblerResult, an object
309    that wraps a disassembled instruction, or it raises a
310    gdb.MemoryError.  */
311 
312 static PyObject *
313 disasmpy_builtin_disassemble (PyObject *self, PyObject *args, PyObject *kw)
314 {
315   PyObject *info_obj, *memory_source_obj = nullptr;
316   static const char *keywords[] = { "info", "memory_source", nullptr };
317   if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O!|O", keywords,
318 					&disasm_info_object_type, &info_obj,
319 					&memory_source_obj))
320     return nullptr;
321 
322   disasm_info_object *disasm_info = (disasm_info_object *) info_obj;
323   DISASMPY_DISASM_INFO_REQUIRE_VALID (disasm_info);
324 
325   /* Where the result will be written.  */
326   gdbpy_disassembler disassembler (disasm_info, memory_source_obj);
327 
328   /* Now actually perform the disassembly.  LENGTH is set to the length of
329      the disassembled instruction, or -1 if there was a memory-error
330      encountered while disassembling.  See below more more details on
331      handling of -1 return value.  */
332   int length = gdbarch_print_insn (disasm_info->gdbarch, disasm_info->address,
333 				   disassembler.disasm_info ());
334 
335   /* It is possible that, while calling a user overridden memory read
336      function, a Python exception was raised that couldn't be
337      translated into a standard memory-error.  In this case the first such
338      exception is stored in the disassembler and restored here.  */
339   if (disassembler.restore_exception ())
340     return nullptr;
341 
342   if (length == -1)
343     {
344 
345       /* In an ideal world, every disassembler should always call the
346 	 memory error function before returning a status of -1 as the only
347 	 error a disassembler should encounter is a failure to read
348 	 memory.  Unfortunately, there are some disassemblers who don't
349 	 follow this rule, and will return -1 without calling the memory
350 	 error function.
351 
352 	 To make the Python API simpler, we just classify everything as a
353 	 memory error, but the message has to be modified for the case
354 	 where the disassembler didn't call the memory error function.  */
355       if (disassembler.memory_error_address ().has_value ())
356 	{
357 	  CORE_ADDR addr = *disassembler.memory_error_address ();
358 	  disasmpy_set_memory_error_for_address (addr);
359 	}
360       else
361 	{
362 	  std::string content = disassembler.release ();
363 	  if (!content.empty ())
364 	    PyErr_SetString (gdbpy_gdberror_exc, content.c_str ());
365 	  else
366 	    PyErr_SetString (gdbpy_gdberror_exc,
367 			     _("Unknown disassembly error."));
368 	}
369       return nullptr;
370     }
371 
372   /* Instructions are either non-zero in length, or we got an error,
373      indicated by a length of -1, which we handled above.  */
374   gdb_assert (length > 0);
375 
376   /* We should not have seen a memory error in this case.  */
377   gdb_assert (!disassembler.memory_error_address ().has_value ());
378 
379   /* Create a DisassemblerResult containing the results.  */
380   std::string content = disassembler.release ();
381   PyTypeObject *type = &disasm_result_object_type;
382   gdbpy_ref<disasm_result_object> res
383     ((disasm_result_object *) type->tp_alloc (type, 0));
384   disasmpy_init_disassembler_result (res.get (), length, std::move (content));
385   return reinterpret_cast<PyObject *> (res.release ());
386 }
387 
388 /* Implement gdb._set_enabled function.  Takes a boolean parameter, and
389    sets whether GDB should enter the Python disassembler code or not.
390 
391    This is called from within the Python code when a new disassembler is
392    registered.  When no disassemblers are registered the global C++ flag
393    is set to false, and GDB never even enters the Python environment to
394    check for a disassembler.
395 
396    When the user registers a new Python disassembler, the global C++ flag
397    is set to true, and now GDB will enter the Python environment to check
398    if there's a disassembler registered for the current architecture.  */
399 
400 static PyObject *
401 disasmpy_set_enabled (PyObject *self, PyObject *args, PyObject *kw)
402 {
403   PyObject *newstate;
404   static const char *keywords[] = { "state", nullptr };
405   if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "O", keywords,
406 					&newstate))
407     return nullptr;
408 
409   if (!PyBool_Check (newstate))
410     {
411       PyErr_SetString (PyExc_TypeError,
412 		       _("The value passed to `_set_enabled' must be a boolean."));
413       return nullptr;
414     }
415 
416   python_print_insn_enabled = PyObject_IsTrue (newstate);
417   Py_RETURN_NONE;
418 }
419 
420 /* Implement DisassembleInfo.read_memory(LENGTH, OFFSET).  Read LENGTH
421    bytes at OFFSET from the start of the instruction currently being
422    disassembled, and return a memory buffer containing the bytes.
423 
424    OFFSET defaults to zero if it is not provided.  LENGTH is required.  If
425    the read fails then this will raise a gdb.MemoryError exception.  */
426 
427 static PyObject *
428 disasmpy_info_read_memory (PyObject *self, PyObject *args, PyObject *kw)
429 {
430   disasm_info_object *obj = (disasm_info_object *) self;
431   DISASMPY_DISASM_INFO_REQUIRE_VALID (obj);
432 
433   LONGEST length, offset = 0;
434   gdb::unique_xmalloc_ptr<gdb_byte> buffer;
435   static const char *keywords[] = { "length", "offset", nullptr };
436 
437   if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, "L|L", keywords,
438 					&length, &offset))
439     return nullptr;
440 
441   /* The apparent address from which we are reading memory.  Note that in
442      some cases GDB actually disassembles instructions from a buffer, so
443      we might not actually be reading this information directly from the
444      inferior memory.  This is all hidden behind the read_memory_func API
445      within the disassemble_info structure.  */
446   CORE_ADDR address = obj->address + offset;
447 
448   /* Setup a buffer to hold the result.  */
449   buffer.reset ((gdb_byte *) xmalloc (length));
450 
451   /* Read content into BUFFER.  If the read fails then raise a memory
452      error, otherwise, convert BUFFER to a Python memory buffer, and return
453      it to the user.  */
454   disassemble_info *info = obj->gdb_info;
455   if (info->read_memory_func ((bfd_vma) address, buffer.get (),
456 			      (unsigned int) length, info) != 0)
457     {
458       disasmpy_set_memory_error_for_address (address);
459       return nullptr;
460     }
461   return gdbpy_buffer_to_membuf (std::move (buffer), address, length);
462 }
463 
464 /* Implement DisassembleInfo.address attribute, return the address at which
465    GDB would like an instruction disassembled.  */
466 
467 static PyObject *
468 disasmpy_info_address (PyObject *self, void *closure)
469 {
470   disasm_info_object *obj = (disasm_info_object *) self;
471   DISASMPY_DISASM_INFO_REQUIRE_VALID (obj);
472   return gdb_py_object_from_longest (obj->address).release ();
473 }
474 
475 /* Implement DisassembleInfo.architecture attribute.  Return the
476    gdb.Architecture in which we are disassembling.  */
477 
478 static PyObject *
479 disasmpy_info_architecture (PyObject *self, void *closure)
480 {
481   disasm_info_object *obj = (disasm_info_object *) self;
482   DISASMPY_DISASM_INFO_REQUIRE_VALID (obj);
483   return gdbarch_to_arch_object (obj->gdbarch);
484 }
485 
486 /* Implement DisassembleInfo.progspace attribute.  Return the
487    gdb.Progspace in which we are disassembling.  */
488 
489 static PyObject *
490 disasmpy_info_progspace (PyObject *self, void *closure)
491 {
492   disasm_info_object *obj = (disasm_info_object *) self;
493   DISASMPY_DISASM_INFO_REQUIRE_VALID (obj);
494   return pspace_to_pspace_object (obj->program_space).release ();
495 }
496 
497 /* This implements the disassemble_info read_memory_func callback and is
498    called from the libopcodes disassembler when the disassembler wants to
499    read memory.
500 
501    From the INFO argument we can find the gdbpy_disassembler object for
502    which we are disassembling, and from that object we can find the
503    DisassembleInfo for the current disassembly call.
504 
505    This function reads the instruction bytes by calling the read_memory
506    method on the DisassembleInfo object.  This method might have been
507    overridden by user code.
508 
509    Read LEN bytes from MEMADDR and place them into BUFF.  Return 0 on
510    success (in which case BUFF has been filled), or -1 on error, in which
511    case the contents of BUFF are undefined.  */
512 
513 int
514 gdbpy_disassembler::read_memory_func (bfd_vma memaddr, gdb_byte *buff,
515 				      unsigned int len,
516 				      struct disassemble_info *info) noexcept
517 {
518   gdbpy_disassembler *dis
519     = static_cast<gdbpy_disassembler *> (info->application_data);
520   disasm_info_object *obj = dis->py_disasm_info ();
521 
522   /* If a previous read attempt resulted in an exception, then we don't
523      allow any further reads to succeed.  We only do this check for the
524      read_memory_func as this is the only one the user can hook into,
525      thus, this check prevents us calling back into user code if a
526      previous call has already thrown an error.  */
527   if (dis->has_stored_exception ())
528     return -1;
529 
530   /* The DisassembleInfo.read_memory method expects an offset from the
531      address stored within the DisassembleInfo object; calculate that
532      offset here.  */
533   LONGEST offset = (LONGEST) memaddr - (LONGEST) obj->address;
534 
535   /* Now call the DisassembleInfo.read_memory method.  This might have been
536      overridden by the user.  */
537   gdbpy_ref<> result_obj (PyObject_CallMethod ((PyObject *) obj,
538 					       "read_memory",
539 					       "KL", len, offset));
540 
541   /* Handle any exceptions.  */
542   if (result_obj == nullptr)
543     {
544       /* If we got a gdb.MemoryError then we ignore this and just report
545 	 that the read failed to the caller.  The caller is then
546 	 responsible for calling the memory_error_func if it wants to.
547 	 Remember, the disassembler might just be probing to see if these
548 	 bytes can be read, if we automatically call the memory error
549 	 function, we can end up registering an error prematurely.  */
550       if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error))
551 	{
552 	  PyErr_Clear ();
553 	  return -1;
554 	}
555 
556       /* For any other exception type we capture the value of the Python
557 	 exception and throw it, this will then be caught in
558 	 disasmpy_builtin_disassemble, at which point the exception will be
559 	 restored.  */
560       dis->store_exception (gdbpy_err_fetch ());
561       return -1;
562     }
563 
564   /* Convert the result to a buffer.  */
565   Py_buffer py_buff;
566   if (!PyObject_CheckBuffer (result_obj.get ())
567       || PyObject_GetBuffer (result_obj.get(), &py_buff, PyBUF_CONTIG_RO) < 0)
568     {
569       PyErr_Format (PyExc_TypeError,
570 		    _("Result from read_memory is not a buffer"));
571       dis->store_exception (gdbpy_err_fetch ());
572       return -1;
573     }
574 
575   /* Wrap PY_BUFF so that it is cleaned up correctly at the end of this
576      scope.  */
577   Py_buffer_up buffer_up (&py_buff);
578 
579   /* Validate that the buffer is the correct length.  */
580   if (py_buff.len != len)
581     {
582       PyErr_Format (PyExc_ValueError,
583 		    _("Buffer returned from read_memory is sized %d instead of the expected %d"),
584 		    py_buff.len, len);
585       dis->store_exception (gdbpy_err_fetch ());
586       return -1;
587     }
588 
589   /* Copy the data out of the Python buffer and return success.  */
590   const gdb_byte *buffer = (const gdb_byte *) py_buff.buf;
591   memcpy (buff, buffer, len);
592   return 0;
593 }
594 
595 /* Implement DisassemblerResult.length attribute, return the length of the
596    disassembled instruction.  */
597 
598 static PyObject *
599 disasmpy_result_length (PyObject *self, void *closure)
600 {
601   disasm_result_object *obj = (disasm_result_object *) self;
602   return gdb_py_object_from_longest (obj->length).release ();
603 }
604 
605 /* Implement DisassemblerResult.string attribute, return the content string
606    of the disassembled instruction.  */
607 
608 static PyObject *
609 disasmpy_result_string (PyObject *self, void *closure)
610 {
611   disasm_result_object *obj = (disasm_result_object *) self;
612 
613   gdb_assert (obj->content != nullptr);
614   gdb_assert (strlen (obj->content->c_str ()) > 0);
615   gdb_assert (obj->length > 0);
616   return PyUnicode_Decode (obj->content->c_str (),
617 			   obj->content->size (),
618 			   host_charset (), nullptr);
619 }
620 
621 /* Implement DisassemblerResult.__init__.  Takes two arguments, an
622    integer, the length in bytes of the disassembled instruction, and a
623    string, the disassembled content of the instruction.  */
624 
625 static int
626 disasmpy_result_init (PyObject *self, PyObject *args, PyObject *kwargs)
627 {
628   static const char *keywords[] = { "length", "string", NULL };
629   int length;
630   const char *string;
631   if (!gdb_PyArg_ParseTupleAndKeywords (args, kwargs, "is", keywords,
632 					&length, &string))
633     return -1;
634 
635   if (length <= 0)
636     {
637       PyErr_SetString (PyExc_ValueError,
638 		       _("Length must be greater than 0."));
639       return -1;
640     }
641 
642   if (strlen (string) == 0)
643     {
644       PyErr_SetString (PyExc_ValueError,
645 		       _("String must not be empty."));
646       return -1;
647     }
648 
649   disasm_result_object *obj = (disasm_result_object *) self;
650   disasmpy_init_disassembler_result (obj, length, std::string (string));
651 
652   return 0;
653 }
654 
655 /* Implement memory_error_func callback for disassemble_info.  Extract the
656    underlying DisassembleInfo Python object, and set a memory error on
657    it.  */
658 
659 void
660 gdbpy_disassembler::memory_error_func (int status, bfd_vma memaddr,
661 				       struct disassemble_info *info) noexcept
662 {
663   gdbpy_disassembler *dis
664     = static_cast<gdbpy_disassembler *> (info->application_data);
665   dis->m_memory_error_address.emplace (memaddr);
666 }
667 
668 /* Wrapper of print_address.  */
669 
670 void
671 gdbpy_disassembler::print_address_func (bfd_vma addr,
672 					struct disassemble_info *info) noexcept
673 {
674   gdbpy_disassembler *dis
675     = static_cast<gdbpy_disassembler *> (info->application_data);
676   print_address (dis->arch (), addr, dis->stream ());
677 }
678 
679 /* constructor.  */
680 
681 gdbpy_disassembler::gdbpy_disassembler (disasm_info_object *obj,
682 					PyObject *memory_source)
683   : gdb_printing_disassembler (obj->gdbarch, &m_string_file,
684 			       read_memory_func, memory_error_func,
685 			       print_address_func),
686     m_disasm_info_object (obj),
687     m_memory_source (memory_source)
688 { /* Nothing.  */ }
689 
690 /* A wrapper around a reference to a Python DisassembleInfo object, which
691    ensures that the object is marked as invalid when we leave the enclosing
692    scope.
693 
694    Each DisassembleInfo is created in gdbpy_print_insn, and is done with by
695    the time that function returns.  However, there's nothing to stop a user
696    caching a reference to the DisassembleInfo, and thus keeping the object
697    around.
698 
699    We therefore have the notion of a DisassembleInfo becoming invalid, this
700    happens when gdbpy_print_insn returns.  This class is responsible for
701    marking the DisassembleInfo as invalid in its destructor.  */
702 
703 struct scoped_disasm_info_object
704 {
705   /* Constructor.  */
706   scoped_disasm_info_object (struct gdbarch *gdbarch, CORE_ADDR memaddr,
707 			     disassemble_info *info)
708     : m_disasm_info (allocate_disasm_info_object ())
709   {
710     disasm_info_fill (m_disasm_info.get (), gdbarch, current_program_space,
711 		      memaddr, info, nullptr);
712   }
713 
714   /* Upon destruction mark m_diasm_info as invalid.  */
715   ~scoped_disasm_info_object ()
716   {
717     /* Invalidate the original DisassembleInfo object as well as any copies
718        that the user might have made.  */
719     for (disasm_info_object *obj = m_disasm_info.get ();
720 	 obj != nullptr;
721 	 obj = obj->next)
722       obj->gdb_info = nullptr;
723   }
724 
725   /* Return a pointer to the underlying disasm_info_object instance.  */
726   disasm_info_object *
727   get () const
728   {
729     return m_disasm_info.get ();
730   }
731 
732 private:
733 
734   /* Wrapper around the call to PyObject_New, this wrapper function can be
735      called from the constructor initialization list, while PyObject_New, a
736      macro, can't.  */
737   static disasm_info_object *
738   allocate_disasm_info_object ()
739   {
740     return (disasm_info_object *) PyObject_New (disasm_info_object,
741 						&disasm_info_object_type);
742   }
743 
744   /* A reference to a gdb.disassembler.DisassembleInfo object.  When this
745      containing instance goes out of scope this reference is released,
746      however, the user might be holding other references to the
747      DisassembleInfo object in Python code, so the underlying object might
748      not be deleted.  */
749   gdbpy_ref<disasm_info_object> m_disasm_info;
750 };
751 
752 /* See python-internal.h.  */
753 
754 gdb::optional<int>
755 gdbpy_print_insn (struct gdbarch *gdbarch, CORE_ADDR memaddr,
756 		  disassemble_info *info)
757 {
758   /* Early exit case.  This must be done as early as possible, and
759      definitely before we enter Python environment.  The
760      python_print_insn_enabled flag is set (from Python) only when the user
761      has installed one (or more) Python disassemblers.  So in the common
762      case (no custom disassembler installed) this flag will be false,
763      allowing for a quick return.  */
764   if (!gdb_python_initialized || !python_print_insn_enabled)
765     return {};
766 
767   gdbpy_enter enter_py (get_current_arch (), current_language);
768 
769   /* Import the gdb.disassembler module.  */
770   gdbpy_ref<> gdb_python_disassembler_module
771     (PyImport_ImportModule ("gdb.disassembler"));
772   if (gdb_python_disassembler_module == nullptr)
773     {
774       gdbpy_print_stack ();
775       return {};
776     }
777 
778   /* Get the _print_insn attribute from the module, this should be the
779      function we are going to call to actually perform the disassembly.  */
780   gdbpy_ref<> hook
781     (PyObject_GetAttrString (gdb_python_disassembler_module.get (),
782 			     "_print_insn"));
783   if (hook == nullptr)
784     {
785       gdbpy_print_stack ();
786       return {};
787     }
788 
789   /* Create the new DisassembleInfo object we will pass into Python.  This
790      object will be marked as invalid when we leave this scope.  */
791   scoped_disasm_info_object scoped_disasm_info (gdbarch, memaddr, info);
792   disasm_info_object *disasm_info = scoped_disasm_info.get ();
793 
794   /* Call into the registered disassembler to (possibly) perform the
795      disassembly.  */
796   PyObject *insn_disas_obj = (PyObject *) disasm_info;
797   gdbpy_ref<> result (PyObject_CallFunctionObjArgs (hook.get (),
798 						    insn_disas_obj,
799 						    nullptr));
800 
801   if (result == nullptr)
802     {
803       /* The call into Python code resulted in an exception.  If this was a
804 	 gdb.MemoryError, then we can figure out an address and call the
805 	 disassemble_info::memory_error_func to report the error back to
806 	 core GDB.  Any other exception type we report back to core GDB as
807 	 an unknown error (return -1 without first calling the
808 	 memory_error_func callback).  */
809 
810       if (PyErr_ExceptionMatches (gdbpy_gdb_memory_error))
811 	{
812 	  /* A gdb.MemoryError might have an address attribute which
813 	     contains the address at which the memory error occurred.  If
814 	     this is the case then use this address, otherwise, fallback to
815 	     just using the address of the instruction we were asked to
816 	     disassemble.  */
817 	  gdbpy_err_fetch err;
818 	  PyErr_Clear ();
819 
820 	  CORE_ADDR addr;
821 	  if (err.value () != nullptr
822 	      && PyObject_HasAttrString (err.value ().get (), "address"))
823 	    {
824 	      PyObject *addr_obj
825 		= PyObject_GetAttrString (err.value ().get (), "address");
826 	      if (get_addr_from_python (addr_obj, &addr) < 0)
827 		addr = disasm_info->address;
828 	    }
829 	  else
830 	    addr = disasm_info->address;
831 
832 	  info->memory_error_func (-1, addr, info);
833 	  return gdb::optional<int> (-1);
834 	}
835       else if (PyErr_ExceptionMatches (gdbpy_gdberror_exc))
836 	{
837 	  gdbpy_err_fetch err;
838 	  gdb::unique_xmalloc_ptr<char> msg = err.to_string ();
839 
840 	  info->fprintf_func (info->stream, "%s", msg.get ());
841 	  return gdb::optional<int> (-1);
842 	}
843       else
844 	{
845 	  gdbpy_print_stack ();
846 	  return gdb::optional<int> (-1);
847 	}
848 
849     }
850   else if (result == Py_None)
851     {
852       /* A return value of None indicates that the Python code could not,
853 	 or doesn't want to, disassemble this instruction.  Just return an
854 	 empty result and core GDB will try to disassemble this for us.  */
855       return {};
856     }
857 
858   /* Check the result is a DisassemblerResult (or a sub-class).  */
859   if (!PyObject_IsInstance (result.get (),
860 			    (PyObject *) &disasm_result_object_type))
861     {
862       PyErr_SetString (PyExc_TypeError,
863 		       _("Result is not a DisassemblerResult."));
864       gdbpy_print_stack ();
865       return gdb::optional<int> (-1);
866     }
867 
868   /* The call into Python neither raised an exception, or returned None.
869      Check to see if the result looks valid.  */
870   gdbpy_ref<> length_obj (PyObject_GetAttrString (result.get (), "length"));
871   if (length_obj == nullptr)
872     {
873       gdbpy_print_stack ();
874       return gdb::optional<int> (-1);
875     }
876 
877   gdbpy_ref<> string_obj (PyObject_GetAttrString (result.get (), "string"));
878   if (string_obj == nullptr)
879     {
880       gdbpy_print_stack ();
881       return gdb::optional<int> (-1);
882     }
883   if (!gdbpy_is_string (string_obj.get ()))
884     {
885       PyErr_SetString (PyExc_TypeError, _("String attribute is not a string."));
886       gdbpy_print_stack ();
887       return gdb::optional<int> (-1);
888     }
889 
890   gdb::unique_xmalloc_ptr<char> string
891     = gdbpy_obj_to_string (string_obj.get ());
892   if (string == nullptr)
893     {
894       gdbpy_print_stack ();
895       return gdb::optional<int> (-1);
896     }
897 
898   long length;
899   if (!gdb_py_int_as_long (length_obj.get (), &length))
900     {
901       gdbpy_print_stack ();
902       return gdb::optional<int> (-1);
903     }
904 
905   long max_insn_length = (gdbarch_max_insn_length_p (gdbarch) ?
906 			  gdbarch_max_insn_length (gdbarch) : INT_MAX);
907   if (length <= 0)
908     {
909       PyErr_SetString
910 	(PyExc_ValueError,
911 	 _("Invalid length attribute: length must be greater than 0."));
912       gdbpy_print_stack ();
913       return gdb::optional<int> (-1);
914     }
915   if (length > max_insn_length)
916     {
917       PyErr_Format
918 	(PyExc_ValueError,
919 	 _("Invalid length attribute: length %d greater than architecture maximum of %d"),
920 	 length, max_insn_length);
921       gdbpy_print_stack ();
922       return gdb::optional<int> (-1);
923     }
924 
925   if (strlen (string.get ()) == 0)
926     {
927       PyErr_SetString (PyExc_ValueError,
928 		       _("String attribute must not be empty."));
929       gdbpy_print_stack ();
930       return gdb::optional<int> (-1);
931     }
932 
933   /* Print the disassembled instruction back to core GDB, and return the
934      length of the disassembled instruction.  */
935   info->fprintf_func (info->stream, "%s", string.get ());
936   return gdb::optional<int> (length);
937 }
938 
939 /* The tp_dealloc callback for the DisassemblerResult type.  Takes care of
940    deallocating the content buffer.  */
941 
942 static void
943 disasmpy_dealloc_result (PyObject *self)
944 {
945   disasm_result_object *obj = (disasm_result_object *) self;
946   delete obj->content;
947   Py_TYPE (self)->tp_free (self);
948 }
949 
950 /* The get/set attributes of the gdb.disassembler.DisassembleInfo type.  */
951 
952 static gdb_PyGetSetDef disasm_info_object_getset[] = {
953   { "address", disasmpy_info_address, nullptr,
954     "Start address of the instruction to disassemble.", nullptr },
955   { "architecture", disasmpy_info_architecture, nullptr,
956     "Architecture to disassemble in", nullptr },
957   { "progspace", disasmpy_info_progspace, nullptr,
958     "Program space to disassemble in", nullptr },
959   { nullptr }   /* Sentinel */
960 };
961 
962 /* The methods of the gdb.disassembler.DisassembleInfo type.  */
963 
964 static PyMethodDef disasm_info_object_methods[] = {
965   { "read_memory", (PyCFunction) disasmpy_info_read_memory,
966     METH_VARARGS | METH_KEYWORDS,
967     "read_memory (LEN, OFFSET = 0) -> Octets[]\n\
968 Read LEN octets for the instruction to disassemble." },
969   { "is_valid", disasmpy_info_is_valid, METH_NOARGS,
970     "is_valid () -> Boolean.\n\
971 Return true if this DisassembleInfo is valid, false if not." },
972   {nullptr}  /* Sentinel */
973 };
974 
975 /* The get/set attributes of the gdb.disassembler.DisassemblerResult type.  */
976 
977 static gdb_PyGetSetDef disasm_result_object_getset[] = {
978   { "length", disasmpy_result_length, nullptr,
979     "Length of the disassembled instruction.", nullptr },
980   { "string", disasmpy_result_string, nullptr,
981     "String representing the disassembled instruction.", nullptr },
982   { nullptr }   /* Sentinel */
983 };
984 
985 /* These are the methods we add into the _gdb.disassembler module, which
986    are then imported into the gdb.disassembler module.  These are global
987    functions that support performing disassembly.  */
988 
989 PyMethodDef python_disassembler_methods[] =
990 {
991   { "builtin_disassemble", (PyCFunction) disasmpy_builtin_disassemble,
992     METH_VARARGS | METH_KEYWORDS,
993     "builtin_disassemble (INFO, MEMORY_SOURCE = None) -> None\n\
994 Disassemble using GDB's builtin disassembler.  INFO is an instance of\n\
995 gdb.disassembler.DisassembleInfo.  The MEMORY_SOURCE, if not None, should\n\
996 be an object with the read_memory method." },
997   { "_set_enabled", (PyCFunction) disasmpy_set_enabled,
998     METH_VARARGS | METH_KEYWORDS,
999     "_set_enabled (STATE) -> None\n\
1000 Set whether GDB should call into the Python _print_insn code or not." },
1001   {nullptr, nullptr, 0, nullptr}
1002 };
1003 
1004 /* Structure to define the _gdb.disassembler module.  */
1005 
1006 static struct PyModuleDef python_disassembler_module_def =
1007 {
1008   PyModuleDef_HEAD_INIT,
1009   "_gdb.disassembler",
1010   nullptr,
1011   -1,
1012   python_disassembler_methods,
1013   nullptr,
1014   nullptr,
1015   nullptr,
1016   nullptr
1017 };
1018 
1019 /* Called to initialize the Python structures in this file.  */
1020 
1021 int
1022 gdbpy_initialize_disasm ()
1023 {
1024   /* Create the _gdb.disassembler module, and add it to the _gdb module.  */
1025 
1026   PyObject *gdb_disassembler_module;
1027   gdb_disassembler_module = PyModule_Create (&python_disassembler_module_def);
1028   if (gdb_disassembler_module == nullptr)
1029     return -1;
1030   PyModule_AddObject(gdb_module, "disassembler", gdb_disassembler_module);
1031 
1032   /* This is needed so that 'import _gdb.disassembler' will work.  */
1033   PyObject *dict = PyImport_GetModuleDict ();
1034   PyDict_SetItemString (dict, "_gdb.disassembler", gdb_disassembler_module);
1035 
1036   disasm_info_object_type.tp_new = PyType_GenericNew;
1037   if (PyType_Ready (&disasm_info_object_type) < 0)
1038     return -1;
1039 
1040   if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassembleInfo",
1041 			      (PyObject *) &disasm_info_object_type) < 0)
1042     return -1;
1043 
1044   disasm_result_object_type.tp_new = PyType_GenericNew;
1045   if (PyType_Ready (&disasm_result_object_type) < 0)
1046     return -1;
1047 
1048   if (gdb_pymodule_addobject (gdb_disassembler_module, "DisassemblerResult",
1049 			      (PyObject *) &disasm_result_object_type) < 0)
1050     return -1;
1051 
1052   return 0;
1053 }
1054 
1055 /* Describe the gdb.disassembler.DisassembleInfo type.  */
1056 
1057 PyTypeObject disasm_info_object_type = {
1058   PyVarObject_HEAD_INIT (nullptr, 0)
1059   "gdb.disassembler.DisassembleInfo",		/*tp_name*/
1060   sizeof (disasm_info_object),			/*tp_basicsize*/
1061   0,						/*tp_itemsize*/
1062   disasm_info_dealloc,				/*tp_dealloc*/
1063   0,						/*tp_print*/
1064   0,						/*tp_getattr*/
1065   0,						/*tp_setattr*/
1066   0,						/*tp_compare*/
1067   0,						/*tp_repr*/
1068   0,						/*tp_as_number*/
1069   0,						/*tp_as_sequence*/
1070   0,						/*tp_as_mapping*/
1071   0,						/*tp_hash */
1072   0,						/*tp_call*/
1073   0,						/*tp_str*/
1074   0,						/*tp_getattro*/
1075   0,						/*tp_setattro*/
1076   0,						/*tp_as_buffer*/
1077   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,	/*tp_flags*/
1078   "GDB instruction disassembler object",	/* tp_doc */
1079   0,						/* tp_traverse */
1080   0,						/* tp_clear */
1081   0,						/* tp_richcompare */
1082   0,						/* tp_weaklistoffset */
1083   0,						/* tp_iter */
1084   0,						/* tp_iternext */
1085   disasm_info_object_methods,			/* tp_methods */
1086   0,						/* tp_members */
1087   disasm_info_object_getset,			/* tp_getset */
1088   0,						/* tp_base */
1089   0,						/* tp_dict */
1090   0,						/* tp_descr_get */
1091   0,						/* tp_descr_set */
1092   0,						/* tp_dictoffset */
1093   disasm_info_init,				/* tp_init */
1094   0,						/* tp_alloc */
1095 };
1096 
1097 /* Describe the gdb.disassembler.DisassemblerResult type.  */
1098 
1099 PyTypeObject disasm_result_object_type = {
1100   PyVarObject_HEAD_INIT (nullptr, 0)
1101   "gdb.disassembler.DisassemblerResult",	/*tp_name*/
1102   sizeof (disasm_result_object),		/*tp_basicsize*/
1103   0,						/*tp_itemsize*/
1104   disasmpy_dealloc_result,			/*tp_dealloc*/
1105   0,						/*tp_print*/
1106   0,						/*tp_getattr*/
1107   0,						/*tp_setattr*/
1108   0,						/*tp_compare*/
1109   0,						/*tp_repr*/
1110   0,						/*tp_as_number*/
1111   0,						/*tp_as_sequence*/
1112   0,						/*tp_as_mapping*/
1113   0,						/*tp_hash */
1114   0,						/*tp_call*/
1115   0,						/*tp_str*/
1116   0,						/*tp_getattro*/
1117   0,						/*tp_setattro*/
1118   0,						/*tp_as_buffer*/
1119   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,	/*tp_flags*/
1120   "GDB object, representing a disassembler result",	/* tp_doc */
1121   0,						/* tp_traverse */
1122   0,						/* tp_clear */
1123   0,						/* tp_richcompare */
1124   0,						/* tp_weaklistoffset */
1125   0,						/* tp_iter */
1126   0,						/* tp_iternext */
1127   0,						/* tp_methods */
1128   0,						/* tp_members */
1129   disasm_result_object_getset,			/* tp_getset */
1130   0,						/* tp_base */
1131   0,						/* tp_dict */
1132   0,						/* tp_descr_get */
1133   0,						/* tp_descr_set */
1134   0,						/* tp_dictoffset */
1135   disasmpy_result_init,				/* tp_init */
1136   0,						/* tp_alloc */
1137 };
1138