xref: /netbsd-src/external/gpl3/binutils.old/dist/include/dis-asm.h (revision e992f068c547fd6e84b3f104dc2340adcc955732)
1 /* Interface between the opcode library and its callers.
2 
3    Copyright (C) 1999-2022 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin Street - Fifth Floor,
18    Boston, MA 02110-1301, USA.
19 
20    Written by Cygnus Support, 1993.
21 
22    The opcode library (libopcodes.a) provides instruction decoders for
23    a large variety of instruction sets, callable with an identical
24    interface, for making instruction-processing programs more independent
25    of the instruction set being processed.  */
26 
27 #ifndef DIS_ASM_H
28 #define DIS_ASM_H
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #include <stdio.h>
35 #include <string.h>
36 #include "bfd.h"
37 
38 enum dis_insn_type
39 {
40   dis_noninsn,			/* Not a valid instruction.  */
41   dis_nonbranch,		/* Not a branch instruction.  */
42   dis_branch,			/* Unconditional branch.  */
43   dis_condbranch,		/* Conditional branch.  */
44   dis_jsr,			/* Jump to subroutine.  */
45   dis_condjsr,			/* Conditional jump to subroutine.  */
46   dis_dref,			/* Data reference instruction.  */
47   dis_dref2			/* Two data references in instruction.  */
48 };
49 
50 /* When printing styled disassembler output, this describes what style
51    should be used.  */
52 
53 enum disassembler_style
54 {
55   /* This is the default style, use this for any additional syntax
56      (e.g. commas between operands, brackets, etc), or just as a default if
57      no other style seems appropriate.  */
58   dis_style_text,
59 
60   /* Use this for all instruction mnemonics, or aliases for mnemonics.
61      These should be things that correspond to real machine
62      instructions.  */
63   dis_style_mnemonic,
64 
65   /* For things that aren't real machine instructions, but rather
66      assembler directives, e.g. .byte, etc.  */
67   dis_style_assembler_directive,
68 
69   /* Use this for any register names.  This may or may-not include any
70      register prefix, e.g. '$', '%', at the discretion of the target,
71      though within each target the choice to include prefixes for not
72      should be kept consistent.  If the prefix is not printed with this
73      style, then dis_style_text should be used.  */
74   dis_style_register,
75 
76   /* Use this for any constant values used within instructions or
77      directives, unless the value is an absolute address, or an offset
78      that will be added to an address (no matter where the address comes
79      from) before use.  This style may, or may-not be used for any
80      prefix to the immediate value, e.g. '$', at the discretion of the
81      target, though within each target the choice to include these
82      prefixes should be kept consistent.  */
83   dis_style_immediate,
84 
85   /* The style for the numerical representation of an absolute address.
86      Anything that is an address offset should use the immediate style.
87      This style may, or may-not be used for any prefix to the immediate
88      value, e.g. '$', at the discretion of the target, though within
89      each target the choice to include these prefixes should be kept
90      consistent.  */
91   dis_style_address,
92 
93   /* The style for any constant value within an instruction or directive
94      that represents an offset that will be added to an address before
95      use.  This style may, or may-not be used for any prefix to the
96      immediate value, e.g. '$', at the discretion of the target, though
97      within each target the choice to include these prefixes should be
98      kept consistent.  */
99   dis_style_address_offset,
100 
101   /* The style for a symbol's name.  The numerical address of a symbol
102      should use the address style above, this style is reserved for the
103      name.  */
104   dis_style_symbol,
105 
106   /* The start of a comment that runs to the end of the line.  Anything
107      printed after a comment start might be styled differently,
108      e.g. everything might be styled as a comment, regardless of the
109      actual style used.  The disassembler itself should not try to adjust
110      the style emitted for comment content, e.g. an address emitted within
111      a comment should still be given dis_style_address, in this way it is
112      up to the user of the disassembler to decide how comments should be
113      styled.  */
114   dis_style_comment_start
115 };
116 
117 typedef int (*fprintf_ftype) (void *, const char*, ...) ATTRIBUTE_FPTR_PRINTF_2;
118 typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...) ATTRIBUTE_FPTR_PRINTF_3;
119 
120 /* This struct is passed into the instruction decoding routine,
121    and is passed back out into each callback.  The various fields are used
122    for conveying information from your main routine into your callbacks,
123    for passing information into the instruction decoders (such as the
124    addresses of the callback functions), or for passing information
125    back from the instruction decoders to their callers.
126 
127    It must be initialized before it is first passed; this can be done
128    by hand, or using one of the initialization macros below.  */
129 
130 typedef struct disassemble_info
131 {
132   fprintf_ftype fprintf_func;
133   fprintf_styled_ftype fprintf_styled_func;
134   void *stream;
135   void *application_data;
136 
137   /* Target description.  We could replace this with a pointer to the bfd,
138      but that would require one.  There currently isn't any such requirement
139      so to avoid introducing one we record these explicitly.  */
140   /* The bfd_flavour.  This can be bfd_target_unknown_flavour.  */
141   enum bfd_flavour flavour;
142   /* The bfd_arch value.  */
143   enum bfd_architecture arch;
144   /* The bfd_mach value.  */
145   unsigned long mach;
146   /* Endianness (for bi-endian cpus).  Mono-endian cpus can ignore this.  */
147   enum bfd_endian endian;
148   /* Endianness of code, for mixed-endian situations such as ARM BE8.  */
149   enum bfd_endian endian_code;
150 
151   /* Some targets need information about the current section to accurately
152      display insns.  If this is NULL, the target disassembler function
153      will have to make its best guess.  */
154   asection *section;
155 
156   /* An array of pointers to symbols either at the location being disassembled
157      or at the start of the function being disassembled.  The array is sorted
158      so that the first symbol is intended to be the one used.  The others are
159      present for any misc. purposes.  This is not set reliably, but if it is
160      not NULL, it is correct.  */
161   asymbol **symbols;
162   /* Number of symbols in array.  */
163   int num_symbols;
164 
165   /* Symbol table provided for targets that want to look at it.  This is
166      used on Arm to find mapping symbols and determine Arm/Thumb code.  */
167   asymbol **symtab;
168   int symtab_pos;
169   int symtab_size;
170 
171   /* For use by the disassembler.
172      The top 16 bits are reserved for public use (and are documented here).
173      The bottom 16 bits are for the internal use of the disassembler.  */
174   unsigned long flags;
175   /* Set if the disassembler has determined that there are one or more
176      relocations associated with the instruction being disassembled.  */
177 #define INSN_HAS_RELOC	 (1u << 31)
178   /* Set if the user has requested the disassembly of data as well as code.  */
179 #define DISASSEMBLE_DATA (1u << 30)
180   /* Set if the user has specifically set the machine type encoded in the
181      mach field of this structure.  */
182 #define USER_SPECIFIED_MACHINE_TYPE (1u << 29)
183   /* Set if the user has requested wide output.  */
184 #define WIDE_OUTPUT (1u << 28)
185 
186   /* Dynamic relocations, if they have been loaded.  */
187   arelent **dynrelbuf;
188   long dynrelcount;
189 
190   /* Use internally by the target specific disassembly code.  */
191   void *private_data;
192 
193   /* Function used to get bytes to disassemble.  MEMADDR is the
194      address of the stuff to be disassembled, MYADDR is the address to
195      put the bytes in, and LENGTH is the number of bytes to read.
196      INFO is a pointer to this struct.
197      Returns an errno value or 0 for success.  */
198   int (*read_memory_func)
199     (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
200      struct disassemble_info *dinfo);
201 
202   /* Function which should be called if we get an error that we can't
203      recover from.  STATUS is the errno value from read_memory_func and
204      MEMADDR is the address that we were trying to read.  INFO is a
205      pointer to this struct.  */
206   void (*memory_error_func)
207     (int status, bfd_vma memaddr, struct disassemble_info *dinfo);
208 
209   /* Function called to print ADDR.  */
210   void (*print_address_func)
211     (bfd_vma addr, struct disassemble_info *dinfo);
212 
213   /* Function called to determine if there is a symbol at the given ADDR.
214      If there is, the function returns 1, otherwise it returns 0.
215      This is used by ports which support an overlay manager where
216      the overlay number is held in the top part of an address.  In
217      some circumstances we want to include the overlay number in the
218      address, (normally because there is a symbol associated with
219      that address), but sometimes we want to mask out the overlay bits.  */
220   asymbol * (*symbol_at_address_func)
221     (bfd_vma addr, struct disassemble_info *dinfo);
222 
223   /* Function called to check if a SYMBOL is can be displayed to the user.
224      This is used by some ports that want to hide special symbols when
225      displaying debugging outout.  */
226   bool (*symbol_is_valid)
227     (asymbol *, struct disassemble_info *dinfo);
228 
229   /* These are for buffer_read_memory.  */
230   bfd_byte *buffer;
231   bfd_vma buffer_vma;
232   size_t buffer_length;
233 
234   /* This variable may be set by the instruction decoder.  It suggests
235       the number of bytes objdump should display on a single line.  If
236       the instruction decoder sets this, it should always set it to
237       the same value in order to get reasonable looking output.  */
238   int bytes_per_line;
239 
240   /* The next two variables control the way objdump displays the raw data.  */
241   /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */
242   /* output will look like this:
243      00:   00000000 00000000
244      with the chunks displayed according to "display_endian". */
245   int bytes_per_chunk;
246   enum bfd_endian display_endian;
247 
248   /* Number of octets per incremented target address
249      Normally one, but some DSPs have byte sizes of 16 or 32 bits.  */
250   unsigned int octets_per_byte;
251 
252   /* The number of zeroes we want to see at the end of a section before we
253      start skipping them.  */
254   unsigned int skip_zeroes;
255 
256   /* The number of zeroes to skip at the end of a section.  If the number
257      of zeroes at the end is between SKIP_ZEROES_AT_END and SKIP_ZEROES,
258      they will be disassembled.  If there are fewer than
259      SKIP_ZEROES_AT_END, they will be skipped.  This is a heuristic
260      attempt to avoid disassembling zeroes inserted by section
261      alignment.  */
262   unsigned int skip_zeroes_at_end;
263 
264   /* Whether the disassembler always needs the relocations.  */
265   bool disassembler_needs_relocs;
266 
267   /* Results from instruction decoders.  Not all decoders yet support
268      this information.  This info is set each time an instruction is
269      decoded, and is only valid for the last such instruction.
270 
271      To determine whether this decoder supports this information, set
272      insn_info_valid to 0, decode an instruction, then check it.  */
273 
274   char insn_info_valid;		/* Branch info has been set. */
275   char branch_delay_insns;	/* How many sequential insn's will run before
276 				   a branch takes effect.  (0 = normal) */
277   char data_size;		/* Size of data reference in insn, in bytes */
278   enum dis_insn_type insn_type;	/* Type of instruction */
279   bfd_vma target;		/* Target address of branch or dref, if known;
280 				   zero if unknown.  */
281   bfd_vma target2;		/* Second target address for dref2 */
282 
283   /* Command line options specific to the target disassembler.  */
284   const char *disassembler_options;
285 
286   /* If non-zero then try not disassemble beyond this address, even if
287      there are values left in the buffer.  This address is the address
288      of the nearest symbol forwards from the start of the disassembly,
289      and it is assumed that it lies on the boundary between instructions.
290      If an instruction spans this address then this is an error in the
291      file being disassembled.  */
292   bfd_vma stop_vma;
293 
294   /* The end range of the current range being disassembled.  This is required
295      in order to notify the disassembler when it's currently handling a
296      different range than it was before.  This prevent unsafe optimizations when
297      disassembling such as the way mapping symbols are found on AArch64.  */
298   bfd_vma stop_offset;
299 
300   /* Set to true if the disassembler applied styling to the output,
301      otherwise, set to false.  */
302   bool created_styled_output;
303 } disassemble_info;
304 
305 /* This struct is used to pass information about valid disassembler
306    option arguments from the target to the generic GDB functions
307    that set and display them.  */
308 
309 typedef struct
310 {
311   /* Option argument name to use in descriptions.  */
312   const char *name;
313 
314   /* Vector of acceptable option argument values, NULL-terminated.  */
315   const char **values;
316 } disasm_option_arg_t;
317 
318 /* This struct is used to pass information about valid disassembler
319    options, their descriptions and arguments from the target to the
320    generic GDB functions that set and display them.  Options are
321    defined by tuples of vector entries at each index.  */
322 
323 typedef struct
324 {
325   /* Vector of option names, NULL-terminated.  */
326   const char **name;
327 
328   /* Vector of option descriptions or NULL if none to be shown.  */
329   const char **description;
330 
331   /* Vector of option argument information pointers or NULL if no
332      option accepts an argument.  NULL entries denote individual
333      options that accept no argument.  */
334   const disasm_option_arg_t **arg;
335 } disasm_options_t;
336 
337 /* This struct is used to pass information about valid disassembler
338    options and arguments from the target to the generic GDB functions
339    that set and display them.  */
340 
341 typedef struct
342 {
343   /* Valid disassembler options.  Individual options that support
344      an argument will refer to entries in the ARGS vector.  */
345   disasm_options_t options;
346 
347   /* Vector of acceptable option arguments, NULL-terminated.  This
348      collects all possible option argument choices, some of which
349      may be shared by different options from the OPTIONS member.  */
350   disasm_option_arg_t *args;
351 } disasm_options_and_args_t;
352 
353 /* Standard disassemblers.  Disassemble one instruction at the given
354    target address.  Return number of octets processed.  */
355 typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
356 
357 /* Disassemblers used out side of opcodes library.  */
358 extern int print_insn_m32c		(bfd_vma, disassemble_info *);
359 extern int print_insn_mep		(bfd_vma, disassemble_info *);
360 extern int print_insn_s12z		(bfd_vma, disassemble_info *);
361 extern int print_insn_sh		(bfd_vma, disassemble_info *);
362 extern int print_insn_sparc		(bfd_vma, disassemble_info *);
363 extern int print_insn_rx		(bfd_vma, disassemble_info *);
364 extern int print_insn_rl78		(bfd_vma, disassemble_info *);
365 extern int print_insn_rl78_g10		(bfd_vma, disassemble_info *);
366 extern int print_insn_rl78_g13		(bfd_vma, disassemble_info *);
367 extern int print_insn_rl78_g14		(bfd_vma, disassemble_info *);
368 
369 extern disassembler_ftype arc_get_disassembler (bfd *);
370 extern disassembler_ftype cris_get_disassembler (bfd *);
371 
372 extern void print_aarch64_disassembler_options (FILE *);
373 extern void print_i386_disassembler_options (FILE *);
374 extern void print_mips_disassembler_options (FILE *);
375 extern void print_nfp_disassembler_options (FILE *);
376 extern void print_ppc_disassembler_options (FILE *);
377 extern void print_riscv_disassembler_options (FILE *);
378 extern void print_arm_disassembler_options (FILE *);
379 extern void print_arc_disassembler_options (FILE *);
380 extern void print_s390_disassembler_options (FILE *);
381 extern void print_wasm32_disassembler_options (FILE *);
382 extern void print_loongarch_disassembler_options (FILE *);
383 extern bool aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
384 extern bool arm_symbol_is_valid (asymbol *, struct disassemble_info *);
385 extern bool csky_symbol_is_valid (asymbol *, struct disassemble_info *);
386 extern bool riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
387 extern void disassemble_init_powerpc (struct disassemble_info *);
388 extern void disassemble_init_s390 (struct disassemble_info *);
389 extern void disassemble_init_wasm32 (struct disassemble_info *);
390 extern void disassemble_init_nds32 (struct disassemble_info *);
391 extern const disasm_options_and_args_t *disassembler_options_arc (void);
392 extern const disasm_options_and_args_t *disassembler_options_arm (void);
393 extern const disasm_options_and_args_t *disassembler_options_mips (void);
394 extern const disasm_options_and_args_t *disassembler_options_powerpc (void);
395 extern const disasm_options_and_args_t *disassembler_options_riscv (void);
396 extern const disasm_options_and_args_t *disassembler_options_s390 (void);
397 
398 /* Fetch the disassembler for a given architecture ARC, endianess (big
399    endian if BIG is true), bfd_mach value MACH, and ABFD, if that support
400    is available.  ABFD may be NULL.  */
401 extern disassembler_ftype disassembler (enum bfd_architecture arc,
402 					bool big, unsigned long mach,
403 					bfd *abfd);
404 
405 /* Amend the disassemble_info structure as necessary for the target architecture.
406    Should only be called after initialising the info->arch field.  */
407 extern void disassemble_init_for_target (struct disassemble_info *);
408 
409 /* Tidy any memory allocated by targets, such as info->private_data.  */
410 extern void disassemble_free_target (struct disassemble_info *);
411 
412 /* Set the basic disassembler print functions.  */
413 extern void disassemble_set_printf (struct disassemble_info *, void *,
414 				    fprintf_ftype, fprintf_styled_ftype);
415 
416 /* Document any target specific options available from the disassembler.  */
417 extern void disassembler_usage (FILE *);
418 
419 /* Remove whitespace and consecutive commas.  */
420 extern char *remove_whitespace_and_extra_commas (char *);
421 
422 /* Like STRCMP, but treat ',' the same as '\0' so that we match
423    strings like "foobar" against "foobar,xxyyzz,...".  */
424 extern int disassembler_options_cmp (const char *, const char *);
425 
426 /* A helper function for FOR_EACH_DISASSEMBLER_OPTION.  */
427 static inline const char *
next_disassembler_option(const char * options)428 next_disassembler_option (const char *options)
429 {
430   const char *opt = strchr (options, ',');
431   if (opt != NULL)
432     opt++;
433   return opt;
434 }
435 
436 /* A macro for iterating over each comma separated option in OPTIONS.  */
437 #define FOR_EACH_DISASSEMBLER_OPTION(OPT, OPTIONS) \
438   for ((OPT) = (OPTIONS); \
439        (OPT) != NULL; \
440        (OPT) = next_disassembler_option (OPT))
441 
442 
443 /* This block of definitions is for particular callers who read instructions
444    into a buffer before calling the instruction decoder.  */
445 
446 /* Here is a function which callers may wish to use for read_memory_func.
447    It gets bytes from a buffer.  */
448 extern int buffer_read_memory
449   (bfd_vma, bfd_byte *, unsigned int, struct disassemble_info *);
450 
451 /* This function goes with buffer_read_memory.
452    It prints a message using info->fprintf_func and info->stream.  */
453 extern void perror_memory (int, bfd_vma, struct disassemble_info *);
454 
455 
456 /* Just print the address in hex.  This is included for completeness even
457    though both GDB and objdump provide their own (to print symbolic
458    addresses).  */
459 extern void generic_print_address
460   (bfd_vma, struct disassemble_info *);
461 
462 /* Always NULL.  */
463 extern asymbol *generic_symbol_at_address
464   (bfd_vma, struct disassemble_info *);
465 
466 /* Always true.  */
467 extern bool generic_symbol_is_valid
468   (asymbol *, struct disassemble_info *);
469 
470 /* Method to initialize a disassemble_info struct.  This should be
471    called by all applications creating such a struct.  */
472 extern void init_disassemble_info (struct disassemble_info *dinfo, void *stream,
473 				   fprintf_ftype fprintf_func,
474 				   fprintf_styled_ftype fprintf_styled_func);
475 
476 /* For compatibility with existing code.  */
477 #define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC, FPRINTF_STYLED_FUNC)  \
478   init_disassemble_info (&(INFO), (STREAM), (fprintf_ftype) (FPRINTF_FUNC), \
479 			 (fprintf_styled_ftype) (FPRINTF_STYLED_FUNC))
480 
481 #ifdef __cplusplus
482 }
483 #endif
484 
485 #endif /* ! defined (DIS_ASM_H) */
486