1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <ctype.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/sysmacros.h>
35 #include <sys/elf_SPARC.h>
36
37 #include <libdisasm.h>
38
39 #include "dis_target.h"
40 #include "dis_util.h"
41 #include "dis_list.h"
42
43 int g_demangle; /* Demangle C++ names */
44 int g_quiet; /* Quiet mode */
45 int g_numeric; /* Numeric mode */
46 int g_flags; /* libdisasm language flags */
47 int g_doall; /* true if no functions or sections were given */
48
49 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
50 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
51
52 /*
53 * Section options for -d, -D, and -s
54 */
55 #define DIS_DATA_RELATIVE 1
56 #define DIS_DATA_ABSOLUTE 2
57 #define DIS_TEXT 3
58
59 /*
60 * libdisasm callback data. Keeps track of current data (function or section)
61 * and offset within that data.
62 */
63 typedef struct dis_buffer {
64 dis_tgt_t *db_tgt; /* current dis target */
65 void *db_data; /* function or section data */
66 uint64_t db_addr; /* address of function start */
67 size_t db_size; /* size of data */
68 uint64_t db_nextaddr; /* next address to be read */
69 } dis_buffer_t;
70
71 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
72
73 /*
74 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
75 * formatted symbol, based on the offset and current setttings.
76 */
77 void
getsymname(uint64_t addr,const char * symbol,off_t offset,char * buf,size_t buflen)78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
79 size_t buflen)
80 {
81 if (symbol == NULL || g_numeric) {
82 if (g_flags & DIS_OCTAL)
83 (void) snprintf(buf, buflen, "0%llo", addr);
84 else
85 (void) snprintf(buf, buflen, "0x%llx", addr);
86 } else {
87 if (g_demangle)
88 symbol = dis_demangle(symbol);
89
90 if (offset == 0)
91 (void) snprintf(buf, buflen, "%s", symbol);
92 else if (g_flags & DIS_OCTAL)
93 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
94 else
95 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
96 }
97 }
98
99 /*
100 * The main disassembly routine. Given a fixed-sized buffer and starting
101 * address, disassemble the data using the supplied target and libdisasm handle.
102 */
103 void
dis_data(dis_tgt_t * tgt,dis_handle_t * dhp,uint64_t addr,void * data,size_t datalen)104 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
105 size_t datalen)
106 {
107 dis_buffer_t db = { 0 };
108 char buf[BUFSIZE];
109 char symbuf[BUFSIZE];
110 const char *symbol;
111 off_t symoffset;
112 int i;
113 int bytesperline;
114 size_t symsize;
115 int isfunc;
116 size_t symwidth = 0;
117
118 db.db_tgt = tgt;
119 db.db_data = data;
120 db.db_addr = addr;
121 db.db_size = datalen;
122
123 dis_set_data(dhp, &db);
124
125 if ((bytesperline = dis_max_instrlen(dhp)) > 6)
126 bytesperline = 6;
127
128 while (addr < db.db_addr + db.db_size) {
129
130 if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
131 /*
132 * If we encounter an invalid opcode, we just
133 * print "*** invalid opcode ***" at that first bad
134 * instruction and continue with printing the rest
135 * of the instruction stream as hex data,
136 * We then find the next valid symbol in the section,
137 * and disassemble from there.
138 */
139 off_t next;
140
141 (void) snprintf(buf, sizeof (buf),
142 "*** invalid opcode ***");
143
144 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
145 db.db_nextaddr = db.db_addr + db.db_size;
146 } else {
147 if (next > db.db_size)
148 db.db_nextaddr = db.db_addr +
149 db.db_size;
150 else
151 db.db_nextaddr = addr + next;
152 }
153 }
154
155 /*
156 * Print out the line as:
157 *
158 * address: bytes text
159 *
160 * If there are more than 6 bytes in any given instruction,
161 * spread the bytes across two lines. We try to get symbolic
162 * information for the address, but if that fails we print out
163 * the numeric address instead.
164 *
165 * We try to keep the address portion of the text aligned at
166 * MINSYMWIDTH characters. If we are disassembling a function
167 * with a long name, this can be annoying. So we pick a width
168 * based on the maximum width that the current symbol can be.
169 * This at least produces text aligned within each function.
170 */
171 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
172 &isfunc);
173 /* Get the maximum length for this symbol */
174 getsymname(addr, symbol, symsize, symbuf, sizeof (symbuf));
175 symwidth = MAX(strlen(symbuf), MINSYMWIDTH);
176
177 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
178
179 /*
180 * If we've crossed a new function boundary, print out the
181 * function name on a blank line.
182 */
183 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
184 (void) printf("%s()\n", symbol);
185
186 (void) printf(" %s:%*s ", symbuf,
187 symwidth - strlen(symbuf), "");
188
189 /* print bytes */
190 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
191 i++) {
192 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
193 if (g_flags & DIS_OCTAL)
194 (void) printf("%03o ", byte);
195 else
196 (void) printf("%02x ", byte);
197 }
198
199 /* trailing spaces for missing bytes */
200 for (; i < bytesperline; i++) {
201 if (g_flags & DIS_OCTAL)
202 (void) printf(" ");
203 else
204 (void) printf(" ");
205 }
206
207 /* contents of disassembly */
208 (void) printf(" %s", buf);
209
210 /* excess bytes that spill over onto subsequent lines */
211 for (; i < db.db_nextaddr - addr; i++) {
212 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
213 if (i % bytesperline == 0)
214 (void) printf("\n %*s ", symwidth, "");
215 if (g_flags & DIS_OCTAL)
216 (void) printf("%03o ", byte);
217 else
218 (void) printf("%02x ", byte);
219 }
220
221 (void) printf("\n");
222
223 addr = db.db_nextaddr;
224 }
225 }
226
227 /*
228 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
229 * function, and convert the result using getsymname().
230 */
231 int
do_lookup(void * data,uint64_t addr,char * buf,size_t buflen,uint64_t * start,size_t * symlen)232 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
233 size_t *symlen)
234 {
235 dis_buffer_t *db = data;
236 const char *symbol;
237 off_t offset;
238 size_t size;
239
240 /*
241 * If NULL symbol is returned, getsymname takes care of
242 * printing appropriate address in buf instead of symbol.
243 */
244 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
245
246 if (buf != NULL)
247 getsymname(addr, symbol, offset, buf, buflen);
248
249 if (start != NULL)
250 *start = addr - offset;
251 if (symlen != NULL)
252 *symlen = size;
253
254 if (symbol == NULL)
255 return (-1);
256
257 return (0);
258 }
259
260 /*
261 * libdisasm wrapper around target reading. libdisasm will always read data
262 * in order, so update our current offset within the buffer appropriately.
263 * We only support reading from within the current object; libdisasm should
264 * never ask us to do otherwise.
265 */
266 int
do_read(void * data,uint64_t addr,void * buf,size_t len)267 do_read(void *data, uint64_t addr, void *buf, size_t len)
268 {
269 dis_buffer_t *db = data;
270 size_t offset;
271
272 if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
273 return (-1);
274
275 offset = addr - db->db_addr;
276 len = MIN(len, db->db_size - offset);
277
278 (void) memcpy(buf, (char *)db->db_data + offset, len);
279
280 db->db_nextaddr = addr + len;
281
282 return (len);
283 }
284
285 /*
286 * Routine to dump raw data in a human-readable format. Used by the -d and -D
287 * options. We model our output after the xxd(1) program, which gives nicely
288 * formatted output, along with an ASCII translation of the result.
289 */
290 void
dump_data(uint64_t addr,void * data,size_t datalen)291 dump_data(uint64_t addr, void *data, size_t datalen)
292 {
293 uintptr_t curaddr = addr & (~0xf);
294 uint8_t *bytes = data;
295 int i;
296 int width;
297
298 /*
299 * Determine if the address given to us fits in 32-bit range, in which
300 * case use a 4-byte width.
301 */
302 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
303 width = 8;
304 else
305 width = 16;
306
307 while (curaddr < addr + datalen) {
308 /*
309 * Display leading address
310 */
311 (void) printf("%0*x: ", width, curaddr);
312
313 /*
314 * Print out data in two-byte chunks. If the current address
315 * is before the starting address or after the end of the
316 * section, print spaces.
317 */
318 for (i = 0; i < 16; i++) {
319 if (curaddr + i < addr ||curaddr + i >= addr + datalen)
320 (void) printf(" ");
321 else
322 (void) printf("%02x",
323 bytes[curaddr + i - addr]);
324
325 if (i & 1)
326 (void) printf(" ");
327 }
328
329 (void) printf(" ");
330
331 /*
332 * Print out the ASCII representation
333 */
334 for (i = 0; i < 16; i++) {
335 if (curaddr + i < addr ||
336 curaddr + i >= addr + datalen) {
337 (void) printf(" ");
338 } else {
339 uint8_t byte = bytes[curaddr + i - addr];
340 if (isprint(byte))
341 (void) printf("%c", byte);
342 else
343 (void) printf(".");
344 }
345 }
346
347 (void) printf("\n");
348
349 curaddr += 16;
350 }
351 }
352
353 /*
354 * Disassemble a section implicitly specified as part of a file. This function
355 * is called for all sections when no other flags are specified. We ignore any
356 * data sections, and print out only those sections containing text.
357 */
358 void
dis_text_section(dis_tgt_t * tgt,dis_scn_t * scn,void * data)359 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
360 {
361 dis_handle_t *dhp = data;
362
363 /* ignore data sections */
364 if (!dis_section_istext(scn))
365 return;
366
367 if (!g_quiet)
368 (void) printf("\nsection %s\n", dis_section_name(scn));
369
370 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
371 dis_section_size(scn));
372 }
373
374 /*
375 * Structure passed to dis_named_{section,function} which keeps track of both
376 * the target and the libdisasm handle.
377 */
378 typedef struct callback_arg {
379 dis_tgt_t *ca_tgt;
380 dis_handle_t *ca_handle;
381 } callback_arg_t;
382
383 /*
384 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
385 * argument contains the type of argument given. Pass the data onto the
386 * appropriate helper routine.
387 */
388 void
dis_named_section(dis_scn_t * scn,int type,void * data)389 dis_named_section(dis_scn_t *scn, int type, void *data)
390 {
391 callback_arg_t *ca = data;
392
393 if (!g_quiet)
394 (void) printf("\nsection %s\n", dis_section_name(scn));
395
396 switch (type) {
397 case DIS_DATA_RELATIVE:
398 dump_data(0, dis_section_data(scn), dis_section_size(scn));
399 break;
400 case DIS_DATA_ABSOLUTE:
401 dump_data(dis_section_addr(scn), dis_section_data(scn),
402 dis_section_size(scn));
403 break;
404 case DIS_TEXT:
405 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
406 dis_section_data(scn), dis_section_size(scn));
407 break;
408 }
409 }
410
411 /*
412 * Disassemble a function explicitly specified with '-F'. The 'type' argument
413 * is unused.
414 */
415 /* ARGSUSED */
416 void
dis_named_function(dis_func_t * func,int type,void * data)417 dis_named_function(dis_func_t *func, int type, void *data)
418 {
419 callback_arg_t *ca = data;
420
421 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
422 dis_function_data(func), dis_function_size(func));
423 }
424
425 /*
426 * Disassemble a complete file. First, we determine the type of the file based
427 * on the ELF machine type, and instantiate a version of the disassembler
428 * appropriate for the file. We then resolve any named sections or functions
429 * against the file, and iterate over the results (or all sections if no flags
430 * were specified).
431 */
432 void
dis_file(const char * filename)433 dis_file(const char *filename)
434 {
435 dis_tgt_t *tgt, *current;
436 dis_scnlist_t *sections;
437 dis_funclist_t *functions;
438 dis_handle_t *dhp;
439 GElf_Ehdr ehdr;
440
441 /*
442 * First, initialize the target
443 */
444 if ((tgt = dis_tgt_create(filename)) == NULL)
445 return;
446
447 if (!g_quiet)
448 (void) printf("disassembly for %s\n\n", filename);
449
450 /*
451 * A given file may contain multiple targets (if it is an archive, for
452 * example). We iterate over all possible targets if this is the case.
453 */
454 for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
455 dis_tgt_ehdr(current, &ehdr);
456
457 /*
458 * Eventually, this should probably live within libdisasm, and
459 * we should be able to disassemble targets from different
460 * architectures. For now, we only support objects as the
461 * native machine type.
462 */
463 switch (ehdr.e_machine) {
464 #ifdef __sparc
465 case EM_SPARC:
466 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
467 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
468 warn("invalid E_IDENT field for SPARC object");
469 return;
470 }
471 g_flags |= DIS_SPARC_V8;
472 break;
473
474 case EM_SPARC32PLUS:
475 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
476 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
477 warn("invalid E_IDENT field for SPARC object");
478 return;
479 }
480
481 switch (ehdr.e_flags & EF_SPARC_32PLUS_MASK) {
482 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
483 EF_SPARC_SUN_US3):
484 case (EF_SPARC_32PLUS | EF_SPARC_SUN_US1):
485 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
486 default:
487 g_flags |= DIS_SPARC_V9;
488 }
489 break;
490
491 case EM_SPARCV9:
492 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
493 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
494 warn("invalid E_IDENT field for SPARC object");
495 return;
496 }
497
498 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
499 break;
500 #endif /* __sparc */
501
502 #if defined(__i386) || defined(__amd64)
503 case EM_386:
504 g_flags |= DIS_X86_SIZE32;
505 break;
506
507 case EM_AMD64:
508 g_flags |= DIS_X86_SIZE64;
509 break;
510 #endif /* __i386 || __amd64 */
511
512 default:
513 die("%s: unsupported ELF machine 0x%x", filename,
514 ehdr.e_machine);
515 }
516
517 /*
518 * If ET_REL (.o), printing immediate symbols is likely to
519 * result in garbage, as symbol lookups on unrelocated
520 * immediates find false and useless matches.
521 */
522
523 if (ehdr.e_type == ET_REL)
524 g_flags |= DIS_NOIMMSYM;
525
526 if (!g_quiet && dis_tgt_member(current) != NULL)
527 (void) printf("\narchive member %s\n",
528 dis_tgt_member(current));
529
530 /*
531 * Instantiate a libdisasm handle based on the file type.
532 */
533 if ((dhp = dis_handle_create(g_flags, current, do_lookup,
534 do_read)) == NULL)
535 die("%s: failed to initialize disassembler: %s",
536 filename, dis_strerror(dis_errno()));
537
538 if (g_doall) {
539 /*
540 * With no arguments, iterate over all sections and
541 * disassemble only those that contain text.
542 */
543 dis_tgt_section_iter(current, dis_text_section, dhp);
544 } else {
545 callback_arg_t ca;
546
547 ca.ca_tgt = current;
548 ca.ca_handle = dhp;
549
550 /*
551 * If sections or functions were explicitly specified,
552 * resolve those names against the object, and iterate
553 * over just the resulting data.
554 */
555 sections = dis_namelist_resolve_sections(g_seclist,
556 current);
557 functions = dis_namelist_resolve_functions(g_funclist,
558 current);
559
560 dis_scnlist_iter(sections, dis_named_section, &ca);
561 dis_funclist_iter(functions, dis_named_function, &ca);
562
563 dis_scnlist_destroy(sections);
564 dis_funclist_destroy(functions);
565 }
566
567 dis_handle_destroy(dhp);
568 }
569
570 dis_tgt_destroy(tgt);
571 }
572
573 void
usage(void)574 usage(void)
575 {
576 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
577 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
578 exit(2);
579 }
580
581 typedef struct lib_node {
582 char *path;
583 struct lib_node *next;
584 } lib_node_t;
585
586 int
main(int argc,char ** argv)587 main(int argc, char **argv)
588 {
589 int optchar;
590 int i;
591 lib_node_t *libs = NULL;
592
593 g_funclist = dis_namelist_create();
594 g_seclist = dis_namelist_create();
595
596 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
597 switch (optchar) {
598 case 'C':
599 g_demangle = 1;
600 break;
601 case 'd':
602 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
603 break;
604 case 'D':
605 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
606 break;
607 case 'F':
608 dis_namelist_add(g_funclist, optarg, 0);
609 break;
610 case 'l': {
611 /*
612 * The '-l foo' option historically would attempt to
613 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
614 * environment variable has never been supported or
615 * documented for our linker. However, until this
616 * option is formally EOLed, we have to support it.
617 */
618 char *dir;
619 lib_node_t *node;
620 size_t len;
621
622 if ((dir = getenv("LIBDIR")) == NULL ||
623 dir[0] == '\0')
624 dir = "/usr/lib";
625 node = safe_malloc(sizeof (lib_node_t));
626 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
627 node->path = safe_malloc(len);
628
629 (void) snprintf(node->path, len, "%s/lib%s.a", dir,
630 optarg);
631 node->next = libs;
632 libs = node;
633 break;
634 }
635 case 'L':
636 /*
637 * The '-L' option historically would attempt to read
638 * the .debug section of the target to determine source
639 * line information in order to annotate the output.
640 * No compiler has emitted these sections in many years,
641 * and the option has never done what it purported to
642 * do. We silently consume the option for
643 * compatibility.
644 */
645 break;
646 case 'n':
647 g_numeric = 1;
648 break;
649 case 'o':
650 g_flags |= DIS_OCTAL;
651 break;
652 case 'q':
653 g_quiet = 1;
654 break;
655 case 't':
656 dis_namelist_add(g_seclist, optarg, DIS_TEXT);
657 break;
658 case 'V':
659 (void) printf("Solaris disassembler version 1.0\n");
660 return (0);
661 default:
662 usage();
663 break;
664 }
665 }
666
667 argc -= optind;
668 argv += optind;
669
670 if (argc == 0 && libs == NULL) {
671 warn("no objects specified");
672 usage();
673 }
674
675 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
676 g_doall = 1;
677
678 /*
679 * See comment for 'l' option, above.
680 */
681 while (libs != NULL) {
682 lib_node_t *node = libs->next;
683
684 dis_file(libs->path);
685 free(libs->path);
686 free(libs);
687 libs = node;
688 }
689
690 for (i = 0; i < argc; i++)
691 dis_file(argv[i]);
692
693 dis_namelist_destroy(g_funclist);
694 dis_namelist_destroy(g_seclist);
695
696 return (g_error);
697 }
698