xref: /onnv-gate/usr/src/cmd/dis/dis_target.c (revision 13093:48f2dbca79a2)
11545Seschrock /*
21545Seschrock  * CDDL HEADER START
31545Seschrock  *
41545Seschrock  * The contents of this file are subject to the terms of the
51545Seschrock  * Common Development and Distribution License (the "License").
61545Seschrock  * You may not use this file except in compliance with the License.
71545Seschrock  *
81545Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91545Seschrock  * or http://www.opensolaris.org/os/licensing.
101545Seschrock  * See the License for the specific language governing permissions
111545Seschrock  * and limitations under the License.
121545Seschrock  *
131545Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
141545Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151545Seschrock  * If applicable, add the following below this CDDL HEADER, with the
161545Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
171545Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
181545Seschrock  *
191545Seschrock  * CDDL HEADER END
201545Seschrock  */
211545Seschrock 
221545Seschrock /*
23*13093SRoger.Faulkner@Oracle.COM  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
241545Seschrock  */
251545Seschrock 
261545Seschrock #include <assert.h>
271545Seschrock #include <errno.h>
281545Seschrock #include <fcntl.h>
291545Seschrock #include <gelf.h>
301545Seschrock #include <libelf.h>
311545Seschrock #include <stdlib.h>
321545Seschrock #include <string.h>
331545Seschrock #include <unistd.h>
341545Seschrock 
351545Seschrock #include <sys/fcntl.h>
361545Seschrock #include <sys/stat.h>
371545Seschrock 
381545Seschrock #include "dis_target.h"
391545Seschrock #include "dis_util.h"
401545Seschrock 
411545Seschrock /*
421545Seschrock  * Standard ELF disassembler target.
431545Seschrock  *
441545Seschrock  * We only support disassembly of ELF files, though this target interface could
451545Seschrock  * be extended in the future.  Each basic type (target, func, section) contains
461545Seschrock  * enough information to uniquely identify the location within the file.  The
471545Seschrock  * interfaces use libelf(3LIB) to do the actual processing of the file.
481545Seschrock  */
491545Seschrock 
501545Seschrock /*
511545Seschrock  * Symbol table entry type.  We maintain our own symbol table sorted by address,
521545Seschrock  * with the symbol name already resolved against the ELF symbol table.
531545Seschrock  */
541545Seschrock typedef struct sym_entry {
551545Seschrock 	GElf_Sym	se_sym;		/* value of symbol */
561545Seschrock 	char		*se_name;	/* name of symbol */
571545Seschrock 	int		se_shndx;	/* section where symbol is located */
581545Seschrock } sym_entry_t;
591545Seschrock 
601545Seschrock /*
611545Seschrock  * Target data structure.  This structure keeps track of the ELF file
621545Seschrock  * information, a few bits of pre-processed section index information, and
631545Seschrock  * sorted versions of the symbol table.  We also keep track of the last symbol
641545Seschrock  * looked up, as the majority of lookups remain within the same symbol.
651545Seschrock  */
661545Seschrock struct dis_tgt {
671545Seschrock 	Elf		*dt_elf;	/* libelf handle */
681545Seschrock 	Elf		*dt_elf_root;	/* main libelf handle (for archives) */
691545Seschrock 	const char	*dt_filename;	/* name of file */
701545Seschrock 	int		dt_fd;		/* underlying file descriptor */
711545Seschrock 	size_t		dt_shstrndx;	/* section index of .shstrtab */
721545Seschrock 	size_t		dt_symidx;	/* section index of symbol table */
731545Seschrock 	sym_entry_t	*dt_symcache;	/* last symbol looked up */
741545Seschrock 	sym_entry_t	*dt_symtab;	/* sorted symbol table */
751545Seschrock 	int		dt_symcount;	/* # of symbol table entries */
761545Seschrock 	struct dis_tgt	*dt_next;	/* next target (for archives) */
771545Seschrock 	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
781545Seschrock };
791545Seschrock 
801545Seschrock /*
811545Seschrock  * Function data structure.  We resolve the symbol and lookup the associated ELF
821545Seschrock  * data when building this structure.  The offset is calculated based on the
831545Seschrock  * section's starting address.
841545Seschrock  */
851545Seschrock struct dis_func {
861545Seschrock 	sym_entry_t	*df_sym;	/* symbol table reference */
871545Seschrock 	Elf_Data	*df_data;	/* associated ELF data */
881545Seschrock 	size_t		df_offset;	/* offset within data */
891545Seschrock };
901545Seschrock 
911545Seschrock /*
921545Seschrock  * Section data structure.  We store the entire section header so that we can
931545Seschrock  * determine some properties (such as whether or not it contains text) after
941545Seschrock  * building the structure.
951545Seschrock  */
961545Seschrock struct dis_scn {
971545Seschrock 	GElf_Shdr	ds_shdr;
981545Seschrock 	const char	*ds_name;
991545Seschrock 	Elf_Data	*ds_data;
1001545Seschrock };
1011545Seschrock 
102*13093SRoger.Faulkner@Oracle.COM /* Lifted from Psymtab.c, omitting STT_TLS */
1031545Seschrock #define	DATA_TYPES      \
104*13093SRoger.Faulkner@Oracle.COM 	((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
1051545Seschrock #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
1061545Seschrock 
1071545Seschrock /*
1081545Seschrock  * Pick out the best symbol to used based on the sections available in the
1091545Seschrock  * target.  We prefer SHT_SYMTAB over SHT_DYNSYM.
1101545Seschrock  */
1111545Seschrock /* ARGSUSED */
1121545Seschrock static void
get_symtab(dis_tgt_t * tgt,dis_scn_t * scn,void * data)1131545Seschrock get_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
1141545Seschrock {
1151545Seschrock 	int *index = data;
1161545Seschrock 
1171545Seschrock 	*index += 1;
1181545Seschrock 
1191545Seschrock 	/*
1201545Seschrock 	 * Prefer SHT_SYMTAB over SHT_DYNSYM
1211545Seschrock 	 */
1221545Seschrock 	if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
1231545Seschrock 		tgt->dt_symidx = *index;
1241545Seschrock 	else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
1251545Seschrock 		tgt->dt_symidx = *index;
1261545Seschrock }
1271545Seschrock 
1281545Seschrock static int
sym_compare(const void * a,const void * b)1291545Seschrock sym_compare(const void *a, const void *b)
1301545Seschrock {
1311545Seschrock 	const sym_entry_t *syma = a;
1321545Seschrock 	const sym_entry_t *symb = b;
1331545Seschrock 	const char *aname = syma->se_name;
1341545Seschrock 	const char *bname = symb->se_name;
1351545Seschrock 
1361545Seschrock 	if (syma->se_sym.st_value < symb->se_sym.st_value)
1371545Seschrock 		return (-1);
1381545Seschrock 
1391545Seschrock 	if (syma->se_sym.st_value > symb->se_sym.st_value)
1401545Seschrock 		return (1);
1411545Seschrock 
1421545Seschrock 	/*
1431545Seschrock 	 * Prefer functions over non-functions
1441545Seschrock 	 */
1451545Seschrock 	if (GELF_ST_TYPE(syma->se_sym.st_info) !=
1461545Seschrock 	    GELF_ST_TYPE(symb->se_sym.st_info)) {
1471545Seschrock 		if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
1481545Seschrock 			return (-1);
1491545Seschrock 		if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
1501545Seschrock 			return (1);
1511545Seschrock 	}
1521545Seschrock 
1531545Seschrock 	/*
1541545Seschrock 	 * For symbols with the same address and type, we sort them according to
1551545Seschrock 	 * a hierarchy:
1561545Seschrock 	 *
1571545Seschrock 	 * 	1. weak symbols (common name)
1581545Seschrock 	 * 	2. global symbols (external name)
1591545Seschrock 	 * 	3. local symbols
1601545Seschrock 	 */
1611545Seschrock 	if (GELF_ST_BIND(syma->se_sym.st_info) !=
1621545Seschrock 	    GELF_ST_BIND(symb->se_sym.st_info)) {
1631545Seschrock 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
1641545Seschrock 			return (-1);
1651545Seschrock 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
1661545Seschrock 			return (1);
1671545Seschrock 
1681545Seschrock 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
1691545Seschrock 			return (-1);
1701545Seschrock 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
1711545Seschrock 			return (1);
1721545Seschrock 	}
1731545Seschrock 
1741545Seschrock 	/*
1751545Seschrock 	 * As a last resort, if we have multiple symbols of the same type at the
1761545Seschrock 	 * same address, prefer the version with the fewest leading underscores.
1771545Seschrock 	 */
1781545Seschrock 	if (aname == NULL)
1791545Seschrock 		return (-1);
1801545Seschrock 	if (bname == NULL)
1811545Seschrock 		return (1);
1821545Seschrock 
1831545Seschrock 	while (*aname == '_' && *bname == '_') {
1841545Seschrock 		aname++;
1851545Seschrock 		bname++;
1861545Seschrock 	}
1871545Seschrock 
1881545Seschrock 	if (*bname == '_')
1891545Seschrock 		return (-1);
1901545Seschrock 	if (*aname == '_')
1911545Seschrock 		return (1);
1921545Seschrock 
1931545Seschrock 	/*
1941545Seschrock 	 * Prefer the symbol with the smaller size.
1951545Seschrock 	 */
1961545Seschrock 	if (syma->se_sym.st_size < symb->se_sym.st_size)
1971545Seschrock 		return (-1);
1981545Seschrock 	if (syma->se_sym.st_size > symb->se_sym.st_size)
1991545Seschrock 		return (1);
2001545Seschrock 
2011545Seschrock 	/*
2021545Seschrock 	 * We really do have two identical symbols for some reason.  Just report
2031545Seschrock 	 * them as equal, and to the lucky one go the spoils.
2041545Seschrock 	 */
2051545Seschrock 	return (0);
2061545Seschrock }
2071545Seschrock 
2081545Seschrock /*
2091545Seschrock  * Construct an optimized symbol table sorted by starting address.
2101545Seschrock  */
2111545Seschrock static void
construct_symtab(dis_tgt_t * tgt)2121545Seschrock construct_symtab(dis_tgt_t *tgt)
2131545Seschrock {
2141545Seschrock 	Elf_Scn *scn;
2151545Seschrock 	GElf_Shdr shdr;
2161545Seschrock 	Elf_Data *symdata;
2171545Seschrock 	int i;
2181545Seschrock 	GElf_Word *symshndx = NULL;
2191545Seschrock 	int symshndx_size;
2201545Seschrock 	sym_entry_t *sym;
2211545Seschrock 	sym_entry_t *p_symtab = NULL;
2221545Seschrock 	int nsym = 0; /* count of symbols we're not interested in */
2231545Seschrock 
2241545Seschrock 	/*
2251545Seschrock 	 * Find the symshndx section, if any
2261545Seschrock 	 */
2271545Seschrock 	for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
2281545Seschrock 	    scn = elf_nextscn(tgt->dt_elf, scn)) {
2291545Seschrock 		if (gelf_getshdr(scn, &shdr) == NULL)
2301545Seschrock 			break;
2311545Seschrock 		if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
2321545Seschrock 		    shdr.sh_link == tgt->dt_symidx) {
2331545Seschrock 			Elf_Data	*data;
2341545Seschrock 
2351545Seschrock 			if ((data = elf_getdata(scn, NULL)) != NULL) {
2361545Seschrock 				symshndx = (GElf_Word *)data->d_buf;
2371545Seschrock 				symshndx_size = data->d_size /
2381545Seschrock 				    sizeof (GElf_Word);
2391545Seschrock 				break;
2401545Seschrock 			}
2411545Seschrock 		}
2421545Seschrock 	}
2431545Seschrock 
2441545Seschrock 	if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
2451545Seschrock 		die("%s: failed to get section information", tgt->dt_filename);
2461545Seschrock 	if (gelf_getshdr(scn, &shdr) == NULL)
2471545Seschrock 		die("%s: failed to get section header", tgt->dt_filename);
2481545Seschrock 	if (shdr.sh_entsize == 0)
2491545Seschrock 		die("%s: symbol table has zero size", tgt->dt_filename);
2501545Seschrock 
2511545Seschrock 	if ((symdata = elf_getdata(scn, NULL)) == NULL)
2521545Seschrock 		die("%s: failed to get symbol table", tgt->dt_filename);
2531545Seschrock 
2541545Seschrock 	tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
2556529Sjmcp 	    1, EV_CURRENT);
2561545Seschrock 
2571545Seschrock 	p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
2581545Seschrock 
2591545Seschrock 	for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
2601545Seschrock 		if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
2611545Seschrock 			warn("%s: gelf_getsym returned NULL for %d",
2626529Sjmcp 			    tgt->dt_filename, i);
2631545Seschrock 			nsym++;
2641545Seschrock 			continue;
2651545Seschrock 		}
2661545Seschrock 
2671545Seschrock 		/*
2681545Seschrock 		 * We're only interested in data symbols.
2691545Seschrock 		 */
2701545Seschrock 		if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
2711545Seschrock 			nsym++;
2721545Seschrock 			continue;
2731545Seschrock 		}
2741545Seschrock 
2751545Seschrock 		if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
2761545Seschrock 			if (i > symshndx_size) {
2771545Seschrock 				warn("%s: bad SHNX_XINDEX %d",
2786529Sjmcp 				    tgt->dt_filename, i);
2791545Seschrock 				sym->se_shndx = -1;
2801545Seschrock 			} else {
2811545Seschrock 				sym->se_shndx = symshndx[i];
2821545Seschrock 			}
2831545Seschrock 		} else {
2841545Seschrock 			sym->se_shndx = sym->se_sym.st_shndx;
2851545Seschrock 		}
2861545Seschrock 
2871545Seschrock 		if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
2881545Seschrock 		    (size_t)sym->se_sym.st_name)) == NULL) {
2891545Seschrock 			warn("%s: failed to lookup symbol %d name",
2906529Sjmcp 			    tgt->dt_filename, i);
2911545Seschrock 			nsym++;
2921545Seschrock 			continue;
2931545Seschrock 		}
2941545Seschrock 
2951545Seschrock 		sym++;
2961545Seschrock 	}
2971545Seschrock 
2981545Seschrock 	tgt->dt_symcount -= nsym;
2996529Sjmcp 	tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
3006529Sjmcp 	    sizeof (sym_entry_t));
3011545Seschrock 
3021545Seschrock 	qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
3031545Seschrock 	    sym_compare);
3041545Seschrock }
3051545Seschrock 
3061545Seschrock /*
3071545Seschrock  * Create a target backed by an ELF file.
3081545Seschrock  */
3091545Seschrock dis_tgt_t *
dis_tgt_create(const char * file)3101545Seschrock dis_tgt_create(const char *file)
3111545Seschrock {
3121545Seschrock 	dis_tgt_t *tgt, *current;
3131545Seschrock 	int idx;
3141545Seschrock 	Elf *elf;
3151545Seschrock 	GElf_Ehdr ehdr;
3161545Seschrock 	Elf_Arhdr *arhdr = NULL;
3171545Seschrock 	int cmd;
3181545Seschrock 
3191545Seschrock 	if (elf_version(EV_CURRENT) == EV_NONE)
3201545Seschrock 		die("libelf(3ELF) out of date");
3211545Seschrock 
3221545Seschrock 	tgt = safe_malloc(sizeof (dis_tgt_t));
3231545Seschrock 
3241545Seschrock 	if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
3251545Seschrock 		warn("%s: failed opening file, reason: %s", file,
3266529Sjmcp 		    strerror(errno));
3271545Seschrock 		free(tgt);
3281545Seschrock 		return (NULL);
3291545Seschrock 	}
3301545Seschrock 
3311545Seschrock 	if ((tgt->dt_elf_root =
3321545Seschrock 	    elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
3331545Seschrock 		warn("%s: invalid or corrupt ELF file", file);
3341545Seschrock 		dis_tgt_destroy(tgt);
3351545Seschrock 		return (NULL);
3361545Seschrock 	}
3371545Seschrock 
3381545Seschrock 	current = tgt;
3391545Seschrock 	cmd = ELF_C_READ;
3401545Seschrock 	while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
3411545Seschrock 
3421545Seschrock 		if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
3431545Seschrock 		    (arhdr = elf_getarhdr(elf)) == NULL) {
3441545Seschrock 			warn("%s: malformed archive", file);
3451545Seschrock 			dis_tgt_destroy(tgt);
3461545Seschrock 			return (NULL);
3471545Seschrock 		}
3481545Seschrock 
3491545Seschrock 		/*
3501545Seschrock 		 * Make sure that this Elf file is sane
3511545Seschrock 		 */
3521545Seschrock 		if (gelf_getehdr(elf, &ehdr) == NULL) {
3531545Seschrock 			if (arhdr != NULL) {
3541545Seschrock 				/*
3551545Seschrock 				 * For archives, we drive on in the face of bad
3561545Seschrock 				 * members.  The "/" and "//" members are
3571545Seschrock 				 * special, and should be silently ignored.
3581545Seschrock 				 */
3591545Seschrock 				if (strcmp(arhdr->ar_name, "/") != 0 &&
3601545Seschrock 				    strcmp(arhdr->ar_name, "//") != 0)
3611545Seschrock 					warn("%s[%s]: invalid file type",
3621545Seschrock 					    file, arhdr->ar_name);
3631545Seschrock 				cmd = elf_next(elf);
3641545Seschrock 				(void) elf_end(elf);
3651545Seschrock 				continue;
3661545Seschrock 			}
3671545Seschrock 
3681545Seschrock 			warn("%s: invalid file type", file);
3691545Seschrock 			dis_tgt_destroy(tgt);
3701545Seschrock 			return (NULL);
3711545Seschrock 		}
3721545Seschrock 
3731545Seschrock 		/*
3741545Seschrock 		 * If we're seeing a new Elf object, then we have an
3751545Seschrock 		 * archive. In this case, we create a new target, and chain it
3761545Seschrock 		 * off the master target.  We can later iterate over these
3771545Seschrock 		 * targets using dis_tgt_next().
3781545Seschrock 		 */
3791545Seschrock 		if (current->dt_elf != NULL) {
3801545Seschrock 			dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
3811545Seschrock 			next->dt_elf_root = tgt->dt_elf_root;
3821545Seschrock 			next->dt_fd = -1;
3831545Seschrock 			current->dt_next = next;
3841545Seschrock 			current = next;
3851545Seschrock 		}
3861545Seschrock 		current->dt_elf = elf;
3871545Seschrock 		current->dt_arhdr = arhdr;
3881545Seschrock 
3899900SAli.Bahrami@Sun.COM 		if (elf_getshdrstrndx(elf, &current->dt_shstrndx) == -1) {
3901545Seschrock 			warn("%s: failed to get section string table for "
3911545Seschrock 			    "file", file);
3921545Seschrock 			dis_tgt_destroy(tgt);
3931545Seschrock 			return (NULL);
3941545Seschrock 		}
3951545Seschrock 
3961545Seschrock 		idx = 0;
3971545Seschrock 		dis_tgt_section_iter(current, get_symtab, &idx);
3981545Seschrock 
3991545Seschrock 		if (current->dt_symidx != 0)
4001545Seschrock 			construct_symtab(current);
4011545Seschrock 
4021545Seschrock 		current->dt_filename = file;
4031545Seschrock 
4041545Seschrock 		cmd = elf_next(elf);
4051545Seschrock 	}
4061545Seschrock 
4071545Seschrock 	/*
4081545Seschrock 	 * Final sanity check.  If we had an archive with no members, then bail
4091545Seschrock 	 * out with a nice message.
4101545Seschrock 	 */
4111545Seschrock 	if (tgt->dt_elf == NULL) {
4121545Seschrock 		warn("%s: empty archive\n", file);
4131545Seschrock 		dis_tgt_destroy(tgt);
4141545Seschrock 		return (NULL);
4151545Seschrock 	}
4161545Seschrock 
4171545Seschrock 	return (tgt);
4181545Seschrock }
4191545Seschrock 
4201545Seschrock /*
4211545Seschrock  * Return the filename associated with the target.
4221545Seschrock  */
4231545Seschrock const char *
dis_tgt_name(dis_tgt_t * tgt)4241545Seschrock dis_tgt_name(dis_tgt_t *tgt)
4251545Seschrock {
4261545Seschrock 	return (tgt->dt_filename);
4271545Seschrock }
4281545Seschrock 
4291545Seschrock /*
4301545Seschrock  * Return the archive member name, if any.
4311545Seschrock  */
4321545Seschrock const char *
dis_tgt_member(dis_tgt_t * tgt)4331545Seschrock dis_tgt_member(dis_tgt_t *tgt)
4341545Seschrock {
4351545Seschrock 	if (tgt->dt_arhdr)
4361545Seschrock 		return (tgt->dt_arhdr->ar_name);
4371545Seschrock 	else
4381545Seschrock 		return (NULL);
4391545Seschrock }
4401545Seschrock 
4411545Seschrock /*
4421545Seschrock  * Return the Elf_Ehdr associated with this target.  Needed to determine which
4431545Seschrock  * disassembler to use.
4441545Seschrock  */
4451545Seschrock void
dis_tgt_ehdr(dis_tgt_t * tgt,GElf_Ehdr * ehdr)4461545Seschrock dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
4471545Seschrock {
4481545Seschrock 	(void) gelf_getehdr(tgt->dt_elf, ehdr);
4491545Seschrock }
4501545Seschrock 
4511545Seschrock /*
4521545Seschrock  * Return the next target in the list, if this is an archive.
4531545Seschrock  */
4541545Seschrock dis_tgt_t *
dis_tgt_next(dis_tgt_t * tgt)4551545Seschrock dis_tgt_next(dis_tgt_t *tgt)
4561545Seschrock {
4571545Seschrock 	return (tgt->dt_next);
4581545Seschrock }
4591545Seschrock 
4601545Seschrock /*
4611545Seschrock  * Destroy a target and free up any associated memory.
4621545Seschrock  */
4631545Seschrock void
dis_tgt_destroy(dis_tgt_t * tgt)4641545Seschrock dis_tgt_destroy(dis_tgt_t *tgt)
4651545Seschrock {
4661545Seschrock 	dis_tgt_t *current, *next;
4671545Seschrock 
4681545Seschrock 	current = tgt->dt_next;
4691545Seschrock 	while (current != NULL) {
4701545Seschrock 		next = current->dt_next;
4711545Seschrock 		if (current->dt_elf)
4721545Seschrock 			(void) elf_end(current->dt_elf);
4731545Seschrock 		if (current->dt_symtab)
4741545Seschrock 			free(current->dt_symtab);
4751545Seschrock 		free(current);
4761545Seschrock 		current = next;
4771545Seschrock 	}
4781545Seschrock 
4791545Seschrock 	if (tgt->dt_elf)
4801545Seschrock 		(void) elf_end(tgt->dt_elf);
4811545Seschrock 	if (tgt->dt_elf_root)
4821545Seschrock 		(void) elf_end(tgt->dt_elf_root);
4831545Seschrock 
4841545Seschrock 	if (tgt->dt_symtab)
4851545Seschrock 		free(tgt->dt_symtab);
4861545Seschrock 
4871545Seschrock 	free(tgt);
4881545Seschrock }
4891545Seschrock 
4901545Seschrock /*
4911545Seschrock  * Given an address, returns the name of the corresponding symbol, as well as
4921545Seschrock  * the offset within that symbol.  If no matching symbol is found, then NULL is
4931545Seschrock  * returned.
4941545Seschrock  *
4951545Seschrock  * If 'cache_result' is specified, then we keep track of the resulting symbol.
4961545Seschrock  * This cached result is consulted first on subsequent lookups in order to avoid
4971545Seschrock  * unecessary lookups.  This flag should be used for resolving the current PC,
4981545Seschrock  * as the majority of addresses stay within the current function.
4991545Seschrock  */
5001545Seschrock const char *
dis_tgt_lookup(dis_tgt_t * tgt,uint64_t addr,off_t * offset,int cache_result,size_t * size,int * isfunc)5011545Seschrock dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
5021545Seschrock     size_t *size, int *isfunc)
5031545Seschrock {
5041545Seschrock 	int lo, hi, mid;
5051545Seschrock 	sym_entry_t *sym, *osym, *match;
5061545Seschrock 	int found;
5071545Seschrock 
5081545Seschrock 	if (tgt->dt_symcache != NULL &&
5091545Seschrock 	    addr >= tgt->dt_symcache->se_sym.st_value &&
5101545Seschrock 	    addr < tgt->dt_symcache->se_sym.st_value +
5111545Seschrock 	    tgt->dt_symcache->se_sym.st_size) {
5121545Seschrock 		*offset = addr - tgt->dt_symcache->se_sym.st_value;
5131545Seschrock 		*size = tgt->dt_symcache->se_sym.st_size;
5141545Seschrock 		return (tgt->dt_symcache->se_name);
5151545Seschrock 	}
5161545Seschrock 
5171545Seschrock 	lo = 0;
5181545Seschrock 	hi = (tgt->dt_symcount - 1);
5191545Seschrock 	found = 0;
5201545Seschrock 	match = osym = NULL;
5211545Seschrock 	while (lo <= hi) {
5221545Seschrock 		mid = (lo + hi) / 2;
5231545Seschrock 
5241545Seschrock 		sym = &tgt->dt_symtab[mid];
5251545Seschrock 
5261545Seschrock 		if (addr >= sym->se_sym.st_value &&
5271545Seschrock 		    addr < sym->se_sym.st_value + sym->se_sym.st_size &&
5281545Seschrock 		    (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
5291545Seschrock 			osym = sym;
5301545Seschrock 			found = 1;
5311545Seschrock 		} else if (addr == sym->se_sym.st_value) {
5321545Seschrock 			/*
5331545Seschrock 			 * Particularly for .plt objects, it's possible to have
5341545Seschrock 			 * a zero sized object.  We want to return this, but we
5351545Seschrock 			 * want it to be a last resort.
5361545Seschrock 			 */
5371545Seschrock 			match = sym;
5381545Seschrock 		}
5391545Seschrock 
5401545Seschrock 		if (addr < sym->se_sym.st_value)
5411545Seschrock 			hi = mid - 1;
5421545Seschrock 		else
5431545Seschrock 			lo = mid + 1;
5441545Seschrock 	}
5451545Seschrock 
5461545Seschrock 	if (!found) {
5471545Seschrock 		if (match)
5481545Seschrock 			osym = match;
5491545Seschrock 		else
5501545Seschrock 			return (NULL);
5511545Seschrock 	}
5521545Seschrock 
5531545Seschrock 	/*
5541545Seschrock 	 * Walk backwards to find the best match.
5551545Seschrock 	 */
5561545Seschrock 	do {
5571545Seschrock 		sym = osym;
5581545Seschrock 
5591545Seschrock 		if (osym == tgt->dt_symtab)
5601545Seschrock 			break;
5611545Seschrock 
5621545Seschrock 		osym = osym - 1;
5631545Seschrock 	} while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
5646529Sjmcp 	    (addr >= osym->se_sym.st_value) &&
5656529Sjmcp 	    (addr < osym->se_sym.st_value + osym->se_sym.st_size));
5661545Seschrock 
5671545Seschrock 	if (cache_result)
5681545Seschrock 		tgt->dt_symcache = sym;
5691545Seschrock 
5701545Seschrock 	*offset = addr - sym->se_sym.st_value;
5711545Seschrock 	*size = sym->se_sym.st_size;
5721545Seschrock 	if (isfunc)
5731545Seschrock 		*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
5741545Seschrock 
5751545Seschrock 	return (sym->se_name);
5761545Seschrock }
5771545Seschrock 
5781545Seschrock /*
5791545Seschrock  * Given an address, return the starting offset of the next symbol in the file.
5801545Seschrock  * Relies on the fact that this is only used when we encounter a bad instruction
5811545Seschrock  * in the input stream, so we know that the last symbol looked up will be in the
5821545Seschrock  * cache.
5831545Seschrock  */
5841545Seschrock off_t
dis_tgt_next_symbol(dis_tgt_t * tgt,uint64_t addr)5851545Seschrock dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
5861545Seschrock {
5871545Seschrock 	sym_entry_t *sym = tgt->dt_symcache;
5881545Seschrock 	uint64_t start;
5891545Seschrock 
5901545Seschrock 	/* make sure the cached symbol and address are valid */
5911545Seschrock 	if (sym == NULL || addr < sym->se_sym.st_value ||
5921545Seschrock 	    addr >= sym->se_sym.st_value + sym->se_sym.st_size)
5931545Seschrock 		return (0);
5941545Seschrock 
5951545Seschrock 	start = sym->se_sym.st_value;
5961545Seschrock 
5971545Seschrock 	/* find the next symbol */
5981545Seschrock 	while (sym != tgt->dt_symtab + tgt->dt_symcount &&
5991545Seschrock 	    sym->se_sym.st_value == start)
6001545Seschrock 		sym++;
6011545Seschrock 
6021545Seschrock 	return (sym->se_sym.st_value - addr);
6031545Seschrock }
6041545Seschrock 
6051545Seschrock /*
6061545Seschrock  * Iterate over all sections in the target, executing the given callback for
6071545Seschrock  * each.
6081545Seschrock  */
6091545Seschrock void
dis_tgt_section_iter(dis_tgt_t * tgt,section_iter_f func,void * data)6101545Seschrock dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
6111545Seschrock {
6121545Seschrock 	dis_scn_t sdata;
6131545Seschrock 	Elf_Scn *scn;
6141545Seschrock 	int idx;
6151545Seschrock 
6161545Seschrock 	for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
6171545Seschrock 	    scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
6181545Seschrock 
6191545Seschrock 		if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
6201545Seschrock 			warn("%s: failed to get section %d header",
6211545Seschrock 			    tgt->dt_filename, idx);
6221545Seschrock 			continue;
6231545Seschrock 		}
6241545Seschrock 
6251545Seschrock 		if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
6261545Seschrock 		    sdata.ds_shdr.sh_name)) == NULL) {
6271545Seschrock 			warn("%s: failed to get section %d name",
6281545Seschrock 			    tgt->dt_filename, idx);
6291545Seschrock 			continue;
6301545Seschrock 		}
6311545Seschrock 
6321545Seschrock 		if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
6331545Seschrock 			warn("%s: failed to get data for section '%s'",
6341545Seschrock 			    tgt->dt_filename, sdata.ds_name);
6351545Seschrock 			continue;
6361545Seschrock 		}
6371545Seschrock 
6381545Seschrock 		func(tgt, &sdata, data);
6391545Seschrock 	}
6401545Seschrock }
6411545Seschrock 
6421545Seschrock /*
6431545Seschrock  * Return 1 if the given section contains text, 0 otherwise.
6441545Seschrock  */
6451545Seschrock int
dis_section_istext(dis_scn_t * scn)6461545Seschrock dis_section_istext(dis_scn_t *scn)
6471545Seschrock {
6481545Seschrock 	return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
6491545Seschrock 	    (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
6501545Seschrock }
6511545Seschrock 
6521545Seschrock /*
6531545Seschrock  * Return a pointer to the section data.
6541545Seschrock  */
6551545Seschrock void *
dis_section_data(dis_scn_t * scn)6561545Seschrock dis_section_data(dis_scn_t *scn)
6571545Seschrock {
6581545Seschrock 	return (scn->ds_data->d_buf);
6591545Seschrock }
6601545Seschrock 
6611545Seschrock /*
6621545Seschrock  * Return the size of the section data.
6631545Seschrock  */
6641545Seschrock size_t
dis_section_size(dis_scn_t * scn)6651545Seschrock dis_section_size(dis_scn_t *scn)
6661545Seschrock {
6671545Seschrock 	return (scn->ds_data->d_size);
6681545Seschrock }
6691545Seschrock 
6701545Seschrock /*
6711545Seschrock  * Return the address for the given section.
6721545Seschrock  */
6731545Seschrock uint64_t
dis_section_addr(dis_scn_t * scn)6741545Seschrock dis_section_addr(dis_scn_t *scn)
6751545Seschrock {
6761545Seschrock 	return (scn->ds_shdr.sh_addr);
6771545Seschrock }
6781545Seschrock 
6791545Seschrock /*
6801545Seschrock  * Return the name of the current section.
6811545Seschrock  */
6821545Seschrock const char *
dis_section_name(dis_scn_t * scn)6831545Seschrock dis_section_name(dis_scn_t *scn)
6841545Seschrock {
6851545Seschrock 	return (scn->ds_name);
6861545Seschrock }
6871545Seschrock 
6881545Seschrock /*
6891545Seschrock  * Create an allocated copy of the given section
6901545Seschrock  */
6911545Seschrock dis_scn_t *
dis_section_copy(dis_scn_t * scn)6921545Seschrock dis_section_copy(dis_scn_t *scn)
6931545Seschrock {
6941545Seschrock 	dis_scn_t *new;
6951545Seschrock 
6961545Seschrock 	new = safe_malloc(sizeof (dis_scn_t));
6971545Seschrock 	(void) memcpy(new, scn, sizeof (dis_scn_t));
6981545Seschrock 
6991545Seschrock 	return (new);
7001545Seschrock }
7011545Seschrock 
7021545Seschrock /*
7031545Seschrock  * Free section memory
7041545Seschrock  */
7051545Seschrock void
dis_section_free(dis_scn_t * scn)7061545Seschrock dis_section_free(dis_scn_t *scn)
7071545Seschrock {
7081545Seschrock 	free(scn);
7091545Seschrock }
7101545Seschrock 
7111545Seschrock /*
7121545Seschrock  * Iterate over all functions in the target, executing the given callback for
7131545Seschrock  * each one.
7141545Seschrock  */
7151545Seschrock void
dis_tgt_function_iter(dis_tgt_t * tgt,function_iter_f func,void * data)7161545Seschrock dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
7171545Seschrock {
7181545Seschrock 	int i;
7191545Seschrock 	sym_entry_t *sym;
7201545Seschrock 	dis_func_t df;
7211545Seschrock 	Elf_Scn *scn;
7221545Seschrock 	GElf_Shdr	shdr;
7231545Seschrock 
7241545Seschrock 	for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
7251545Seschrock 
7261545Seschrock 		/* ignore non-functions */
7271545Seschrock 		if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
7282355Srie 		    (sym->se_name == NULL) ||
7292355Srie 		    (sym->se_sym.st_size == 0) ||
7302355Srie 		    (sym->se_shndx >= SHN_LORESERVE))
7311545Seschrock 			continue;
7321545Seschrock 
7331545Seschrock 		/* get the ELF data associated with this function */
7341545Seschrock 		if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
7351545Seschrock 		    gelf_getshdr(scn, &shdr) == NULL ||
7361545Seschrock 		    (df.df_data = elf_getdata(scn, NULL)) == NULL ||
7371545Seschrock 		    df.df_data->d_size == 0) {
7381545Seschrock 			warn("%s: failed to read section %d",
7391545Seschrock 			    tgt->dt_filename, sym->se_shndx);
7401545Seschrock 			continue;
7411545Seschrock 		}
7421545Seschrock 
7431545Seschrock 		/*
7441545Seschrock 		 * Verify that the address lies within the section that we think
7451545Seschrock 		 * it does.
7461545Seschrock 		 */
7471545Seschrock 		if (sym->se_sym.st_value < shdr.sh_addr ||
7481545Seschrock 		    (sym->se_sym.st_value + sym->se_sym.st_size) >
7491545Seschrock 		    (shdr.sh_addr + shdr.sh_size)) {
7501545Seschrock 			warn("%s: bad section %d for address %p",
7516529Sjmcp 			    tgt->dt_filename, sym->se_sym.st_shndx,
7526529Sjmcp 			    sym->se_sym.st_value);
7531545Seschrock 			continue;
7541545Seschrock 		}
7551545Seschrock 
7561545Seschrock 		df.df_sym = sym;
7571545Seschrock 		df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
7581545Seschrock 
7591545Seschrock 		func(tgt, &df, data);
7601545Seschrock 	}
7611545Seschrock }
7621545Seschrock 
7631545Seschrock /*
7641545Seschrock  * Return the data associated with a given function.
7651545Seschrock  */
7661545Seschrock void *
dis_function_data(dis_func_t * func)7671545Seschrock dis_function_data(dis_func_t *func)
7681545Seschrock {
7691545Seschrock 	return ((char *)func->df_data->d_buf + func->df_offset);
7701545Seschrock }
7711545Seschrock 
7721545Seschrock /*
7731545Seschrock  * Return the size of a function.
7741545Seschrock  */
7751545Seschrock size_t
dis_function_size(dis_func_t * func)7761545Seschrock dis_function_size(dis_func_t *func)
7771545Seschrock {
7781545Seschrock 	return (func->df_sym->se_sym.st_size);
7791545Seschrock }
7801545Seschrock 
7811545Seschrock /*
7821545Seschrock  * Return the address of a function.
7831545Seschrock  */
7841545Seschrock uint64_t
dis_function_addr(dis_func_t * func)7851545Seschrock dis_function_addr(dis_func_t *func)
7861545Seschrock {
7871545Seschrock 	return (func->df_sym->se_sym.st_value);
7881545Seschrock }
7891545Seschrock 
7901545Seschrock /*
7911545Seschrock  * Return the name of the function
7921545Seschrock  */
7931545Seschrock const char *
dis_function_name(dis_func_t * func)7941545Seschrock dis_function_name(dis_func_t *func)
7951545Seschrock {
7961545Seschrock 	return (func->df_sym->se_name);
7971545Seschrock }
7981545Seschrock 
7991545Seschrock /*
8001545Seschrock  * Return a copy of a function.
8011545Seschrock  */
8021545Seschrock dis_func_t *
dis_function_copy(dis_func_t * func)8031545Seschrock dis_function_copy(dis_func_t *func)
8041545Seschrock {
8051545Seschrock 	dis_func_t *new;
8061545Seschrock 
8071545Seschrock 	new = safe_malloc(sizeof (dis_func_t));
8081545Seschrock 	(void) memcpy(new, func, sizeof (dis_func_t));
8091545Seschrock 
8101545Seschrock 	return (new);
8111545Seschrock }
8121545Seschrock 
8131545Seschrock /*
8141545Seschrock  * Free function memory
8151545Seschrock  */
8161545Seschrock void
dis_function_free(dis_func_t * func)8171545Seschrock dis_function_free(dis_func_t *func)
8181545Seschrock {
8191545Seschrock 	free(func);
8201545Seschrock }
821