xref: /onnv-gate/usr/src/cmd/dis/dis_target.c (revision 1545:8f6fb1eeee38)
1*1545Seschrock /*
2*1545Seschrock  * CDDL HEADER START
3*1545Seschrock  *
4*1545Seschrock  * The contents of this file are subject to the terms of the
5*1545Seschrock  * Common Development and Distribution License (the "License").
6*1545Seschrock  * You may not use this file except in compliance with the License.
7*1545Seschrock  *
8*1545Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*1545Seschrock  * or http://www.opensolaris.org/os/licensing.
10*1545Seschrock  * See the License for the specific language governing permissions
11*1545Seschrock  * and limitations under the License.
12*1545Seschrock  *
13*1545Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
14*1545Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*1545Seschrock  * If applicable, add the following below this CDDL HEADER, with the
16*1545Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
17*1545Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
18*1545Seschrock  *
19*1545Seschrock  * CDDL HEADER END
20*1545Seschrock  */
21*1545Seschrock 
22*1545Seschrock /*
23*1545Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24*1545Seschrock  * Use is subject to license terms.
25*1545Seschrock  */
26*1545Seschrock 
27*1545Seschrock #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*1545Seschrock 
29*1545Seschrock #include <assert.h>
30*1545Seschrock #include <errno.h>
31*1545Seschrock #include <fcntl.h>
32*1545Seschrock #include <gelf.h>
33*1545Seschrock #include <libelf.h>
34*1545Seschrock #include <stdlib.h>
35*1545Seschrock #include <string.h>
36*1545Seschrock #include <unistd.h>
37*1545Seschrock 
38*1545Seschrock #include <sys/fcntl.h>
39*1545Seschrock #include <sys/stat.h>
40*1545Seschrock 
41*1545Seschrock #include "dis_target.h"
42*1545Seschrock #include "dis_util.h"
43*1545Seschrock 
44*1545Seschrock /*
45*1545Seschrock  * Standard ELF disassembler target.
46*1545Seschrock  *
47*1545Seschrock  * We only support disassembly of ELF files, though this target interface could
48*1545Seschrock  * be extended in the future.  Each basic type (target, func, section) contains
49*1545Seschrock  * enough information to uniquely identify the location within the file.  The
50*1545Seschrock  * interfaces use libelf(3LIB) to do the actual processing of the file.
51*1545Seschrock  */
52*1545Seschrock 
53*1545Seschrock /*
54*1545Seschrock  * Symbol table entry type.  We maintain our own symbol table sorted by address,
55*1545Seschrock  * with the symbol name already resolved against the ELF symbol table.
56*1545Seschrock  */
57*1545Seschrock typedef struct sym_entry {
58*1545Seschrock 	GElf_Sym	se_sym;		/* value of symbol */
59*1545Seschrock 	char		*se_name;	/* name of symbol */
60*1545Seschrock 	int		se_shndx;	/* section where symbol is located */
61*1545Seschrock } sym_entry_t;
62*1545Seschrock 
63*1545Seschrock /*
64*1545Seschrock  * Target data structure.  This structure keeps track of the ELF file
65*1545Seschrock  * information, a few bits of pre-processed section index information, and
66*1545Seschrock  * sorted versions of the symbol table.  We also keep track of the last symbol
67*1545Seschrock  * looked up, as the majority of lookups remain within the same symbol.
68*1545Seschrock  */
69*1545Seschrock struct dis_tgt {
70*1545Seschrock 	Elf		*dt_elf;	/* libelf handle */
71*1545Seschrock 	Elf		*dt_elf_root;	/* main libelf handle (for archives) */
72*1545Seschrock 	const char	*dt_filename;	/* name of file */
73*1545Seschrock 	int		dt_fd;		/* underlying file descriptor */
74*1545Seschrock 	size_t		dt_shstrndx;	/* section index of .shstrtab */
75*1545Seschrock 	size_t		dt_symidx;	/* section index of symbol table */
76*1545Seschrock 	sym_entry_t	*dt_symcache;	/* last symbol looked up */
77*1545Seschrock 	sym_entry_t	*dt_symtab;	/* sorted symbol table */
78*1545Seschrock 	int		dt_symcount;	/* # of symbol table entries */
79*1545Seschrock 	struct dis_tgt	*dt_next;	/* next target (for archives) */
80*1545Seschrock 	Elf_Arhdr	*dt_arhdr;	/* archive header (for archives) */
81*1545Seschrock };
82*1545Seschrock 
83*1545Seschrock /*
84*1545Seschrock  * Function data structure.  We resolve the symbol and lookup the associated ELF
85*1545Seschrock  * data when building this structure.  The offset is calculated based on the
86*1545Seschrock  * section's starting address.
87*1545Seschrock  */
88*1545Seschrock struct dis_func {
89*1545Seschrock 	sym_entry_t	*df_sym;	/* symbol table reference */
90*1545Seschrock 	Elf_Data	*df_data;	/* associated ELF data */
91*1545Seschrock 	size_t		df_offset;	/* offset within data */
92*1545Seschrock };
93*1545Seschrock 
94*1545Seschrock /*
95*1545Seschrock  * Section data structure.  We store the entire section header so that we can
96*1545Seschrock  * determine some properties (such as whether or not it contains text) after
97*1545Seschrock  * building the structure.
98*1545Seschrock  */
99*1545Seschrock struct dis_scn {
100*1545Seschrock 	GElf_Shdr	ds_shdr;
101*1545Seschrock 	const char	*ds_name;
102*1545Seschrock 	Elf_Data	*ds_data;
103*1545Seschrock };
104*1545Seschrock 
105*1545Seschrock /* Lifted from Psymtab.c */
106*1545Seschrock #define	DATA_TYPES      \
107*1545Seschrock 	((1 << STT_OBJECT) | (1 << STT_FUNC) | \
108*1545Seschrock 	(1 << STT_COMMON) | (1 << STT_TLS))
109*1545Seschrock #define	IS_DATA_TYPE(tp)	(((1 << (tp)) & DATA_TYPES) != 0)
110*1545Seschrock 
111*1545Seschrock /*
112*1545Seschrock  * Pick out the best symbol to used based on the sections available in the
113*1545Seschrock  * target.  We prefer SHT_SYMTAB over SHT_DYNSYM.
114*1545Seschrock  */
115*1545Seschrock /* ARGSUSED */
116*1545Seschrock static void
117*1545Seschrock get_symtab(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
118*1545Seschrock {
119*1545Seschrock 	int *index = data;
120*1545Seschrock 
121*1545Seschrock 	*index += 1;
122*1545Seschrock 
123*1545Seschrock 	/*
124*1545Seschrock 	 * Prefer SHT_SYMTAB over SHT_DYNSYM
125*1545Seschrock 	 */
126*1545Seschrock 	if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
127*1545Seschrock 		tgt->dt_symidx = *index;
128*1545Seschrock 	else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
129*1545Seschrock 		tgt->dt_symidx = *index;
130*1545Seschrock }
131*1545Seschrock 
132*1545Seschrock static int
133*1545Seschrock sym_compare(const void *a, const void *b)
134*1545Seschrock {
135*1545Seschrock 	const sym_entry_t *syma = a;
136*1545Seschrock 	const sym_entry_t *symb = b;
137*1545Seschrock 	const char *aname = syma->se_name;
138*1545Seschrock 	const char *bname = symb->se_name;
139*1545Seschrock 
140*1545Seschrock 	if (syma->se_sym.st_value < symb->se_sym.st_value)
141*1545Seschrock 		return (-1);
142*1545Seschrock 
143*1545Seschrock 	if (syma->se_sym.st_value > symb->se_sym.st_value)
144*1545Seschrock 		return (1);
145*1545Seschrock 
146*1545Seschrock 	/*
147*1545Seschrock 	 * Prefer functions over non-functions
148*1545Seschrock 	 */
149*1545Seschrock 	if (GELF_ST_TYPE(syma->se_sym.st_info) !=
150*1545Seschrock 	    GELF_ST_TYPE(symb->se_sym.st_info)) {
151*1545Seschrock 		if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
152*1545Seschrock 			return (-1);
153*1545Seschrock 		if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
154*1545Seschrock 			return (1);
155*1545Seschrock 	}
156*1545Seschrock 
157*1545Seschrock 	/*
158*1545Seschrock 	 * For symbols with the same address and type, we sort them according to
159*1545Seschrock 	 * a hierarchy:
160*1545Seschrock 	 *
161*1545Seschrock 	 * 	1. weak symbols (common name)
162*1545Seschrock 	 * 	2. global symbols (external name)
163*1545Seschrock 	 * 	3. local symbols
164*1545Seschrock 	 */
165*1545Seschrock 	if (GELF_ST_BIND(syma->se_sym.st_info) !=
166*1545Seschrock 	    GELF_ST_BIND(symb->se_sym.st_info)) {
167*1545Seschrock 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
168*1545Seschrock 			return (-1);
169*1545Seschrock 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
170*1545Seschrock 			return (1);
171*1545Seschrock 
172*1545Seschrock 		if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
173*1545Seschrock 			return (-1);
174*1545Seschrock 		if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
175*1545Seschrock 			return (1);
176*1545Seschrock 	}
177*1545Seschrock 
178*1545Seschrock 	/*
179*1545Seschrock 	 * As a last resort, if we have multiple symbols of the same type at the
180*1545Seschrock 	 * same address, prefer the version with the fewest leading underscores.
181*1545Seschrock 	 */
182*1545Seschrock 	if (aname == NULL)
183*1545Seschrock 		return (-1);
184*1545Seschrock 	if (bname == NULL)
185*1545Seschrock 		return (1);
186*1545Seschrock 
187*1545Seschrock 	while (*aname == '_' && *bname == '_') {
188*1545Seschrock 		aname++;
189*1545Seschrock 		bname++;
190*1545Seschrock 	}
191*1545Seschrock 
192*1545Seschrock 	if (*bname == '_')
193*1545Seschrock 		return (-1);
194*1545Seschrock 	if (*aname == '_')
195*1545Seschrock 		return (1);
196*1545Seschrock 
197*1545Seschrock 	/*
198*1545Seschrock 	 * Prefer the symbol with the smaller size.
199*1545Seschrock 	 */
200*1545Seschrock 	if (syma->se_sym.st_size < symb->se_sym.st_size)
201*1545Seschrock 		return (-1);
202*1545Seschrock 	if (syma->se_sym.st_size > symb->se_sym.st_size)
203*1545Seschrock 		return (1);
204*1545Seschrock 
205*1545Seschrock 	/*
206*1545Seschrock 	 * We really do have two identical symbols for some reason.  Just report
207*1545Seschrock 	 * them as equal, and to the lucky one go the spoils.
208*1545Seschrock 	 */
209*1545Seschrock 	return (0);
210*1545Seschrock }
211*1545Seschrock 
212*1545Seschrock /*
213*1545Seschrock  * Construct an optimized symbol table sorted by starting address.
214*1545Seschrock  */
215*1545Seschrock static void
216*1545Seschrock construct_symtab(dis_tgt_t *tgt)
217*1545Seschrock {
218*1545Seschrock 	Elf_Scn *scn;
219*1545Seschrock 	GElf_Shdr shdr;
220*1545Seschrock 	Elf_Data *symdata;
221*1545Seschrock 	int i;
222*1545Seschrock 	GElf_Word *symshndx = NULL;
223*1545Seschrock 	int symshndx_size;
224*1545Seschrock 	sym_entry_t *sym;
225*1545Seschrock 	sym_entry_t *p_symtab = NULL;
226*1545Seschrock 	int nsym = 0; /* count of symbols we're not interested in */
227*1545Seschrock 
228*1545Seschrock 	/*
229*1545Seschrock 	 * Find the symshndx section, if any
230*1545Seschrock 	 */
231*1545Seschrock 	for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
232*1545Seschrock 	    scn = elf_nextscn(tgt->dt_elf, scn)) {
233*1545Seschrock 		if (gelf_getshdr(scn, &shdr) == NULL)
234*1545Seschrock 			break;
235*1545Seschrock 		if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
236*1545Seschrock 		    shdr.sh_link == tgt->dt_symidx) {
237*1545Seschrock 			Elf_Data	*data;
238*1545Seschrock 
239*1545Seschrock 			if ((data = elf_getdata(scn, NULL)) != NULL) {
240*1545Seschrock 				symshndx = (GElf_Word *)data->d_buf;
241*1545Seschrock 				symshndx_size = data->d_size /
242*1545Seschrock 				    sizeof (GElf_Word);
243*1545Seschrock 				break;
244*1545Seschrock 			}
245*1545Seschrock 		}
246*1545Seschrock 	}
247*1545Seschrock 
248*1545Seschrock 	if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
249*1545Seschrock 		die("%s: failed to get section information", tgt->dt_filename);
250*1545Seschrock 	if (gelf_getshdr(scn, &shdr) == NULL)
251*1545Seschrock 		die("%s: failed to get section header", tgt->dt_filename);
252*1545Seschrock 	if (shdr.sh_entsize == 0)
253*1545Seschrock 		die("%s: symbol table has zero size", tgt->dt_filename);
254*1545Seschrock 
255*1545Seschrock 	if ((symdata = elf_getdata(scn, NULL)) == NULL)
256*1545Seschrock 		die("%s: failed to get symbol table", tgt->dt_filename);
257*1545Seschrock 
258*1545Seschrock 	tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
259*1545Seschrock 		1, EV_CURRENT);
260*1545Seschrock 
261*1545Seschrock 	p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
262*1545Seschrock 
263*1545Seschrock 	for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
264*1545Seschrock 		(void) memset(sym, sizeof (sym_entry_t), 0);
265*1545Seschrock 		if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
266*1545Seschrock 			warn("%s: gelf_getsym returned NULL for %d",
267*1545Seschrock 				tgt->dt_filename, i);
268*1545Seschrock 			nsym++;
269*1545Seschrock 			continue;
270*1545Seschrock 		}
271*1545Seschrock 
272*1545Seschrock 		/*
273*1545Seschrock 		 * We're only interested in data symbols.
274*1545Seschrock 		 */
275*1545Seschrock 		if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
276*1545Seschrock 			nsym++;
277*1545Seschrock 			continue;
278*1545Seschrock 		}
279*1545Seschrock 
280*1545Seschrock 		if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
281*1545Seschrock 			if (i > symshndx_size) {
282*1545Seschrock 				warn("%s: bad SHNX_XINDEX %d",
283*1545Seschrock 					tgt->dt_filename, i);
284*1545Seschrock 				sym->se_shndx = -1;
285*1545Seschrock 			} else {
286*1545Seschrock 				sym->se_shndx = symshndx[i];
287*1545Seschrock 			}
288*1545Seschrock 		} else {
289*1545Seschrock 			sym->se_shndx = sym->se_sym.st_shndx;
290*1545Seschrock 		}
291*1545Seschrock 
292*1545Seschrock 		if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
293*1545Seschrock 		    (size_t)sym->se_sym.st_name)) == NULL) {
294*1545Seschrock 			warn("%s: failed to lookup symbol %d name",
295*1545Seschrock 				tgt->dt_filename, i);
296*1545Seschrock 			nsym++;
297*1545Seschrock 			continue;
298*1545Seschrock 		}
299*1545Seschrock 
300*1545Seschrock 		sym++;
301*1545Seschrock 	}
302*1545Seschrock 
303*1545Seschrock 	tgt->dt_symcount -= nsym;
304*1545Seschrock 	tgt->dt_symtab = realloc(p_symtab,
305*1545Seschrock 				tgt->dt_symcount * sizeof (sym_entry_t));
306*1545Seschrock 
307*1545Seschrock 	qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
308*1545Seschrock 	    sym_compare);
309*1545Seschrock }
310*1545Seschrock 
311*1545Seschrock /*
312*1545Seschrock  * Create a target backed by an ELF file.
313*1545Seschrock  */
314*1545Seschrock dis_tgt_t *
315*1545Seschrock dis_tgt_create(const char *file)
316*1545Seschrock {
317*1545Seschrock 	dis_tgt_t *tgt, *current;
318*1545Seschrock 	int idx;
319*1545Seschrock 	Elf *elf;
320*1545Seschrock 	GElf_Ehdr ehdr;
321*1545Seschrock 	Elf_Arhdr *arhdr = NULL;
322*1545Seschrock 	int cmd;
323*1545Seschrock 
324*1545Seschrock 	if (elf_version(EV_CURRENT) == EV_NONE)
325*1545Seschrock 		die("libelf(3ELF) out of date");
326*1545Seschrock 
327*1545Seschrock 	tgt = safe_malloc(sizeof (dis_tgt_t));
328*1545Seschrock 
329*1545Seschrock 	if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
330*1545Seschrock 		warn("%s: failed opening file, reason: %s", file,
331*1545Seschrock 			strerror(errno));
332*1545Seschrock 		free(tgt);
333*1545Seschrock 		return (NULL);
334*1545Seschrock 	}
335*1545Seschrock 
336*1545Seschrock 	if ((tgt->dt_elf_root =
337*1545Seschrock 	    elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
338*1545Seschrock 		warn("%s: invalid or corrupt ELF file", file);
339*1545Seschrock 		dis_tgt_destroy(tgt);
340*1545Seschrock 		return (NULL);
341*1545Seschrock 	}
342*1545Seschrock 
343*1545Seschrock 	current = tgt;
344*1545Seschrock 	cmd = ELF_C_READ;
345*1545Seschrock 	while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
346*1545Seschrock 
347*1545Seschrock 		if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
348*1545Seschrock 		    (arhdr = elf_getarhdr(elf)) == NULL) {
349*1545Seschrock 			warn("%s: malformed archive", file);
350*1545Seschrock 			dis_tgt_destroy(tgt);
351*1545Seschrock 			return (NULL);
352*1545Seschrock 		}
353*1545Seschrock 
354*1545Seschrock 		/*
355*1545Seschrock 		 * Make sure that this Elf file is sane
356*1545Seschrock 		 */
357*1545Seschrock 		if (gelf_getehdr(elf, &ehdr) == NULL) {
358*1545Seschrock 			if (arhdr != NULL) {
359*1545Seschrock 				/*
360*1545Seschrock 				 * For archives, we drive on in the face of bad
361*1545Seschrock 				 * members.  The "/" and "//" members are
362*1545Seschrock 				 * special, and should be silently ignored.
363*1545Seschrock 				 */
364*1545Seschrock 				if (strcmp(arhdr->ar_name, "/") != 0 &&
365*1545Seschrock 				    strcmp(arhdr->ar_name, "//") != 0)
366*1545Seschrock 					warn("%s[%s]: invalid file type",
367*1545Seschrock 					    file, arhdr->ar_name);
368*1545Seschrock 				cmd = elf_next(elf);
369*1545Seschrock 				(void) elf_end(elf);
370*1545Seschrock 				continue;
371*1545Seschrock 			}
372*1545Seschrock 
373*1545Seschrock 			warn("%s: invalid file type", file);
374*1545Seschrock 			dis_tgt_destroy(tgt);
375*1545Seschrock 			return (NULL);
376*1545Seschrock 		}
377*1545Seschrock 
378*1545Seschrock 		/*
379*1545Seschrock 		 * If we're seeing a new Elf object, then we have an
380*1545Seschrock 		 * archive. In this case, we create a new target, and chain it
381*1545Seschrock 		 * off the master target.  We can later iterate over these
382*1545Seschrock 		 * targets using dis_tgt_next().
383*1545Seschrock 		 */
384*1545Seschrock 		if (current->dt_elf != NULL) {
385*1545Seschrock 			dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
386*1545Seschrock 			next->dt_elf_root = tgt->dt_elf_root;
387*1545Seschrock 			next->dt_fd = -1;
388*1545Seschrock 			current->dt_next = next;
389*1545Seschrock 			current = next;
390*1545Seschrock 		}
391*1545Seschrock 		current->dt_elf = elf;
392*1545Seschrock 		current->dt_arhdr = arhdr;
393*1545Seschrock 
394*1545Seschrock 		if (elf_getshstrndx(elf, &current->dt_shstrndx) == -1) {
395*1545Seschrock 			warn("%s: failed to get section string table for "
396*1545Seschrock 			    "file", file);
397*1545Seschrock 			dis_tgt_destroy(tgt);
398*1545Seschrock 			return (NULL);
399*1545Seschrock 		}
400*1545Seschrock 
401*1545Seschrock 		idx = 0;
402*1545Seschrock 		dis_tgt_section_iter(current, get_symtab, &idx);
403*1545Seschrock 
404*1545Seschrock 		if (current->dt_symidx != 0)
405*1545Seschrock 			construct_symtab(current);
406*1545Seschrock 
407*1545Seschrock 		current->dt_filename = file;
408*1545Seschrock 
409*1545Seschrock 		cmd = elf_next(elf);
410*1545Seschrock 	}
411*1545Seschrock 
412*1545Seschrock 	/*
413*1545Seschrock 	 * Final sanity check.  If we had an archive with no members, then bail
414*1545Seschrock 	 * out with a nice message.
415*1545Seschrock 	 */
416*1545Seschrock 	if (tgt->dt_elf == NULL) {
417*1545Seschrock 		warn("%s: empty archive\n", file);
418*1545Seschrock 		dis_tgt_destroy(tgt);
419*1545Seschrock 		return (NULL);
420*1545Seschrock 	}
421*1545Seschrock 
422*1545Seschrock 	return (tgt);
423*1545Seschrock }
424*1545Seschrock 
425*1545Seschrock /*
426*1545Seschrock  * Return the filename associated with the target.
427*1545Seschrock  */
428*1545Seschrock const char *
429*1545Seschrock dis_tgt_name(dis_tgt_t *tgt)
430*1545Seschrock {
431*1545Seschrock 	return (tgt->dt_filename);
432*1545Seschrock }
433*1545Seschrock 
434*1545Seschrock /*
435*1545Seschrock  * Return the archive member name, if any.
436*1545Seschrock  */
437*1545Seschrock const char *
438*1545Seschrock dis_tgt_member(dis_tgt_t *tgt)
439*1545Seschrock {
440*1545Seschrock 	if (tgt->dt_arhdr)
441*1545Seschrock 		return (tgt->dt_arhdr->ar_name);
442*1545Seschrock 	else
443*1545Seschrock 		return (NULL);
444*1545Seschrock }
445*1545Seschrock 
446*1545Seschrock /*
447*1545Seschrock  * Return the Elf_Ehdr associated with this target.  Needed to determine which
448*1545Seschrock  * disassembler to use.
449*1545Seschrock  */
450*1545Seschrock void
451*1545Seschrock dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
452*1545Seschrock {
453*1545Seschrock 	(void) gelf_getehdr(tgt->dt_elf, ehdr);
454*1545Seschrock }
455*1545Seschrock 
456*1545Seschrock /*
457*1545Seschrock  * Return the next target in the list, if this is an archive.
458*1545Seschrock  */
459*1545Seschrock dis_tgt_t *
460*1545Seschrock dis_tgt_next(dis_tgt_t *tgt)
461*1545Seschrock {
462*1545Seschrock 	return (tgt->dt_next);
463*1545Seschrock }
464*1545Seschrock 
465*1545Seschrock /*
466*1545Seschrock  * Destroy a target and free up any associated memory.
467*1545Seschrock  */
468*1545Seschrock void
469*1545Seschrock dis_tgt_destroy(dis_tgt_t *tgt)
470*1545Seschrock {
471*1545Seschrock 	dis_tgt_t *current, *next;
472*1545Seschrock 
473*1545Seschrock 	current = tgt->dt_next;
474*1545Seschrock 	while (current != NULL) {
475*1545Seschrock 		next = current->dt_next;
476*1545Seschrock 		if (current->dt_elf)
477*1545Seschrock 			(void) elf_end(current->dt_elf);
478*1545Seschrock 		if (current->dt_symtab)
479*1545Seschrock 			free(current->dt_symtab);
480*1545Seschrock 		free(current);
481*1545Seschrock 		current = next;
482*1545Seschrock 	}
483*1545Seschrock 
484*1545Seschrock 	if (tgt->dt_elf)
485*1545Seschrock 		(void) elf_end(tgt->dt_elf);
486*1545Seschrock 	if (tgt->dt_elf_root)
487*1545Seschrock 		(void) elf_end(tgt->dt_elf_root);
488*1545Seschrock 
489*1545Seschrock 	if (tgt->dt_symtab)
490*1545Seschrock 		free(tgt->dt_symtab);
491*1545Seschrock 
492*1545Seschrock 	free(tgt);
493*1545Seschrock }
494*1545Seschrock 
495*1545Seschrock /*
496*1545Seschrock  * Given an address, returns the name of the corresponding symbol, as well as
497*1545Seschrock  * the offset within that symbol.  If no matching symbol is found, then NULL is
498*1545Seschrock  * returned.
499*1545Seschrock  *
500*1545Seschrock  * If 'cache_result' is specified, then we keep track of the resulting symbol.
501*1545Seschrock  * This cached result is consulted first on subsequent lookups in order to avoid
502*1545Seschrock  * unecessary lookups.  This flag should be used for resolving the current PC,
503*1545Seschrock  * as the majority of addresses stay within the current function.
504*1545Seschrock  */
505*1545Seschrock const char *
506*1545Seschrock dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
507*1545Seschrock     size_t *size, int *isfunc)
508*1545Seschrock {
509*1545Seschrock 	int lo, hi, mid;
510*1545Seschrock 	sym_entry_t *sym, *osym, *match;
511*1545Seschrock 	int found;
512*1545Seschrock 
513*1545Seschrock 	if (tgt->dt_symcache != NULL &&
514*1545Seschrock 	    addr >= tgt->dt_symcache->se_sym.st_value &&
515*1545Seschrock 	    addr < tgt->dt_symcache->se_sym.st_value +
516*1545Seschrock 	    tgt->dt_symcache->se_sym.st_size) {
517*1545Seschrock 		*offset = addr - tgt->dt_symcache->se_sym.st_value;
518*1545Seschrock 		*size = tgt->dt_symcache->se_sym.st_size;
519*1545Seschrock 		return (tgt->dt_symcache->se_name);
520*1545Seschrock 	}
521*1545Seschrock 
522*1545Seschrock 	lo = 0;
523*1545Seschrock 	hi = (tgt->dt_symcount - 1);
524*1545Seschrock 	found = 0;
525*1545Seschrock 	match = osym = NULL;
526*1545Seschrock 	while (lo <= hi) {
527*1545Seschrock 		mid = (lo + hi) / 2;
528*1545Seschrock 
529*1545Seschrock 		sym = &tgt->dt_symtab[mid];
530*1545Seschrock 
531*1545Seschrock 		if (addr >= sym->se_sym.st_value &&
532*1545Seschrock 		    addr < sym->se_sym.st_value + sym->se_sym.st_size &&
533*1545Seschrock 		    (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
534*1545Seschrock 			osym = sym;
535*1545Seschrock 			found = 1;
536*1545Seschrock 		} else if (addr == sym->se_sym.st_value) {
537*1545Seschrock 			/*
538*1545Seschrock 			 * Particularly for .plt objects, it's possible to have
539*1545Seschrock 			 * a zero sized object.  We want to return this, but we
540*1545Seschrock 			 * want it to be a last resort.
541*1545Seschrock 			 */
542*1545Seschrock 			match = sym;
543*1545Seschrock 		}
544*1545Seschrock 
545*1545Seschrock 		if (addr < sym->se_sym.st_value)
546*1545Seschrock 			hi = mid - 1;
547*1545Seschrock 		else
548*1545Seschrock 			lo = mid + 1;
549*1545Seschrock 	}
550*1545Seschrock 
551*1545Seschrock 	if (!found) {
552*1545Seschrock 		if (match)
553*1545Seschrock 			osym = match;
554*1545Seschrock 		else
555*1545Seschrock 			return (NULL);
556*1545Seschrock 	}
557*1545Seschrock 
558*1545Seschrock 	/*
559*1545Seschrock 	 * Walk backwards to find the best match.
560*1545Seschrock 	 */
561*1545Seschrock 	do {
562*1545Seschrock 		sym = osym;
563*1545Seschrock 
564*1545Seschrock 		if (osym == tgt->dt_symtab)
565*1545Seschrock 			break;
566*1545Seschrock 
567*1545Seschrock 		osym = osym - 1;
568*1545Seschrock 	} while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
569*1545Seschrock 		(addr >= osym->se_sym.st_value) &&
570*1545Seschrock 		(addr < osym->se_sym.st_value + osym->se_sym.st_size));
571*1545Seschrock 
572*1545Seschrock 	if (cache_result)
573*1545Seschrock 		tgt->dt_symcache = sym;
574*1545Seschrock 
575*1545Seschrock 	*offset = addr - sym->se_sym.st_value;
576*1545Seschrock 	*size = sym->se_sym.st_size;
577*1545Seschrock 	if (isfunc)
578*1545Seschrock 		*isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
579*1545Seschrock 
580*1545Seschrock 	return (sym->se_name);
581*1545Seschrock }
582*1545Seschrock 
583*1545Seschrock /*
584*1545Seschrock  * Given an address, return the starting offset of the next symbol in the file.
585*1545Seschrock  * Relies on the fact that this is only used when we encounter a bad instruction
586*1545Seschrock  * in the input stream, so we know that the last symbol looked up will be in the
587*1545Seschrock  * cache.
588*1545Seschrock  */
589*1545Seschrock off_t
590*1545Seschrock dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
591*1545Seschrock {
592*1545Seschrock 	sym_entry_t *sym = tgt->dt_symcache;
593*1545Seschrock 	uint64_t start;
594*1545Seschrock 
595*1545Seschrock 	/* make sure the cached symbol and address are valid */
596*1545Seschrock 	if (sym == NULL || addr < sym->se_sym.st_value ||
597*1545Seschrock 	    addr >= sym->se_sym.st_value + sym->se_sym.st_size)
598*1545Seschrock 		return (0);
599*1545Seschrock 
600*1545Seschrock 	start = sym->se_sym.st_value;
601*1545Seschrock 
602*1545Seschrock 	/* find the next symbol */
603*1545Seschrock 	while (sym != tgt->dt_symtab + tgt->dt_symcount &&
604*1545Seschrock 	    sym->se_sym.st_value == start)
605*1545Seschrock 		sym++;
606*1545Seschrock 
607*1545Seschrock 	return (sym->se_sym.st_value - addr);
608*1545Seschrock }
609*1545Seschrock 
610*1545Seschrock /*
611*1545Seschrock  * Iterate over all sections in the target, executing the given callback for
612*1545Seschrock  * each.
613*1545Seschrock  */
614*1545Seschrock void
615*1545Seschrock dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
616*1545Seschrock {
617*1545Seschrock 	dis_scn_t sdata;
618*1545Seschrock 	Elf_Scn *scn;
619*1545Seschrock 	int idx;
620*1545Seschrock 
621*1545Seschrock 	for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
622*1545Seschrock 	    scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
623*1545Seschrock 
624*1545Seschrock 		if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
625*1545Seschrock 			warn("%s: failed to get section %d header",
626*1545Seschrock 			    tgt->dt_filename, idx);
627*1545Seschrock 			continue;
628*1545Seschrock 		}
629*1545Seschrock 
630*1545Seschrock 		if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
631*1545Seschrock 		    sdata.ds_shdr.sh_name)) == NULL) {
632*1545Seschrock 			warn("%s: failed to get section %d name",
633*1545Seschrock 			    tgt->dt_filename, idx);
634*1545Seschrock 			continue;
635*1545Seschrock 		}
636*1545Seschrock 
637*1545Seschrock 		if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
638*1545Seschrock 			warn("%s: failed to get data for section '%s'",
639*1545Seschrock 			    tgt->dt_filename, sdata.ds_name);
640*1545Seschrock 			continue;
641*1545Seschrock 		}
642*1545Seschrock 
643*1545Seschrock 		func(tgt, &sdata, data);
644*1545Seschrock 	}
645*1545Seschrock }
646*1545Seschrock 
647*1545Seschrock /*
648*1545Seschrock  * Return 1 if the given section contains text, 0 otherwise.
649*1545Seschrock  */
650*1545Seschrock int
651*1545Seschrock dis_section_istext(dis_scn_t *scn)
652*1545Seschrock {
653*1545Seschrock 	return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
654*1545Seschrock 	    (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
655*1545Seschrock }
656*1545Seschrock 
657*1545Seschrock /*
658*1545Seschrock  * Return a pointer to the section data.
659*1545Seschrock  */
660*1545Seschrock void *
661*1545Seschrock dis_section_data(dis_scn_t *scn)
662*1545Seschrock {
663*1545Seschrock 	return (scn->ds_data->d_buf);
664*1545Seschrock }
665*1545Seschrock 
666*1545Seschrock /*
667*1545Seschrock  * Return the size of the section data.
668*1545Seschrock  */
669*1545Seschrock size_t
670*1545Seschrock dis_section_size(dis_scn_t *scn)
671*1545Seschrock {
672*1545Seschrock 	return (scn->ds_data->d_size);
673*1545Seschrock }
674*1545Seschrock 
675*1545Seschrock /*
676*1545Seschrock  * Return the address for the given section.
677*1545Seschrock  */
678*1545Seschrock uint64_t
679*1545Seschrock dis_section_addr(dis_scn_t *scn)
680*1545Seschrock {
681*1545Seschrock 	return (scn->ds_shdr.sh_addr);
682*1545Seschrock }
683*1545Seschrock 
684*1545Seschrock /*
685*1545Seschrock  * Return the name of the current section.
686*1545Seschrock  */
687*1545Seschrock const char *
688*1545Seschrock dis_section_name(dis_scn_t *scn)
689*1545Seschrock {
690*1545Seschrock 	return (scn->ds_name);
691*1545Seschrock }
692*1545Seschrock 
693*1545Seschrock /*
694*1545Seschrock  * Create an allocated copy of the given section
695*1545Seschrock  */
696*1545Seschrock dis_scn_t *
697*1545Seschrock dis_section_copy(dis_scn_t *scn)
698*1545Seschrock {
699*1545Seschrock 	dis_scn_t *new;
700*1545Seschrock 
701*1545Seschrock 	new = safe_malloc(sizeof (dis_scn_t));
702*1545Seschrock 	(void) memcpy(new, scn, sizeof (dis_scn_t));
703*1545Seschrock 
704*1545Seschrock 	return (new);
705*1545Seschrock }
706*1545Seschrock 
707*1545Seschrock /*
708*1545Seschrock  * Free section memory
709*1545Seschrock  */
710*1545Seschrock void
711*1545Seschrock dis_section_free(dis_scn_t *scn)
712*1545Seschrock {
713*1545Seschrock 	free(scn);
714*1545Seschrock }
715*1545Seschrock 
716*1545Seschrock /*
717*1545Seschrock  * Iterate over all functions in the target, executing the given callback for
718*1545Seschrock  * each one.
719*1545Seschrock  */
720*1545Seschrock void
721*1545Seschrock dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
722*1545Seschrock {
723*1545Seschrock 	int i;
724*1545Seschrock 	sym_entry_t *sym;
725*1545Seschrock 	dis_func_t df;
726*1545Seschrock 	Elf_Scn *scn;
727*1545Seschrock 	GElf_Shdr	shdr;
728*1545Seschrock 
729*1545Seschrock 	for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
730*1545Seschrock 
731*1545Seschrock 		/* ignore non-functions */
732*1545Seschrock 		if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
733*1545Seschrock 		    sym->se_name == NULL ||
734*1545Seschrock 		    sym->se_sym.st_size == 0 ||
735*1545Seschrock 		    sym->se_shndx == -1)
736*1545Seschrock 			continue;
737*1545Seschrock 
738*1545Seschrock 		/* get the ELF data associated with this function */
739*1545Seschrock 		if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
740*1545Seschrock 		    gelf_getshdr(scn, &shdr) == NULL ||
741*1545Seschrock 		    (df.df_data = elf_getdata(scn, NULL)) == NULL ||
742*1545Seschrock 		    df.df_data->d_size == 0) {
743*1545Seschrock 			warn("%s: failed to read section %d",
744*1545Seschrock 			    tgt->dt_filename, sym->se_shndx);
745*1545Seschrock 			continue;
746*1545Seschrock 		}
747*1545Seschrock 
748*1545Seschrock 		/*
749*1545Seschrock 		 * Verify that the address lies within the section that we think
750*1545Seschrock 		 * it does.
751*1545Seschrock 		 */
752*1545Seschrock 		if (sym->se_sym.st_value < shdr.sh_addr ||
753*1545Seschrock 		    (sym->se_sym.st_value + sym->se_sym.st_size) >
754*1545Seschrock 		    (shdr.sh_addr + shdr.sh_size)) {
755*1545Seschrock 			warn("%s: bad section %d for address %p",
756*1545Seschrock 				tgt->dt_filename, sym->se_sym.st_shndx,
757*1545Seschrock 				sym->se_sym.st_value);
758*1545Seschrock 			continue;
759*1545Seschrock 		}
760*1545Seschrock 
761*1545Seschrock 		df.df_sym = sym;
762*1545Seschrock 		df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
763*1545Seschrock 
764*1545Seschrock 		func(tgt, &df, data);
765*1545Seschrock 	}
766*1545Seschrock }
767*1545Seschrock 
768*1545Seschrock /*
769*1545Seschrock  * Return the data associated with a given function.
770*1545Seschrock  */
771*1545Seschrock void *
772*1545Seschrock dis_function_data(dis_func_t *func)
773*1545Seschrock {
774*1545Seschrock 	return ((char *)func->df_data->d_buf + func->df_offset);
775*1545Seschrock }
776*1545Seschrock 
777*1545Seschrock /*
778*1545Seschrock  * Return the size of a function.
779*1545Seschrock  */
780*1545Seschrock size_t
781*1545Seschrock dis_function_size(dis_func_t *func)
782*1545Seschrock {
783*1545Seschrock 	return (func->df_sym->se_sym.st_size);
784*1545Seschrock }
785*1545Seschrock 
786*1545Seschrock /*
787*1545Seschrock  * Return the address of a function.
788*1545Seschrock  */
789*1545Seschrock uint64_t
790*1545Seschrock dis_function_addr(dis_func_t *func)
791*1545Seschrock {
792*1545Seschrock 	return (func->df_sym->se_sym.st_value);
793*1545Seschrock }
794*1545Seschrock 
795*1545Seschrock /*
796*1545Seschrock  * Return the name of the function
797*1545Seschrock  */
798*1545Seschrock const char *
799*1545Seschrock dis_function_name(dis_func_t *func)
800*1545Seschrock {
801*1545Seschrock 	return (func->df_sym->se_name);
802*1545Seschrock }
803*1545Seschrock 
804*1545Seschrock /*
805*1545Seschrock  * Return a copy of a function.
806*1545Seschrock  */
807*1545Seschrock dis_func_t *
808*1545Seschrock dis_function_copy(dis_func_t *func)
809*1545Seschrock {
810*1545Seschrock 	dis_func_t *new;
811*1545Seschrock 
812*1545Seschrock 	new = safe_malloc(sizeof (dis_func_t));
813*1545Seschrock 	(void) memcpy(new, func, sizeof (dis_func_t));
814*1545Seschrock 
815*1545Seschrock 	return (new);
816*1545Seschrock }
817*1545Seschrock 
818*1545Seschrock /*
819*1545Seschrock  * Free function memory
820*1545Seschrock  */
821*1545Seschrock void
822*1545Seschrock dis_function_free(dis_func_t *func)
823*1545Seschrock {
824*1545Seschrock 	free(func);
825*1545Seschrock }
826