xref: /illumos-gate/usr/src/lib/libdisasm/common/libdisasm.c (revision 1f1540205fa6366266184180654434272c425ac2)
1dc0093f4Seschrock /*
2dc0093f4Seschrock  * CDDL HEADER START
3dc0093f4Seschrock  *
4dc0093f4Seschrock  * The contents of this file are subject to the terms of the
5dc0093f4Seschrock  * Common Development and Distribution License (the "License").
6dc0093f4Seschrock  * You may not use this file except in compliance with the License.
7dc0093f4Seschrock  *
8dc0093f4Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9dc0093f4Seschrock  * or http://www.opensolaris.org/os/licensing.
10dc0093f4Seschrock  * See the License for the specific language governing permissions
11dc0093f4Seschrock  * and limitations under the License.
12dc0093f4Seschrock  *
13dc0093f4Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
14dc0093f4Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15dc0093f4Seschrock  * If applicable, add the following below this CDDL HEADER, with the
16dc0093f4Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
17dc0093f4Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
18dc0093f4Seschrock  *
19dc0093f4Seschrock  * CDDL HEADER END
20dc0093f4Seschrock  */
21dc0093f4Seschrock 
22dc0093f4Seschrock /*
23dc0093f4Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24dc0093f4Seschrock  * Use is subject to license terms.
25f7184619SJoshua M. Clulow  * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
26f7184619SJoshua M. Clulow  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
27*1f154020SRobert Mustacchi  * Copyright 2018, Joyent, Inc.
28dc0093f4Seschrock  */
29dc0093f4Seschrock 
30dc0093f4Seschrock #include <libdisasm.h>
31dc0093f4Seschrock #include <stdlib.h>
32dc0093f4Seschrock #ifdef DIS_STANDALONE
33dc0093f4Seschrock #include <mdb/mdb_modapi.h>
34f7184619SJoshua M. Clulow #define	_MDB
35f7184619SJoshua M. Clulow #include <mdb/mdb_io.h>
36f7184619SJoshua M. Clulow #else
37f7184619SJoshua M. Clulow #include <stdio.h>
38dc0093f4Seschrock #endif
39dc0093f4Seschrock 
40f7184619SJoshua M. Clulow #include "libdisasm_impl.h"
41f7184619SJoshua M. Clulow 
42dc0093f4Seschrock static int _dis_errno;
43dc0093f4Seschrock 
44dc0093f4Seschrock /*
45f7184619SJoshua M. Clulow  * If we're building the standalone library, then we only want to
46f7184619SJoshua M. Clulow  * include support for disassembly of the native architecture.
47f7184619SJoshua M. Clulow  * The regular shared library should include support for all
48f7184619SJoshua M. Clulow  * architectures.
49f7184619SJoshua M. Clulow  */
50f7184619SJoshua M. Clulow #if !defined(DIS_STANDALONE) || defined(__i386) || defined(__amd64)
51f7184619SJoshua M. Clulow extern dis_arch_t dis_arch_i386;
52f7184619SJoshua M. Clulow #endif
53f7184619SJoshua M. Clulow #if !defined(DIS_STANDALONE) || defined(__sparc)
54f7184619SJoshua M. Clulow extern dis_arch_t dis_arch_sparc;
55f7184619SJoshua M. Clulow #endif
560472e113SJosef 'Jeff' Sipek #if !defined(DIS_STANDALONE) || defined(__s390) || defined(__s390x)
570472e113SJosef 'Jeff' Sipek extern dis_arch_t dis_arch_s390;
580472e113SJosef 'Jeff' Sipek #endif
59*1f154020SRobert Mustacchi #if !defined(DIS_STANDALONE) || defined(__riscv)
60*1f154020SRobert Mustacchi extern dis_arch_t dis_arch_riscv;
61*1f154020SRobert Mustacchi #endif
62f7184619SJoshua M. Clulow 
63f7184619SJoshua M. Clulow static dis_arch_t *dis_archs[] = {
64f7184619SJoshua M. Clulow #if !defined(DIS_STANDALONE) || defined(__i386) || defined(__amd64)
65f7184619SJoshua M. Clulow 	&dis_arch_i386,
66f7184619SJoshua M. Clulow #endif
67f7184619SJoshua M. Clulow #if !defined(DIS_STANDALONE) || defined(__sparc)
68f7184619SJoshua M. Clulow 	&dis_arch_sparc,
69f7184619SJoshua M. Clulow #endif
700472e113SJosef 'Jeff' Sipek #if !defined(DIS_STANDALONE) || defined(__s390) || defined(__s390x)
710472e113SJosef 'Jeff' Sipek 	&dis_arch_s390,
720472e113SJosef 'Jeff' Sipek #endif
73*1f154020SRobert Mustacchi #if !defined(DIS_STANDALONE) || defined(__riscv)
74*1f154020SRobert Mustacchi 	&dis_arch_riscv,
75*1f154020SRobert Mustacchi #endif
76f7184619SJoshua M. Clulow 	NULL
77f7184619SJoshua M. Clulow };
78f7184619SJoshua M. Clulow 
79f7184619SJoshua M. Clulow /*
80dc0093f4Seschrock  * For the standalone library, we need to link against mdb's malloc/free.
81dc0093f4Seschrock  * Otherwise, use the standard malloc/free.
82dc0093f4Seschrock  */
83dc0093f4Seschrock #ifdef DIS_STANDALONE
84dc0093f4Seschrock void *
dis_zalloc(size_t bytes)85dc0093f4Seschrock dis_zalloc(size_t bytes)
86dc0093f4Seschrock {
87dc0093f4Seschrock 	return (mdb_zalloc(bytes, UM_SLEEP));
88dc0093f4Seschrock }
89dc0093f4Seschrock 
90dc0093f4Seschrock void
dis_free(void * ptr,size_t bytes)91dc0093f4Seschrock dis_free(void *ptr, size_t bytes)
92dc0093f4Seschrock {
93dc0093f4Seschrock 	mdb_free(ptr, bytes);
94dc0093f4Seschrock }
95dc0093f4Seschrock #else
96dc0093f4Seschrock void *
dis_zalloc(size_t bytes)97dc0093f4Seschrock dis_zalloc(size_t bytes)
98dc0093f4Seschrock {
99dc0093f4Seschrock 	return (calloc(1, bytes));
100dc0093f4Seschrock }
101dc0093f4Seschrock 
102dc0093f4Seschrock /*ARGSUSED*/
103dc0093f4Seschrock void
dis_free(void * ptr,size_t bytes)104dc0093f4Seschrock dis_free(void *ptr, size_t bytes)
105dc0093f4Seschrock {
106dc0093f4Seschrock 	free(ptr);
107dc0093f4Seschrock }
108dc0093f4Seschrock #endif
109dc0093f4Seschrock 
110dc0093f4Seschrock int
dis_seterrno(int error)111dc0093f4Seschrock dis_seterrno(int error)
112dc0093f4Seschrock {
113dc0093f4Seschrock 	_dis_errno = error;
114dc0093f4Seschrock 	return (-1);
115dc0093f4Seschrock }
116dc0093f4Seschrock 
117dc0093f4Seschrock int
dis_errno(void)118dc0093f4Seschrock dis_errno(void)
119dc0093f4Seschrock {
120dc0093f4Seschrock 	return (_dis_errno);
121dc0093f4Seschrock }
122dc0093f4Seschrock 
123dc0093f4Seschrock const char *
dis_strerror(int error)124dc0093f4Seschrock dis_strerror(int error)
125dc0093f4Seschrock {
126dc0093f4Seschrock 	switch (error) {
127dc0093f4Seschrock 	case E_DIS_NOMEM:
128dc0093f4Seschrock 		return ("out of memory");
129dc0093f4Seschrock 	case E_DIS_INVALFLAG:
130dc0093f4Seschrock 		return ("invalid flags for this architecture");
131f7184619SJoshua M. Clulow 	case E_DIS_UNSUPARCH:
132f7184619SJoshua M. Clulow 		return ("unsupported machine architecture");
133dc0093f4Seschrock 	default:
134dc0093f4Seschrock 		return ("unknown error");
135dc0093f4Seschrock 	}
136dc0093f4Seschrock }
137f7184619SJoshua M. Clulow 
138f7184619SJoshua M. Clulow void
dis_set_data(dis_handle_t * dhp,void * data)139f7184619SJoshua M. Clulow dis_set_data(dis_handle_t *dhp, void *data)
140f7184619SJoshua M. Clulow {
141f7184619SJoshua M. Clulow 	dhp->dh_data = data;
142f7184619SJoshua M. Clulow }
143f7184619SJoshua M. Clulow 
144f7184619SJoshua M. Clulow void
dis_flags_set(dis_handle_t * dhp,int f)145f7184619SJoshua M. Clulow dis_flags_set(dis_handle_t *dhp, int f)
146f7184619SJoshua M. Clulow {
147f7184619SJoshua M. Clulow 	dhp->dh_flags |= f;
148f7184619SJoshua M. Clulow }
149f7184619SJoshua M. Clulow 
150f7184619SJoshua M. Clulow void
dis_flags_clear(dis_handle_t * dhp,int f)151f7184619SJoshua M. Clulow dis_flags_clear(dis_handle_t *dhp, int f)
152f7184619SJoshua M. Clulow {
153f7184619SJoshua M. Clulow 	dhp->dh_flags &= ~f;
154f7184619SJoshua M. Clulow }
155f7184619SJoshua M. Clulow 
156f7184619SJoshua M. Clulow void
dis_handle_destroy(dis_handle_t * dhp)157f7184619SJoshua M. Clulow dis_handle_destroy(dis_handle_t *dhp)
158f7184619SJoshua M. Clulow {
1590c923cf7SJosef 'Jeff' Sipek 	if (dhp->dh_arch->da_handle_detach != NULL)
160f7184619SJoshua M. Clulow 		dhp->dh_arch->da_handle_detach(dhp);
1610c923cf7SJosef 'Jeff' Sipek 
162f7184619SJoshua M. Clulow 	dis_free(dhp, sizeof (dis_handle_t));
163f7184619SJoshua M. Clulow }
164f7184619SJoshua M. Clulow 
165f7184619SJoshua M. Clulow dis_handle_t *
dis_handle_create(int flags,void * data,dis_lookup_f lookup_func,dis_read_f read_func)166f7184619SJoshua M. Clulow dis_handle_create(int flags, void *data, dis_lookup_f lookup_func,
167f7184619SJoshua M. Clulow     dis_read_f read_func)
168f7184619SJoshua M. Clulow {
169f7184619SJoshua M. Clulow 	dis_handle_t *dhp;
170f7184619SJoshua M. Clulow 	dis_arch_t *arch = NULL;
171f7184619SJoshua M. Clulow 	int i;
172f7184619SJoshua M. Clulow 
173f7184619SJoshua M. Clulow 	/* Select an architecture based on flags */
174f7184619SJoshua M. Clulow 	for (i = 0; dis_archs[i] != NULL; i++) {
175f7184619SJoshua M. Clulow 		if (dis_archs[i]->da_supports_flags(flags)) {
176f7184619SJoshua M. Clulow 			arch = dis_archs[i];
177f7184619SJoshua M. Clulow 			break;
178f7184619SJoshua M. Clulow 		}
179f7184619SJoshua M. Clulow 	}
180f7184619SJoshua M. Clulow 	if (arch == NULL) {
181f7184619SJoshua M. Clulow 		(void) dis_seterrno(E_DIS_UNSUPARCH);
182f7184619SJoshua M. Clulow 		return (NULL);
183f7184619SJoshua M. Clulow 	}
184f7184619SJoshua M. Clulow 
185f7184619SJoshua M. Clulow 	if ((dhp = dis_zalloc(sizeof (dis_handle_t))) == NULL) {
186f7184619SJoshua M. Clulow 		(void) dis_seterrno(E_DIS_NOMEM);
187f7184619SJoshua M. Clulow 		return (NULL);
188f7184619SJoshua M. Clulow 	}
189f7184619SJoshua M. Clulow 	dhp->dh_arch = arch;
190f7184619SJoshua M. Clulow 	dhp->dh_lookup = lookup_func;
191f7184619SJoshua M. Clulow 	dhp->dh_read = read_func;
192f7184619SJoshua M. Clulow 	dhp->dh_flags = flags;
193f7184619SJoshua M. Clulow 	dhp->dh_data = data;
194f7184619SJoshua M. Clulow 
195f7184619SJoshua M. Clulow 	/*
196f7184619SJoshua M. Clulow 	 * Allow the architecture-specific code to allocate
197f7184619SJoshua M. Clulow 	 * its private data.
198f7184619SJoshua M. Clulow 	 */
1990c923cf7SJosef 'Jeff' Sipek 	if (arch->da_handle_attach != NULL &&
2000c923cf7SJosef 'Jeff' Sipek 	    arch->da_handle_attach(dhp) != 0) {
201f7184619SJoshua M. Clulow 		dis_free(dhp, sizeof (dis_handle_t));
202f7184619SJoshua M. Clulow 		/* dis errno already set */
203f7184619SJoshua M. Clulow 		return (NULL);
204f7184619SJoshua M. Clulow 	}
205f7184619SJoshua M. Clulow 
206f7184619SJoshua M. Clulow 	return (dhp);
207f7184619SJoshua M. Clulow }
208f7184619SJoshua M. Clulow 
209f7184619SJoshua M. Clulow int
dis_disassemble(dis_handle_t * dhp,uint64_t addr,char * buf,size_t buflen)210f7184619SJoshua M. Clulow dis_disassemble(dis_handle_t *dhp, uint64_t addr, char *buf, size_t buflen)
211f7184619SJoshua M. Clulow {
212f7184619SJoshua M. Clulow 	return (dhp->dh_arch->da_disassemble(dhp, addr, buf, buflen));
213f7184619SJoshua M. Clulow }
214f7184619SJoshua M. Clulow 
215038db8fbSJosef 'Jeff' Sipek /*
216038db8fbSJosef 'Jeff' Sipek  * On some instruction sets (e.g., x86), we have no choice except to
217038db8fbSJosef 'Jeff' Sipek  * disassemble everything from the start of the symbol, and stop when we
218038db8fbSJosef 'Jeff' Sipek  * have reached our instruction address.  If we're not in the middle of a
219038db8fbSJosef 'Jeff' Sipek  * known symbol, then we return the same address to indicate failure.
220038db8fbSJosef 'Jeff' Sipek  */
221038db8fbSJosef 'Jeff' Sipek static uint64_t
dis_generic_previnstr(dis_handle_t * dhp,uint64_t pc,int n)222038db8fbSJosef 'Jeff' Sipek dis_generic_previnstr(dis_handle_t *dhp, uint64_t pc, int n)
223038db8fbSJosef 'Jeff' Sipek {
224038db8fbSJosef 'Jeff' Sipek 	uint64_t *hist, addr, start;
225038db8fbSJosef 'Jeff' Sipek 	int cur, nseen;
226038db8fbSJosef 'Jeff' Sipek 	uint64_t res = pc;
227038db8fbSJosef 'Jeff' Sipek 
228038db8fbSJosef 'Jeff' Sipek 	if (n <= 0)
229038db8fbSJosef 'Jeff' Sipek 		return (pc);
230038db8fbSJosef 'Jeff' Sipek 
231038db8fbSJosef 'Jeff' Sipek 	if (dhp->dh_lookup(dhp->dh_data, pc, NULL, 0, &start, NULL) != 0 ||
232038db8fbSJosef 'Jeff' Sipek 	    start == pc)
233038db8fbSJosef 'Jeff' Sipek 		return (res);
234038db8fbSJosef 'Jeff' Sipek 
235038db8fbSJosef 'Jeff' Sipek 	hist = dis_zalloc(sizeof (uint64_t) * n);
236038db8fbSJosef 'Jeff' Sipek 
237038db8fbSJosef 'Jeff' Sipek 	for (cur = 0, nseen = 0, addr = start; addr < pc; addr = dhp->dh_addr) {
238038db8fbSJosef 'Jeff' Sipek 		hist[cur] = addr;
239038db8fbSJosef 'Jeff' Sipek 		cur = (cur + 1) % n;
240038db8fbSJosef 'Jeff' Sipek 		nseen++;
241038db8fbSJosef 'Jeff' Sipek 
242038db8fbSJosef 'Jeff' Sipek 		/* if we cannot make forward progress, give up */
243038db8fbSJosef 'Jeff' Sipek 		if (dis_disassemble(dhp, addr, NULL, 0) != 0)
244038db8fbSJosef 'Jeff' Sipek 			goto done;
245038db8fbSJosef 'Jeff' Sipek 	}
246038db8fbSJosef 'Jeff' Sipek 
247038db8fbSJosef 'Jeff' Sipek 	if (addr != pc) {
248038db8fbSJosef 'Jeff' Sipek 		/*
249038db8fbSJosef 'Jeff' Sipek 		 * We scanned past %pc, but didn't find an instruction that
250038db8fbSJosef 'Jeff' Sipek 		 * started at %pc.  This means that either the caller specified
251038db8fbSJosef 'Jeff' Sipek 		 * an invalid address, or we ran into something other than code
252038db8fbSJosef 'Jeff' Sipek 		 * during our scan.  Virtually any combination of bytes can be
253038db8fbSJosef 'Jeff' Sipek 		 * construed as a valid Intel instruction, so any non-code bytes
254038db8fbSJosef 'Jeff' Sipek 		 * we encounter will have thrown off the scan.
255038db8fbSJosef 'Jeff' Sipek 		 */
256038db8fbSJosef 'Jeff' Sipek 		goto done;
257038db8fbSJosef 'Jeff' Sipek 	}
258038db8fbSJosef 'Jeff' Sipek 
259038db8fbSJosef 'Jeff' Sipek 	res = hist[(cur + n - MIN(n, nseen)) % n];
260038db8fbSJosef 'Jeff' Sipek 
261038db8fbSJosef 'Jeff' Sipek done:
262038db8fbSJosef 'Jeff' Sipek 	dis_free(hist, sizeof (uint64_t) * n);
263038db8fbSJosef 'Jeff' Sipek 	return (res);
264038db8fbSJosef 'Jeff' Sipek }
265038db8fbSJosef 'Jeff' Sipek 
266038db8fbSJosef 'Jeff' Sipek /*
267038db8fbSJosef 'Jeff' Sipek  * Return the nth previous instruction's address.  Return the same address
268038db8fbSJosef 'Jeff' Sipek  * to indicate failure.
269038db8fbSJosef 'Jeff' Sipek  */
270f7184619SJoshua M. Clulow uint64_t
dis_previnstr(dis_handle_t * dhp,uint64_t pc,int n)271f7184619SJoshua M. Clulow dis_previnstr(dis_handle_t *dhp, uint64_t pc, int n)
272f7184619SJoshua M. Clulow {
273038db8fbSJosef 'Jeff' Sipek 	if (dhp->dh_arch->da_previnstr == NULL)
274038db8fbSJosef 'Jeff' Sipek 		return (dis_generic_previnstr(dhp, pc, n));
275038db8fbSJosef 'Jeff' Sipek 
276f7184619SJoshua M. Clulow 	return (dhp->dh_arch->da_previnstr(dhp, pc, n));
277f7184619SJoshua M. Clulow }
278f7184619SJoshua M. Clulow 
279f7184619SJoshua M. Clulow int
dis_min_instrlen(dis_handle_t * dhp)280f7184619SJoshua M. Clulow dis_min_instrlen(dis_handle_t *dhp)
281f7184619SJoshua M. Clulow {
282f7184619SJoshua M. Clulow 	return (dhp->dh_arch->da_min_instrlen(dhp));
283f7184619SJoshua M. Clulow }
284f7184619SJoshua M. Clulow 
285f7184619SJoshua M. Clulow int
dis_max_instrlen(dis_handle_t * dhp)286f7184619SJoshua M. Clulow dis_max_instrlen(dis_handle_t *dhp)
287f7184619SJoshua M. Clulow {
288f7184619SJoshua M. Clulow 	return (dhp->dh_arch->da_max_instrlen(dhp));
289f7184619SJoshua M. Clulow }
290f7184619SJoshua M. Clulow 
291cfc41e72SJosef 'Jeff' Sipek static int
dis_generic_instrlen(dis_handle_t * dhp,uint64_t pc)292cfc41e72SJosef 'Jeff' Sipek dis_generic_instrlen(dis_handle_t *dhp, uint64_t pc)
293cfc41e72SJosef 'Jeff' Sipek {
294cfc41e72SJosef 'Jeff' Sipek 	if (dis_disassemble(dhp, pc, NULL, 0) != 0)
295cfc41e72SJosef 'Jeff' Sipek 		return (-1);
296cfc41e72SJosef 'Jeff' Sipek 
297cfc41e72SJosef 'Jeff' Sipek 	return (dhp->dh_addr - pc);
298cfc41e72SJosef 'Jeff' Sipek }
299cfc41e72SJosef 'Jeff' Sipek 
300f7184619SJoshua M. Clulow int
dis_instrlen(dis_handle_t * dhp,uint64_t pc)301f7184619SJoshua M. Clulow dis_instrlen(dis_handle_t *dhp, uint64_t pc)
302f7184619SJoshua M. Clulow {
303cfc41e72SJosef 'Jeff' Sipek 	if (dhp->dh_arch->da_instrlen == NULL)
304cfc41e72SJosef 'Jeff' Sipek 		return (dis_generic_instrlen(dhp, pc));
305cfc41e72SJosef 'Jeff' Sipek 
306f7184619SJoshua M. Clulow 	return (dhp->dh_arch->da_instrlen(dhp, pc));
307f7184619SJoshua M. Clulow }
308f7184619SJoshua M. Clulow 
309f7184619SJoshua M. Clulow int
dis_vsnprintf(char * restrict s,size_t n,const char * restrict format,va_list args)310f7184619SJoshua M. Clulow dis_vsnprintf(char *restrict s, size_t n, const char *restrict format,
311f7184619SJoshua M. Clulow     va_list args)
312f7184619SJoshua M. Clulow {
313f7184619SJoshua M. Clulow #ifdef DIS_STANDALONE
314f7184619SJoshua M. Clulow 	return (mdb_iob_vsnprintf(s, n, format, args));
315f7184619SJoshua M. Clulow #else
316f7184619SJoshua M. Clulow 	return (vsnprintf(s, n, format, args));
317f7184619SJoshua M. Clulow #endif
318f7184619SJoshua M. Clulow }
319f7184619SJoshua M. Clulow 
320f7184619SJoshua M. Clulow int
dis_snprintf(char * restrict s,size_t n,const char * restrict format,...)321f7184619SJoshua M. Clulow dis_snprintf(char *restrict s, size_t n, const char *restrict format, ...)
322f7184619SJoshua M. Clulow {
323f7184619SJoshua M. Clulow 	va_list args;
324f7184619SJoshua M. Clulow 
325f7184619SJoshua M. Clulow 	va_start(args, format);
326f7184619SJoshua M. Clulow 	n = dis_vsnprintf(s, n, format, args);
327f7184619SJoshua M. Clulow 	va_end(args);
328f7184619SJoshua M. Clulow 
329f7184619SJoshua M. Clulow 	return (n);
330f7184619SJoshua M. Clulow }
331