xref: /openbsd-src/usr.bin/ctfconv/ctfconv.c (revision 0687c322e07315b8b2d5d0eb4cd12f6106989d1c)
1*0687c322Sjasper /*	$OpenBSD: ctfconv.c,v 1.2 2017/08/11 14:58:56 jasper Exp $ */
2*0687c322Sjasper 
3192095f7Smpi /*
4192095f7Smpi  * Copyright (c) 2016-2017 Martin Pieuchot
5192095f7Smpi  *
6192095f7Smpi  * Permission to use, copy, modify, and distribute this software for any
7192095f7Smpi  * purpose with or without fee is hereby granted, provided that the above
8192095f7Smpi  * copyright notice and this permission notice appear in all copies.
9192095f7Smpi  *
10192095f7Smpi  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11192095f7Smpi  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12192095f7Smpi  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13192095f7Smpi  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14192095f7Smpi  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15192095f7Smpi  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16192095f7Smpi  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17192095f7Smpi  */
18192095f7Smpi 
19192095f7Smpi #include <sys/param.h>
20192095f7Smpi #include <sys/types.h>
21192095f7Smpi #include <sys/stat.h>
22192095f7Smpi #include <sys/exec_elf.h>
23192095f7Smpi #include <sys/mman.h>
24192095f7Smpi #include <sys/queue.h>
25192095f7Smpi #include <sys/tree.h>
26192095f7Smpi #include <sys/ctf.h>
27192095f7Smpi 
28192095f7Smpi #include <assert.h>
29192095f7Smpi #include <err.h>
30192095f7Smpi #include <fcntl.h>
31192095f7Smpi #include <locale.h>
32192095f7Smpi #include <stdio.h>
33192095f7Smpi #include <stdint.h>
34192095f7Smpi #include <stdlib.h>
35192095f7Smpi #include <string.h>
36192095f7Smpi #include <unistd.h>
37192095f7Smpi 
38192095f7Smpi #include "itype.h"
39192095f7Smpi #include "xmalloc.h"
40192095f7Smpi 
41192095f7Smpi #ifndef nitems
42192095f7Smpi #define nitems(_a)	(sizeof((_a)) / sizeof((_a)[0]))
43192095f7Smpi #endif
44192095f7Smpi 
45192095f7Smpi #define DEBUG_ABBREV	".debug_abbrev"
46192095f7Smpi #define DEBUG_INFO	".debug_info"
47192095f7Smpi #define DEBUG_LINE	".debug_line"
48192095f7Smpi #define DEBUG_STR	".debug_str"
49192095f7Smpi 
50192095f7Smpi __dead void	 usage(void);
51192095f7Smpi int		 convert(const char *);
52192095f7Smpi int		 generate(const char *, const char *, int);
53192095f7Smpi int		 elf_convert(char *, size_t);
54192095f7Smpi void		 elf_sort(void);
55192095f7Smpi void		 dump_type(struct itype *);
56192095f7Smpi void		 dump_func(struct itype *, int *);
57192095f7Smpi void		 dump_obj(struct itype *, int *);
58192095f7Smpi 
59192095f7Smpi /* elf.c */
60192095f7Smpi int		 iself(const char *, size_t);
61192095f7Smpi int		 elf_getshstab(const char *, size_t, const char **, size_t *);
62192095f7Smpi ssize_t		 elf_getsymtab(const char *, const char *, size_t,
63192095f7Smpi 		     const Elf_Sym **, size_t *);
64192095f7Smpi ssize_t		 elf_getsection(char *, const char *, const char *,
65192095f7Smpi 		     size_t, const char **, size_t *);
66192095f7Smpi 
67192095f7Smpi /* parse.c */
68192095f7Smpi void		 dwarf_parse(const char *, size_t, const char *, size_t);
69192095f7Smpi 
70192095f7Smpi const char	*ctf_enc2name(unsigned short);
71192095f7Smpi 
72192095f7Smpi /* lists of parsed types and functions */
73192095f7Smpi struct itype_queue itypeq = TAILQ_HEAD_INITIALIZER(itypeq);
74192095f7Smpi struct itype_queue ifuncq = TAILQ_HEAD_INITIALIZER(ifuncq);
75192095f7Smpi struct itype_queue iobjq = TAILQ_HEAD_INITIALIZER(iobjq);
76192095f7Smpi 
77192095f7Smpi __dead void
78192095f7Smpi usage(void)
79192095f7Smpi {
80192095f7Smpi 	fprintf(stderr, "usage: %s [-d] -l label -o outfile file\n",
81192095f7Smpi 	    getprogname());
82192095f7Smpi 	exit(1);
83192095f7Smpi }
84192095f7Smpi 
85192095f7Smpi int
86192095f7Smpi main(int argc, char *argv[])
87192095f7Smpi {
88192095f7Smpi 	const char *filename, *label = NULL, *outfile = NULL;
89192095f7Smpi 	int dump = 0;
90192095f7Smpi 	int ch, error = 0;
91192095f7Smpi 	struct itype *it;
92192095f7Smpi 
93192095f7Smpi 	setlocale(LC_ALL, "");
94192095f7Smpi 
95192095f7Smpi 	while ((ch = getopt(argc, argv, "dl:o:")) != -1) {
96192095f7Smpi 		switch (ch) {
97192095f7Smpi 		case 'd':
98192095f7Smpi 			dump = 1;	/* ctfdump(1) like SUNW_ctf sections */
99192095f7Smpi 			break;
100192095f7Smpi 		case 'l':
101192095f7Smpi 			if (label != NULL)
102192095f7Smpi 				usage();
103192095f7Smpi 			label = optarg;
104192095f7Smpi 			break;
105192095f7Smpi 		case 'o':
106192095f7Smpi 			if (outfile != NULL)
107192095f7Smpi 				usage();
108192095f7Smpi 			outfile = optarg;
109192095f7Smpi 			break;
110192095f7Smpi 		default:
111192095f7Smpi 			usage();
112192095f7Smpi 		}
113192095f7Smpi 	}
114192095f7Smpi 
115192095f7Smpi 	argc -= optind;
116192095f7Smpi 	argv += optind;
117192095f7Smpi 
118192095f7Smpi 	if (argc != 1)
119192095f7Smpi 		usage();
120192095f7Smpi 
121192095f7Smpi 	if (!dump && (outfile == NULL || label == NULL))
122192095f7Smpi 		usage();
123192095f7Smpi 
124192095f7Smpi 	filename = *argv;
125192095f7Smpi 	error = convert(filename);
126192095f7Smpi 	if (error != 0)
127192095f7Smpi 		return error;
128192095f7Smpi 
129192095f7Smpi 	if (dump) {
130192095f7Smpi 		int fidx = -1, oidx = -1;
131192095f7Smpi 
132192095f7Smpi 		TAILQ_FOREACH(it, &iobjq, it_symb)
133192095f7Smpi 			dump_obj(it, &oidx);
134192095f7Smpi 		printf("\n");
135192095f7Smpi 
136192095f7Smpi 		TAILQ_FOREACH(it, &ifuncq, it_symb)
137192095f7Smpi 			dump_func(it, &fidx);
138192095f7Smpi 		printf("\n");
139192095f7Smpi 
140192095f7Smpi 		TAILQ_FOREACH(it, &itypeq, it_next) {
141192095f7Smpi 			if (it->it_flags & (ITF_FUNC|ITF_OBJ))
142192095f7Smpi 				continue;
143192095f7Smpi 
144192095f7Smpi 			dump_type(it);
145192095f7Smpi 		}
146192095f7Smpi 	}
147192095f7Smpi 
148192095f7Smpi 	if (outfile != NULL) {
149192095f7Smpi 		error = generate(outfile, label, 1);
150192095f7Smpi 		if (error != 0)
151192095f7Smpi 			return error;
152192095f7Smpi 	}
153192095f7Smpi 
154192095f7Smpi 	return 0;
155192095f7Smpi }
156192095f7Smpi 
157192095f7Smpi int
158192095f7Smpi convert(const char *path)
159192095f7Smpi {
160192095f7Smpi 	struct stat		 st;
161192095f7Smpi 	int			 fd, error = 1;
162192095f7Smpi 	char			*p;
163192095f7Smpi 
164192095f7Smpi 	fd = open(path, O_RDONLY);
165192095f7Smpi 	if (fd == -1) {
166192095f7Smpi 		warn("open %s", path);
167192095f7Smpi 		return 1;
168192095f7Smpi 	}
169192095f7Smpi 	if (fstat(fd, &st) == -1) {
170192095f7Smpi 		warn("fstat %s", path);
171192095f7Smpi 		return 1;
172192095f7Smpi 	}
173192095f7Smpi 	if ((uintmax_t)st.st_size > SIZE_MAX) {
174192095f7Smpi 		warnx("file too big to fit memory");
175192095f7Smpi 		return 1;
176192095f7Smpi 	}
177192095f7Smpi 
178192095f7Smpi 	p = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
179192095f7Smpi 	if (p == MAP_FAILED)
180192095f7Smpi 		err(1, "mmap");
181192095f7Smpi 
182192095f7Smpi 	if (iself(p, st.st_size))
183192095f7Smpi 		error = elf_convert(p, st.st_size);
184192095f7Smpi 
185192095f7Smpi 	munmap(p, st.st_size);
186192095f7Smpi 	close(fd);
187192095f7Smpi 
188192095f7Smpi 	return error;
189192095f7Smpi }
190192095f7Smpi 
191192095f7Smpi const char		*dstrbuf;
192192095f7Smpi size_t			 dstrlen;
193192095f7Smpi const char		*strtab;
194192095f7Smpi const Elf_Sym		*symtab;
195192095f7Smpi size_t			 strtabsz, nsymb;
196192095f7Smpi 
197192095f7Smpi int
198192095f7Smpi elf_convert(char *p, size_t filesize)
199192095f7Smpi {
200192095f7Smpi 	const char		*shstab;
201192095f7Smpi 	const char		*infobuf, *abbuf;
202192095f7Smpi 	size_t			 infolen, ablen;
203192095f7Smpi 	size_t			 shstabsz;
204192095f7Smpi 
205192095f7Smpi 	/* Find section header string table location and size. */
206192095f7Smpi 	if (elf_getshstab(p, filesize, &shstab, &shstabsz))
207192095f7Smpi 		return 1;
208192095f7Smpi 
209192095f7Smpi 	/* Find symbol table location and number of symbols. */
210192095f7Smpi 	if (elf_getsymtab(p, shstab, shstabsz, &symtab, &nsymb) == -1)
211192095f7Smpi 		warnx("symbol table not found");
212192095f7Smpi 
213192095f7Smpi 	/* Find string table location and size. */
214192095f7Smpi 	if (elf_getsection(p, ELF_STRTAB, shstab, shstabsz, &strtab,
215192095f7Smpi 	    &strtabsz) == -1)
216192095f7Smpi 		warnx("string table not found");
217192095f7Smpi 
218192095f7Smpi 	/* Find abbreviation location and size. */
219192095f7Smpi 	if (elf_getsection(p, DEBUG_ABBREV, shstab, shstabsz, &abbuf,
220192095f7Smpi 	    &ablen) == -1) {
221192095f7Smpi 		warnx("%s section not found", DEBUG_ABBREV);
222192095f7Smpi 		return 1;
223192095f7Smpi 	}
224192095f7Smpi 
225192095f7Smpi 	if (elf_getsection(p, DEBUG_INFO, shstab, shstabsz, &infobuf,
226192095f7Smpi 	    &infolen) == -1) {
227192095f7Smpi 		warnx("%s section not found", DEBUG_INFO);
228192095f7Smpi 		return 1;
229192095f7Smpi 	}
230192095f7Smpi 
231192095f7Smpi 	/* Find string table location and size. */
232192095f7Smpi 	if (elf_getsection(p, DEBUG_STR, shstab, shstabsz, &dstrbuf,
233192095f7Smpi 	    &dstrlen) == -1)
234192095f7Smpi 		warnx("%s section not found", DEBUG_STR);
235192095f7Smpi 
236192095f7Smpi 	dwarf_parse(infobuf, infolen, abbuf, ablen);
237192095f7Smpi 
238192095f7Smpi 	/* Sort functions */
239192095f7Smpi 	elf_sort();
240192095f7Smpi 
241192095f7Smpi 	return 0;
242192095f7Smpi }
243192095f7Smpi 
244192095f7Smpi void
245192095f7Smpi elf_sort(void)
246192095f7Smpi {
247192095f7Smpi 	struct itype		*it, tmp;
248192095f7Smpi 	size_t			 i;
249192095f7Smpi 
250192095f7Smpi 	memset(&tmp, 0, sizeof(tmp));
251192095f7Smpi 	for (i = 0; i < nsymb; i++) {
252192095f7Smpi 		const Elf_Sym	*st = &symtab[i];
253192095f7Smpi 		char 		*sname;
254192095f7Smpi 
255192095f7Smpi 		if (st->st_shndx == SHN_UNDEF || st->st_shndx == SHN_COMMON)
256192095f7Smpi 			continue;
257192095f7Smpi 
258192095f7Smpi 		switch (ELF_ST_TYPE(st->st_info)) {
259192095f7Smpi 		case STT_FUNC:
260192095f7Smpi 			tmp.it_flags = ITF_FUNC;
261192095f7Smpi 			break;
262192095f7Smpi 		case STT_OBJECT:
263192095f7Smpi 			tmp.it_flags = ITF_OBJ;
264192095f7Smpi 			break;
265192095f7Smpi 		default:
266192095f7Smpi 			continue;
267192095f7Smpi 		}
268192095f7Smpi 
269192095f7Smpi 		/*
270192095f7Smpi 		 * Skip local suffix
271192095f7Smpi 		 *
272192095f7Smpi 		 * FIXME: only skip local copies.
273192095f7Smpi 		 */
274192095f7Smpi 		sname = xstrdup(strtab + st->st_name);
275192095f7Smpi 		strlcpy(tmp.it_name, strtok(sname, "."), ITNAME_MAX);
276192095f7Smpi 		it = RB_FIND(isymb_tree, &isymbt, &tmp);
277192095f7Smpi 		strlcpy(tmp.it_name, (strtab + st->st_name), ITNAME_MAX);
278192095f7Smpi 		free(sname);
279192095f7Smpi 
280192095f7Smpi 		if (it == NULL) {
281192095f7Smpi 			/* Insert 'unknown' entry to match symbol order. */
282192095f7Smpi 			it = it_dup(&tmp);
283192095f7Smpi 			it->it_refp = it;
284192095f7Smpi #ifdef DEBUG
285192095f7Smpi 			warnx("symbol not found: %s", it_name(it));
286192095f7Smpi #endif
287192095f7Smpi 		}
288192095f7Smpi 
289192095f7Smpi 		if (it->it_flags & ITF_INSERTED) {
290192095f7Smpi #ifdef DEBUG
291192095f7Smpi 			warnx("%s: already inserted", it_name(it));
292192095f7Smpi #endif
293192095f7Smpi 			it = it_dup(it);
294192095f7Smpi 		}
295192095f7Smpi 
296192095f7Smpi 		/* Save symbol index for dump. */
297192095f7Smpi 		it->it_ref = i;
298192095f7Smpi 
299192095f7Smpi 		it->it_flags |= ITF_INSERTED;
300192095f7Smpi 		if (it->it_flags & ITF_FUNC)
301192095f7Smpi 			TAILQ_INSERT_TAIL(&ifuncq, it, it_symb);
302192095f7Smpi 		else
303192095f7Smpi 			TAILQ_INSERT_TAIL(&iobjq, it, it_symb);
304192095f7Smpi 	}
305192095f7Smpi }
306192095f7Smpi 
307192095f7Smpi const char *
308192095f7Smpi type_name(struct itype *it)
309192095f7Smpi {
310192095f7Smpi 	const char *name;
311192095f7Smpi 
312192095f7Smpi 	name = it_name(it);
313192095f7Smpi 	if (name == NULL)
314192095f7Smpi 		return "(anon)";
315192095f7Smpi 
316192095f7Smpi 	return name;
317192095f7Smpi }
318192095f7Smpi 
319192095f7Smpi /* Display parsed types a la ctfdump(1) */
320192095f7Smpi void
321192095f7Smpi dump_type(struct itype *it)
322192095f7Smpi {
323192095f7Smpi 	struct imember *im;
324192095f7Smpi 
325192095f7Smpi #ifdef DEBUG
326192095f7Smpi 	switch (it->it_type) {
327192095f7Smpi 	case CTF_K_POINTER:
328192095f7Smpi 	case CTF_K_TYPEDEF:
329192095f7Smpi 	case CTF_K_VOLATILE:
330192095f7Smpi 	case CTF_K_CONST:
331192095f7Smpi 	case CTF_K_RESTRICT:
332192095f7Smpi 	case CTF_K_ARRAY:
333192095f7Smpi 	case CTF_K_FUNCTION:
334192095f7Smpi 		if (it->it_refp == NULL) {
335192095f7Smpi 			printf("unresolved: %s type=%d\n", it_name(it),
336192095f7Smpi 			    it->it_type);
337192095f7Smpi 			return;
338192095f7Smpi 		}
339192095f7Smpi 	default:
340192095f7Smpi 		break;
341192095f7Smpi 	}
342192095f7Smpi #endif
343192095f7Smpi 
344192095f7Smpi 	switch (it->it_type) {
345192095f7Smpi 	case CTF_K_FLOAT:
346192095f7Smpi 	case CTF_K_INTEGER:
347192095f7Smpi 		printf("  [%u] %s %s encoding=%s offset=0 bits=%u\n",
348192095f7Smpi 		    it->it_idx,
349192095f7Smpi 		    (it->it_type == CTF_K_INTEGER) ? "INTEGER" : "FLOAT",
350192095f7Smpi 		    it_name(it), ctf_enc2name(it->it_enc), it->it_size);
351192095f7Smpi 		break;
352192095f7Smpi 	case CTF_K_POINTER:
353192095f7Smpi 		printf("  <%u> POINTER %s refers to %u\n", it->it_idx,
354192095f7Smpi 		    type_name(it), it->it_refp->it_idx);
355192095f7Smpi 		break;
356192095f7Smpi 	case CTF_K_TYPEDEF:
357192095f7Smpi 		printf("  <%u> TYPEDEF %s refers to %u\n",
358192095f7Smpi 		    it->it_idx, it_name(it), it->it_refp->it_idx);
359192095f7Smpi 		break;
360192095f7Smpi 	case CTF_K_VOLATILE:
361192095f7Smpi 		printf("  <%u> VOLATILE %s refers to %u\n", it->it_idx,
362192095f7Smpi 		    type_name(it), it->it_refp->it_idx);
363192095f7Smpi 		break;
364192095f7Smpi 	case CTF_K_CONST:
365192095f7Smpi 		printf("  <%u> CONST %s refers to %u\n", it->it_idx,
366192095f7Smpi 		    type_name(it), it->it_refp->it_idx);
367192095f7Smpi 		break;
368192095f7Smpi 	case CTF_K_RESTRICT:
369192095f7Smpi 		printf("  <%u> RESTRICT %s refers to %u\n", it->it_idx,
370192095f7Smpi 		    it_name(it), it->it_refp->it_idx);
371192095f7Smpi 		break;
372192095f7Smpi 	case CTF_K_ARRAY:
373192095f7Smpi 		printf("  [%u] ARRAY %s content: %u index: %u nelems: %u\n",
374192095f7Smpi 		    it->it_idx, type_name(it), it->it_refp->it_idx, long_tidx,
375192095f7Smpi 		    it->it_nelems);
376192095f7Smpi 		printf("\n");
377192095f7Smpi 		break;
378192095f7Smpi 	case CTF_K_STRUCT:
379192095f7Smpi 	case CTF_K_UNION:
380192095f7Smpi 		printf("  [%u] %s %s (%u bytes)\n", it->it_idx,
381192095f7Smpi 		    (it->it_type == CTF_K_STRUCT) ? "STRUCT" : "UNION",
382192095f7Smpi 		    type_name(it), it->it_size);
383192095f7Smpi 		TAILQ_FOREACH(im, &it->it_members, im_next) {
384192095f7Smpi 			printf("\t%s type=%u off=%zd\n",
385192095f7Smpi 			    (im->im_flags & ITM_ANON) ? "unknown" : im->im_name,
386192095f7Smpi 			    im->im_refp->it_idx, im->im_off);
387192095f7Smpi 		}
388192095f7Smpi 		printf("\n");
389192095f7Smpi 		break;
390192095f7Smpi 	case CTF_K_ENUM:
391192095f7Smpi 		printf("  [%u] ENUM %s\n\n", it->it_idx, type_name(it));
392192095f7Smpi 		break;
393192095f7Smpi 	case CTF_K_FUNCTION:
394192095f7Smpi 		printf("  [%u] FUNCTION (%s) returns: %u args: (",
395192095f7Smpi 		    it->it_idx, (it_name(it) != NULL) ? it_name(it) : "anon",
396192095f7Smpi 		    it->it_refp->it_idx);
397192095f7Smpi 		TAILQ_FOREACH(im, &it->it_members, im_next) {
398192095f7Smpi 			printf("%u%s", im->im_refp->it_idx,
399192095f7Smpi 			    TAILQ_NEXT(im, im_next) ? ", " : "");
400192095f7Smpi 		}
401192095f7Smpi 		printf(")\n");
402192095f7Smpi 		break;
403192095f7Smpi 	default:
404192095f7Smpi 		assert(0 == 1);
405192095f7Smpi 	}
406192095f7Smpi }
407192095f7Smpi 
408192095f7Smpi void
409192095f7Smpi dump_func(struct itype *it, int *idx)
410192095f7Smpi {
411192095f7Smpi 	struct imember *im;
412192095f7Smpi 
413192095f7Smpi 	(*idx)++;
414192095f7Smpi 
415192095f7Smpi 	if (it->it_type == CTF_K_UNKNOWN && it->it_nelems == 0)
416192095f7Smpi 		return;
417192095f7Smpi 
418192095f7Smpi 	printf("  [%u] FUNC (%s) returns: %u args: (", (*idx),
419192095f7Smpi 	    (it_name(it) != NULL) ? it_name(it) : "unknown",
420192095f7Smpi 	    it->it_refp->it_idx);
421192095f7Smpi 	TAILQ_FOREACH(im, &it->it_members, im_next) {
422192095f7Smpi 		printf("%u%s", im->im_refp->it_idx,
423192095f7Smpi 		    TAILQ_NEXT(im, im_next) ? ", " : "");
424192095f7Smpi 	}
425192095f7Smpi 	printf(")\n");
426192095f7Smpi }
427192095f7Smpi 
428192095f7Smpi void
429192095f7Smpi dump_obj(struct itype *it, int *idx)
430192095f7Smpi {
431192095f7Smpi 	int l;
432192095f7Smpi 
433192095f7Smpi 	(*idx)++;
434192095f7Smpi 
435192095f7Smpi 	l = printf("  [%u] %u", (*idx), it->it_refp->it_idx);
436192095f7Smpi 	printf("%*s %s (%llu)\n", 14 - l, "", it_name(it), it->it_ref);
437192095f7Smpi }
438192095f7Smpi 
439192095f7Smpi const char *
440192095f7Smpi ctf_enc2name(unsigned short enc)
441192095f7Smpi {
442192095f7Smpi 	static const char *enc_name[] = { "SIGNED", "CHAR", "SIGNED CHAR",
443192095f7Smpi 	    "BOOL", "SIGNED BOOL" };
444192095f7Smpi 	static char invalid[7];
445192095f7Smpi 
446192095f7Smpi 	if (enc == CTF_INT_VARARGS)
447192095f7Smpi 		return "VARARGS";
448192095f7Smpi 
449192095f7Smpi 	if (enc > 0 && enc < nitems(enc_name))
450192095f7Smpi 		return enc_name[enc - 1];
451192095f7Smpi 
452192095f7Smpi 	snprintf(invalid, sizeof(invalid), "0x%x", enc);
453192095f7Smpi 	return invalid;
454192095f7Smpi }
455