1 /* $OpenBSD: ctfconv.c,v 1.20 2022/10/02 11:56:43 mpi Exp $ */
2
3 /*
4 * Copyright (c) 2016-2017 Martin Pieuchot
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/tree.h>
24 #include <sys/ctf.h>
25
26 #include <assert.h>
27 #include <elf.h>
28 #include <err.h>
29 #include <fcntl.h>
30 #include <limits.h>
31 #include <locale.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "itype.h"
39 #include "xmalloc.h"
40
41 #ifndef nitems
42 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
43 #endif
44
45 #define DEBUG_ABBREV ".debug_abbrev"
46 #define DEBUG_INFO ".debug_info"
47 #define DEBUG_STR ".debug_str"
48
49 __dead void usage(void);
50 int convert(const char *);
51 int generate(const char *, const char *, int);
52 int elf_convert(char *, size_t);
53 void elf_sort(void);
54 char *guess_static_local_name(char *);
55 struct itype *find_symb(struct itype *, size_t);
56 void dump_type(struct itype *);
57 void dump_func(struct itype *, int *);
58 void dump_obj(struct itype *, int *);
59
60 /* elf.c */
61 int iself(const char *, size_t);
62 int elf_getshstab(const char *, size_t, const char **, size_t *);
63 ssize_t elf_getsymtab(const char *, size_t, const char *, size_t,
64 const Elf_Sym **, size_t *, const char **, size_t *);
65 ssize_t elf_getsection(char *, size_t, const char *, const char *,
66 size_t, const char **, size_t *);
67
68 /* parse.c */
69 void dwarf_parse(const char *, size_t, const char *, size_t);
70
71 const char *ctf_enc2name(unsigned short);
72
73 /* lists of parsed types and functions */
74 struct itype_queue itypeq = TAILQ_HEAD_INITIALIZER(itypeq);
75 struct itype_queue ifuncq = TAILQ_HEAD_INITIALIZER(ifuncq);
76 struct itype_queue iobjq = TAILQ_HEAD_INITIALIZER(iobjq);
77
78 __dead void
usage(void)79 usage(void)
80 {
81 fprintf(stderr, "usage: %s [-d] -l label -o outfile file\n",
82 getprogname());
83 exit(1);
84 }
85
86 int
main(int argc,char * argv[])87 main(int argc, char *argv[])
88 {
89 const char *filename, *label = NULL, *outfile = NULL;
90 int dump = 0;
91 int ch, error = 0;
92 struct itype *it;
93
94 setlocale(LC_ALL, "");
95
96 while ((ch = getopt(argc, argv, "dl:o:")) != -1) {
97 switch (ch) {
98 case 'd':
99 dump = 1; /* ctfdump(1)-like SUNW_ctf sections */
100 break;
101 case 'l':
102 if (label != NULL)
103 usage();
104 label = optarg;
105 break;
106 case 'o':
107 if (outfile != NULL)
108 usage();
109 outfile = optarg;
110 break;
111 default:
112 usage();
113 }
114 }
115
116 argc -= optind;
117 argv += optind;
118
119 if (argc != 1)
120 usage();
121
122 /* Either dump the sections, or write it out. */
123 if ((dump && (outfile != NULL || label != NULL)) ||
124 (!dump && (outfile == NULL || label == NULL)))
125 usage();
126
127 filename = *argv;
128
129 if (unveil(filename, "r") == -1)
130 err(1, "unveil %s", filename);
131
132 if (outfile != NULL) {
133 if (unveil(outfile, "wc") == -1)
134 err(1, "unveil %s", outfile);
135 }
136
137 if (pledge("stdio rpath wpath cpath", NULL) == -1)
138 err(1, "pledge");
139
140 error = convert(filename);
141 if (error != 0)
142 return error;
143
144 if (outfile != NULL) {
145 if (pledge("stdio wpath cpath", NULL) == -1)
146 err(1, "pledge");
147
148 error = generate(outfile, label, 1);
149 if (error != 0)
150 return error;
151 }
152
153 if (dump) {
154 if (pledge("stdio", NULL) == -1)
155 err(1, "pledge");
156
157 int fidx = -1, oidx = -1;
158
159 TAILQ_FOREACH(it, &iobjq, it_symb)
160 dump_obj(it, &oidx);
161 printf("\n");
162
163 TAILQ_FOREACH(it, &ifuncq, it_symb)
164 dump_func(it, &fidx);
165 printf("\n");
166
167 TAILQ_FOREACH(it, &itypeq, it_next) {
168 if (it->it_flags & (ITF_FUNC|ITF_OBJ))
169 continue;
170
171 dump_type(it);
172 }
173
174 return 0;
175 }
176
177 return 0;
178 }
179
180 int
convert(const char * path)181 convert(const char *path)
182 {
183 struct stat st;
184 int fd, error = 1;
185 char *p;
186
187 fd = open(path, O_RDONLY);
188 if (fd == -1) {
189 warn("open %s", path);
190 return 1;
191 }
192 if (fstat(fd, &st) == -1) {
193 warn("fstat %s", path);
194 close(fd);
195 return 1;
196 }
197 if ((uintmax_t)st.st_size > SIZE_MAX) {
198 warnx("file too big to fit memory");
199 close(fd);
200 return 1;
201 }
202
203 p = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
204 if (p == MAP_FAILED)
205 err(1, "mmap");
206
207 if (iself(p, st.st_size))
208 error = elf_convert(p, st.st_size);
209
210 munmap(p, st.st_size);
211 close(fd);
212
213 return error;
214 }
215
216 const char *dstrbuf;
217 size_t dstrlen;
218 const char *strtab;
219 const Elf_Sym *symtab;
220 size_t strtabsz, nsymb;
221
222 int
elf_convert(char * p,size_t filesize)223 elf_convert(char *p, size_t filesize)
224 {
225 const char *shstab;
226 const char *infobuf, *abbuf;
227 size_t infolen, ablen;
228 size_t shstabsz;
229
230 /* Find section header string table location and size. */
231 if (elf_getshstab(p, filesize, &shstab, &shstabsz))
232 return 1;
233
234 /* Find symbol table and associated string table. */
235 if (elf_getsymtab(p, filesize, shstab, shstabsz, &symtab, &nsymb,
236 &strtab, &strtabsz) == -1)
237 warnx("symbol table not found");
238
239 /* Find abbreviation location and size. */
240 if (elf_getsection(p, filesize, DEBUG_ABBREV, shstab, shstabsz, &abbuf,
241 &ablen) == -1) {
242 warnx("%s section not found", DEBUG_ABBREV);
243 return 1;
244 }
245
246 if (elf_getsection(p, filesize, DEBUG_INFO, shstab, shstabsz, &infobuf,
247 &infolen) == -1) {
248 warnx("%s section not found", DEBUG_INFO);
249 return 1;
250 }
251
252 /* Find string table location and size. */
253 if (elf_getsection(p, filesize, DEBUG_STR, shstab, shstabsz, &dstrbuf,
254 &dstrlen) == -1)
255 warnx("%s section not found", DEBUG_STR);
256
257 dwarf_parse(infobuf, infolen, abbuf, ablen);
258
259 /* Sort functions */
260 elf_sort();
261
262 return 0;
263 }
264
265 /*
266 * Guess which part of a local symbol name correspond to the variable
267 * name.
268 *
269 * gcc 4.2.1 emits:
270 *
271 * varname.id
272 *
273 * clang 8 emits:
274 *
275 * funcname.varname
276 *
277 */
278 char *
guess_static_local_name(char * sname)279 guess_static_local_name(char *sname)
280 {
281 const char *errstr;
282 char *first, *second;
283
284 first = strtok(sname, ".");
285 if (first == NULL)
286 return NULL;
287
288 /* Skip meta symbols - gcc style. */
289 if (strncmp(first, "__func__", sizeof("__func__") - 1) == 0 ||
290 strncmp(first, "__FUNCTION__", sizeof("__FUNCTION__") - 1) == 0 ||
291 strncmp(first, "__warned", sizeof("__warned") - 1) == 0)
292 return NULL;
293
294 second = strtok(NULL, "\0");
295 if (second == NULL)
296 return first;
297
298 /* Skip meta symbols - clang style. */
299 if (strncmp(second, "__warned", sizeof("__warned") - 1) == 0)
300 return NULL;
301
302 /* If `second' isn't a number, assume clang-style name. */
303 if (strtonum(second, 1, INT_MAX, &errstr) == 0)
304 return second;
305
306 return first;
307 }
308
309 struct itype *
find_symb(struct itype * tmp,size_t stroff)310 find_symb(struct itype *tmp, size_t stroff)
311 {
312 struct itype *it;
313 char *sname, *p;
314
315 if (strtab == NULL || stroff >= strtabsz)
316 return NULL;
317
318 sname = xstrdup(strtab + stroff);
319 if ((p = guess_static_local_name(sname)) == NULL) {
320 free(sname);
321 return NULL;
322 }
323
324 strlcpy(tmp->it_name, p, ITNAME_MAX);
325 free(sname);
326 it = RB_FIND(isymb_tree, &isymbt, tmp);
327
328 /* Restore original name */
329 if (it == NULL)
330 strlcpy(tmp->it_name, (strtab + stroff), ITNAME_MAX);
331
332 return it;
333 }
334
335 void
elf_sort(void)336 elf_sort(void)
337 {
338 struct itype *it, tmp;
339 size_t i;
340
341 memset(&tmp, 0, sizeof(tmp));
342 for (i = 0; i < nsymb; i++) {
343 const Elf_Sym *st = &symtab[i];
344
345 if (st->st_shndx == SHN_UNDEF || st->st_shndx == SHN_COMMON)
346 continue;
347
348 switch (ELF_ST_TYPE(st->st_info)) {
349 case STT_FUNC:
350 tmp.it_flags = ITF_FUNC;
351 break;
352 case STT_OBJECT:
353 tmp.it_flags = ITF_OBJ;
354 break;
355 default:
356 continue;
357 }
358
359 it = find_symb(&tmp, st->st_name);
360 if (it == NULL) {
361 /* Insert 'unknown' entry to match symbol order. */
362 it = it_dup(&tmp);
363 it->it_refp = it;
364 #ifdef DEBUG
365 warnx("symbol not found: %s", it_name(it));
366 #endif
367 }
368
369 if (it->it_flags & ITF_INSERTED) {
370 #ifdef DEBUG
371 warnx("%s: already inserted", it_name(it));
372 #endif
373 it = it_dup(it);
374 }
375
376 /* Save symbol index for dump. */
377 it->it_ref = i;
378
379 it->it_flags |= ITF_INSERTED;
380 if (it->it_flags & ITF_FUNC)
381 TAILQ_INSERT_TAIL(&ifuncq, it, it_symb);
382 else
383 TAILQ_INSERT_TAIL(&iobjq, it, it_symb);
384 }
385 }
386
387 const char *
type_name(struct itype * it)388 type_name(struct itype *it)
389 {
390 const char *name;
391
392 name = it_name(it);
393 if (name == NULL)
394 return "(anon)";
395
396 return name;
397 }
398
399 /* Display parsed types a la ctfdump(1) */
400 void
dump_type(struct itype * it)401 dump_type(struct itype *it)
402 {
403 struct imember *im;
404
405 #ifdef DEBUG
406 switch (it->it_type) {
407 case CTF_K_POINTER:
408 case CTF_K_TYPEDEF:
409 case CTF_K_VOLATILE:
410 case CTF_K_CONST:
411 case CTF_K_RESTRICT:
412 case CTF_K_ARRAY:
413 case CTF_K_FUNCTION:
414 if (it->it_refp == NULL) {
415 printf("unresolved: %s type=%d\n", it_name(it),
416 it->it_type);
417 return;
418 }
419 default:
420 break;
421 }
422 #endif
423
424 switch (it->it_type) {
425 case CTF_K_FLOAT:
426 case CTF_K_INTEGER:
427 printf(" [%u] %s %s encoding=%s offset=0 bits=%u\n",
428 it->it_idx,
429 (it->it_type == CTF_K_INTEGER) ? "INTEGER" : "FLOAT",
430 it_name(it), ctf_enc2name(it->it_enc), it->it_size);
431 break;
432 case CTF_K_POINTER:
433 printf(" <%u> POINTER %s refers to %u\n", it->it_idx,
434 type_name(it), it->it_refp->it_idx);
435 break;
436 case CTF_K_TYPEDEF:
437 printf(" <%u> TYPEDEF %s refers to %u\n",
438 it->it_idx, it_name(it), it->it_refp->it_idx);
439 break;
440 case CTF_K_VOLATILE:
441 printf(" <%u> VOLATILE %s refers to %u\n", it->it_idx,
442 type_name(it), it->it_refp->it_idx);
443 break;
444 case CTF_K_CONST:
445 printf(" <%u> CONST %s refers to %u\n", it->it_idx,
446 type_name(it), it->it_refp->it_idx);
447 break;
448 case CTF_K_RESTRICT:
449 printf(" <%u> RESTRICT %s refers to %u\n", it->it_idx,
450 it_name(it), it->it_refp->it_idx);
451 break;
452 case CTF_K_ARRAY:
453 printf(" [%u] ARRAY %s content: %u index: %u nelems: %u\n",
454 it->it_idx, type_name(it), it->it_refp->it_idx, long_tidx,
455 it->it_nelems);
456 printf("\n");
457 break;
458 case CTF_K_STRUCT:
459 case CTF_K_UNION:
460 printf(" [%u] %s %s (%u bytes)\n", it->it_idx,
461 (it->it_type == CTF_K_STRUCT) ? "STRUCT" : "UNION",
462 type_name(it), it->it_size);
463 TAILQ_FOREACH(im, &it->it_members, im_next) {
464 printf("\t%s type=%u off=%zu\n",
465 (im_name(im) == NULL) ? "unknown" : im_name(im),
466 im->im_refp ? im->im_refp->it_idx : 0, im->im_off);
467 }
468 printf("\n");
469 break;
470 case CTF_K_ENUM:
471 printf(" [%u] ENUM %s\n", it->it_idx, type_name(it));
472 TAILQ_FOREACH(im, &it->it_members, im_next) {
473 printf("\t%s = %zu\n", im_name(im), im->im_ref);
474 }
475 printf("\n");
476 break;
477 case CTF_K_FUNCTION:
478 printf(" [%u] FUNCTION (%s) returns: %u args: (",
479 it->it_idx, (it_name(it) != NULL) ? it_name(it) : "anon",
480 it->it_refp->it_idx);
481 TAILQ_FOREACH(im, &it->it_members, im_next) {
482 printf("%u%s", im->im_refp->it_idx,
483 TAILQ_NEXT(im, im_next) ? ", " : "");
484 }
485 printf(")\n");
486 break;
487 default:
488 assert(0 == 1);
489 }
490 }
491
492 void
dump_func(struct itype * it,int * idx)493 dump_func(struct itype *it, int *idx)
494 {
495 struct imember *im;
496
497 (*idx)++;
498
499 if (it->it_type == CTF_K_UNKNOWN && it->it_nelems == 0)
500 return;
501
502 printf(" [%u] FUNC (%s) returns: %u args: (", (*idx),
503 (it_name(it) != NULL) ? it_name(it) : "unknown",
504 it->it_refp->it_idx);
505 TAILQ_FOREACH(im, &it->it_members, im_next) {
506 printf("%u%s", im->im_refp->it_idx,
507 TAILQ_NEXT(im, im_next) ? ", " : "");
508 }
509 printf(")\n");
510 }
511
512 void
dump_obj(struct itype * it,int * idx)513 dump_obj(struct itype *it, int *idx)
514 {
515 int l;
516
517 (*idx)++;
518
519 l = printf(" [%u] %u", (*idx), it->it_refp->it_idx);
520 printf("%*s %s (%llu)\n", 14 - l, "", it_name(it), it->it_ref);
521 }
522
523 const char *
ctf_enc2name(unsigned short enc)524 ctf_enc2name(unsigned short enc)
525 {
526 static const char *enc_name[] = { "SIGNED", "CHAR", "SIGNED CHAR",
527 "BOOL", "SIGNED BOOL" };
528 static char invalid[7];
529
530 if (enc == CTF_INT_VARARGS)
531 return "VARARGS";
532
533 if (enc > 0 && enc < nitems(enc_name))
534 return enc_name[enc - 1];
535
536 snprintf(invalid, sizeof(invalid), "0x%x", enc);
537 return invalid;
538 }
539