1 /* @(#)apprentice.c 1.13 09/07/11 joerg */
2 #ifndef lint
3 static const char sccsid[] =
4 "@(#)apprentice.c 1.13 09/07/11 joerg";
5 #endif
6 /*
7 ** find file types by using a modified "magic" file
8 **
9 ** based on file v3.22 by Ian F. Darwin (see below)
10 **
11 ** Modified for mkhybrid James Pearson 19/5/98
12 */
13
14 /*
15 * apprentice - make one pass through /etc/magic, learning its secrets.
16 *
17 * Copyright (c) Ian F. Darwin, 1987.
18 * Written by Ian F. Darwin.
19 *
20 * This software is not subject to any export provision of the United States
21 * Department of Commerce, and may be exported to any country or planet.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 * notice immediately at the beginning of the file, without modification,
28 * this list of conditions, and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
37 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * SUCH DAMAGE.
44 */
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50 #include "proto.h"
51 #include "file.h"
52
53 #ifndef lint
54 static const char moduleid[] =
55 "@(#)Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp";
56 #endif /* lint */
57
58 int __f_nmagic = 0; /* number of valid magic[]s */
59 #if defined(IS_MACOS_X)
60 /*
61 * The MAC OS X linker does not grok "common" varaibles.
62 * Make __f_magic a "data" variable.
63 */
64 struct magic *__f_magic = 0; /* array of magic entries */
65 #else
66 struct magic *__f_magic; /* array of magic entries */
67 #endif
68
69 #define EATAB {while (isascii((unsigned char) *l) && \
70 isspace((unsigned char) *l)) ++l;}
71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
72 tolower((unsigned char) (l)) : (l))
73
74
75 static int getvalue (struct magic *, char **);
76 static int hextoint (int);
77 static char *apgetstr (char *, char *, int, int *);
78 static int parse (char *, int *, int);
79 static void eatsize (char **);
80
81 static int maxmagic = 0;
82
83 static int apprentice_1 (char *, int);
84
85 /*
86 * init_magic - read magic file and set up mapping
87 * based on the original apprentice()
88 */
89 int
init_magic(char * fn)90 init_magic(
91 char *fn /* list of magic files */
92 )
93 {
94 maxmagic = MAXMAGIS;
95 __f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
96 if (__f_magic == NULL)
97 return -1;
98
99 return(apprentice_1(fn, 0));
100 }
101
102 static int
apprentice_1(char * fn,int check)103 apprentice_1(
104 char *fn, /* name of magic file */
105 int check /* non-zero? checking-only run. */
106 )
107 {
108 static const char hdr[] =
109 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
110 FILE *f;
111 char line[BUFSIZ+1];
112 int errs = 0;
113 int lineno;
114
115 f = fopen(fn, "r");
116 if (f==NULL) {
117 return -1;
118 }
119
120 /* parse it */
121 if (check) /* print silly verbose header for USG compat. */
122 (void) printf("%s\n", hdr);
123
124 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
125 if (line[0]=='#') /* comment, do not parse */
126 continue;
127 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
128 continue;
129 line[strlen(line)-1] = '\0'; /* delete newline */
130 if (parse(line, &__f_nmagic, check) != 0)
131 errs = 1;
132 }
133
134 (void) fclose(f);
135 return errs;
136 }
137
138 /*
139 * extend the sign bit if the comparison is to be signed
140 * XXX is uint32 really a good idea XXX JS
141 */
142 UInt32_t
signextend(struct magic * m,UInt32_t v)143 signextend(struct magic *m, UInt32_t v)
144 {
145 if (!(m->flag & UNSIGNED))
146 switch(m->type) {
147 /*
148 * Do not remove the casts below. They are
149 * vital. When later compared with the data,
150 * the sign extension must have happened.
151 */
152 case BYTE:
153 v = (char) v;
154 break;
155 case SHORT:
156 case BESHORT:
157 case LESHORT:
158 v = (short) v;
159 break;
160 case DATE:
161 case BEDATE:
162 case LEDATE:
163 case LONG:
164 case BELONG:
165 case LELONG:
166 v = (Int32_t) v;
167 break;
168 case STRING:
169 break;
170 default:
171 return -1;
172 }
173 return v;
174 }
175
176 /*
177 * parse one line from magic file, put into magic[index++] if valid
178 */
179 static int
parse(char * l,int * ndx,int check)180 parse(char *l, int *ndx, int check)
181 {
182 int i = 0, nd = *ndx;
183 struct magic *m;
184 char *t, *s;
185
186 #define ALLOC_INCR 20
187 if (nd+1 >= maxmagic){
188 maxmagic += ALLOC_INCR;
189 if ((__f_magic = (struct magic *) realloc(__f_magic,
190 sizeof(struct magic) *
191 maxmagic)) == NULL) {
192 #ifdef MAIN
193 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
194 #else
195 (void) fprintf(stderr, "libfile: Out of memory.\n");
196 #endif
197 if (check)
198 return -1;
199 else
200 exit(1);
201 }
202 memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
203 }
204 m = &__f_magic[*ndx];
205 m->flag = 0;
206 m->cont_level = 0;
207
208 while (*l == '>') {
209 ++l; /* step over */
210 m->cont_level++;
211 }
212
213 if (m->cont_level != 0 && *l == '(') {
214 ++l; /* step over */
215 m->flag |= INDIR;
216 }
217 if (m->cont_level != 0 && *l == '&') {
218 ++l; /* step over */
219 m->flag |= ADD;
220 }
221
222 /* get offset, then skip over it */
223 m->offset = (int) strtoul(l,&t,0);
224 /*
225 if (l == t)
226 magwarn("offset %s invalid", l);
227 */
228 l = t;
229
230 if (m->flag & INDIR) {
231 m->in.type = LONG;
232 m->in.offset = 0;
233 /*
234 * read [.lbs][+-]nnnnn)
235 */
236 if (*l == '.') {
237 l++;
238 switch (LOWCASE(*l)) {
239 case 'l':
240 m->in.type = LONG;
241 break;
242 case 'h':
243 case 's':
244 m->in.type = SHORT;
245 break;
246 case 'c':
247 case 'b':
248 m->in.type = BYTE;
249 break;
250 default:
251 break;
252 }
253 l++;
254 }
255 s = l;
256 if (*l == '+' || *l == '-') l++;
257 if (isdigit((unsigned char)*l)) {
258 m->in.offset = strtoul(l, &t, 0);
259 if (*s == '-') m->in.offset = - m->in.offset;
260 }
261 else
262 t = l;
263 /*
264 if (*t++ != ')')
265 magwarn("missing ')' in indirect offset");
266 */
267 l = t;
268 }
269
270
271 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
272 ++l;
273 EATAB;
274
275 #define NBYTE 4
276 #define NSHORT 5
277 #define NLONG 4
278 #define NSTRING 6
279 #define NDATE 4
280 #define NBESHORT 7
281 #define NBELONG 6
282 #define NBEDATE 6
283 #define NLESHORT 7
284 #define NLELONG 6
285 #define NLEDATE 6
286
287 if (*l == 'u') {
288 ++l;
289 m->flag |= UNSIGNED;
290 }
291
292 /* get type, skip it */
293 if (strncmp(l, "byte", NBYTE)==0) {
294 m->type = BYTE;
295 l += NBYTE;
296 } else if (strncmp(l, "short", NSHORT)==0) {
297 m->type = SHORT;
298 l += NSHORT;
299 } else if (strncmp(l, "long", NLONG)==0) {
300 m->type = LONG;
301 l += NLONG;
302 } else if (strncmp(l, "string", NSTRING)==0) {
303 m->type = STRING;
304 l += NSTRING;
305 } else if (strncmp(l, "date", NDATE)==0) {
306 m->type = DATE;
307 l += NDATE;
308 } else if (strncmp(l, "beshort", NBESHORT)==0) {
309 m->type = BESHORT;
310 l += NBESHORT;
311 } else if (strncmp(l, "belong", NBELONG)==0) {
312 m->type = BELONG;
313 l += NBELONG;
314 } else if (strncmp(l, "bedate", NBEDATE)==0) {
315 m->type = BEDATE;
316 l += NBEDATE;
317 } else if (strncmp(l, "leshort", NLESHORT)==0) {
318 m->type = LESHORT;
319 l += NLESHORT;
320 } else if (strncmp(l, "lelong", NLELONG)==0) {
321 m->type = LELONG;
322 l += NLELONG;
323 } else if (strncmp(l, "ledate", NLEDATE)==0) {
324 m->type = LEDATE;
325 l += NLEDATE;
326 } else {
327 return -1;
328 }
329 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
330 if (*l == '&') {
331 ++l;
332 m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */
333 eatsize(&l);
334 } else
335 m->mask = ~0L;
336 EATAB;
337
338 switch (*l) {
339 case '>':
340 case '<':
341 /* Old-style anding: "0 byte &0x80 dynamically linked" */
342 case '&':
343 case '^':
344 case '=':
345 m->reln = *l;
346 ++l;
347 break;
348 case '!':
349 if (m->type != STRING) {
350 m->reln = *l;
351 ++l;
352 break;
353 }
354 /* FALL THROUGH */
355 default:
356 if (*l == 'x' && isascii((unsigned char)l[1]) &&
357 isspace((unsigned char)l[1])) {
358 m->reln = *l;
359 ++l;
360 goto GetDesc; /* Bill The Cat */
361 }
362 m->reln = '=';
363 break;
364 }
365 EATAB;
366
367 if (getvalue(m, &l))
368 return -1;
369 /*
370 * TODO finish this macro and start using it!
371 * #define offsetcheck {if (offset > HOWMANY-1)
372 * magwarn("offset too big"); }
373 */
374
375 /*
376 * now get last part - the description
377 */
378 GetDesc:
379 EATAB;
380 if (l[0] == '\b') {
381 ++l;
382 m->nospflag = 1;
383 } else if ((l[0] == '\\') && (l[1] == 'b')) {
384 ++l;
385 ++l;
386 m->nospflag = 1;
387 } else
388 m->nospflag = 0;
389 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
390 /* NULLBODY */;
391
392 ++(*ndx); /* make room for next */
393 return 0;
394 }
395
396 /*
397 * Read a numeric value from a pointer, into the value union of a magic
398 * pointer, according to the magic type. Update the string pointer to point
399 * just after the number read. Return 0 for success, non-zero for failure.
400 */
401 static int
getvalue(struct magic * m,char ** p)402 getvalue(struct magic *m, char **p)
403 {
404 int slen;
405
406 if (m->type == STRING) {
407 *p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen);
408 m->vallen = slen;
409 } else
410 if (m->reln != 'x') {
411 m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */
412 eatsize(p);
413 }
414 return 0;
415 }
416
417 /*
418 * Convert a string containing C character escapes. Stop at an unescaped
419 * space or tab.
420 * Copy the converted version to "p", returning its length in *slen.
421 * Return updated scan pointer as function result.
422 */
423 static char *
apgetstr(char * s,char * p,int plen,int * slen)424 apgetstr(char *s, char *p, int plen, int *slen)
425 {
426 char *origs = s, *origp = p;
427 char *pmax = p + plen - 1;
428 register int c;
429 register int val;
430
431 while ((c = *s++) != '\0') {
432 if (isspace((unsigned char) c))
433 break;
434 if (p >= pmax) {
435 fprintf(stderr, "String too long: %s\n", origs);
436 break;
437 }
438 if(c == '\\') {
439 switch(c = *s++) {
440
441 case '\0':
442 goto out;
443
444 default:
445 *p++ = (char) c;
446 break;
447
448 case 'n':
449 *p++ = '\n';
450 break;
451
452 case 'r':
453 *p++ = '\r';
454 break;
455
456 case 'b':
457 *p++ = '\b';
458 break;
459
460 case 't':
461 *p++ = '\t';
462 break;
463
464 case 'f':
465 *p++ = '\f';
466 break;
467
468 case 'v':
469 *p++ = '\v';
470 break;
471
472 /* \ and up to 3 octal digits */
473 case '0':
474 case '1':
475 case '2':
476 case '3':
477 case '4':
478 case '5':
479 case '6':
480 case '7':
481 val = c - '0';
482 c = *s++; /* try for 2 */
483 if(c >= '0' && c <= '7') {
484 val = (val<<3) | (c - '0');
485 c = *s++; /* try for 3 */
486 if(c >= '0' && c <= '7')
487 val = (val<<3) | (c-'0');
488 else
489 --s;
490 }
491 else
492 --s;
493 *p++ = (char)val;
494 break;
495
496 /* \x and up to 2 hex digits */
497 case 'x':
498 val = 'x'; /* Default if no digits */
499 c = hextoint(*s++); /* Get next char */
500 if (c >= 0) {
501 val = c;
502 c = hextoint(*s++);
503 if (c >= 0)
504 val = (val << 4) + c;
505 else
506 --s;
507 } else
508 --s;
509 *p++ = (char)val;
510 break;
511 }
512 } else
513 *p++ = (char)c;
514 }
515 out:
516 *p = '\0';
517 *slen = p - origp;
518 return s;
519 }
520
521
522 /* Single hex char to int; -1 if not a hex char. */
523 static int
hextoint(int c)524 hextoint(int c)
525 {
526 if (!isascii((unsigned char) c)) return -1;
527 if (isdigit((unsigned char) c)) return c - '0';
528 if ((c>='a')&&(c<='f')) return c + 10 - 'a';
529 if ((c>='A')&&(c<='F')) return c + 10 - 'A';
530 return -1;
531 }
532
533
534 /*
535 * Print a string containing C character escapes.
536 */
537 void
showstr(FILE * fp,const char * s,int len)538 showstr(FILE *fp, const char *s, int len)
539 {
540 register char c;
541
542 for (;;) {
543 c = *s++;
544 if (len == -1) {
545 if (c == '\0')
546 break;
547 }
548 else {
549 if (len-- == 0)
550 break;
551 }
552 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
553 (void) fputc(c, fp);
554 else {
555 (void) fputc('\\', fp);
556 switch (c) {
557
558 case '\n':
559 (void) fputc('n', fp);
560 break;
561
562 case '\r':
563 (void) fputc('r', fp);
564 break;
565
566 case '\b':
567 (void) fputc('b', fp);
568 break;
569
570 case '\t':
571 (void) fputc('t', fp);
572 break;
573
574 case '\f':
575 (void) fputc('f', fp);
576 break;
577
578 case '\v':
579 (void) fputc('v', fp);
580 break;
581
582 default:
583 (void) fprintf(fp, "%.3o", c & 0377);
584 break;
585 }
586 }
587 }
588 }
589
590 /*
591 * eatsize(): Eat the size spec from a number [eg. 10UL]
592 */
593 static void
eatsize(char ** p)594 eatsize(char **p)
595 {
596 char *l = *p;
597
598 if (LOWCASE(*l) == 'u')
599 l++;
600
601 switch (LOWCASE(*l)) {
602 case 'l': /* long */
603 case 's': /* short */
604 case 'h': /* short */
605 case 'b': /* char/byte */
606 case 'c': /* char/byte */
607 l++;
608 /*FALLTHROUGH*/
609 default:
610 break;
611 }
612
613 *p = l;
614 }
615