xref: /netbsd-src/external/gpl2/mkhybrid/dist/libfile/apprentice.c (revision 7804f23efd64308ff6bfc74397b5a25728376f71)
1 /* @(#)apprentice.c	1.13 09/07/11 joerg */
2 #ifndef lint
3 static	const char sccsid[] =
4 	"@(#)apprentice.c	1.13 09/07/11 joerg";
5 #endif
6 /*
7 **	find file types by using a modified "magic" file
8 **
9 **	based on file v3.22 by Ian F. Darwin (see below)
10 **
11 **	Modified for mkhybrid James Pearson 19/5/98
12 */
13 
14 /*
15  * apprentice - make one pass through /etc/magic, learning its secrets.
16  *
17  * Copyright (c) Ian F. Darwin, 1987.
18  * Written by Ian F. Darwin.
19  *
20  * This software is not subject to any export provision of the United States
21  * Department of Commerce, and may be exported to any country or planet.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the above copyright
27  *    notice immediately at the beginning of the file, without modification,
28  *    this list of conditions, and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
37  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43  * SUCH DAMAGE.
44  */
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50 #include "proto.h"
51 #include "file.h"
52 
53 #ifndef	lint
54 static const char moduleid[] =
55 	"@(#)Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp";
56 #endif	/* lint */
57 
58 int	__f_nmagic = 0;		/* number of valid magic[]s 		*/
59 #if	defined(IS_MACOS_X)
60 /*
61  * The MAC OS X linker does not grok "common" varaibles.
62  * Make __f_magic a "data" variable.
63  */
64 struct  magic *__f_magic = 0;	/* array of magic entries		*/
65 #else
66 struct  magic *__f_magic;	/* array of magic entries		*/
67 #endif
68 
69 #define	EATAB {while (isascii((unsigned char) *l) && \
70 		      isspace((unsigned char) *l))  ++l;}
71 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
72 			tolower((unsigned char) (l)) : (l))
73 
74 
75 static int getvalue	(struct magic *, char **);
76 static int hextoint	(int);
77 static char *apgetstr	(char *, char *, int, int *);
78 static int parse	(char *, int *, int);
79 static void eatsize	(char **);
80 
81 static int maxmagic = 0;
82 
83 static int apprentice_1	(char *, int);
84 
85 /*
86  * init_magic - read magic file and set up mapping
87  * based on the original apprentice()
88  */
89 int
init_magic(char * fn)90 init_magic(
91 char *fn			/* list of magic files */
92 )
93 {
94         maxmagic = MAXMAGIS;
95 	__f_magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
96 	if (__f_magic == NULL)
97 		return -1;
98 
99 	return(apprentice_1(fn, 0));
100 }
101 
102 static int
apprentice_1(char * fn,int check)103 apprentice_1(
104 char *fn,			/* name of magic file */
105 int check			/* non-zero? checking-only run. */
106 )
107 {
108 	static const char hdr[] =
109 		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
110 	FILE *f;
111 	char line[BUFSIZ+1];
112 	int errs = 0;
113 	int lineno;
114 
115 	f = fopen(fn, "r");
116 	if (f==NULL) {
117 		return -1;
118 	}
119 
120 	/* parse it */
121 	if (check)	/* print silly verbose header for USG compat. */
122 		(void) printf("%s\n", hdr);
123 
124 	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
125 		if (line[0]=='#')	/* comment, do not parse */
126 			continue;
127 		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
128 			continue;
129 		line[strlen(line)-1] = '\0'; /* delete newline */
130 		if (parse(line, &__f_nmagic, check) != 0)
131 			errs = 1;
132 	}
133 
134 	(void) fclose(f);
135 	return errs;
136 }
137 
138 /*
139  * extend the sign bit if the comparison is to be signed
140  * XXX is uint32 really a good idea XXX JS
141  */
142 UInt32_t
signextend(struct magic * m,UInt32_t v)143 signextend(struct magic *m, UInt32_t v)
144 {
145 	if (!(m->flag & UNSIGNED))
146 		switch(m->type) {
147 		/*
148 		 * Do not remove the casts below.  They are
149 		 * vital.  When later compared with the data,
150 		 * the sign extension must have happened.
151 		 */
152 		case BYTE:
153 			v = (char) v;
154 			break;
155 		case SHORT:
156 		case BESHORT:
157 		case LESHORT:
158 			v = (short) v;
159 			break;
160 		case DATE:
161 		case BEDATE:
162 		case LEDATE:
163 		case LONG:
164 		case BELONG:
165 		case LELONG:
166 			v = (Int32_t) v;
167 			break;
168 		case STRING:
169 			break;
170 		default:
171 			return -1;
172 		}
173 	return v;
174 }
175 
176 /*
177  * parse one line from magic file, put into magic[index++] if valid
178  */
179 static int
parse(char * l,int * ndx,int check)180 parse(char *l, int *ndx, int check)
181 {
182 	int i = 0, nd = *ndx;
183 	struct magic *m;
184 	char *t, *s;
185 
186 #define ALLOC_INCR	20
187 	if (nd+1 >= maxmagic){
188 	    maxmagic += ALLOC_INCR;
189 	    if ((__f_magic = (struct magic *) realloc(__f_magic,
190 						  sizeof(struct magic) *
191 						  maxmagic)) == NULL) {
192 #ifdef	MAIN
193 		(void) fprintf(stderr, "%s: Out of memory.\n", progname);
194 #else
195 		(void) fprintf(stderr, "libfile: Out of memory.\n");
196 #endif
197 		if (check)
198 			return -1;
199 		else
200 			exit(1);
201 	    }
202 	    memset(&__f_magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
203 	}
204 	m = &__f_magic[*ndx];
205 	m->flag = 0;
206 	m->cont_level = 0;
207 
208 	while (*l == '>') {
209 		++l;		/* step over */
210 		m->cont_level++;
211 	}
212 
213 	if (m->cont_level != 0 && *l == '(') {
214 		++l;		/* step over */
215 		m->flag |= INDIR;
216 	}
217 	if (m->cont_level != 0 && *l == '&') {
218                 ++l;            /* step over */
219                 m->flag |= ADD;
220         }
221 
222 	/* get offset, then skip over it */
223 	m->offset = (int) strtoul(l,&t,0);
224 /*
225         if (l == t)
226 		magwarn("offset %s invalid", l);
227 */
228         l = t;
229 
230 	if (m->flag & INDIR) {
231 		m->in.type = LONG;
232 		m->in.offset = 0;
233 		/*
234 		 * read [.lbs][+-]nnnnn)
235 		 */
236 		if (*l == '.') {
237 			l++;
238 			switch (LOWCASE(*l)) {
239 			case 'l':
240 				m->in.type = LONG;
241 				break;
242 			case 'h':
243 			case 's':
244 				m->in.type = SHORT;
245 				break;
246 			case 'c':
247 			case 'b':
248 				m->in.type = BYTE;
249 				break;
250 			default:
251 				break;
252 			}
253 			l++;
254 		}
255 		s = l;
256 		if (*l == '+' || *l == '-') l++;
257 		if (isdigit((unsigned char)*l)) {
258 			m->in.offset = strtoul(l, &t, 0);
259 			if (*s == '-') m->in.offset = - m->in.offset;
260 		}
261 		else
262 			t = l;
263 /*
264 		if (*t++ != ')')
265 			magwarn("missing ')' in indirect offset");
266 */
267 		l = t;
268 	}
269 
270 
271 	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
272 		++l;
273 	EATAB;
274 
275 #define NBYTE		4
276 #define NSHORT		5
277 #define NLONG		4
278 #define NSTRING 	6
279 #define NDATE		4
280 #define NBESHORT	7
281 #define NBELONG		6
282 #define NBEDATE		6
283 #define NLESHORT	7
284 #define NLELONG		6
285 #define NLEDATE		6
286 
287 	if (*l == 'u') {
288 		++l;
289 		m->flag |= UNSIGNED;
290 	}
291 
292 	/* get type, skip it */
293 	if (strncmp(l, "byte", NBYTE)==0) {
294 		m->type = BYTE;
295 		l += NBYTE;
296 	} else if (strncmp(l, "short", NSHORT)==0) {
297 		m->type = SHORT;
298 		l += NSHORT;
299 	} else if (strncmp(l, "long", NLONG)==0) {
300 		m->type = LONG;
301 		l += NLONG;
302 	} else if (strncmp(l, "string", NSTRING)==0) {
303 		m->type = STRING;
304 		l += NSTRING;
305 	} else if (strncmp(l, "date", NDATE)==0) {
306 		m->type = DATE;
307 		l += NDATE;
308 	} else if (strncmp(l, "beshort", NBESHORT)==0) {
309 		m->type = BESHORT;
310 		l += NBESHORT;
311 	} else if (strncmp(l, "belong", NBELONG)==0) {
312 		m->type = BELONG;
313 		l += NBELONG;
314 	} else if (strncmp(l, "bedate", NBEDATE)==0) {
315 		m->type = BEDATE;
316 		l += NBEDATE;
317 	} else if (strncmp(l, "leshort", NLESHORT)==0) {
318 		m->type = LESHORT;
319 		l += NLESHORT;
320 	} else if (strncmp(l, "lelong", NLELONG)==0) {
321 		m->type = LELONG;
322 		l += NLELONG;
323 	} else if (strncmp(l, "ledate", NLEDATE)==0) {
324 		m->type = LEDATE;
325 		l += NLEDATE;
326 	} else {
327 		return -1;
328 	}
329 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
330 	if (*l == '&') {
331 		++l;
332 		m->mask = signextend(m, (UInt32_t)strtoul(l, &l, 0)); /* XXX JS uint32 cat may be wrong */
333 		eatsize(&l);
334 	} else
335 		m->mask = ~0L;
336 	EATAB;
337 
338 	switch (*l) {
339 	case '>':
340 	case '<':
341 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
342 	case '&':
343 	case '^':
344 	case '=':
345   		m->reln = *l;
346   		++l;
347 		break;
348 	case '!':
349 		if (m->type != STRING) {
350 			m->reln = *l;
351 			++l;
352 			break;
353 		}
354 		/* FALL THROUGH */
355 	default:
356 		if (*l == 'x' && isascii((unsigned char)l[1]) &&
357 		    isspace((unsigned char)l[1])) {
358 			m->reln = *l;
359 			++l;
360 			goto GetDesc;	/* Bill The Cat */
361 		}
362   		m->reln = '=';
363 		break;
364 	}
365   	EATAB;
366 
367 	if (getvalue(m, &l))
368 		return -1;
369 	/*
370 	 * TODO finish this macro and start using it!
371 	 * #define offsetcheck {if (offset > HOWMANY-1)
372 	 *	magwarn("offset too big"); }
373 	 */
374 
375 	/*
376 	 * now get last part - the description
377 	 */
378 GetDesc:
379 	EATAB;
380 	if (l[0] == '\b') {
381 		++l;
382 		m->nospflag = 1;
383 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
384 		++l;
385 		++l;
386 		m->nospflag = 1;
387 	} else
388 		m->nospflag = 0;
389 	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
390 		/* NULLBODY */;
391 
392 	++(*ndx);		/* make room for next */
393 	return 0;
394 }
395 
396 /*
397  * Read a numeric value from a pointer, into the value union of a magic
398  * pointer, according to the magic type.  Update the string pointer to point
399  * just after the number read.  Return 0 for success, non-zero for failure.
400  */
401 static int
getvalue(struct magic * m,char ** p)402 getvalue(struct magic *m, char **p)
403 {
404 	int slen;
405 
406 	if (m->type == STRING) {
407 		*p = apgetstr(*p, m->value.s, sizeof(m->value.s), &slen);
408 		m->vallen = slen;
409 	} else
410 		if (m->reln != 'x') {
411 			m->value.l = signextend(m, (UInt32_t)strtoul(*p, p, 0)); /* XXX JS uint32 cat may be wrong */
412 			eatsize(p);
413 		}
414 	return 0;
415 }
416 
417 /*
418  * Convert a string containing C character escapes.  Stop at an unescaped
419  * space or tab.
420  * Copy the converted version to "p", returning its length in *slen.
421  * Return updated scan pointer as function result.
422  */
423 static char *
apgetstr(char * s,char * p,int plen,int * slen)424 apgetstr(char *s, char *p, int plen, int *slen)
425 {
426 	char	*origs = s, *origp = p;
427 	char	*pmax = p + plen - 1;
428 	register int	c;
429 	register int	val;
430 
431 	while ((c = *s++) != '\0') {
432 		if (isspace((unsigned char) c))
433 			break;
434 		if (p >= pmax) {
435 			fprintf(stderr, "String too long: %s\n", origs);
436 			break;
437 		}
438 		if(c == '\\') {
439 			switch(c = *s++) {
440 
441 			case '\0':
442 				goto out;
443 
444 			default:
445 				*p++ = (char) c;
446 				break;
447 
448 			case 'n':
449 				*p++ = '\n';
450 				break;
451 
452 			case 'r':
453 				*p++ = '\r';
454 				break;
455 
456 			case 'b':
457 				*p++ = '\b';
458 				break;
459 
460 			case 't':
461 				*p++ = '\t';
462 				break;
463 
464 			case 'f':
465 				*p++ = '\f';
466 				break;
467 
468 			case 'v':
469 				*p++ = '\v';
470 				break;
471 
472 			/* \ and up to 3 octal digits */
473 			case '0':
474 			case '1':
475 			case '2':
476 			case '3':
477 			case '4':
478 			case '5':
479 			case '6':
480 			case '7':
481 				val = c - '0';
482 				c = *s++;  /* try for 2 */
483 				if(c >= '0' && c <= '7') {
484 					val = (val<<3) | (c - '0');
485 					c = *s++;  /* try for 3 */
486 					if(c >= '0' && c <= '7')
487 						val = (val<<3) | (c-'0');
488 					else
489 						--s;
490 				}
491 				else
492 					--s;
493 				*p++ = (char)val;
494 				break;
495 
496 			/* \x and up to 2 hex digits */
497 			case 'x':
498 				val = 'x';	/* Default if no digits */
499 				c = hextoint(*s++);	/* Get next char */
500 				if (c >= 0) {
501 					val = c;
502 					c = hextoint(*s++);
503 					if (c >= 0)
504 						val = (val << 4) + c;
505 					else
506 						--s;
507 				} else
508 					--s;
509 				*p++ = (char)val;
510 				break;
511 			}
512 		} else
513 			*p++ = (char)c;
514 	}
515 out:
516 	*p = '\0';
517 	*slen = p - origp;
518 	return s;
519 }
520 
521 
522 /* Single hex char to int; -1 if not a hex char. */
523 static int
hextoint(int c)524 hextoint(int c)
525 {
526 	if (!isascii((unsigned char) c))	return -1;
527 	if (isdigit((unsigned char) c))		return c - '0';
528 	if ((c>='a')&&(c<='f'))	return c + 10 - 'a';
529 	if ((c>='A')&&(c<='F'))	return c + 10 - 'A';
530 				return -1;
531 }
532 
533 
534 /*
535  * Print a string containing C character escapes.
536  */
537 void
showstr(FILE * fp,const char * s,int len)538 showstr(FILE *fp, const char *s, int len)
539 {
540 	register char	c;
541 
542 	for (;;) {
543 		c = *s++;
544 		if (len == -1) {
545 			if (c == '\0')
546 				break;
547 		}
548 		else  {
549 			if (len-- == 0)
550 				break;
551 		}
552 		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
553 			(void) fputc(c, fp);
554 		else {
555 			(void) fputc('\\', fp);
556 			switch (c) {
557 
558 			case '\n':
559 				(void) fputc('n', fp);
560 				break;
561 
562 			case '\r':
563 				(void) fputc('r', fp);
564 				break;
565 
566 			case '\b':
567 				(void) fputc('b', fp);
568 				break;
569 
570 			case '\t':
571 				(void) fputc('t', fp);
572 				break;
573 
574 			case '\f':
575 				(void) fputc('f', fp);
576 				break;
577 
578 			case '\v':
579 				(void) fputc('v', fp);
580 				break;
581 
582 			default:
583 				(void) fprintf(fp, "%.3o", c & 0377);
584 				break;
585 			}
586 		}
587 	}
588 }
589 
590 /*
591  * eatsize(): Eat the size spec from a number [eg. 10UL]
592  */
593 static void
eatsize(char ** p)594 eatsize(char **p)
595 {
596 	char *l = *p;
597 
598 	if (LOWCASE(*l) == 'u')
599 		l++;
600 
601 	switch (LOWCASE(*l)) {
602 	case 'l':    /* long */
603 	case 's':    /* short */
604 	case 'h':    /* short */
605 	case 'b':    /* char/byte */
606 	case 'c':    /* char/byte */
607 		l++;
608 		/*FALLTHROUGH*/
609 	default:
610 		break;
611 	}
612 
613 	*p = l;
614 }
615