xref: /netbsd-src/external/bsd/file/dist/src/apprentice.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /*	$NetBSD: apprentice.c,v 1.6 2012/02/22 17:53:51 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * apprentice - make one pass through /etc/magic, learning its secrets.
32  */
33 
34 #include "file.h"
35 
36 #ifndef	lint
37 #if 0
38 FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $")
39 #else
40 __RCSID("$NetBSD: apprentice.c,v 1.6 2012/02/22 17:53:51 christos Exp $");
41 #endif
42 #endif	/* lint */
43 
44 #include "magic.h"
45 #include <stdlib.h>
46 #ifdef HAVE_UNISTD_H
47 #include <unistd.h>
48 #endif
49 #include <string.h>
50 #include <assert.h>
51 #include <ctype.h>
52 #include <fcntl.h>
53 #ifdef QUICK
54 #include <sys/mman.h>
55 #endif
56 #include <dirent.h>
57 
58 #define	EATAB {while (isascii((unsigned char) *l) && \
59 		      isspace((unsigned char) *l))  ++l;}
60 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
61 			tolower((unsigned char) (l)) : (l))
62 /*
63  * Work around a bug in headers on Digital Unix.
64  * At least confirmed for: OSF1 V4.0 878
65  */
66 #if defined(__osf__) && defined(__DECC)
67 #ifdef MAP_FAILED
68 #undef MAP_FAILED
69 #endif
70 #endif
71 
72 #ifndef MAP_FAILED
73 #define MAP_FAILED (void *) -1
74 #endif
75 
76 #ifndef MAP_FILE
77 #define MAP_FILE 0
78 #endif
79 
80 struct magic_entry {
81 	struct magic *mp;
82 	uint32_t cont_count;
83 	uint32_t max_count;
84 };
85 
86 int file_formats[FILE_NAMES_SIZE];
87 const size_t file_nformats = FILE_NAMES_SIZE;
88 const char *file_names[FILE_NAMES_SIZE];
89 const size_t file_nnames = FILE_NAMES_SIZE;
90 
91 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
92 private int hextoint(int);
93 private const char *getstr(struct magic_set *, struct magic *, const char *,
94     int);
95 private int parse(struct magic_set *, struct magic_entry **, uint32_t *,
96     const char *, size_t, int);
97 private void eatsize(const char **);
98 private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
99 private size_t apprentice_magic_strength(const struct magic *);
100 private int apprentice_sort(const void *, const void *);
101 private void apprentice_list(struct mlist *, int );
102 private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
103     const char *, int);
104 private void byteswap(struct magic *, uint32_t);
105 private void bs1(struct magic *);
106 private uint16_t swap2(uint16_t);
107 private uint32_t swap4(uint32_t);
108 private uint64_t swap8(uint64_t);
109 private char *mkdbname(struct magic_set *, const char *, int);
110 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
111     const char *);
112 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
113     const char *);
114 private int check_format_type(const char *, int);
115 private int check_format(struct magic_set *, struct magic *);
116 private int get_op(char);
117 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
118 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
119 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
120 
121 
122 private size_t maxmagic = 0;
123 private size_t magicsize = sizeof(struct magic);
124 
125 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
126 
127 private struct {
128 	const char *name;
129 	size_t len;
130 	int (*fun)(struct magic_set *, struct magic_entry *, const char *);
131 } bang[] = {
132 #define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
133 	DECLARE_FIELD(mime),
134 	DECLARE_FIELD(apple),
135 	DECLARE_FIELD(strength),
136 #undef	DECLARE_FIELD
137 	{ NULL, 0, NULL }
138 };
139 
140 #ifdef COMPILE_ONLY
141 
142 int main(int, char *[]);
143 
144 int
145 main(int argc, char *argv[])
146 {
147 	int ret;
148 	struct magic_set *ms;
149 	char *progname;
150 
151 	if ((progname = strrchr(argv[0], '/')) != NULL)
152 		progname++;
153 	else
154 		progname = argv[0];
155 
156 	if (argc != 2) {
157 		(void)fprintf(stderr, "Usage: %s file\n", progname);
158 		return 1;
159 	}
160 
161 	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
162 		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
163 		return 1;
164 	}
165 	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
166 	if (ret == 1)
167 		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
168 	magic_close(ms);
169 	return ret;
170 }
171 #endif /* COMPILE_ONLY */
172 
173 static const struct type_tbl_s {
174 	const char name[16];
175 	const size_t len;
176 	const int type;
177 	const int format;
178 } type_tbl[] = {
179 # define XX(s)		s, (sizeof(s) - 1)
180 # define XX_NULL	"", 0
181 	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
182 	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
183 	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_STR },
184 	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
185 	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
186 	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
187 	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
188 	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
189 	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
190 	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
191 	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
192 	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
193 	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
194 	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
195 	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
196 	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
197 	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
198 	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
199 	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
200 	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
201 	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
202 	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
203 	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
204 	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
205 	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
206 	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
207 	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
208 	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
209 	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
210 	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
211 	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
212 	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
213 	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
214 	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
215 	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
216 	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
217 	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
218 	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
219 	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
220 	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
221 	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NONE },
222 	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
223 # undef XX
224 # undef XX_NULL
225 };
226 
227 private int
228 get_type(const char *l, const char **t)
229 {
230 	const struct type_tbl_s *p;
231 
232 	for (p = type_tbl; p->len; p++) {
233 		if (strncmp(l, p->name, p->len) == 0) {
234 			if (t)
235 				*t = l + p->len;
236 			break;
237 		}
238 	}
239 	return p->type;
240 }
241 
242 private void
243 init_file_tables(void)
244 {
245 	static int done = 0;
246 	const struct type_tbl_s *p;
247 
248 	if (done)
249 		return;
250 	done++;
251 
252 	for (p = type_tbl; p->len; p++) {
253 		assert(p->type < FILE_NAMES_SIZE);
254 		file_names[p->type] = p->name;
255 		file_formats[p->type] = p->format;
256 	}
257 }
258 
259 /*
260  * Handle one file or directory.
261  */
262 private int
263 apprentice_1(struct magic_set *ms, const char *fn, int action,
264     struct mlist *mlist)
265 {
266 	struct magic *magic = NULL;
267 	uint32_t nmagic = 0;
268 	struct mlist *ml;
269 	int rv = -1;
270 	int mapped;
271 
272 	if (magicsize != FILE_MAGICSIZE) {
273 		file_error(ms, 0, "magic element size %lu != %lu",
274 		    (unsigned long)sizeof(*magic),
275 		    (unsigned long)FILE_MAGICSIZE);
276 		return -1;
277 	}
278 
279 	if (action == FILE_COMPILE) {
280 		rv = apprentice_load(ms, &magic, &nmagic, fn, action);
281 		if (rv != 0)
282 			return -1;
283 		rv = apprentice_compile(ms, &magic, &nmagic, fn);
284 		free(magic);
285 		return rv;
286 	}
287 
288 #ifndef COMPILE_ONLY
289 	if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
290 		if (ms->flags & MAGIC_CHECK)
291 			file_magwarn(ms, "using regular magic file `%s'", fn);
292 		rv = apprentice_load(ms, &magic, &nmagic, fn, action);
293 		if (rv != 0)
294 			return -1;
295 	}
296 
297 	mapped = rv;
298 
299 	if (magic == NULL) {
300 		file_delmagic(magic, mapped, nmagic);
301 		return -1;
302 	}
303 
304 	if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) {
305 		file_delmagic(magic, mapped, nmagic);
306 		file_oomem(ms, sizeof(*ml));
307 		return -1;
308 	}
309 
310 	ml->magic = magic;
311 	ml->nmagic = nmagic;
312 	ml->mapped = mapped;
313 
314 	mlist->prev->next = ml;
315 	ml->prev = mlist->prev;
316 	ml->next = mlist;
317 	mlist->prev = ml;
318 
319 	if (action == FILE_LIST) {
320 		printf("Binary patterns:\n");
321 		apprentice_list(mlist, BINTEST);
322 		printf("Text patterns:\n");
323 		apprentice_list(mlist, TEXTTEST);
324 	}
325 
326 	return 0;
327 #endif /* COMPILE_ONLY */
328 }
329 
330 protected void
331 file_delmagic(struct magic *p, int type, size_t entries)
332 {
333 	if (p == NULL)
334 		return;
335 	switch (type) {
336 	case 2:
337 #ifdef QUICK
338 		p--;
339 		(void)munmap((void *)p, sizeof(*p) * (entries + 1));
340 		break;
341 #else
342 		(void)&entries;
343 		abort();
344 		/*NOTREACHED*/
345 #endif
346 	case 1:
347 		p--;
348 		/*FALLTHROUGH*/
349 	case 0:
350 		free(p);
351 		break;
352 	default:
353 		abort();
354 	}
355 }
356 
357 /* const char *fn: list of magic files and directories */
358 protected struct mlist *
359 file_apprentice(struct magic_set *ms, const char *fn, int action)
360 {
361 	char *p, *mfn;
362 	int file_err, errs = -1;
363 	struct mlist *mlist;
364 
365 	if ((fn = magic_getpath(fn, action)) == NULL)
366 		return NULL;
367 
368 	init_file_tables();
369 
370 	if ((mfn = strdup(fn)) == NULL) {
371 		file_oomem(ms, strlen(fn));
372 		return NULL;
373 	}
374 	fn = mfn;
375 
376 	if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) {
377 		free(mfn);
378 		file_oomem(ms, sizeof(*mlist));
379 		return NULL;
380 	}
381 	mlist->next = mlist->prev = mlist;
382 
383 	while (fn) {
384 		p = strchr(fn, PATHSEP);
385 		if (p)
386 			*p++ = '\0';
387 		if (*fn == '\0')
388 			break;
389 		file_err = apprentice_1(ms, fn, action, mlist);
390 		errs = MAX(errs, file_err);
391 		fn = p;
392 	}
393 	if (errs == -1) {
394 		free(mfn);
395 		free(mlist);
396 		mlist = NULL;
397 		file_error(ms, 0, "could not find any magic files!");
398 		return NULL;
399 	}
400 	free(mfn);
401 	return mlist;
402 }
403 
404 /*
405  * Get weight of this magic entry, for sorting purposes.
406  */
407 private size_t
408 apprentice_magic_strength(const struct magic *m)
409 {
410 #define MULT 10
411 	size_t val = 2 * MULT;	/* baseline strength */
412 
413 	switch (m->type) {
414 	case FILE_DEFAULT:	/* make sure this sorts last */
415 		if (m->factor_op != FILE_FACTOR_OP_NONE)
416 			abort();
417 		return 0;
418 
419 	case FILE_BYTE:
420 		val += 1 * MULT;
421 		break;
422 
423 	case FILE_SHORT:
424 	case FILE_LESHORT:
425 	case FILE_BESHORT:
426 		val += 2 * MULT;
427 		break;
428 
429 	case FILE_LONG:
430 	case FILE_LELONG:
431 	case FILE_BELONG:
432 	case FILE_MELONG:
433 		val += 4 * MULT;
434 		break;
435 
436 	case FILE_PSTRING:
437 	case FILE_STRING:
438 		val += m->vallen * MULT;
439 		break;
440 
441 	case FILE_BESTRING16:
442 	case FILE_LESTRING16:
443 		val += m->vallen * MULT / 2;
444 		break;
445 
446 	case FILE_SEARCH:
447 	case FILE_REGEX:
448 		val += m->vallen * MAX(MULT / m->vallen, 1);
449 		break;
450 
451 	case FILE_DATE:
452 	case FILE_LEDATE:
453 	case FILE_BEDATE:
454 	case FILE_MEDATE:
455 	case FILE_LDATE:
456 	case FILE_LELDATE:
457 	case FILE_BELDATE:
458 	case FILE_MELDATE:
459 	case FILE_FLOAT:
460 	case FILE_BEFLOAT:
461 	case FILE_LEFLOAT:
462 		val += 4 * MULT;
463 		break;
464 
465 	case FILE_QUAD:
466 	case FILE_BEQUAD:
467 	case FILE_LEQUAD:
468 	case FILE_QDATE:
469 	case FILE_LEQDATE:
470 	case FILE_BEQDATE:
471 	case FILE_QLDATE:
472 	case FILE_LEQLDATE:
473 	case FILE_BEQLDATE:
474 	case FILE_DOUBLE:
475 	case FILE_BEDOUBLE:
476 	case FILE_LEDOUBLE:
477 		val += 8 * MULT;
478 		break;
479 
480 	default:
481 		val = 0;
482 		(void)fprintf(stderr, "Bad type %d\n", m->type);
483 		abort();
484 	}
485 
486 	switch (m->reln) {
487 	case 'x':	/* matches anything penalize */
488 	case '!':       /* matches almost anything penalize */
489 		val = 0;
490 		break;
491 
492 	case '=':	/* Exact match, prefer */
493 		val += MULT;
494 		break;
495 
496 	case '>':
497 	case '<':	/* comparison match reduce strength */
498 		val -= 2 * MULT;
499 		break;
500 
501 	case '^':
502 	case '&':	/* masking bits, we could count them too */
503 		val -= MULT;
504 		break;
505 
506 	default:
507 		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
508 		abort();
509 	}
510 
511 	if (val == 0)	/* ensure we only return 0 for FILE_DEFAULT */
512 		val = 1;
513 
514 	switch (m->factor_op) {
515 	case FILE_FACTOR_OP_NONE:
516 		break;
517 	case FILE_FACTOR_OP_PLUS:
518 		val += m->factor;
519 		break;
520 	case FILE_FACTOR_OP_MINUS:
521 		val -= m->factor;
522 		break;
523 	case FILE_FACTOR_OP_TIMES:
524 		val *= m->factor;
525 		break;
526 	case FILE_FACTOR_OP_DIV:
527 		val /= m->factor;
528 		break;
529 	default:
530 		abort();
531 	}
532 
533 	/*
534 	 * Magic entries with no description get a bonus because they depend
535 	 * on subsequent magic entries to print something.
536 	 */
537 	if (m->desc[0] == '\0')
538 		val++;
539 	return val;
540 }
541 
542 /*
543  * Sort callback for sorting entries by "strength" (basically length)
544  */
545 private int
546 apprentice_sort(const void *a, const void *b)
547 {
548 	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
549 	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
550 	size_t sa = apprentice_magic_strength(ma->mp);
551 	size_t sb = apprentice_magic_strength(mb->mp);
552 	if (sa == sb)
553 		return 0;
554 	else if (sa > sb)
555 		return -1;
556 	else
557 		return 1;
558 }
559 
560 /*
561  * Shows sorted patterns list in the order which is used for the matching
562  */
563 private void
564 apprentice_list(struct mlist *mlist, int mode)
565 {
566 	uint32_t magindex = 0;
567 	struct mlist *ml;
568 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
569 		for (magindex = 0; magindex < ml->nmagic; magindex++) {
570 			struct magic *m = &ml->magic[magindex];
571 			if ((m->flag & mode) != mode) {
572 				/* Skip sub-tests */
573 				while (magindex + 1 < ml->nmagic &&
574 				       ml->magic[magindex + 1].cont_level != 0)
575 					++magindex;
576 				continue; /* Skip to next top-level test*/
577 			}
578 
579 			/*
580 			 * Try to iterate over the tree until we find item with
581 			 * description/mimetype.
582 			 */
583 			while (magindex + 1 < ml->nmagic &&
584 			       ml->magic[magindex + 1].cont_level != 0 &&
585 			       *ml->magic[magindex].desc == '\0' &&
586 			       *ml->magic[magindex].mimetype == '\0')
587 				magindex++;
588 
589 			printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
590 			    apprentice_magic_strength(m),
591 			    ml->magic[magindex].desc,
592 			    ml->magic[magindex].mimetype);
593 		}
594 	}
595 }
596 
597 private void
598 set_test_type(struct magic *mstart, struct magic *m)
599 {
600 	switch (m->type) {
601 	case FILE_BYTE:
602 	case FILE_SHORT:
603 	case FILE_LONG:
604 	case FILE_DATE:
605 	case FILE_BESHORT:
606 	case FILE_BELONG:
607 	case FILE_BEDATE:
608 	case FILE_LESHORT:
609 	case FILE_LELONG:
610 	case FILE_LEDATE:
611 	case FILE_LDATE:
612 	case FILE_BELDATE:
613 	case FILE_LELDATE:
614 	case FILE_MEDATE:
615 	case FILE_MELDATE:
616 	case FILE_MELONG:
617 	case FILE_QUAD:
618 	case FILE_LEQUAD:
619 	case FILE_BEQUAD:
620 	case FILE_QDATE:
621 	case FILE_LEQDATE:
622 	case FILE_BEQDATE:
623 	case FILE_QLDATE:
624 	case FILE_LEQLDATE:
625 	case FILE_BEQLDATE:
626 	case FILE_FLOAT:
627 	case FILE_BEFLOAT:
628 	case FILE_LEFLOAT:
629 	case FILE_DOUBLE:
630 	case FILE_BEDOUBLE:
631 	case FILE_LEDOUBLE:
632 		mstart->flag |= BINTEST;
633 		break;
634 	case FILE_STRING:
635 	case FILE_PSTRING:
636 	case FILE_BESTRING16:
637 	case FILE_LESTRING16:
638 		/* Allow text overrides */
639 		if (mstart->str_flags & STRING_TEXTTEST)
640 			mstart->flag |= TEXTTEST;
641 		else
642 			mstart->flag |= BINTEST;
643 		break;
644 	case FILE_REGEX:
645 	case FILE_SEARCH:
646 		/* Check for override */
647 		if (mstart->str_flags & STRING_BINTEST)
648 			mstart->flag |= BINTEST;
649 		if (mstart->str_flags & STRING_TEXTTEST)
650 			mstart->flag |= TEXTTEST;
651 
652 		if (mstart->flag & (TEXTTEST|BINTEST))
653 			break;
654 
655 		/* binary test if pattern is not text */
656 		if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
657 		    NULL) <= 0)
658 			mstart->flag |= BINTEST;
659 		else
660 			mstart->flag |= TEXTTEST;
661 		break;
662 	case FILE_DEFAULT:
663 		/* can't deduce anything; we shouldn't see this at the
664 		   top level anyway */
665 		break;
666 	case FILE_INVALID:
667 	default:
668 		/* invalid search type, but no need to complain here */
669 		break;
670 	}
671 }
672 
673 /*
674  * Load and parse one file.
675  */
676 private void
677 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
678    struct magic_entry **marray, uint32_t *marraycount)
679 {
680 	size_t lineno = 0, llen = 0;
681 	char *line = NULL;
682 	ssize_t len;
683 
684 	FILE *f = fopen(ms->file = fn, "r");
685 	if (f == NULL) {
686 		if (errno != ENOENT)
687 			file_error(ms, errno, "cannot read magic file `%s'",
688 				   fn);
689 		(*errs)++;
690 		return;
691 	}
692 
693 	/* read and parse this file */
694 	for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
695 	    ms->line++) {
696 		if (len == 0) /* null line, garbage, etc */
697 			continue;
698 		if (line[len - 1] == '\n') {
699 			lineno++;
700 			line[len - 1] = '\0'; /* delete newline */
701 		}
702 		switch (line[0]) {
703 		case '\0':	/* empty, do not parse */
704 		case '#':	/* comment, do not parse */
705 			continue;
706 		case '!':
707 			if (line[1] == ':') {
708 				size_t i;
709 
710 				for (i = 0; bang[i].name != NULL; i++) {
711 					if ((size_t)(len - 2) > bang[i].len &&
712 					    memcmp(bang[i].name, line + 2,
713 					    bang[i].len) == 0)
714 						break;
715 				}
716 				if (bang[i].name == NULL) {
717 					file_error(ms, 0,
718 					    "Unknown !: entry `%s'", line);
719 					(*errs)++;
720 					continue;
721 				}
722 				if (*marraycount == 0) {
723 					file_error(ms, 0,
724 					    "No current entry for :!%s type",
725 						bang[i].name);
726 					(*errs)++;
727 					continue;
728 				}
729 				if ((*bang[i].fun)(ms,
730 				    &(*marray)[*marraycount - 1],
731 				    line + bang[i].len + 2) != 0) {
732 					(*errs)++;
733 					continue;
734 				}
735 				continue;
736 			}
737 			/*FALLTHROUGH*/
738 		default:
739 			if (parse(ms, marray, marraycount, line, lineno,
740 			    action) != 0)
741 				(*errs)++;
742 			break;
743 		}
744 	}
745 	free(line);
746 	(void)fclose(f);
747 }
748 
749 /*
750  * parse a file or directory of files
751  * const char *fn: name of magic file or directory
752  */
753 private int
754 cmpstrp(const void *p1, const void *p2)
755 {
756         return strcmp(*(char *const *)p1, *(char *const *)p2);
757 }
758 
759 private int
760 apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
761     const char *fn, int action)
762 {
763 	int errs = 0;
764 	struct magic_entry *marray;
765 	uint32_t marraycount, i, mentrycount = 0, starttest;
766 	size_t slen, files = 0, maxfiles = 0;
767 	char **filearr = NULL, *mfn;
768 	struct stat st;
769 	DIR *dir;
770 	struct dirent *d;
771 
772 	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
773 
774         maxmagic = MAXMAGIS;
775 	if ((marray = CAST(struct magic_entry *, calloc(maxmagic,
776 	    sizeof(*marray)))) == NULL) {
777 		file_oomem(ms, maxmagic * sizeof(*marray));
778 		return -1;
779 	}
780 	marraycount = 0;
781 
782 	/* print silly verbose header for USG compat. */
783 	if (action == FILE_CHECK)
784 		(void)fprintf(stderr, "%s\n", usg_hdr);
785 
786 	/* load directory or file */
787 	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
788 		dir = opendir(fn);
789 		if (!dir) {
790 			errs++;
791 			goto out;
792 		}
793 		while ((d = readdir(dir)) != NULL) {
794 			if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
795 				file_oomem(ms,
796 				    strlen(fn) + strlen(d->d_name) + 2);
797 				errs++;
798 				closedir(dir);
799 				goto out;
800 			}
801 			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
802 				free(mfn);
803 				continue;
804 			}
805 			if (files >= maxfiles) {
806 				size_t mlen;
807 				maxfiles = (maxfiles + 1) * 2;
808 				mlen = maxfiles * sizeof(*filearr);
809 				if ((filearr = CAST(char **,
810 				    realloc(filearr, mlen))) == NULL) {
811 					file_oomem(ms, mlen);
812 					free(mfn);
813 					closedir(dir);
814 					errs++;
815 					goto out;
816 				}
817 			}
818 			filearr[files++] = mfn;
819 		}
820 		closedir(dir);
821 		qsort(filearr, files, sizeof(*filearr), cmpstrp);
822 		for (i = 0; i < files; i++) {
823 			load_1(ms, action, filearr[i], &errs, &marray,
824 			    &marraycount);
825 			free(filearr[i]);
826 		}
827 		free(filearr);
828 	} else
829 		load_1(ms, action, fn, &errs, &marray, &marraycount);
830 	if (errs)
831 		goto out;
832 
833 	/* Set types of tests */
834 	for (i = 0; i < marraycount; ) {
835 		if (marray[i].mp->cont_level != 0) {
836 			i++;
837 			continue;
838 		}
839 
840 		starttest = i;
841 		do {
842 			static const char text[] = "text";
843 			static const char binary[] = "binary";
844 			static const size_t len = sizeof(text);
845 			set_test_type(marray[starttest].mp, marray[i].mp);
846 			if ((ms->flags & MAGIC_DEBUG) == 0)
847 				continue;
848 			(void)fprintf(stderr, "%s%s%s: %s\n",
849 			    marray[i].mp->mimetype,
850 			    marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
851 			    marray[i].mp->desc[0] ? marray[i].mp->desc :
852 			    "(no description)",
853 			    marray[i].mp->flag & BINTEST ? binary : text);
854 			if (marray[i].mp->flag & BINTEST) {
855 				char *p = strstr(marray[i].mp->desc, text);
856 				if (p && (p == marray[i].mp->desc ||
857 				    isspace((unsigned char)p[-1])) &&
858 				    (p + len - marray[i].mp->desc ==
859 				    MAXstring || (p[len] == '\0' ||
860 				    isspace((unsigned char)p[len]))))
861 					(void)fprintf(stderr, "*** Possible "
862 					    "binary test for text type\n");
863 			}
864 		} while (++i < marraycount && marray[i].mp->cont_level != 0);
865 	}
866 
867 	qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
868 
869 	/*
870 	 * Make sure that any level 0 "default" line is last (if one exists).
871 	 */
872 	for (i = 0; i < marraycount; i++) {
873 		if (marray[i].mp->cont_level == 0 &&
874 		    marray[i].mp->type == FILE_DEFAULT) {
875 			while (++i < marraycount)
876 				if (marray[i].mp->cont_level == 0)
877 					break;
878 			if (i != marraycount) {
879 				/* XXX - Ugh! */
880 				ms->line = marray[i].mp->lineno;
881 				file_magwarn(ms,
882 				    "level 0 \"default\" did not sort last");
883 			}
884 			break;
885 		}
886 	}
887 
888 	for (i = 0; i < marraycount; i++)
889 		mentrycount += marray[i].cont_count;
890 
891 	slen = sizeof(**magicp) * mentrycount;
892 	if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) {
893 		file_oomem(ms, slen);
894 		errs++;
895 		goto out;
896 	}
897 
898 	mentrycount = 0;
899 	for (i = 0; i < marraycount; i++) {
900 		(void)memcpy(*magicp + mentrycount, marray[i].mp,
901 		    marray[i].cont_count * sizeof(**magicp));
902 		mentrycount += marray[i].cont_count;
903 	}
904 out:
905 	for (i = 0; i < marraycount; i++)
906 		free(marray[i].mp);
907 	free(marray);
908 	if (errs) {
909 		*magicp = NULL;
910 		*nmagicp = 0;
911 		return errs;
912 	} else {
913 		*nmagicp = mentrycount;
914 		return 0;
915 	}
916 
917 }
918 
919 /*
920  * extend the sign bit if the comparison is to be signed
921  */
922 protected uint64_t
923 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
924 {
925 	if (!(m->flag & UNSIGNED)) {
926 		switch(m->type) {
927 		/*
928 		 * Do not remove the casts below.  They are
929 		 * vital.  When later compared with the data,
930 		 * the sign extension must have happened.
931 		 */
932 		case FILE_BYTE:
933 			v = (char) v;
934 			break;
935 		case FILE_SHORT:
936 		case FILE_BESHORT:
937 		case FILE_LESHORT:
938 			v = (short) v;
939 			break;
940 		case FILE_DATE:
941 		case FILE_BEDATE:
942 		case FILE_LEDATE:
943 		case FILE_MEDATE:
944 		case FILE_LDATE:
945 		case FILE_BELDATE:
946 		case FILE_LELDATE:
947 		case FILE_MELDATE:
948 		case FILE_LONG:
949 		case FILE_BELONG:
950 		case FILE_LELONG:
951 		case FILE_MELONG:
952 		case FILE_FLOAT:
953 		case FILE_BEFLOAT:
954 		case FILE_LEFLOAT:
955 			v = (int32_t) v;
956 			break;
957 		case FILE_QUAD:
958 		case FILE_BEQUAD:
959 		case FILE_LEQUAD:
960 		case FILE_QDATE:
961 		case FILE_QLDATE:
962 		case FILE_BEQDATE:
963 		case FILE_BEQLDATE:
964 		case FILE_LEQDATE:
965 		case FILE_LEQLDATE:
966 		case FILE_DOUBLE:
967 		case FILE_BEDOUBLE:
968 		case FILE_LEDOUBLE:
969 			v = (int64_t) v;
970 			break;
971 		case FILE_STRING:
972 		case FILE_PSTRING:
973 		case FILE_BESTRING16:
974 		case FILE_LESTRING16:
975 		case FILE_REGEX:
976 		case FILE_SEARCH:
977 		case FILE_DEFAULT:
978 		case FILE_INDIRECT:
979 			break;
980 		default:
981 			if (ms->flags & MAGIC_CHECK)
982 			    file_magwarn(ms, "cannot happen: m->type=%d\n",
983 				    m->type);
984 			return ~0U;
985 		}
986 	}
987 	return v;
988 }
989 
990 private int
991 string_modifier_check(struct magic_set *ms, struct magic *m)
992 {
993 	if ((ms->flags & MAGIC_CHECK) == 0)
994 		return 0;
995 
996 	if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
997 		file_magwarn(ms,
998 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
999 		return -1;
1000 	}
1001 	switch (m->type) {
1002 	case FILE_BESTRING16:
1003 	case FILE_LESTRING16:
1004 		if (m->str_flags != 0) {
1005 			file_magwarn(ms,
1006 			    "no modifiers allowed for 16-bit strings\n");
1007 			return -1;
1008 		}
1009 		break;
1010 	case FILE_STRING:
1011 	case FILE_PSTRING:
1012 		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1013 			file_magwarn(ms,
1014 			    "'/%c' only allowed on regex and search\n",
1015 			    CHAR_REGEX_OFFSET_START);
1016 			return -1;
1017 		}
1018 		break;
1019 	case FILE_SEARCH:
1020 		if (m->str_range == 0) {
1021 			file_magwarn(ms,
1022 			    "missing range; defaulting to %d\n",
1023                             STRING_DEFAULT_RANGE);
1024 			m->str_range = STRING_DEFAULT_RANGE;
1025 			return -1;
1026 		}
1027 		break;
1028 	case FILE_REGEX:
1029 		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1030 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1031 			    CHAR_COMPACT_WHITESPACE);
1032 			return -1;
1033 		}
1034 		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1035 			file_magwarn(ms, "'/%c' not allowed on regex\n",
1036 			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1037 			return -1;
1038 		}
1039 		break;
1040 	default:
1041 		file_magwarn(ms, "coding error: m->type=%d\n",
1042 		    m->type);
1043 		return -1;
1044 	}
1045 	return 0;
1046 }
1047 
1048 private int
1049 get_op(char c)
1050 {
1051 	switch (c) {
1052 	case '&':
1053 		return FILE_OPAND;
1054 	case '|':
1055 		return FILE_OPOR;
1056 	case '^':
1057 		return FILE_OPXOR;
1058 	case '+':
1059 		return FILE_OPADD;
1060 	case '-':
1061 		return FILE_OPMINUS;
1062 	case '*':
1063 		return FILE_OPMULTIPLY;
1064 	case '/':
1065 		return FILE_OPDIVIDE;
1066 	case '%':
1067 		return FILE_OPMODULO;
1068 	default:
1069 		return -1;
1070 	}
1071 }
1072 
1073 #ifdef ENABLE_CONDITIONALS
1074 private int
1075 get_cond(const char *l, const char **t)
1076 {
1077 	static const struct cond_tbl_s {
1078 		char name[8];
1079 		size_t len;
1080 		int cond;
1081 	} cond_tbl[] = {
1082 		{ "if",		2,	COND_IF },
1083 		{ "elif",	4,	COND_ELIF },
1084 		{ "else",	4,	COND_ELSE },
1085 		{ "",		0,	COND_NONE },
1086 	};
1087 	const struct cond_tbl_s *p;
1088 
1089 	for (p = cond_tbl; p->len; p++) {
1090 		if (strncmp(l, p->name, p->len) == 0 &&
1091 		    isspace((unsigned char)l[p->len])) {
1092 			if (t)
1093 				*t = l + p->len;
1094 			break;
1095 		}
1096 	}
1097 	return p->cond;
1098 }
1099 
1100 private int
1101 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1102 {
1103 	int last_cond;
1104 	last_cond = ms->c.li[cont_level].last_cond;
1105 
1106 	switch (cond) {
1107 	case COND_IF:
1108 		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1109 			if (ms->flags & MAGIC_CHECK)
1110 				file_magwarn(ms, "syntax error: `if'");
1111 			return -1;
1112 		}
1113 		last_cond = COND_IF;
1114 		break;
1115 
1116 	case COND_ELIF:
1117 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1118 			if (ms->flags & MAGIC_CHECK)
1119 				file_magwarn(ms, "syntax error: `elif'");
1120 			return -1;
1121 		}
1122 		last_cond = COND_ELIF;
1123 		break;
1124 
1125 	case COND_ELSE:
1126 		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1127 			if (ms->flags & MAGIC_CHECK)
1128 				file_magwarn(ms, "syntax error: `else'");
1129 			return -1;
1130 		}
1131 		last_cond = COND_NONE;
1132 		break;
1133 
1134 	case COND_NONE:
1135 		last_cond = COND_NONE;
1136 		break;
1137 	}
1138 
1139 	ms->c.li[cont_level].last_cond = last_cond;
1140 	return 0;
1141 }
1142 #endif /* ENABLE_CONDITIONALS */
1143 
1144 /*
1145  * parse one line from magic file, put into magic[index++] if valid
1146  */
1147 private int
1148 parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
1149     const char *line, size_t lineno, int action)
1150 {
1151 #ifdef ENABLE_CONDITIONALS
1152 	static uint32_t last_cont_level = 0;
1153 #endif
1154 	size_t i;
1155 	struct magic_entry *me;
1156 	struct magic *m;
1157 	const char *l = line;
1158 	char *t;
1159 	int op;
1160 	uint32_t cont_level;
1161 
1162 	cont_level = 0;
1163 
1164 	while (*l == '>') {
1165 		++l;		/* step over */
1166 		cont_level++;
1167 	}
1168 #ifdef ENABLE_CONDITIONALS
1169 	if (cont_level == 0 || cont_level > last_cont_level)
1170 		if (file_check_mem(ms, cont_level) == -1)
1171 			return -1;
1172 	last_cont_level = cont_level;
1173 #endif
1174 
1175 #define ALLOC_CHUNK	(size_t)10
1176 #define ALLOC_INCR	(size_t)200
1177 
1178 	if (cont_level != 0) {
1179 		if (*nmentryp == 0) {
1180 			file_error(ms, 0, "No current entry for continuation");
1181 			return -1;
1182 		}
1183 		me = &(*mentryp)[*nmentryp - 1];
1184 		if (me->cont_count == me->max_count) {
1185 			struct magic *nm;
1186 			size_t cnt = me->max_count + ALLOC_CHUNK;
1187 			if ((nm = CAST(struct magic *, realloc(me->mp,
1188 			    sizeof(*nm) * cnt))) == NULL) {
1189 				file_oomem(ms, sizeof(*nm) * cnt);
1190 				return -1;
1191 			}
1192 			me->mp = m = nm;
1193 			me->max_count = CAST(uint32_t, cnt);
1194 		}
1195 		m = &me->mp[me->cont_count++];
1196 		(void)memset(m, 0, sizeof(*m));
1197 		m->cont_level = cont_level;
1198 	} else {
1199 		if (*nmentryp == maxmagic) {
1200 			struct magic_entry *mp;
1201 
1202 			maxmagic += ALLOC_INCR;
1203 			if ((mp = CAST(struct magic_entry *,
1204 			    realloc(*mentryp, sizeof(*mp) * maxmagic))) ==
1205 			    NULL) {
1206 				file_oomem(ms, sizeof(*mp) * maxmagic);
1207 				return -1;
1208 			}
1209 			(void)memset(&mp[*nmentryp], 0, sizeof(*mp) *
1210 			    ALLOC_INCR);
1211 			*mentryp = mp;
1212 		}
1213 		me = &(*mentryp)[*nmentryp];
1214 		if (me->mp == NULL) {
1215 			size_t len = sizeof(*m) * ALLOC_CHUNK;
1216 			if ((m = CAST(struct magic *, malloc(len))) == NULL) {
1217 				file_oomem(ms, len);
1218 				return -1;
1219 			}
1220 			me->mp = m;
1221 			me->max_count = ALLOC_CHUNK;
1222 		} else
1223 			m = me->mp;
1224 		(void)memset(m, 0, sizeof(*m));
1225 		m->factor_op = FILE_FACTOR_OP_NONE;
1226 		m->cont_level = 0;
1227 		me->cont_count = 1;
1228 	}
1229 	m->lineno = CAST(uint32_t, lineno);
1230 
1231 	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1232                 ++l;            /* step over */
1233                 m->flag |= OFFADD;
1234         }
1235 	if (*l == '(') {
1236 		++l;		/* step over */
1237 		m->flag |= INDIR;
1238 		if (m->flag & OFFADD)
1239 			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1240 
1241 		if (*l == '&') {  /* m->cont_level == 0 checked below */
1242 			++l;            /* step over */
1243 			m->flag |= OFFADD;
1244 		}
1245 	}
1246 	/* Indirect offsets are not valid at level 0. */
1247 	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1248 		if (ms->flags & MAGIC_CHECK)
1249 			file_magwarn(ms, "relative offset at level 0");
1250 
1251 	/* get offset, then skip over it */
1252 	m->offset = (uint32_t)strtoul(l, &t, 0);
1253         if (l == t)
1254 		if (ms->flags & MAGIC_CHECK)
1255 			file_magwarn(ms, "offset `%s' invalid", l);
1256         l = t;
1257 
1258 	if (m->flag & INDIR) {
1259 		m->in_type = FILE_LONG;
1260 		m->in_offset = 0;
1261 		/*
1262 		 * read [.lbs][+-]nnnnn)
1263 		 */
1264 		if (*l == '.') {
1265 			l++;
1266 			switch (*l) {
1267 			case 'l':
1268 				m->in_type = FILE_LELONG;
1269 				break;
1270 			case 'L':
1271 				m->in_type = FILE_BELONG;
1272 				break;
1273 			case 'm':
1274 				m->in_type = FILE_MELONG;
1275 				break;
1276 			case 'h':
1277 			case 's':
1278 				m->in_type = FILE_LESHORT;
1279 				break;
1280 			case 'H':
1281 			case 'S':
1282 				m->in_type = FILE_BESHORT;
1283 				break;
1284 			case 'c':
1285 			case 'b':
1286 			case 'C':
1287 			case 'B':
1288 				m->in_type = FILE_BYTE;
1289 				break;
1290 			case 'e':
1291 			case 'f':
1292 			case 'g':
1293 				m->in_type = FILE_LEDOUBLE;
1294 				break;
1295 			case 'E':
1296 			case 'F':
1297 			case 'G':
1298 				m->in_type = FILE_BEDOUBLE;
1299 				break;
1300 			case 'i':
1301 				m->in_type = FILE_LEID3;
1302 				break;
1303 			case 'I':
1304 				m->in_type = FILE_BEID3;
1305 				break;
1306 			default:
1307 				if (ms->flags & MAGIC_CHECK)
1308 					file_magwarn(ms,
1309 					    "indirect offset type `%c' invalid",
1310 					    *l);
1311 				break;
1312 			}
1313 			l++;
1314 		}
1315 
1316 		m->in_op = 0;
1317 		if (*l == '~') {
1318 			m->in_op |= FILE_OPINVERSE;
1319 			l++;
1320 		}
1321 		if ((op = get_op(*l)) != -1) {
1322 			m->in_op |= op;
1323 			l++;
1324 		}
1325 		if (*l == '(') {
1326 			m->in_op |= FILE_OPINDIRECT;
1327 			l++;
1328 		}
1329 		if (isdigit((unsigned char)*l) || *l == '-') {
1330 			m->in_offset = (int32_t)strtol(l, &t, 0);
1331 			if (l == t)
1332 				if (ms->flags & MAGIC_CHECK)
1333 					file_magwarn(ms,
1334 					    "in_offset `%s' invalid", l);
1335 			l = t;
1336 		}
1337 		if (*l++ != ')' ||
1338 		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1339 			if (ms->flags & MAGIC_CHECK)
1340 				file_magwarn(ms,
1341 				    "missing ')' in indirect offset");
1342 	}
1343 	EATAB;
1344 
1345 #ifdef ENABLE_CONDITIONALS
1346 	m->cond = get_cond(l, &l);
1347 	if (check_cond(ms, m->cond, cont_level) == -1)
1348 		return -1;
1349 
1350 	EATAB;
1351 #endif
1352 
1353 	if (*l == 'u') {
1354 		++l;
1355 		m->flag |= UNSIGNED;
1356 	}
1357 
1358 	m->type = get_type(l, &l);
1359 	if (m->type == FILE_INVALID) {
1360 		if (ms->flags & MAGIC_CHECK)
1361 			file_magwarn(ms, "type `%s' invalid", l);
1362 		return -1;
1363 	}
1364 
1365 	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1366 	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1367 
1368 	m->mask_op = 0;
1369 	if (*l == '~') {
1370 		if (!IS_STRING(m->type))
1371 			m->mask_op |= FILE_OPINVERSE;
1372 		else if (ms->flags & MAGIC_CHECK)
1373 			file_magwarn(ms, "'~' invalid for string types");
1374 		++l;
1375 	}
1376 	m->str_range = 0;
1377 	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
1378 	if ((op = get_op(*l)) != -1) {
1379 		if (!IS_STRING(m->type)) {
1380 			uint64_t val;
1381 			++l;
1382 			m->mask_op |= op;
1383 			val = (uint64_t)strtoull(l, &t, 0);
1384 			l = t;
1385 			m->num_mask = file_signextend(ms, m, val);
1386 			eatsize(&l);
1387 		}
1388 		else if (op == FILE_OPDIVIDE) {
1389 			int have_range = 0;
1390 			while (!isspace((unsigned char)*++l)) {
1391 				switch (*l) {
1392 				case '0':  case '1':  case '2':
1393 				case '3':  case '4':  case '5':
1394 				case '6':  case '7':  case '8':
1395 				case '9':
1396 					if (have_range &&
1397 					    (ms->flags & MAGIC_CHECK))
1398 						file_magwarn(ms,
1399 						    "multiple ranges");
1400 					have_range = 1;
1401 					m->str_range = CAST(uint32_t,
1402 					    strtoul(l, &t, 0));
1403 					if (m->str_range == 0)
1404 						file_magwarn(ms,
1405 						    "zero range");
1406 					l = t - 1;
1407 					break;
1408 				case CHAR_COMPACT_WHITESPACE:
1409 					m->str_flags |=
1410 					    STRING_COMPACT_WHITESPACE;
1411 					break;
1412 				case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1413 					m->str_flags |=
1414 					    STRING_COMPACT_OPTIONAL_WHITESPACE;
1415 					break;
1416 				case CHAR_IGNORE_LOWERCASE:
1417 					m->str_flags |= STRING_IGNORE_LOWERCASE;
1418 					break;
1419 				case CHAR_IGNORE_UPPERCASE:
1420 					m->str_flags |= STRING_IGNORE_UPPERCASE;
1421 					break;
1422 				case CHAR_REGEX_OFFSET_START:
1423 					m->str_flags |= REGEX_OFFSET_START;
1424 					break;
1425 				case CHAR_BINTEST:
1426 					m->str_flags |= STRING_BINTEST;
1427 					break;
1428 				case CHAR_TEXTTEST:
1429 					m->str_flags |= STRING_TEXTTEST;
1430 					break;
1431 				case CHAR_PSTRING_1_LE:
1432 					if (m->type != FILE_PSTRING)
1433 						goto bad;
1434 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
1435 					break;
1436 				case CHAR_PSTRING_2_BE:
1437 					if (m->type != FILE_PSTRING)
1438 						goto bad;
1439 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
1440 					break;
1441 				case CHAR_PSTRING_2_LE:
1442 					if (m->type != FILE_PSTRING)
1443 						goto bad;
1444 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
1445 					break;
1446 				case CHAR_PSTRING_4_BE:
1447 					if (m->type != FILE_PSTRING)
1448 						goto bad;
1449 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
1450 					break;
1451 				case CHAR_PSTRING_4_LE:
1452 					if (m->type != FILE_PSTRING)
1453 						goto bad;
1454 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
1455 					break;
1456 				case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1457 					if (m->type != FILE_PSTRING)
1458 						goto bad;
1459 					m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1460 					break;
1461 				default:
1462 				bad:
1463 					if (ms->flags & MAGIC_CHECK)
1464 						file_magwarn(ms,
1465 						    "string extension `%c' "
1466 						    "invalid", *l);
1467 					return -1;
1468 				}
1469 				/* allow multiple '/' for readability */
1470 				if (l[1] == '/' &&
1471 				    !isspace((unsigned char)l[2]))
1472 					l++;
1473 			}
1474 			if (string_modifier_check(ms, m) == -1)
1475 				return -1;
1476 		}
1477 		else {
1478 			if (ms->flags & MAGIC_CHECK)
1479 				file_magwarn(ms, "invalid string op: %c", *t);
1480 			return -1;
1481 		}
1482 	}
1483 	/*
1484 	 * We used to set mask to all 1's here, instead let's just not do
1485 	 * anything if mask = 0 (unless you have a better idea)
1486 	 */
1487 	EATAB;
1488 
1489 	switch (*l) {
1490 	case '>':
1491 	case '<':
1492   		m->reln = *l;
1493   		++l;
1494 		if (*l == '=') {
1495 			if (ms->flags & MAGIC_CHECK) {
1496 				file_magwarn(ms, "%c= not supported",
1497 				    m->reln);
1498 				return -1;
1499 			}
1500 		   ++l;
1501 		}
1502 		break;
1503 	/* Old-style anding: "0 byte &0x80 dynamically linked" */
1504 	case '&':
1505 	case '^':
1506 	case '=':
1507   		m->reln = *l;
1508   		++l;
1509 		if (*l == '=') {
1510 		   /* HP compat: ignore &= etc. */
1511 		   ++l;
1512 		}
1513 		break;
1514 	case '!':
1515 		m->reln = *l;
1516 		++l;
1517 		break;
1518 	default:
1519   		m->reln = '=';	/* the default relation */
1520 		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1521 		    isspace((unsigned char)l[1])) || !l[1])) {
1522 			m->reln = *l;
1523 			++l;
1524 		}
1525 		break;
1526 	}
1527 	/*
1528 	 * Grab the value part, except for an 'x' reln.
1529 	 */
1530 	if (m->reln != 'x' && getvalue(ms, m, &l, action))
1531 		return -1;
1532 
1533 	/*
1534 	 * TODO finish this macro and start using it!
1535 	 * #define offsetcheck {if (offset > HOWMANY-1)
1536 	 *	magwarn("offset too big"); }
1537 	 */
1538 
1539 	/*
1540 	 * Now get last part - the description
1541 	 */
1542 	EATAB;
1543 	if (l[0] == '\b') {
1544 		++l;
1545 		m->flag |= NOSPACE;
1546 	} else if ((l[0] == '\\') && (l[1] == 'b')) {
1547 		++l;
1548 		++l;
1549 		m->flag |= NOSPACE;
1550 	}
1551 	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1552 		continue;
1553 	if (i == sizeof(m->desc)) {
1554 		m->desc[sizeof(m->desc) - 1] = '\0';
1555 		if (ms->flags & MAGIC_CHECK)
1556 			file_magwarn(ms, "description `%s' truncated", m->desc);
1557 	}
1558 
1559         /*
1560 	 * We only do this check while compiling, or if any of the magic
1561 	 * files were not compiled.
1562          */
1563         if (ms->flags & MAGIC_CHECK) {
1564 		if (check_format(ms, m) == -1)
1565 			return -1;
1566 	}
1567 #ifndef COMPILE_ONLY
1568 	if (action == FILE_CHECK) {
1569 		file_mdump(m);
1570 	}
1571 #endif
1572 	m->mimetype[0] = '\0';		/* initialise MIME type to none */
1573 	if (m->cont_level == 0)
1574 		++(*nmentryp);		/* make room for next */
1575 	return 0;
1576 }
1577 
1578 /*
1579  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1580  * if valid
1581  */
1582 private int
1583 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1584 {
1585 	const char *l = line;
1586 	char *el;
1587 	unsigned long factor;
1588 	struct magic *m = &me->mp[0];
1589 
1590 	if (m->factor_op != FILE_FACTOR_OP_NONE) {
1591 		file_magwarn(ms,
1592 		    "Current entry already has a strength type: %c %d",
1593 		    m->factor_op, m->factor);
1594 		return -1;
1595 	}
1596 	EATAB;
1597 	switch (*l) {
1598 	case FILE_FACTOR_OP_NONE:
1599 	case FILE_FACTOR_OP_PLUS:
1600 	case FILE_FACTOR_OP_MINUS:
1601 	case FILE_FACTOR_OP_TIMES:
1602 	case FILE_FACTOR_OP_DIV:
1603 		m->factor_op = *l++;
1604 		break;
1605 	default:
1606 		file_magwarn(ms, "Unknown factor op `%c'", *l);
1607 		return -1;
1608 	}
1609 	EATAB;
1610 	factor = strtoul(l, &el, 0);
1611 	if (factor > 255) {
1612 		file_magwarn(ms, "Too large factor `%lu'", factor);
1613 		goto out;
1614 	}
1615 	if (*el && !isspace((unsigned char)*el)) {
1616 		file_magwarn(ms, "Bad factor `%s'", l);
1617 		goto out;
1618 	}
1619 	m->factor = (uint8_t)factor;
1620 	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1621 		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
1622 		    m->factor_op, m->factor);
1623 		goto out;
1624 	}
1625 	return 0;
1626 out:
1627 	m->factor_op = FILE_FACTOR_OP_NONE;
1628 	m->factor = 0;
1629 	return -1;
1630 }
1631 
1632 /*
1633  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
1634  * magic[index - 1]
1635  */
1636 private int
1637 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
1638 {
1639 	size_t i;
1640 	const char *l = line;
1641 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
1642 
1643 	if (m->apple[0] != '\0') {
1644 		file_magwarn(ms, "Current entry already has a APPLE type "
1645 		    "`%.8s', new type `%s'", m->mimetype, l);
1646 		return -1;
1647 	}
1648 
1649 	EATAB;
1650 	for (i = 0; *l && ((isascii((unsigned char)*l) &&
1651 	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
1652 	    i < sizeof(m->apple); m->apple[i++] = *l++)
1653 		continue;
1654 	if (i == sizeof(m->apple) && *l) {
1655 		/* We don't need to NUL terminate here, printing handles it */
1656 		if (ms->flags & MAGIC_CHECK)
1657 			file_magwarn(ms, "APPLE type `%s' truncated %"
1658 			    SIZE_T_FORMAT "u", line, i);
1659 	}
1660 
1661 	if (i > 0)
1662 		return 0;
1663 	else
1664 		return -1;
1665 }
1666 
1667 /*
1668  * parse a MIME annotation line from magic file, put into magic[index - 1]
1669  * if valid
1670  */
1671 private int
1672 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
1673 {
1674 	size_t i;
1675 	const char *l = line;
1676 	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
1677 
1678 	if (m->mimetype[0] != '\0') {
1679 		file_magwarn(ms, "Current entry already has a MIME type `%s',"
1680 		    " new type `%s'", m->mimetype, l);
1681 		return -1;
1682 	}
1683 
1684 	EATAB;
1685 	for (i = 0; *l && ((isascii((unsigned char)*l) &&
1686 	    isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
1687 	    i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
1688 		continue;
1689 	if (i == sizeof(m->mimetype)) {
1690 		m->mimetype[sizeof(m->mimetype) - 1] = '\0';
1691 		if (ms->flags & MAGIC_CHECK)
1692 			file_magwarn(ms, "MIME type `%s' truncated %"
1693 			    SIZE_T_FORMAT "u", m->mimetype, i);
1694 	} else
1695 		m->mimetype[i] = '\0';
1696 
1697 	if (i > 0)
1698 		return 0;
1699 	else
1700 		return -1;
1701 }
1702 
1703 private int
1704 check_format_type(const char *ptr, int type)
1705 {
1706 	int quad = 0;
1707 	if (*ptr == '\0') {
1708 		/* Missing format string; bad */
1709 		return -1;
1710 	}
1711 
1712 	switch (type) {
1713 	case FILE_FMT_QUAD:
1714 		quad = 1;
1715 		/*FALLTHROUGH*/
1716 	case FILE_FMT_NUM:
1717 		if (*ptr == '-')
1718 			ptr++;
1719 		if (*ptr == '.')
1720 			ptr++;
1721 		while (isdigit((unsigned char)*ptr)) ptr++;
1722 		if (*ptr == '.')
1723 			ptr++;
1724 		while (isdigit((unsigned char)*ptr)) ptr++;
1725 		if (quad) {
1726 			if (*ptr++ != 'l')
1727 				return -1;
1728 			if (*ptr++ != 'l')
1729 				return -1;
1730 		}
1731 
1732 		switch (*ptr++) {
1733 		case 'l':
1734 			switch (*ptr++) {
1735 			case 'i':
1736 			case 'd':
1737 			case 'u':
1738 			case 'x':
1739 			case 'X':
1740 				return 0;
1741 			default:
1742 				return -1;
1743 			}
1744 
1745 		case 'h':
1746 			switch (*ptr++) {
1747 			case 'h':
1748 				switch (*ptr++) {
1749 				case 'i':
1750 				case 'd':
1751 				case 'u':
1752 				case 'x':
1753 				case 'X':
1754 					return 0;
1755 				default:
1756 					return -1;
1757 				}
1758 			case 'd':
1759 				return 0;
1760 			default:
1761 				return -1;
1762 			}
1763 
1764 		case 'i':
1765 		case 'c':
1766 		case 'd':
1767 		case 'u':
1768 		case 'x':
1769 		case 'X':
1770 			return 0;
1771 
1772 		default:
1773 			return -1;
1774 		}
1775 
1776 	case FILE_FMT_FLOAT:
1777 	case FILE_FMT_DOUBLE:
1778 		if (*ptr == '-')
1779 			ptr++;
1780 		if (*ptr == '.')
1781 			ptr++;
1782 		while (isdigit((unsigned char)*ptr)) ptr++;
1783 		if (*ptr == '.')
1784 			ptr++;
1785 		while (isdigit((unsigned char)*ptr)) ptr++;
1786 
1787 		switch (*ptr++) {
1788 		case 'e':
1789 		case 'E':
1790 		case 'f':
1791 		case 'F':
1792 		case 'g':
1793 		case 'G':
1794 			return 0;
1795 
1796 		default:
1797 			return -1;
1798 		}
1799 
1800 
1801 	case FILE_FMT_STR:
1802 		if (*ptr == '-')
1803 			ptr++;
1804 		while (isdigit((unsigned char )*ptr))
1805 			ptr++;
1806 		if (*ptr == '.') {
1807 			ptr++;
1808 			while (isdigit((unsigned char )*ptr))
1809 				ptr++;
1810 		}
1811 
1812 		switch (*ptr++) {
1813 		case 's':
1814 			return 0;
1815 		default:
1816 			return -1;
1817 		}
1818 
1819 	default:
1820 		/* internal error */
1821 		abort();
1822 	}
1823 	/*NOTREACHED*/
1824 	return -1;
1825 }
1826 
1827 /*
1828  * Check that the optional printf format in description matches
1829  * the type of the magic.
1830  */
1831 private int
1832 check_format(struct magic_set *ms, struct magic *m)
1833 {
1834 	char *ptr;
1835 
1836 	for (ptr = m->desc; *ptr; ptr++)
1837 		if (*ptr == '%')
1838 			break;
1839 	if (*ptr == '\0') {
1840 		/* No format string; ok */
1841 		return 1;
1842 	}
1843 
1844 	assert(file_nformats == file_nnames);
1845 
1846 	if (m->type >= file_nformats) {
1847 		file_magwarn(ms, "Internal error inconsistency between "
1848 		    "m->type and format strings");
1849 		return -1;
1850 	}
1851 	if (file_formats[m->type] == FILE_FMT_NONE) {
1852 		file_magwarn(ms, "No format string for `%s' with description "
1853 		    "`%s'", m->desc, file_names[m->type]);
1854 		return -1;
1855 	}
1856 
1857 	ptr++;
1858 	if (check_format_type(ptr, file_formats[m->type]) == -1) {
1859 		/*
1860 		 * TODO: this error message is unhelpful if the format
1861 		 * string is not one character long
1862 		 */
1863 		file_magwarn(ms, "Printf format `%c' is not valid for type "
1864 		    "`%s' in description `%s'", *ptr ? *ptr : '?',
1865 		    file_names[m->type], m->desc);
1866 		return -1;
1867 	}
1868 
1869 	for (; *ptr; ptr++) {
1870 		if (*ptr == '%') {
1871 			file_magwarn(ms,
1872 			    "Too many format strings (should have at most one) "
1873 			    "for `%s' with description `%s'",
1874 			    file_names[m->type], m->desc);
1875 			return -1;
1876 		}
1877 	}
1878 	return 0;
1879 }
1880 
1881 /*
1882  * Read a numeric value from a pointer, into the value union of a magic
1883  * pointer, according to the magic type.  Update the string pointer to point
1884  * just after the number read.  Return 0 for success, non-zero for failure.
1885  */
1886 private int
1887 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
1888 {
1889 	switch (m->type) {
1890 	case FILE_BESTRING16:
1891 	case FILE_LESTRING16:
1892 	case FILE_STRING:
1893 	case FILE_PSTRING:
1894 	case FILE_REGEX:
1895 	case FILE_SEARCH:
1896 		*p = getstr(ms, m, *p, action == FILE_COMPILE);
1897 		if (*p == NULL) {
1898 			if (ms->flags & MAGIC_CHECK)
1899 				file_magwarn(ms, "cannot get string from `%s'",
1900 				    m->value.s);
1901 			return -1;
1902 		}
1903 		return 0;
1904 	case FILE_FLOAT:
1905 	case FILE_BEFLOAT:
1906 	case FILE_LEFLOAT:
1907 		if (m->reln != 'x') {
1908 			char *ep;
1909 #ifdef HAVE_STRTOF
1910 			m->value.f = strtof(*p, &ep);
1911 #else
1912 			m->value.f = (float)strtod(*p, &ep);
1913 #endif
1914 			*p = ep;
1915 		}
1916 		return 0;
1917 	case FILE_DOUBLE:
1918 	case FILE_BEDOUBLE:
1919 	case FILE_LEDOUBLE:
1920 		if (m->reln != 'x') {
1921 			char *ep;
1922 			m->value.d = strtod(*p, &ep);
1923 			*p = ep;
1924 		}
1925 		return 0;
1926 	default:
1927 		if (m->reln != 'x') {
1928 			char *ep;
1929 			m->value.q = file_signextend(ms, m,
1930 			    (uint64_t)strtoull(*p, &ep, 0));
1931 			*p = ep;
1932 			eatsize(p);
1933 		}
1934 		return 0;
1935 	}
1936 }
1937 
1938 /*
1939  * Convert a string containing C character escapes.  Stop at an unescaped
1940  * space or tab.
1941  * Copy the converted version to "m->value.s", and the length in m->vallen.
1942  * Return updated scan pointer as function result. Warn if set.
1943  */
1944 private const char *
1945 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
1946 {
1947 	const char *origs = s;
1948 	char	*p = m->value.s;
1949 	size_t  plen = sizeof(m->value.s);
1950 	char 	*origp = p;
1951 	char	*pmax = p + plen - 1;
1952 	int	c;
1953 	int	val;
1954 
1955 	while ((c = *s++) != '\0') {
1956 		if (isspace((unsigned char) c))
1957 			break;
1958 		if (p >= pmax) {
1959 			file_error(ms, 0, "string too long: `%s'", origs);
1960 			return NULL;
1961 		}
1962 		if (c == '\\') {
1963 			switch(c = *s++) {
1964 
1965 			case '\0':
1966 				if (warn)
1967 					file_magwarn(ms, "incomplete escape");
1968 				goto out;
1969 
1970 			case '\t':
1971 				if (warn) {
1972 					file_magwarn(ms,
1973 					    "escaped tab found, use \\t instead");
1974 					warn = 0;	/* already did */
1975 				}
1976 				/*FALLTHROUGH*/
1977 			default:
1978 				if (warn) {
1979 					if (isprint((unsigned char)c)) {
1980 						/* Allow escaping of
1981 						 * ``relations'' */
1982 						if (strchr("<>&^=!", c) == NULL
1983 						    && (m->type != FILE_REGEX ||
1984 						    strchr("[]().*?^$|{}", c)
1985 						    == NULL)) {
1986 							file_magwarn(ms, "no "
1987 							    "need to escape "
1988 							    "`%c'", c);
1989 						}
1990 					} else {
1991 						file_magwarn(ms,
1992 						    "unknown escape sequence: "
1993 						    "\\%03o", c);
1994 					}
1995 				}
1996 				/*FALLTHROUGH*/
1997 			/* space, perhaps force people to use \040? */
1998 			case ' ':
1999 #if 0
2000 			/*
2001 			 * Other things people escape, but shouldn't need to,
2002 			 * so we disallow them
2003 			 */
2004 			case '\'':
2005 			case '"':
2006 			case '?':
2007 #endif
2008 			/* Relations */
2009 			case '>':
2010 			case '<':
2011 			case '&':
2012 			case '^':
2013 			case '=':
2014 			case '!':
2015 			/* and baskslash itself */
2016 			case '\\':
2017 				*p++ = (char) c;
2018 				break;
2019 
2020 			case 'a':
2021 				*p++ = '\a';
2022 				break;
2023 
2024 			case 'b':
2025 				*p++ = '\b';
2026 				break;
2027 
2028 			case 'f':
2029 				*p++ = '\f';
2030 				break;
2031 
2032 			case 'n':
2033 				*p++ = '\n';
2034 				break;
2035 
2036 			case 'r':
2037 				*p++ = '\r';
2038 				break;
2039 
2040 			case 't':
2041 				*p++ = '\t';
2042 				break;
2043 
2044 			case 'v':
2045 				*p++ = '\v';
2046 				break;
2047 
2048 			/* \ and up to 3 octal digits */
2049 			case '0':
2050 			case '1':
2051 			case '2':
2052 			case '3':
2053 			case '4':
2054 			case '5':
2055 			case '6':
2056 			case '7':
2057 				val = c - '0';
2058 				c = *s++;  /* try for 2 */
2059 				if (c >= '0' && c <= '7') {
2060 					val = (val << 3) | (c - '0');
2061 					c = *s++;  /* try for 3 */
2062 					if (c >= '0' && c <= '7')
2063 						val = (val << 3) | (c-'0');
2064 					else
2065 						--s;
2066 				}
2067 				else
2068 					--s;
2069 				*p++ = (char)val;
2070 				break;
2071 
2072 			/* \x and up to 2 hex digits */
2073 			case 'x':
2074 				val = 'x';	/* Default if no digits */
2075 				c = hextoint(*s++);	/* Get next char */
2076 				if (c >= 0) {
2077 					val = c;
2078 					c = hextoint(*s++);
2079 					if (c >= 0)
2080 						val = (val << 4) + c;
2081 					else
2082 						--s;
2083 				} else
2084 					--s;
2085 				*p++ = (char)val;
2086 				break;
2087 			}
2088 		} else
2089 			*p++ = (char)c;
2090 	}
2091 out:
2092 	*p = '\0';
2093 	m->vallen = CAST(unsigned char, (p - origp));
2094 	if (m->type == FILE_PSTRING)
2095 		m->vallen += (unsigned char)file_pstring_length_size(m);
2096 	return s;
2097 }
2098 
2099 
2100 /* Single hex char to int; -1 if not a hex char. */
2101 private int
2102 hextoint(int c)
2103 {
2104 	if (!isascii((unsigned char) c))
2105 		return -1;
2106 	if (isdigit((unsigned char) c))
2107 		return c - '0';
2108 	if ((c >= 'a') && (c <= 'f'))
2109 		return c + 10 - 'a';
2110 	if (( c>= 'A') && (c <= 'F'))
2111 		return c + 10 - 'A';
2112 	return -1;
2113 }
2114 
2115 
2116 /*
2117  * Print a string containing C character escapes.
2118  */
2119 protected void
2120 file_showstr(FILE *fp, const char *s, size_t len)
2121 {
2122 	char	c;
2123 
2124 	for (;;) {
2125 		if (len == ~0U) {
2126 			c = *s++;
2127 			if (c == '\0')
2128 				break;
2129 		}
2130 		else  {
2131 			if (len-- == 0)
2132 				break;
2133 			c = *s++;
2134 		}
2135 		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
2136 			(void) fputc(c, fp);
2137 		else {
2138 			(void) fputc('\\', fp);
2139 			switch (c) {
2140 			case '\a':
2141 				(void) fputc('a', fp);
2142 				break;
2143 
2144 			case '\b':
2145 				(void) fputc('b', fp);
2146 				break;
2147 
2148 			case '\f':
2149 				(void) fputc('f', fp);
2150 				break;
2151 
2152 			case '\n':
2153 				(void) fputc('n', fp);
2154 				break;
2155 
2156 			case '\r':
2157 				(void) fputc('r', fp);
2158 				break;
2159 
2160 			case '\t':
2161 				(void) fputc('t', fp);
2162 				break;
2163 
2164 			case '\v':
2165 				(void) fputc('v', fp);
2166 				break;
2167 
2168 			default:
2169 				(void) fprintf(fp, "%.3o", c & 0377);
2170 				break;
2171 			}
2172 		}
2173 	}
2174 }
2175 
2176 /*
2177  * eatsize(): Eat the size spec from a number [eg. 10UL]
2178  */
2179 private void
2180 eatsize(const char **p)
2181 {
2182 	const char *l = *p;
2183 
2184 	if (LOWCASE(*l) == 'u')
2185 		l++;
2186 
2187 	switch (LOWCASE(*l)) {
2188 	case 'l':    /* long */
2189 	case 's':    /* short */
2190 	case 'h':    /* short */
2191 	case 'b':    /* char/byte */
2192 	case 'c':    /* char/byte */
2193 		l++;
2194 		/*FALLTHROUGH*/
2195 	default:
2196 		break;
2197 	}
2198 
2199 	*p = l;
2200 }
2201 
2202 /*
2203  * handle a compiled file.
2204  */
2205 private int
2206 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
2207     const char *fn)
2208 {
2209 	int fd;
2210 	struct stat st;
2211 	uint32_t *ptr;
2212 	uint32_t version;
2213 	int needsbyteswap;
2214 	char *dbname = NULL;
2215 	void *mm = NULL;
2216 
2217 	dbname = mkdbname(ms, fn, 0);
2218 	if (dbname == NULL)
2219 		goto error2;
2220 
2221 	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
2222 		goto error2;
2223 
2224 	if (fstat(fd, &st) == -1) {
2225 		file_error(ms, errno, "cannot stat `%s'", dbname);
2226 		goto error1;
2227 	}
2228 	if (st.st_size < 8) {
2229 		file_error(ms, 0, "file `%s' is too small", dbname);
2230 		goto error1;
2231 	}
2232 
2233 #ifdef QUICK
2234 	if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
2235 	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
2236 		file_error(ms, errno, "cannot map `%s'", dbname);
2237 		goto error1;
2238 	}
2239 #define RET	2
2240 #else
2241 	if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) {
2242 		file_oomem(ms, (size_t)st.st_size);
2243 		goto error1;
2244 	}
2245 	if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) {
2246 		file_badread(ms);
2247 		goto error1;
2248 	}
2249 #define RET	1
2250 #endif
2251 	*magicp = CAST(struct magic *, mm);
2252 	(void)close(fd);
2253 	fd = -1;
2254 	ptr = (uint32_t *)(void *)*magicp;
2255 	if (*ptr != MAGICNO) {
2256 		if (swap4(*ptr) != MAGICNO) {
2257 			file_error(ms, 0, "bad magic in `%s'", dbname);
2258 			goto error1;
2259 		}
2260 		needsbyteswap = 1;
2261 	} else
2262 		needsbyteswap = 0;
2263 	if (needsbyteswap)
2264 		version = swap4(ptr[1]);
2265 	else
2266 		version = ptr[1];
2267 	if (version != VERSIONNO) {
2268 		file_error(ms, 0, "File %s supports only version %d magic "
2269 		    "files. `%s' is version %d", VERSION,
2270 		    VERSIONNO, dbname, version);
2271 		goto error1;
2272 	}
2273 	*nmagicp = (uint32_t)(st.st_size / sizeof(struct magic));
2274 	if (*nmagicp > 0)
2275 		(*nmagicp)--;
2276 	(*magicp)++;
2277 	if (needsbyteswap)
2278 		byteswap(*magicp, *nmagicp);
2279 	free(dbname);
2280 	return RET;
2281 
2282 error1:
2283 	if (fd != -1)
2284 		(void)close(fd);
2285 	if (mm) {
2286 #ifdef QUICK
2287 		(void)munmap((void *)mm, (size_t)st.st_size);
2288 #else
2289 		free(mm);
2290 #endif
2291 	} else {
2292 		*magicp = NULL;
2293 		*nmagicp = 0;
2294 	}
2295 error2:
2296 	free(dbname);
2297 	return -1;
2298 }
2299 
2300 private const uint32_t ar[] = {
2301     MAGICNO, VERSIONNO
2302 };
2303 /*
2304  * handle an mmaped file.
2305  */
2306 private int
2307 apprentice_compile(struct magic_set *ms, struct magic **magicp,
2308     uint32_t *nmagicp, const char *fn)
2309 {
2310 	int fd = -1;
2311 	char *dbname;
2312 	int rv = -1;
2313 
2314 	dbname = mkdbname(ms, fn, 1);
2315 
2316 	if (dbname == NULL)
2317 		goto out;
2318 
2319 	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) {
2320 		file_error(ms, errno, "cannot open `%s'", dbname);
2321 		goto out;
2322 	}
2323 
2324 	if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2325 		file_error(ms, errno, "error writing `%s'", dbname);
2326 		goto out;
2327 	}
2328 
2329 	if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
2330 	    != sizeof(struct magic)) {
2331 		file_error(ms, errno, "error seeking `%s'", dbname);
2332 		goto out;
2333 	}
2334 
2335 	if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
2336 	    != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
2337 		file_error(ms, errno, "error writing `%s'", dbname);
2338 		goto out;
2339 	}
2340 
2341 	if (fd != -1)
2342 		(void)close(fd);
2343 	rv = 0;
2344 out:
2345 	free(dbname);
2346 	return rv;
2347 }
2348 
2349 private const char ext[] = ".mgc";
2350 /*
2351  * make a dbname
2352  */
2353 private char *
2354 mkdbname(struct magic_set *ms, const char *fn, int strip)
2355 {
2356 	const char *p, *q;
2357 	char *buf;
2358 
2359 	if (strip) {
2360 		if ((p = strrchr(fn, '/')) != NULL)
2361 			fn = ++p;
2362 	}
2363 
2364 	for (q = fn; *q; q++)
2365 		continue;
2366 	/* Look for .mgc */
2367 	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
2368 		if (*p != *q)
2369 			break;
2370 
2371 	/* Did not find .mgc, restore q */
2372 	if (p >= ext)
2373 		while (*q)
2374 			q++;
2375 
2376 	q++;
2377 	/* Compatibility with old code that looked in .mime */
2378 	if (ms->flags & MAGIC_MIME) {
2379 		asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
2380 		if (access(buf, R_OK) != -1) {
2381 			ms->flags &= MAGIC_MIME_TYPE;
2382 			return buf;
2383 		}
2384 		free(buf);
2385 	}
2386 	asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
2387 
2388 	/* Compatibility with old code that looked in .mime */
2389 	if (strstr(p, ".mime") != NULL)
2390 		ms->flags &= MAGIC_MIME_TYPE;
2391 	return buf;
2392 }
2393 
2394 /*
2395  * Byteswap an mmap'ed file if needed
2396  */
2397 private void
2398 byteswap(struct magic *magic, uint32_t nmagic)
2399 {
2400 	uint32_t i;
2401 	for (i = 0; i < nmagic; i++)
2402 		bs1(&magic[i]);
2403 }
2404 
2405 /*
2406  * swap a short
2407  */
2408 private uint16_t
2409 swap2(uint16_t sv)
2410 {
2411 	uint16_t rv;
2412 	uint8_t *s = (uint8_t *)(void *)&sv;
2413 	uint8_t *d = (uint8_t *)(void *)&rv;
2414 	d[0] = s[1];
2415 	d[1] = s[0];
2416 	return rv;
2417 }
2418 
2419 /*
2420  * swap an int
2421  */
2422 private uint32_t
2423 swap4(uint32_t sv)
2424 {
2425 	uint32_t rv;
2426 	uint8_t *s = (uint8_t *)(void *)&sv;
2427 	uint8_t *d = (uint8_t *)(void *)&rv;
2428 	d[0] = s[3];
2429 	d[1] = s[2];
2430 	d[2] = s[1];
2431 	d[3] = s[0];
2432 	return rv;
2433 }
2434 
2435 /*
2436  * swap a quad
2437  */
2438 private uint64_t
2439 swap8(uint64_t sv)
2440 {
2441 	uint64_t rv;
2442 	uint8_t *s = (uint8_t *)(void *)&sv;
2443 	uint8_t *d = (uint8_t *)(void *)&rv;
2444 #if 0
2445 	d[0] = s[3];
2446 	d[1] = s[2];
2447 	d[2] = s[1];
2448 	d[3] = s[0];
2449 	d[4] = s[7];
2450 	d[5] = s[6];
2451 	d[6] = s[5];
2452 	d[7] = s[4];
2453 #else
2454 	d[0] = s[7];
2455 	d[1] = s[6];
2456 	d[2] = s[5];
2457 	d[3] = s[4];
2458 	d[4] = s[3];
2459 	d[5] = s[2];
2460 	d[6] = s[1];
2461 	d[7] = s[0];
2462 #endif
2463 	return rv;
2464 }
2465 
2466 /*
2467  * byteswap a single magic entry
2468  */
2469 private void
2470 bs1(struct magic *m)
2471 {
2472 	m->cont_level = swap2(m->cont_level);
2473 	m->offset = swap4((uint32_t)m->offset);
2474 	m->in_offset = swap4((uint32_t)m->in_offset);
2475 	m->lineno = swap4((uint32_t)m->lineno);
2476 	if (IS_STRING(m->type)) {
2477 		m->str_range = swap4(m->str_range);
2478 		m->str_flags = swap4(m->str_flags);
2479 	}
2480 	else {
2481 		m->value.q = swap8(m->value.q);
2482 		m->num_mask = swap8(m->num_mask);
2483 	}
2484 }
2485 
2486 protected size_t
2487 file_pstring_length_size(const struct magic *m)
2488 {
2489 	switch (m->str_flags & PSTRING_LEN) {
2490 	case PSTRING_1_LE:
2491 		return 1;
2492 	case PSTRING_2_LE:
2493 	case PSTRING_2_BE:
2494 		return 2;
2495 	case PSTRING_4_LE:
2496 	case PSTRING_4_BE:
2497 		return 4;
2498 	default:
2499 		abort();	/* Impossible */
2500 		return 1;
2501 	}
2502 }
2503 protected size_t
2504 file_pstring_get_length(const struct magic *m, const char *s)
2505 {
2506 	size_t len = 0;
2507 
2508 	switch (m->str_flags & PSTRING_LEN) {
2509 	case PSTRING_1_LE:
2510 		len = *s;
2511 		break;
2512 	case PSTRING_2_LE:
2513 		len = (s[1] << 8) | s[0];
2514 		break;
2515 	case PSTRING_2_BE:
2516 		len = (s[0] << 8) | s[1];
2517 		break;
2518 	case PSTRING_4_LE:
2519 		len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
2520 		break;
2521 	case PSTRING_4_BE:
2522 		len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
2523 		break;
2524 	default:
2525 		abort();	/* Impossible */
2526 	}
2527 
2528 	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
2529 		len -= file_pstring_length_size(m);
2530 
2531 	return len;
2532 }
2533