xref: /openbsd-src/usr.bin/file/magic-load.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /* $OpenBSD: magic-load.c,v 1.23 2016/05/01 14:57:15 nicm Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <regex.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "magic.h"
31 #include "xmalloc.h"
32 
33 static int
34 magic_odigit(u_char c)
35 {
36 	if (c >= '0' && c <= '7')
37 		return (c - '0');
38 	return (-1);
39 }
40 
41 static int
42 magic_xdigit(u_char c)
43 {
44 	if (c >= '0' && c <= '9')
45 		return (c - '0');
46 	if (c >= 'a' && c <= 'f')
47 		return (10 + c - 'a');
48 	if (c >= 'A' && c <= 'F')
49 		return (10 + c - 'A');
50 	return (-1);
51 }
52 
53 static void
54 magic_mark_text(struct magic_line *ml, int text)
55 {
56 	do {
57 		ml->text = text;
58 		ml = ml->parent;
59 	} while (ml != NULL);
60 }
61 
62 static int
63 magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
64     const char *p)
65 {
66 	int	error;
67 	char	errbuf[256];
68 
69 	error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
70 	if (error != 0) {
71 		regerror(error, re, errbuf, sizeof errbuf);
72 		magic_warn(ml, "bad %s pattern: %s", name, errbuf);
73 		return (-1);
74 	}
75 	return (0);
76 }
77 
78 static int
79 magic_set_result(struct magic_line *ml, const char *s)
80 {
81 	const char	*fmt;
82 	const char	*endfmt;
83 	const char	*cp;
84 	regex_t		*re = NULL;
85 	regmatch_t	 pmatch;
86 	size_t		 fmtlen;
87 
88 	while (isspace((u_char)*s))
89 		s++;
90 	if (*s == '\0') {
91 		ml->result = NULL;
92 		return (0);
93 	}
94 	ml->result = xstrdup(s);
95 
96 	fmt = NULL;
97 	for (cp = s; *cp != '\0'; cp++) {
98 		if (cp[0] == '%' && cp[1] != '%') {
99 			if (fmt != NULL) {
100 				magic_warn(ml, "multiple formats");
101 				return (-1);
102 			}
103 			fmt = cp;
104 		}
105 	}
106 	if (fmt == NULL)
107 		return (0);
108 	fmt++;
109 
110 	for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
111 		if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
112 			break;
113 	}
114 	if (*endfmt == '\0') {
115 		magic_warn(ml, "unterminated format");
116 		return (-1);
117 	}
118 	fmtlen = endfmt + 1 - fmt;
119 	if (fmtlen > 32) {
120 		magic_warn(ml, "format too long");
121 		return (-1);
122 	}
123 
124 	if (*endfmt == 's') {
125 		switch (ml->type) {
126 		case MAGIC_TYPE_DATE:
127 		case MAGIC_TYPE_LDATE:
128 		case MAGIC_TYPE_UDATE:
129 		case MAGIC_TYPE_ULDATE:
130 		case MAGIC_TYPE_BEDATE:
131 		case MAGIC_TYPE_BELDATE:
132 		case MAGIC_TYPE_UBEDATE:
133 		case MAGIC_TYPE_UBELDATE:
134 		case MAGIC_TYPE_QDATE:
135 		case MAGIC_TYPE_QLDATE:
136 		case MAGIC_TYPE_UQDATE:
137 		case MAGIC_TYPE_UQLDATE:
138 		case MAGIC_TYPE_BEQDATE:
139 		case MAGIC_TYPE_BEQLDATE:
140 		case MAGIC_TYPE_UBEQDATE:
141 		case MAGIC_TYPE_UBEQLDATE:
142 		case MAGIC_TYPE_LEQDATE:
143 		case MAGIC_TYPE_LEQLDATE:
144 		case MAGIC_TYPE_ULEQDATE:
145 		case MAGIC_TYPE_ULEQLDATE:
146 		case MAGIC_TYPE_LEDATE:
147 		case MAGIC_TYPE_LELDATE:
148 		case MAGIC_TYPE_ULEDATE:
149 		case MAGIC_TYPE_ULELDATE:
150 		case MAGIC_TYPE_MEDATE:
151 		case MAGIC_TYPE_MELDATE:
152 		case MAGIC_TYPE_STRING:
153 		case MAGIC_TYPE_PSTRING:
154 		case MAGIC_TYPE_BESTRING16:
155 		case MAGIC_TYPE_LESTRING16:
156 		case MAGIC_TYPE_REGEX:
157 		case MAGIC_TYPE_SEARCH:
158 			break;
159 		default:
160 			ml->stringify = 1;
161 			break;
162 		}
163 	}
164 
165 	if (!ml->root->compiled) {
166 		/*
167 		 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
168 		 * with byte, short, long. We get lucky because our first and
169 		 * only argument ends up in a register. Accept it for now.
170 		 */
171 		if (magic_make_pattern(ml, "short", &ml->root->format_short,
172 		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
173 			return (-1);
174 		if (magic_make_pattern(ml, "long", &ml->root->format_long,
175 		    "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
176 			return (-1);
177 		if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
178 		    "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
179 			return (-1);
180 		if (magic_make_pattern(ml, "float", &ml->root->format_float,
181 		    "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
182 			return (-1);
183 		if (magic_make_pattern(ml, "string", &ml->root->format_string,
184 		    "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
185 			return (-1);
186 		ml->root->compiled = 1;
187 	}
188 
189 	if (ml->stringify)
190 		re = &ml->root->format_string;
191 	else {
192 		switch (ml->type) {
193 		case MAGIC_TYPE_NONE:
194 		case MAGIC_TYPE_BESTRING16:
195 		case MAGIC_TYPE_LESTRING16:
196 		case MAGIC_TYPE_NAME:
197 		case MAGIC_TYPE_USE:
198 			return (0); /* don't use result */
199 		case MAGIC_TYPE_BYTE:
200 		case MAGIC_TYPE_UBYTE:
201 		case MAGIC_TYPE_SHORT:
202 		case MAGIC_TYPE_USHORT:
203 		case MAGIC_TYPE_BESHORT:
204 		case MAGIC_TYPE_UBESHORT:
205 		case MAGIC_TYPE_LESHORT:
206 		case MAGIC_TYPE_ULESHORT:
207 			re = &ml->root->format_short;
208 			break;
209 		case MAGIC_TYPE_LONG:
210 		case MAGIC_TYPE_ULONG:
211 		case MAGIC_TYPE_BELONG:
212 		case MAGIC_TYPE_UBELONG:
213 		case MAGIC_TYPE_LELONG:
214 		case MAGIC_TYPE_ULELONG:
215 		case MAGIC_TYPE_MELONG:
216 			re = &ml->root->format_long;
217 			break;
218 		case MAGIC_TYPE_QUAD:
219 		case MAGIC_TYPE_UQUAD:
220 		case MAGIC_TYPE_BEQUAD:
221 		case MAGIC_TYPE_UBEQUAD:
222 		case MAGIC_TYPE_LEQUAD:
223 		case MAGIC_TYPE_ULEQUAD:
224 			re = &ml->root->format_quad;
225 			break;
226 		case MAGIC_TYPE_FLOAT:
227 		case MAGIC_TYPE_BEFLOAT:
228 		case MAGIC_TYPE_LEFLOAT:
229 		case MAGIC_TYPE_DOUBLE:
230 		case MAGIC_TYPE_BEDOUBLE:
231 		case MAGIC_TYPE_LEDOUBLE:
232 			re = &ml->root->format_float;
233 			break;
234 		case MAGIC_TYPE_DATE:
235 		case MAGIC_TYPE_LDATE:
236 		case MAGIC_TYPE_UDATE:
237 		case MAGIC_TYPE_ULDATE:
238 		case MAGIC_TYPE_BEDATE:
239 		case MAGIC_TYPE_BELDATE:
240 		case MAGIC_TYPE_UBEDATE:
241 		case MAGIC_TYPE_UBELDATE:
242 		case MAGIC_TYPE_QDATE:
243 		case MAGIC_TYPE_QLDATE:
244 		case MAGIC_TYPE_UQDATE:
245 		case MAGIC_TYPE_UQLDATE:
246 		case MAGIC_TYPE_BEQDATE:
247 		case MAGIC_TYPE_BEQLDATE:
248 		case MAGIC_TYPE_UBEQDATE:
249 		case MAGIC_TYPE_UBEQLDATE:
250 		case MAGIC_TYPE_LEQDATE:
251 		case MAGIC_TYPE_LEQLDATE:
252 		case MAGIC_TYPE_ULEQDATE:
253 		case MAGIC_TYPE_ULEQLDATE:
254 		case MAGIC_TYPE_LEDATE:
255 		case MAGIC_TYPE_LELDATE:
256 		case MAGIC_TYPE_ULEDATE:
257 		case MAGIC_TYPE_ULELDATE:
258 		case MAGIC_TYPE_MEDATE:
259 		case MAGIC_TYPE_MELDATE:
260 		case MAGIC_TYPE_STRING:
261 		case MAGIC_TYPE_PSTRING:
262 		case MAGIC_TYPE_REGEX:
263 		case MAGIC_TYPE_SEARCH:
264 		case MAGIC_TYPE_DEFAULT:
265 		case MAGIC_TYPE_CLEAR:
266 			re = &ml->root->format_string;
267 			break;
268 		}
269 	}
270 
271 	pmatch.rm_so = 0;
272 	pmatch.rm_eo = fmtlen;
273 	if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
274 		magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
275 		    (int)fmtlen, fmt);
276 		return (-1);
277 	}
278 
279 	return (0);
280 }
281 
282 static u_int
283 magic_get_strength(struct magic_line *ml)
284 {
285 	int	n;
286 	size_t	size;
287 
288 	if (ml->type == MAGIC_TYPE_NONE)
289 		return (0);
290 
291 	if (ml->test_not || ml->test_operator == 'x') {
292 		n = 1;
293 		goto skip;
294 	}
295 
296 	n = 2 * MAGIC_STRENGTH_MULTIPLIER;
297 	switch (ml->type) {
298 	case MAGIC_TYPE_NONE:
299 	case MAGIC_TYPE_DEFAULT:
300 		return (0);
301 	case MAGIC_TYPE_CLEAR:
302 	case MAGIC_TYPE_NAME:
303 	case MAGIC_TYPE_USE:
304 		break;
305 	case MAGIC_TYPE_BYTE:
306 	case MAGIC_TYPE_UBYTE:
307 		n += 1 * MAGIC_STRENGTH_MULTIPLIER;
308 		break;
309 	case MAGIC_TYPE_SHORT:
310 	case MAGIC_TYPE_USHORT:
311 	case MAGIC_TYPE_BESHORT:
312 	case MAGIC_TYPE_UBESHORT:
313 	case MAGIC_TYPE_LESHORT:
314 	case MAGIC_TYPE_ULESHORT:
315 		n += 2 * MAGIC_STRENGTH_MULTIPLIER;
316 		break;
317 	case MAGIC_TYPE_LONG:
318 	case MAGIC_TYPE_ULONG:
319 	case MAGIC_TYPE_FLOAT:
320 	case MAGIC_TYPE_DATE:
321 	case MAGIC_TYPE_LDATE:
322 	case MAGIC_TYPE_UDATE:
323 	case MAGIC_TYPE_ULDATE:
324 	case MAGIC_TYPE_BELONG:
325 	case MAGIC_TYPE_UBELONG:
326 	case MAGIC_TYPE_BEFLOAT:
327 	case MAGIC_TYPE_BEDATE:
328 	case MAGIC_TYPE_BELDATE:
329 	case MAGIC_TYPE_UBEDATE:
330 	case MAGIC_TYPE_UBELDATE:
331 		n += 4 * MAGIC_STRENGTH_MULTIPLIER;
332 		break;
333 	case MAGIC_TYPE_QUAD:
334 	case MAGIC_TYPE_UQUAD:
335 	case MAGIC_TYPE_DOUBLE:
336 	case MAGIC_TYPE_QDATE:
337 	case MAGIC_TYPE_QLDATE:
338 	case MAGIC_TYPE_UQDATE:
339 	case MAGIC_TYPE_UQLDATE:
340 	case MAGIC_TYPE_BEQUAD:
341 	case MAGIC_TYPE_UBEQUAD:
342 	case MAGIC_TYPE_BEDOUBLE:
343 	case MAGIC_TYPE_BEQDATE:
344 	case MAGIC_TYPE_BEQLDATE:
345 	case MAGIC_TYPE_UBEQDATE:
346 	case MAGIC_TYPE_UBEQLDATE:
347 	case MAGIC_TYPE_LEQUAD:
348 	case MAGIC_TYPE_ULEQUAD:
349 	case MAGIC_TYPE_LEDOUBLE:
350 	case MAGIC_TYPE_LEQDATE:
351 	case MAGIC_TYPE_LEQLDATE:
352 	case MAGIC_TYPE_ULEQDATE:
353 	case MAGIC_TYPE_ULEQLDATE:
354 	case MAGIC_TYPE_LELONG:
355 	case MAGIC_TYPE_ULELONG:
356 	case MAGIC_TYPE_LEFLOAT:
357 	case MAGIC_TYPE_LEDATE:
358 	case MAGIC_TYPE_LELDATE:
359 	case MAGIC_TYPE_ULEDATE:
360 	case MAGIC_TYPE_ULELDATE:
361 	case MAGIC_TYPE_MELONG:
362 	case MAGIC_TYPE_MEDATE:
363 	case MAGIC_TYPE_MELDATE:
364 		n += 8 * MAGIC_STRENGTH_MULTIPLIER;
365 		break;
366 	case MAGIC_TYPE_STRING:
367 	case MAGIC_TYPE_PSTRING:
368 		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
369 		break;
370 	case MAGIC_TYPE_BESTRING16:
371 	case MAGIC_TYPE_LESTRING16:
372 		n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
373 		break;
374 	case MAGIC_TYPE_REGEX:
375 	case MAGIC_TYPE_SEARCH:
376 		size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
377 		if (size < 1)
378 			size = 1;
379 		n += ml->test_string_size * size;
380 		break;
381 	}
382 	switch (ml->test_operator) {
383 	case '=':
384 		n += MAGIC_STRENGTH_MULTIPLIER;
385 		break;
386 	case '<':
387 	case '>':
388 	case '[':
389 	case ']':
390 		n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
391 		break;
392 	case '^':
393 	case '&':
394 		n -= MAGIC_STRENGTH_MULTIPLIER;
395 		break;
396 	}
397 
398 skip:
399 	switch (ml->strength_operator) {
400 	case '+':
401 		n += ml->strength_value;
402 		break;
403 	case '-':
404 		n -= ml->strength_value;
405 		break;
406 	case '*':
407 		n *= ml->strength_value;
408 		break;
409 	case '/':
410 		n /= ml->strength_value;
411 		break;
412 	}
413 	return (n <= 0 ? 1 : n);
414 }
415 
416 static int
417 magic_get_string(char **line, char *out, size_t *outlen)
418 {
419 	char	*start, *cp, c;
420 	int	 d0, d1, d2;
421 
422 	start = out;
423 	for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
424 		if (*cp != '\\') {
425 			*out++ = *cp;
426 			continue;
427 		}
428 
429 		switch (c = *++cp) {
430 		case '\0': /* end of line */
431 			return (-1);
432 		case ' ':
433 			*out++ = ' ';
434 			break;
435 		case '0':
436 		case '1':
437 		case '2':
438 		case '3':
439 		case '4':
440 		case '5':
441 		case '6':
442 		case '7':
443 			d0 = magic_odigit(cp[0]);
444 			if (cp[0] != '\0')
445 				d1 = magic_odigit(cp[1]);
446 			else
447 				d1 = -1;
448 			if (cp[0] != '\0' && cp[1] != '\0')
449 				d2 = magic_odigit(cp[2]);
450 			else
451 				d2 = -1;
452 
453 			if (d0 != -1 && d1 != -1 && d2 != -1) {
454 				*out = d2 | (d1 << 3) | (d0 << 6);
455 				cp += 2;
456 			} else if (d0 != -1 && d1 != -1) {
457 				*out = d1 | (d0 << 3);
458 				cp++;
459 			} else if (d0 != -1)
460 				*out = d0;
461 			else
462 				return (-1);
463 			out++;
464 			break;
465 		case 'x':
466 			d0 = magic_xdigit(cp[1]);
467 			if (cp[1] != '\0')
468 				d1 = magic_xdigit(cp[2]);
469 			else
470 				d1 = -1;
471 
472 			if (d0 != -1 && d1 != -1) {
473 				*out = d1 | (d0 << 4);
474 				cp += 2;
475 			} else if (d0 != -1) {
476 				*out = d0;
477 				cp++;
478 			} else
479 				return (-1);
480 			out++;
481 
482 			break;
483 		case 'a':
484 			*out++ = '\a';
485 			break;
486 		case 'b':
487 			*out++ = '\b';
488 			break;
489 		case 't':
490 			*out++ = '\t';
491 			break;
492 		case 'f':
493 			*out++ = '\f';
494 			break;
495 		case 'n':
496 			*out++ = '\n';
497 			break;
498 		case 'r':
499 			*out++ = '\r';
500 			break;
501 		case '\\':
502 			*out++ = '\\';
503 			break;
504 		case '\'':
505 			*out++ = '\'';
506 			break;
507 		case '\"':
508 			*out++ = '\"';
509 			break;
510 		default:
511 			*out++ = c;
512 			break;
513 		}
514 	}
515 	*out = '\0';
516 	*outlen = out - start;
517 
518 	*line = cp;
519 	return (0);
520 }
521 
522 static int
523 magic_parse_offset(struct magic_line *ml, char **line)
524 {
525 	char	*copy, *s, *cp, *endptr;
526 
527 	while (isspace((u_char)**line))
528 		(*line)++;
529 	copy = s = cp = xmalloc(strlen(*line) + 1);
530 	while (**line != '\0' && !isspace((u_char)**line))
531 		*cp++ = *(*line)++;
532 	*cp = '\0';
533 
534 	ml->offset = 0;
535 	ml->offset_relative = 0;
536 
537 	ml->indirect_type = ' ';
538 	ml->indirect_relative = 0;
539 	ml->indirect_offset = 0;
540 	ml->indirect_operator = ' ';
541 	ml->indirect_operand = 0;
542 
543 	if (*s == '&') {
544 		ml->offset_relative = 1;
545 		s++;
546 	}
547 
548 	if (*s != '(') {
549 		endptr = magic_strtoll(s, &ml->offset);
550 		if (endptr == NULL || *endptr != '\0') {
551 			magic_warn(ml, "missing closing bracket");
552 			goto fail;
553 		}
554 		if (ml->offset < 0 && !ml->offset_relative) {
555 			magic_warn(ml, "negative absolute offset");
556 			goto fail;
557 		}
558 		goto done;
559 	}
560 	s++;
561 
562 	if (*s == '&') {
563 		ml->indirect_relative = 1;
564 		s++;
565 	}
566 
567 	endptr = magic_strtoll(s, &ml->indirect_offset);
568 	if (endptr == NULL) {
569 		magic_warn(ml, "can't parse offset: %s", s);
570 		goto fail;
571 	}
572 	s = endptr;
573 	if (*s == ')')
574 		goto done;
575 
576 	if (*s == '.') {
577 		s++;
578 		if (*s == '\0' || strchr("bslBSL", *s) == NULL) {
579 			magic_warn(ml, "unknown offset type: %c", *s);
580 			goto fail;
581 		}
582 		ml->indirect_type = *s;
583 		s++;
584 		if (*s == ')')
585 			goto done;
586 	}
587 
588 	if (*s == '\0' || strchr("+-*", *s) == NULL) {
589 		magic_warn(ml, "unknown offset operator: %c", *s);
590 		goto fail;
591 	}
592 	ml->indirect_operator = *s;
593 	s++;
594 	if (*s == ')')
595 		goto done;
596 
597 	if (*s == '(') {
598 		s++;
599 		endptr = magic_strtoll(s, &ml->indirect_operand);
600 		if (endptr == NULL || *endptr != ')') {
601 			magic_warn(ml, "missing closing bracket");
602 			goto fail;
603 		}
604 		if (*++endptr != ')') {
605 			magic_warn(ml, "missing closing bracket");
606 			goto fail;
607 		}
608 	} else {
609 		endptr = magic_strtoll(s, &ml->indirect_operand);
610 		if (endptr == NULL || *endptr != ')') {
611 			magic_warn(ml, "missing closing bracket");
612 			goto fail;
613 		}
614 	}
615 
616 done:
617 	free(copy);
618 	return (0);
619 
620 fail:
621 	free(copy);
622 	return (-1);
623 }
624 
625 static int
626 magic_parse_type(struct magic_line *ml, char **line)
627 {
628 	char	*copy, *s, *cp, *endptr;
629 
630 	while (isspace((u_char)**line))
631 		(*line)++;
632 	copy = s = cp = xmalloc(strlen(*line) + 1);
633 	while (**line != '\0' && !isspace((u_char)**line))
634 		*cp++ = *(*line)++;
635 	*cp = '\0';
636 
637 	ml->type = MAGIC_TYPE_NONE;
638 	ml->type_operator = ' ';
639 	ml->type_operand = 0;
640 
641 	if (strcmp(s, "name") == 0) {
642 		ml->type = MAGIC_TYPE_NAME;
643 		ml->type_string = xstrdup(s);
644 		goto done;
645 	}
646 	if (strcmp(s, "use") == 0) {
647 		ml->type = MAGIC_TYPE_USE;
648 		ml->type_string = xstrdup(s);
649 		goto done;
650 	}
651 
652 	if (strncmp(s, "string", (sizeof "string") - 1) == 0 ||
653 	    strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) {
654 		if (*s == 'u')
655 			ml->type_string = xstrdup(s + 1);
656 		else
657 			ml->type_string = xstrdup(s);
658 		ml->type = MAGIC_TYPE_STRING;
659 		magic_mark_text(ml, 0);
660 		goto done;
661 	}
662 	if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 ||
663 	    strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) {
664 		if (*s == 'u')
665 			ml->type_string = xstrdup(s + 1);
666 		else
667 			ml->type_string = xstrdup(s);
668 		ml->type = MAGIC_TYPE_PSTRING;
669 		magic_mark_text(ml, 0);
670 		goto done;
671 	}
672 	if (strncmp(s, "search", (sizeof "search") - 1) == 0 ||
673 	    strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) {
674 		if (*s == 'u')
675 			ml->type_string = xstrdup(s + 1);
676 		else
677 			ml->type_string = xstrdup(s);
678 		ml->type = MAGIC_TYPE_SEARCH;
679 		goto done;
680 	}
681 	if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 ||
682 	    strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) {
683 		if (*s == 'u')
684 			ml->type_string = xstrdup(s + 1);
685 		else
686 			ml->type_string = xstrdup(s);
687 		ml->type = MAGIC_TYPE_REGEX;
688 		goto done;
689 	}
690 	ml->type_string = xstrdup(s);
691 
692 	cp = &s[strcspn(s, "+-&/%*")];
693 	if (*cp != '\0') {
694 		ml->type_operator = *cp;
695 		endptr = magic_strtoull(cp + 1, &ml->type_operand);
696 		if (endptr == NULL || *endptr != '\0') {
697 			magic_warn(ml, "can't parse operand: %s", cp + 1);
698 			goto fail;
699 		}
700 		*cp = '\0';
701 	}
702 
703 	if (strcmp(s, "byte") == 0)
704 		ml->type = MAGIC_TYPE_BYTE;
705 	else if (strcmp(s, "short") == 0)
706 		ml->type = MAGIC_TYPE_SHORT;
707 	else if (strcmp(s, "long") == 0)
708 		ml->type = MAGIC_TYPE_LONG;
709 	else if (strcmp(s, "quad") == 0)
710 		ml->type = MAGIC_TYPE_QUAD;
711 	else if (strcmp(s, "ubyte") == 0)
712 		ml->type = MAGIC_TYPE_UBYTE;
713 	else if (strcmp(s, "ushort") == 0)
714 		ml->type = MAGIC_TYPE_USHORT;
715 	else if (strcmp(s, "ulong") == 0)
716 		ml->type = MAGIC_TYPE_ULONG;
717 	else if (strcmp(s, "uquad") == 0)
718 		ml->type = MAGIC_TYPE_UQUAD;
719 	else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0)
720 		ml->type = MAGIC_TYPE_FLOAT;
721 	else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0)
722 		ml->type = MAGIC_TYPE_DOUBLE;
723 	else if (strcmp(s, "date") == 0)
724 		ml->type = MAGIC_TYPE_DATE;
725 	else if (strcmp(s, "qdate") == 0)
726 		ml->type = MAGIC_TYPE_QDATE;
727 	else if (strcmp(s, "ldate") == 0)
728 		ml->type = MAGIC_TYPE_LDATE;
729 	else if (strcmp(s, "qldate") == 0)
730 		ml->type = MAGIC_TYPE_QLDATE;
731 	else if (strcmp(s, "udate") == 0)
732 		ml->type = MAGIC_TYPE_UDATE;
733 	else if (strcmp(s, "uqdate") == 0)
734 		ml->type = MAGIC_TYPE_UQDATE;
735 	else if (strcmp(s, "uldate") == 0)
736 		ml->type = MAGIC_TYPE_ULDATE;
737 	else if (strcmp(s, "uqldate") == 0)
738 		ml->type = MAGIC_TYPE_UQLDATE;
739 	else if (strcmp(s, "beshort") == 0)
740 		ml->type = MAGIC_TYPE_BESHORT;
741 	else if (strcmp(s, "belong") == 0)
742 		ml->type = MAGIC_TYPE_BELONG;
743 	else if (strcmp(s, "bequad") == 0)
744 		ml->type = MAGIC_TYPE_BEQUAD;
745 	else if (strcmp(s, "ubeshort") == 0)
746 		ml->type = MAGIC_TYPE_UBESHORT;
747 	else if (strcmp(s, "ubelong") == 0)
748 		ml->type = MAGIC_TYPE_UBELONG;
749 	else if (strcmp(s, "ubequad") == 0)
750 		ml->type = MAGIC_TYPE_UBEQUAD;
751 	else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0)
752 		ml->type = MAGIC_TYPE_BEFLOAT;
753 	else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0)
754 		ml->type = MAGIC_TYPE_BEDOUBLE;
755 	else if (strcmp(s, "bedate") == 0)
756 		ml->type = MAGIC_TYPE_BEDATE;
757 	else if (strcmp(s, "beqdate") == 0)
758 		ml->type = MAGIC_TYPE_BEQDATE;
759 	else if (strcmp(s, "beldate") == 0)
760 		ml->type = MAGIC_TYPE_BELDATE;
761 	else if (strcmp(s, "beqldate") == 0)
762 		ml->type = MAGIC_TYPE_BEQLDATE;
763 	else if (strcmp(s, "ubedate") == 0)
764 		ml->type = MAGIC_TYPE_UBEDATE;
765 	else if (strcmp(s, "ubeqdate") == 0)
766 		ml->type = MAGIC_TYPE_UBEQDATE;
767 	else if (strcmp(s, "ubeldate") == 0)
768 		ml->type = MAGIC_TYPE_UBELDATE;
769 	else if (strcmp(s, "ubeqldate") == 0)
770 		ml->type = MAGIC_TYPE_UBEQLDATE;
771 	else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0)
772 		ml->type = MAGIC_TYPE_BESTRING16;
773 	else if (strcmp(s, "leshort") == 0)
774 		ml->type = MAGIC_TYPE_LESHORT;
775 	else if (strcmp(s, "lelong") == 0)
776 		ml->type = MAGIC_TYPE_LELONG;
777 	else if (strcmp(s, "lequad") == 0)
778 		ml->type = MAGIC_TYPE_LEQUAD;
779 	else if (strcmp(s, "uleshort") == 0)
780 		ml->type = MAGIC_TYPE_ULESHORT;
781 	else if (strcmp(s, "ulelong") == 0)
782 		ml->type = MAGIC_TYPE_ULELONG;
783 	else if (strcmp(s, "ulequad") == 0)
784 		ml->type = MAGIC_TYPE_ULEQUAD;
785 	else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0)
786 		ml->type = MAGIC_TYPE_LEFLOAT;
787 	else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0)
788 		ml->type = MAGIC_TYPE_LEDOUBLE;
789 	else if (strcmp(s, "ledate") == 0)
790 		ml->type = MAGIC_TYPE_LEDATE;
791 	else if (strcmp(s, "leqdate") == 0)
792 		ml->type = MAGIC_TYPE_LEQDATE;
793 	else if (strcmp(s, "leldate") == 0)
794 		ml->type = MAGIC_TYPE_LELDATE;
795 	else if (strcmp(s, "leqldate") == 0)
796 		ml->type = MAGIC_TYPE_LEQLDATE;
797 	else if (strcmp(s, "uledate") == 0)
798 		ml->type = MAGIC_TYPE_ULEDATE;
799 	else if (strcmp(s, "uleqdate") == 0)
800 		ml->type = MAGIC_TYPE_ULEQDATE;
801 	else if (strcmp(s, "uleldate") == 0)
802 		ml->type = MAGIC_TYPE_ULELDATE;
803 	else if (strcmp(s, "uleqldate") == 0)
804 		ml->type = MAGIC_TYPE_ULEQLDATE;
805 	else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0)
806 		ml->type = MAGIC_TYPE_LESTRING16;
807 	else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0)
808 		ml->type = MAGIC_TYPE_MELONG;
809 	else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0)
810 		ml->type = MAGIC_TYPE_MEDATE;
811 	else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0)
812 		ml->type = MAGIC_TYPE_MELDATE;
813 	else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0)
814 		ml->type = MAGIC_TYPE_DEFAULT;
815 	else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0)
816 		ml->type = MAGIC_TYPE_CLEAR;
817 	else {
818 		magic_warn(ml, "unknown type: %s", s);
819 		goto fail;
820 	}
821 	magic_mark_text(ml, 0);
822 
823 done:
824 	free(copy);
825 	return (0);
826 
827 fail:
828 	free(copy);
829 	return (-1);
830 }
831 
832 static int
833 magic_parse_value(struct magic_line *ml, char **line)
834 {
835 	char	*copy, *s, *cp, *endptr;
836 	size_t	 slen;
837 	uint64_t u;
838 
839 	while (isspace((u_char)**line))
840 		(*line)++;
841 
842 	ml->test_operator = '=';
843 	ml->test_not = 0;
844 	ml->test_string = NULL;
845 	ml->test_string_size = 0;
846 	ml->test_unsigned = 0;
847 	ml->test_signed = 0;
848 
849 	if (**line == '\0')
850 		return (0);
851 
852 	s = *line;
853 	if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
854 		(*line)++;
855 		ml->test_operator = 'x';
856 		return (0);
857 	}
858 
859 	if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) {
860 		magic_warn(ml, "test specified for default or clear");
861 		ml->test_operator = 'x';
862 		return (0);
863 	}
864 
865 	if (**line == '!') {
866 		ml->test_not = 1;
867 		(*line)++;
868 	}
869 
870 	switch (ml->type) {
871 	case MAGIC_TYPE_NAME:
872 	case MAGIC_TYPE_USE:
873 		copy = s = xmalloc(strlen(*line) + 1);
874 		if (magic_get_string(line, s, &slen) != 0 || slen == 0) {
875 			magic_warn(ml, "can't parse string");
876 			goto fail;
877 		}
878 		if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) {
879 			magic_warn(ml, "invalid name");
880 			goto fail;
881 		}
882 		ml->name = s;
883 		return (0); /* do not free */
884 	case MAGIC_TYPE_STRING:
885 	case MAGIC_TYPE_PSTRING:
886 	case MAGIC_TYPE_SEARCH:
887 		if (**line == '>' || **line == '<' || **line == '=') {
888 			ml->test_operator = **line;
889 			(*line)++;
890 		}
891 		/* FALLTHROUGH */
892 	case MAGIC_TYPE_REGEX:
893 		if (**line == '=')
894 			(*line)++;
895 		copy = s = xmalloc(strlen(*line) + 1);
896 		if (magic_get_string(line, s, &slen) != 0) {
897 			magic_warn(ml, "can't parse string");
898 			goto fail;
899 		}
900 		ml->test_string_size = slen;
901 		ml->test_string = s;
902 		return (0); /* do not free */
903 	default:
904 		break;
905 	}
906 
907 	while (isspace((u_char)**line))
908 		(*line)++;
909 	if ((*line)[0] == '<' && (*line)[1] == '=') {
910 		ml->test_operator = '[';
911 		(*line) += 2;
912 	} else if ((*line)[0] == '>' && (*line)[1] == '=') {
913 		ml->test_operator = ']';
914 		(*line) += 2;
915 	} else if (**line != '\0' && strchr("=<>&^", **line) != NULL) {
916 		ml->test_operator = **line;
917 		(*line)++;
918 	}
919 
920 	while (isspace((u_char)**line))
921 		(*line)++;
922 	copy = cp = xmalloc(strlen(*line) + 1);
923 	while (**line != '\0' && !isspace((u_char)**line))
924 		*cp++ = *(*line)++;
925 	*cp = '\0';
926 
927 	switch (ml->type) {
928 	case MAGIC_TYPE_FLOAT:
929 	case MAGIC_TYPE_DOUBLE:
930 	case MAGIC_TYPE_BEFLOAT:
931 	case MAGIC_TYPE_BEDOUBLE:
932 	case MAGIC_TYPE_LEFLOAT:
933 	case MAGIC_TYPE_LEDOUBLE:
934 		errno = 0;
935 		ml->test_double = strtod(copy, &endptr);
936 		if (errno == ERANGE)
937 			endptr = NULL;
938 		break;
939 	default:
940 		if (*ml->type_string == 'u')
941 			endptr = magic_strtoull(copy, &ml->test_unsigned);
942 		else {
943 			endptr = magic_strtoll(copy, &ml->test_signed);
944 			if (endptr == NULL || *endptr != '\0') {
945 				/*
946 				 * If we can't parse this as a signed number,
947 				 * try as unsigned instead.
948 				 */
949 				endptr = magic_strtoull(copy, &u);
950 				if (endptr != NULL && *endptr == '\0')
951 					ml->test_signed = (int64_t)u;
952 			}
953 		}
954 		break;
955 	}
956 	if (endptr == NULL || *endptr != '\0') {
957 		magic_warn(ml, "can't parse number: %s", copy);
958 		goto fail;
959 	}
960 
961 	free(copy);
962 	return (0);
963 
964 fail:
965 	free(copy);
966 	return (-1);
967 }
968 
969 int
970 magic_compare(struct magic_line *ml1, struct magic_line *ml2)
971 {
972 	if (ml1->strength < ml2->strength)
973 		return (1);
974 	if (ml1->strength > ml2->strength)
975 		return (-1);
976 
977 	/*
978 	 * The original file depends on the (undefined!) qsort(3) behaviour
979 	 * when the strength is equal. This is impossible to reproduce with an
980 	 * RB tree so just use the line number and hope for the best.
981 	 */
982 	if (ml1->line < ml2->line)
983 		return (-1);
984 	if (ml1->line > ml2->line)
985 		return (1);
986 
987 	return (0);
988 }
989 RB_GENERATE(magic_tree, magic_line, node, magic_compare);
990 
991 int
992 magic_named_compare(struct magic_line *ml1, struct magic_line *ml2)
993 {
994 	return (strcmp(ml1->name, ml2->name));
995 }
996 RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare);
997 
998 static void
999 magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml,
1000     char *line)
1001 {
1002 	char	*cp, *s;
1003 	int64_t	 value;
1004 
1005 	cp = line + (sizeof "!:strength") - 1;
1006 	while (isspace((u_char)*cp))
1007 		cp++;
1008 	s = cp;
1009 
1010 	cp = strchr(s, '#');
1011 	if (cp != NULL)
1012 		*cp = '\0';
1013 	cp = s;
1014 
1015 	if (*s == '\0' || strchr("+-*/", *s) == NULL) {
1016 		magic_warnm(m, at, "invalid strength operator: %s", s);
1017 		return;
1018 	}
1019 	ml->strength_operator = *cp++;
1020 
1021 	while (isspace((u_char)*cp))
1022 		cp++;
1023 	cp = magic_strtoll(cp, &value);
1024 	while (cp != NULL && isspace((u_char)*cp))
1025 		cp++;
1026 	if (cp == NULL || *cp != '\0' || value < 0 || value > 255) {
1027 		magic_warnm(m, at, "invalid strength value: %s", s);
1028 		return;
1029 	}
1030 	ml->strength_value = value;
1031 }
1032 
1033 static void
1034 magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
1035 {
1036 	char	*mimetype, *cp;
1037 
1038 	mimetype = line + (sizeof "!:mime") - 1;
1039 	while (isspace((u_char)*mimetype))
1040 		mimetype++;
1041 
1042 	cp = strchr(mimetype, '#');
1043 	if (cp != NULL)
1044 		*cp = '\0';
1045 
1046 	if (*mimetype != '\0') {
1047 		cp = mimetype + strlen(mimetype) - 1;
1048 		while (cp != mimetype && isspace((u_char)*cp))
1049 			*cp-- = '\0';
1050 	}
1051 
1052 	cp = mimetype;
1053 	while (*cp != '\0') {
1054 		if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
1055 			break;
1056 		cp++;
1057 	}
1058 	if (*mimetype == '\0' || *cp != '\0') {
1059 		magic_warnm(m, at, "invalid MIME type: %s", mimetype);
1060 		return;
1061 	}
1062 	if (ml == NULL) {
1063 		magic_warnm(m, at, "stray MIME type: %s", mimetype);
1064 		return;
1065 	}
1066 	ml->mimetype = xstrdup(mimetype);
1067 }
1068 
1069 struct magic *
1070 magic_load(FILE *f, const char *path, int warnings)
1071 {
1072 	struct magic		*m;
1073 	struct magic_line	*ml = NULL, *parent, *parent0;
1074 	char			*line, *tmp;
1075 	size_t			 size;
1076 	u_int			 at, level, n, i;
1077 
1078 	m = xcalloc(1, sizeof *m);
1079 	m->path = xstrdup(path);
1080 	m->warnings = warnings;
1081 	RB_INIT(&m->tree);
1082 
1083 	parent = NULL;
1084 	parent0 = NULL;
1085 	level = 0;
1086 
1087 	at = 0;
1088 	tmp = NULL;
1089 	while ((line = fgetln(f, &size))) {
1090 		if (line[size - 1] == '\n')
1091 			line[size - 1] = '\0';
1092 		else {
1093 			tmp = xmalloc(size + 1);
1094 			memcpy(tmp, line, size);
1095 			tmp[size] = '\0';
1096 			line = tmp;
1097 		}
1098 		at++;
1099 
1100 		while (isspace((u_char)*line))
1101 		    line++;
1102 		if (*line == '\0' || *line == '#')
1103 			continue;
1104 
1105 		if (strncmp (line, "!:mime", 6) == 0) {
1106 			magic_set_mimetype(m, at, ml, line);
1107 			continue;
1108 		}
1109 		if (strncmp (line, "!:strength", 10) == 0) {
1110 			magic_adjust_strength(m, at, ml, line);
1111 			continue;
1112 		}
1113 		if (strncmp (line, "!:", 2) == 0) {
1114 			for (i = 0; i < 64 && line[i] != '\0'; i++) {
1115 				if (isspace((u_char)line[i]))
1116 					break;
1117 			}
1118 			magic_warnm(m, at, "%.*s not supported", i, line);
1119 			continue;
1120 		}
1121 
1122 		n = 0;
1123 		for (; *line == '>'; line++)
1124 			n++;
1125 
1126 		ml = xcalloc(1, sizeof *ml);
1127 		ml->root = m;
1128 		ml->line = at;
1129 		ml->type = MAGIC_TYPE_NONE;
1130 		TAILQ_INIT(&ml->children);
1131 		ml->text = 1;
1132 
1133 		/*
1134 		 * At this point n is the level we want, level is the current
1135 		 * level. parent0 is the last line at the same level and parent
1136 		 * is the last line at the previous level.
1137 		 */
1138 		if (n == level + 1) {
1139 			parent = parent0;
1140 		} else if (n < level) {
1141 			for (i = n; i < level && parent != NULL; i++)
1142 				parent = parent->parent;
1143 		} else if (n != level) {
1144 			magic_warn(ml, "level skipped (%u->%u)", level, n);
1145 			free(ml);
1146 			continue;
1147 		}
1148 		ml->parent = parent;
1149 		level = n;
1150 
1151 		if (magic_parse_offset(ml, &line) != 0 ||
1152 		    magic_parse_type(ml, &line) != 0 ||
1153 		    magic_parse_value(ml, &line) != 0 ||
1154 		    magic_set_result(ml, line) != 0) {
1155 			/*
1156 			 * An invalid line still needs to appear in the tree in
1157 			 * case it has any children.
1158 			 */
1159 			ml->type = MAGIC_TYPE_NONE;
1160 		}
1161 
1162 		ml->strength = magic_get_strength(ml);
1163 		if (ml->parent == NULL) {
1164 			if (ml->name != NULL)
1165 				RB_INSERT(magic_named_tree, &m->named, ml);
1166 			else
1167 				RB_INSERT(magic_tree, &m->tree, ml);
1168 		} else
1169 			TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
1170 		parent0 = ml;
1171 	}
1172 	free(tmp);
1173 
1174 	fclose(f);
1175 	return (m);
1176 }
1177