1 /* $OpenBSD: magic-load.c,v 1.23 2016/05/01 14:57:15 nicm Exp $ */ 2 3 /* 4 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <regex.h> 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #include "magic.h" 31 #include "xmalloc.h" 32 33 static int 34 magic_odigit(u_char c) 35 { 36 if (c >= '0' && c <= '7') 37 return (c - '0'); 38 return (-1); 39 } 40 41 static int 42 magic_xdigit(u_char c) 43 { 44 if (c >= '0' && c <= '9') 45 return (c - '0'); 46 if (c >= 'a' && c <= 'f') 47 return (10 + c - 'a'); 48 if (c >= 'A' && c <= 'F') 49 return (10 + c - 'A'); 50 return (-1); 51 } 52 53 static void 54 magic_mark_text(struct magic_line *ml, int text) 55 { 56 do { 57 ml->text = text; 58 ml = ml->parent; 59 } while (ml != NULL); 60 } 61 62 static int 63 magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re, 64 const char *p) 65 { 66 int error; 67 char errbuf[256]; 68 69 error = regcomp(re, p, REG_EXTENDED|REG_NOSUB); 70 if (error != 0) { 71 regerror(error, re, errbuf, sizeof errbuf); 72 magic_warn(ml, "bad %s pattern: %s", name, errbuf); 73 return (-1); 74 } 75 return (0); 76 } 77 78 static int 79 magic_set_result(struct magic_line *ml, const char *s) 80 { 81 const char *fmt; 82 const char *endfmt; 83 const char *cp; 84 regex_t *re = NULL; 85 regmatch_t pmatch; 86 size_t fmtlen; 87 88 while (isspace((u_char)*s)) 89 s++; 90 if (*s == '\0') { 91 ml->result = NULL; 92 return (0); 93 } 94 ml->result = xstrdup(s); 95 96 fmt = NULL; 97 for (cp = s; *cp != '\0'; cp++) { 98 if (cp[0] == '%' && cp[1] != '%') { 99 if (fmt != NULL) { 100 magic_warn(ml, "multiple formats"); 101 return (-1); 102 } 103 fmt = cp; 104 } 105 } 106 if (fmt == NULL) 107 return (0); 108 fmt++; 109 110 for (endfmt = fmt; *endfmt != '\0'; endfmt++) { 111 if (strchr("diouxXeEfFgGsc", *endfmt) != NULL) 112 break; 113 } 114 if (*endfmt == '\0') { 115 magic_warn(ml, "unterminated format"); 116 return (-1); 117 } 118 fmtlen = endfmt + 1 - fmt; 119 if (fmtlen > 32) { 120 magic_warn(ml, "format too long"); 121 return (-1); 122 } 123 124 if (*endfmt == 's') { 125 switch (ml->type) { 126 case MAGIC_TYPE_DATE: 127 case MAGIC_TYPE_LDATE: 128 case MAGIC_TYPE_UDATE: 129 case MAGIC_TYPE_ULDATE: 130 case MAGIC_TYPE_BEDATE: 131 case MAGIC_TYPE_BELDATE: 132 case MAGIC_TYPE_UBEDATE: 133 case MAGIC_TYPE_UBELDATE: 134 case MAGIC_TYPE_QDATE: 135 case MAGIC_TYPE_QLDATE: 136 case MAGIC_TYPE_UQDATE: 137 case MAGIC_TYPE_UQLDATE: 138 case MAGIC_TYPE_BEQDATE: 139 case MAGIC_TYPE_BEQLDATE: 140 case MAGIC_TYPE_UBEQDATE: 141 case MAGIC_TYPE_UBEQLDATE: 142 case MAGIC_TYPE_LEQDATE: 143 case MAGIC_TYPE_LEQLDATE: 144 case MAGIC_TYPE_ULEQDATE: 145 case MAGIC_TYPE_ULEQLDATE: 146 case MAGIC_TYPE_LEDATE: 147 case MAGIC_TYPE_LELDATE: 148 case MAGIC_TYPE_ULEDATE: 149 case MAGIC_TYPE_ULELDATE: 150 case MAGIC_TYPE_MEDATE: 151 case MAGIC_TYPE_MELDATE: 152 case MAGIC_TYPE_STRING: 153 case MAGIC_TYPE_PSTRING: 154 case MAGIC_TYPE_BESTRING16: 155 case MAGIC_TYPE_LESTRING16: 156 case MAGIC_TYPE_REGEX: 157 case MAGIC_TYPE_SEARCH: 158 break; 159 default: 160 ml->stringify = 1; 161 break; 162 } 163 } 164 165 if (!ml->root->compiled) { 166 /* 167 * XXX %ld (and %lu and so on) is invalid on 64-bit platforms 168 * with byte, short, long. We get lucky because our first and 169 * only argument ends up in a register. Accept it for now. 170 */ 171 if (magic_make_pattern(ml, "short", &ml->root->format_short, 172 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) 173 return (-1); 174 if (magic_make_pattern(ml, "long", &ml->root->format_long, 175 "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) 176 return (-1); 177 if (magic_make_pattern(ml, "quad", &ml->root->format_quad, 178 "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0) 179 return (-1); 180 if (magic_make_pattern(ml, "float", &ml->root->format_float, 181 "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0) 182 return (-1); 183 if (magic_make_pattern(ml, "string", &ml->root->format_string, 184 "^-?[0-9]*(\\.[0-9]*)?s$") != 0) 185 return (-1); 186 ml->root->compiled = 1; 187 } 188 189 if (ml->stringify) 190 re = &ml->root->format_string; 191 else { 192 switch (ml->type) { 193 case MAGIC_TYPE_NONE: 194 case MAGIC_TYPE_BESTRING16: 195 case MAGIC_TYPE_LESTRING16: 196 case MAGIC_TYPE_NAME: 197 case MAGIC_TYPE_USE: 198 return (0); /* don't use result */ 199 case MAGIC_TYPE_BYTE: 200 case MAGIC_TYPE_UBYTE: 201 case MAGIC_TYPE_SHORT: 202 case MAGIC_TYPE_USHORT: 203 case MAGIC_TYPE_BESHORT: 204 case MAGIC_TYPE_UBESHORT: 205 case MAGIC_TYPE_LESHORT: 206 case MAGIC_TYPE_ULESHORT: 207 re = &ml->root->format_short; 208 break; 209 case MAGIC_TYPE_LONG: 210 case MAGIC_TYPE_ULONG: 211 case MAGIC_TYPE_BELONG: 212 case MAGIC_TYPE_UBELONG: 213 case MAGIC_TYPE_LELONG: 214 case MAGIC_TYPE_ULELONG: 215 case MAGIC_TYPE_MELONG: 216 re = &ml->root->format_long; 217 break; 218 case MAGIC_TYPE_QUAD: 219 case MAGIC_TYPE_UQUAD: 220 case MAGIC_TYPE_BEQUAD: 221 case MAGIC_TYPE_UBEQUAD: 222 case MAGIC_TYPE_LEQUAD: 223 case MAGIC_TYPE_ULEQUAD: 224 re = &ml->root->format_quad; 225 break; 226 case MAGIC_TYPE_FLOAT: 227 case MAGIC_TYPE_BEFLOAT: 228 case MAGIC_TYPE_LEFLOAT: 229 case MAGIC_TYPE_DOUBLE: 230 case MAGIC_TYPE_BEDOUBLE: 231 case MAGIC_TYPE_LEDOUBLE: 232 re = &ml->root->format_float; 233 break; 234 case MAGIC_TYPE_DATE: 235 case MAGIC_TYPE_LDATE: 236 case MAGIC_TYPE_UDATE: 237 case MAGIC_TYPE_ULDATE: 238 case MAGIC_TYPE_BEDATE: 239 case MAGIC_TYPE_BELDATE: 240 case MAGIC_TYPE_UBEDATE: 241 case MAGIC_TYPE_UBELDATE: 242 case MAGIC_TYPE_QDATE: 243 case MAGIC_TYPE_QLDATE: 244 case MAGIC_TYPE_UQDATE: 245 case MAGIC_TYPE_UQLDATE: 246 case MAGIC_TYPE_BEQDATE: 247 case MAGIC_TYPE_BEQLDATE: 248 case MAGIC_TYPE_UBEQDATE: 249 case MAGIC_TYPE_UBEQLDATE: 250 case MAGIC_TYPE_LEQDATE: 251 case MAGIC_TYPE_LEQLDATE: 252 case MAGIC_TYPE_ULEQDATE: 253 case MAGIC_TYPE_ULEQLDATE: 254 case MAGIC_TYPE_LEDATE: 255 case MAGIC_TYPE_LELDATE: 256 case MAGIC_TYPE_ULEDATE: 257 case MAGIC_TYPE_ULELDATE: 258 case MAGIC_TYPE_MEDATE: 259 case MAGIC_TYPE_MELDATE: 260 case MAGIC_TYPE_STRING: 261 case MAGIC_TYPE_PSTRING: 262 case MAGIC_TYPE_REGEX: 263 case MAGIC_TYPE_SEARCH: 264 case MAGIC_TYPE_DEFAULT: 265 case MAGIC_TYPE_CLEAR: 266 re = &ml->root->format_string; 267 break; 268 } 269 } 270 271 pmatch.rm_so = 0; 272 pmatch.rm_eo = fmtlen; 273 if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) { 274 magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string, 275 (int)fmtlen, fmt); 276 return (-1); 277 } 278 279 return (0); 280 } 281 282 static u_int 283 magic_get_strength(struct magic_line *ml) 284 { 285 int n; 286 size_t size; 287 288 if (ml->type == MAGIC_TYPE_NONE) 289 return (0); 290 291 if (ml->test_not || ml->test_operator == 'x') { 292 n = 1; 293 goto skip; 294 } 295 296 n = 2 * MAGIC_STRENGTH_MULTIPLIER; 297 switch (ml->type) { 298 case MAGIC_TYPE_NONE: 299 case MAGIC_TYPE_DEFAULT: 300 return (0); 301 case MAGIC_TYPE_CLEAR: 302 case MAGIC_TYPE_NAME: 303 case MAGIC_TYPE_USE: 304 break; 305 case MAGIC_TYPE_BYTE: 306 case MAGIC_TYPE_UBYTE: 307 n += 1 * MAGIC_STRENGTH_MULTIPLIER; 308 break; 309 case MAGIC_TYPE_SHORT: 310 case MAGIC_TYPE_USHORT: 311 case MAGIC_TYPE_BESHORT: 312 case MAGIC_TYPE_UBESHORT: 313 case MAGIC_TYPE_LESHORT: 314 case MAGIC_TYPE_ULESHORT: 315 n += 2 * MAGIC_STRENGTH_MULTIPLIER; 316 break; 317 case MAGIC_TYPE_LONG: 318 case MAGIC_TYPE_ULONG: 319 case MAGIC_TYPE_FLOAT: 320 case MAGIC_TYPE_DATE: 321 case MAGIC_TYPE_LDATE: 322 case MAGIC_TYPE_UDATE: 323 case MAGIC_TYPE_ULDATE: 324 case MAGIC_TYPE_BELONG: 325 case MAGIC_TYPE_UBELONG: 326 case MAGIC_TYPE_BEFLOAT: 327 case MAGIC_TYPE_BEDATE: 328 case MAGIC_TYPE_BELDATE: 329 case MAGIC_TYPE_UBEDATE: 330 case MAGIC_TYPE_UBELDATE: 331 n += 4 * MAGIC_STRENGTH_MULTIPLIER; 332 break; 333 case MAGIC_TYPE_QUAD: 334 case MAGIC_TYPE_UQUAD: 335 case MAGIC_TYPE_DOUBLE: 336 case MAGIC_TYPE_QDATE: 337 case MAGIC_TYPE_QLDATE: 338 case MAGIC_TYPE_UQDATE: 339 case MAGIC_TYPE_UQLDATE: 340 case MAGIC_TYPE_BEQUAD: 341 case MAGIC_TYPE_UBEQUAD: 342 case MAGIC_TYPE_BEDOUBLE: 343 case MAGIC_TYPE_BEQDATE: 344 case MAGIC_TYPE_BEQLDATE: 345 case MAGIC_TYPE_UBEQDATE: 346 case MAGIC_TYPE_UBEQLDATE: 347 case MAGIC_TYPE_LEQUAD: 348 case MAGIC_TYPE_ULEQUAD: 349 case MAGIC_TYPE_LEDOUBLE: 350 case MAGIC_TYPE_LEQDATE: 351 case MAGIC_TYPE_LEQLDATE: 352 case MAGIC_TYPE_ULEQDATE: 353 case MAGIC_TYPE_ULEQLDATE: 354 case MAGIC_TYPE_LELONG: 355 case MAGIC_TYPE_ULELONG: 356 case MAGIC_TYPE_LEFLOAT: 357 case MAGIC_TYPE_LEDATE: 358 case MAGIC_TYPE_LELDATE: 359 case MAGIC_TYPE_ULEDATE: 360 case MAGIC_TYPE_ULELDATE: 361 case MAGIC_TYPE_MELONG: 362 case MAGIC_TYPE_MEDATE: 363 case MAGIC_TYPE_MELDATE: 364 n += 8 * MAGIC_STRENGTH_MULTIPLIER; 365 break; 366 case MAGIC_TYPE_STRING: 367 case MAGIC_TYPE_PSTRING: 368 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER; 369 break; 370 case MAGIC_TYPE_BESTRING16: 371 case MAGIC_TYPE_LESTRING16: 372 n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2; 373 break; 374 case MAGIC_TYPE_REGEX: 375 case MAGIC_TYPE_SEARCH: 376 size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size; 377 if (size < 1) 378 size = 1; 379 n += ml->test_string_size * size; 380 break; 381 } 382 switch (ml->test_operator) { 383 case '=': 384 n += MAGIC_STRENGTH_MULTIPLIER; 385 break; 386 case '<': 387 case '>': 388 case '[': 389 case ']': 390 n -= 2 * MAGIC_STRENGTH_MULTIPLIER; 391 break; 392 case '^': 393 case '&': 394 n -= MAGIC_STRENGTH_MULTIPLIER; 395 break; 396 } 397 398 skip: 399 switch (ml->strength_operator) { 400 case '+': 401 n += ml->strength_value; 402 break; 403 case '-': 404 n -= ml->strength_value; 405 break; 406 case '*': 407 n *= ml->strength_value; 408 break; 409 case '/': 410 n /= ml->strength_value; 411 break; 412 } 413 return (n <= 0 ? 1 : n); 414 } 415 416 static int 417 magic_get_string(char **line, char *out, size_t *outlen) 418 { 419 char *start, *cp, c; 420 int d0, d1, d2; 421 422 start = out; 423 for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) { 424 if (*cp != '\\') { 425 *out++ = *cp; 426 continue; 427 } 428 429 switch (c = *++cp) { 430 case '\0': /* end of line */ 431 return (-1); 432 case ' ': 433 *out++ = ' '; 434 break; 435 case '0': 436 case '1': 437 case '2': 438 case '3': 439 case '4': 440 case '5': 441 case '6': 442 case '7': 443 d0 = magic_odigit(cp[0]); 444 if (cp[0] != '\0') 445 d1 = magic_odigit(cp[1]); 446 else 447 d1 = -1; 448 if (cp[0] != '\0' && cp[1] != '\0') 449 d2 = magic_odigit(cp[2]); 450 else 451 d2 = -1; 452 453 if (d0 != -1 && d1 != -1 && d2 != -1) { 454 *out = d2 | (d1 << 3) | (d0 << 6); 455 cp += 2; 456 } else if (d0 != -1 && d1 != -1) { 457 *out = d1 | (d0 << 3); 458 cp++; 459 } else if (d0 != -1) 460 *out = d0; 461 else 462 return (-1); 463 out++; 464 break; 465 case 'x': 466 d0 = magic_xdigit(cp[1]); 467 if (cp[1] != '\0') 468 d1 = magic_xdigit(cp[2]); 469 else 470 d1 = -1; 471 472 if (d0 != -1 && d1 != -1) { 473 *out = d1 | (d0 << 4); 474 cp += 2; 475 } else if (d0 != -1) { 476 *out = d0; 477 cp++; 478 } else 479 return (-1); 480 out++; 481 482 break; 483 case 'a': 484 *out++ = '\a'; 485 break; 486 case 'b': 487 *out++ = '\b'; 488 break; 489 case 't': 490 *out++ = '\t'; 491 break; 492 case 'f': 493 *out++ = '\f'; 494 break; 495 case 'n': 496 *out++ = '\n'; 497 break; 498 case 'r': 499 *out++ = '\r'; 500 break; 501 case '\\': 502 *out++ = '\\'; 503 break; 504 case '\'': 505 *out++ = '\''; 506 break; 507 case '\"': 508 *out++ = '\"'; 509 break; 510 default: 511 *out++ = c; 512 break; 513 } 514 } 515 *out = '\0'; 516 *outlen = out - start; 517 518 *line = cp; 519 return (0); 520 } 521 522 static int 523 magic_parse_offset(struct magic_line *ml, char **line) 524 { 525 char *copy, *s, *cp, *endptr; 526 527 while (isspace((u_char)**line)) 528 (*line)++; 529 copy = s = cp = xmalloc(strlen(*line) + 1); 530 while (**line != '\0' && !isspace((u_char)**line)) 531 *cp++ = *(*line)++; 532 *cp = '\0'; 533 534 ml->offset = 0; 535 ml->offset_relative = 0; 536 537 ml->indirect_type = ' '; 538 ml->indirect_relative = 0; 539 ml->indirect_offset = 0; 540 ml->indirect_operator = ' '; 541 ml->indirect_operand = 0; 542 543 if (*s == '&') { 544 ml->offset_relative = 1; 545 s++; 546 } 547 548 if (*s != '(') { 549 endptr = magic_strtoll(s, &ml->offset); 550 if (endptr == NULL || *endptr != '\0') { 551 magic_warn(ml, "missing closing bracket"); 552 goto fail; 553 } 554 if (ml->offset < 0 && !ml->offset_relative) { 555 magic_warn(ml, "negative absolute offset"); 556 goto fail; 557 } 558 goto done; 559 } 560 s++; 561 562 if (*s == '&') { 563 ml->indirect_relative = 1; 564 s++; 565 } 566 567 endptr = magic_strtoll(s, &ml->indirect_offset); 568 if (endptr == NULL) { 569 magic_warn(ml, "can't parse offset: %s", s); 570 goto fail; 571 } 572 s = endptr; 573 if (*s == ')') 574 goto done; 575 576 if (*s == '.') { 577 s++; 578 if (*s == '\0' || strchr("bslBSL", *s) == NULL) { 579 magic_warn(ml, "unknown offset type: %c", *s); 580 goto fail; 581 } 582 ml->indirect_type = *s; 583 s++; 584 if (*s == ')') 585 goto done; 586 } 587 588 if (*s == '\0' || strchr("+-*", *s) == NULL) { 589 magic_warn(ml, "unknown offset operator: %c", *s); 590 goto fail; 591 } 592 ml->indirect_operator = *s; 593 s++; 594 if (*s == ')') 595 goto done; 596 597 if (*s == '(') { 598 s++; 599 endptr = magic_strtoll(s, &ml->indirect_operand); 600 if (endptr == NULL || *endptr != ')') { 601 magic_warn(ml, "missing closing bracket"); 602 goto fail; 603 } 604 if (*++endptr != ')') { 605 magic_warn(ml, "missing closing bracket"); 606 goto fail; 607 } 608 } else { 609 endptr = magic_strtoll(s, &ml->indirect_operand); 610 if (endptr == NULL || *endptr != ')') { 611 magic_warn(ml, "missing closing bracket"); 612 goto fail; 613 } 614 } 615 616 done: 617 free(copy); 618 return (0); 619 620 fail: 621 free(copy); 622 return (-1); 623 } 624 625 static int 626 magic_parse_type(struct magic_line *ml, char **line) 627 { 628 char *copy, *s, *cp, *endptr; 629 630 while (isspace((u_char)**line)) 631 (*line)++; 632 copy = s = cp = xmalloc(strlen(*line) + 1); 633 while (**line != '\0' && !isspace((u_char)**line)) 634 *cp++ = *(*line)++; 635 *cp = '\0'; 636 637 ml->type = MAGIC_TYPE_NONE; 638 ml->type_operator = ' '; 639 ml->type_operand = 0; 640 641 if (strcmp(s, "name") == 0) { 642 ml->type = MAGIC_TYPE_NAME; 643 ml->type_string = xstrdup(s); 644 goto done; 645 } 646 if (strcmp(s, "use") == 0) { 647 ml->type = MAGIC_TYPE_USE; 648 ml->type_string = xstrdup(s); 649 goto done; 650 } 651 652 if (strncmp(s, "string", (sizeof "string") - 1) == 0 || 653 strncmp(s, "ustring", (sizeof "ustring") - 1) == 0) { 654 if (*s == 'u') 655 ml->type_string = xstrdup(s + 1); 656 else 657 ml->type_string = xstrdup(s); 658 ml->type = MAGIC_TYPE_STRING; 659 magic_mark_text(ml, 0); 660 goto done; 661 } 662 if (strncmp(s, "pstring", (sizeof "pstring") - 1) == 0 || 663 strncmp(s, "upstring", (sizeof "upstring") - 1) == 0) { 664 if (*s == 'u') 665 ml->type_string = xstrdup(s + 1); 666 else 667 ml->type_string = xstrdup(s); 668 ml->type = MAGIC_TYPE_PSTRING; 669 magic_mark_text(ml, 0); 670 goto done; 671 } 672 if (strncmp(s, "search", (sizeof "search") - 1) == 0 || 673 strncmp(s, "usearch", (sizeof "usearch") - 1) == 0) { 674 if (*s == 'u') 675 ml->type_string = xstrdup(s + 1); 676 else 677 ml->type_string = xstrdup(s); 678 ml->type = MAGIC_TYPE_SEARCH; 679 goto done; 680 } 681 if (strncmp(s, "regex", (sizeof "regex") - 1) == 0 || 682 strncmp(s, "uregex", (sizeof "uregex") - 1) == 0) { 683 if (*s == 'u') 684 ml->type_string = xstrdup(s + 1); 685 else 686 ml->type_string = xstrdup(s); 687 ml->type = MAGIC_TYPE_REGEX; 688 goto done; 689 } 690 ml->type_string = xstrdup(s); 691 692 cp = &s[strcspn(s, "+-&/%*")]; 693 if (*cp != '\0') { 694 ml->type_operator = *cp; 695 endptr = magic_strtoull(cp + 1, &ml->type_operand); 696 if (endptr == NULL || *endptr != '\0') { 697 magic_warn(ml, "can't parse operand: %s", cp + 1); 698 goto fail; 699 } 700 *cp = '\0'; 701 } 702 703 if (strcmp(s, "byte") == 0) 704 ml->type = MAGIC_TYPE_BYTE; 705 else if (strcmp(s, "short") == 0) 706 ml->type = MAGIC_TYPE_SHORT; 707 else if (strcmp(s, "long") == 0) 708 ml->type = MAGIC_TYPE_LONG; 709 else if (strcmp(s, "quad") == 0) 710 ml->type = MAGIC_TYPE_QUAD; 711 else if (strcmp(s, "ubyte") == 0) 712 ml->type = MAGIC_TYPE_UBYTE; 713 else if (strcmp(s, "ushort") == 0) 714 ml->type = MAGIC_TYPE_USHORT; 715 else if (strcmp(s, "ulong") == 0) 716 ml->type = MAGIC_TYPE_ULONG; 717 else if (strcmp(s, "uquad") == 0) 718 ml->type = MAGIC_TYPE_UQUAD; 719 else if (strcmp(s, "float") == 0 || strcmp(s, "ufloat") == 0) 720 ml->type = MAGIC_TYPE_FLOAT; 721 else if (strcmp(s, "double") == 0 || strcmp(s, "udouble") == 0) 722 ml->type = MAGIC_TYPE_DOUBLE; 723 else if (strcmp(s, "date") == 0) 724 ml->type = MAGIC_TYPE_DATE; 725 else if (strcmp(s, "qdate") == 0) 726 ml->type = MAGIC_TYPE_QDATE; 727 else if (strcmp(s, "ldate") == 0) 728 ml->type = MAGIC_TYPE_LDATE; 729 else if (strcmp(s, "qldate") == 0) 730 ml->type = MAGIC_TYPE_QLDATE; 731 else if (strcmp(s, "udate") == 0) 732 ml->type = MAGIC_TYPE_UDATE; 733 else if (strcmp(s, "uqdate") == 0) 734 ml->type = MAGIC_TYPE_UQDATE; 735 else if (strcmp(s, "uldate") == 0) 736 ml->type = MAGIC_TYPE_ULDATE; 737 else if (strcmp(s, "uqldate") == 0) 738 ml->type = MAGIC_TYPE_UQLDATE; 739 else if (strcmp(s, "beshort") == 0) 740 ml->type = MAGIC_TYPE_BESHORT; 741 else if (strcmp(s, "belong") == 0) 742 ml->type = MAGIC_TYPE_BELONG; 743 else if (strcmp(s, "bequad") == 0) 744 ml->type = MAGIC_TYPE_BEQUAD; 745 else if (strcmp(s, "ubeshort") == 0) 746 ml->type = MAGIC_TYPE_UBESHORT; 747 else if (strcmp(s, "ubelong") == 0) 748 ml->type = MAGIC_TYPE_UBELONG; 749 else if (strcmp(s, "ubequad") == 0) 750 ml->type = MAGIC_TYPE_UBEQUAD; 751 else if (strcmp(s, "befloat") == 0 || strcmp(s, "ubefloat") == 0) 752 ml->type = MAGIC_TYPE_BEFLOAT; 753 else if (strcmp(s, "bedouble") == 0 || strcmp(s, "ubedouble") == 0) 754 ml->type = MAGIC_TYPE_BEDOUBLE; 755 else if (strcmp(s, "bedate") == 0) 756 ml->type = MAGIC_TYPE_BEDATE; 757 else if (strcmp(s, "beqdate") == 0) 758 ml->type = MAGIC_TYPE_BEQDATE; 759 else if (strcmp(s, "beldate") == 0) 760 ml->type = MAGIC_TYPE_BELDATE; 761 else if (strcmp(s, "beqldate") == 0) 762 ml->type = MAGIC_TYPE_BEQLDATE; 763 else if (strcmp(s, "ubedate") == 0) 764 ml->type = MAGIC_TYPE_UBEDATE; 765 else if (strcmp(s, "ubeqdate") == 0) 766 ml->type = MAGIC_TYPE_UBEQDATE; 767 else if (strcmp(s, "ubeldate") == 0) 768 ml->type = MAGIC_TYPE_UBELDATE; 769 else if (strcmp(s, "ubeqldate") == 0) 770 ml->type = MAGIC_TYPE_UBEQLDATE; 771 else if (strcmp(s, "bestring16") == 0 || strcmp(s, "ubestring16") == 0) 772 ml->type = MAGIC_TYPE_BESTRING16; 773 else if (strcmp(s, "leshort") == 0) 774 ml->type = MAGIC_TYPE_LESHORT; 775 else if (strcmp(s, "lelong") == 0) 776 ml->type = MAGIC_TYPE_LELONG; 777 else if (strcmp(s, "lequad") == 0) 778 ml->type = MAGIC_TYPE_LEQUAD; 779 else if (strcmp(s, "uleshort") == 0) 780 ml->type = MAGIC_TYPE_ULESHORT; 781 else if (strcmp(s, "ulelong") == 0) 782 ml->type = MAGIC_TYPE_ULELONG; 783 else if (strcmp(s, "ulequad") == 0) 784 ml->type = MAGIC_TYPE_ULEQUAD; 785 else if (strcmp(s, "lefloat") == 0 || strcmp(s, "ulefloat") == 0) 786 ml->type = MAGIC_TYPE_LEFLOAT; 787 else if (strcmp(s, "ledouble") == 0 || strcmp(s, "uledouble") == 0) 788 ml->type = MAGIC_TYPE_LEDOUBLE; 789 else if (strcmp(s, "ledate") == 0) 790 ml->type = MAGIC_TYPE_LEDATE; 791 else if (strcmp(s, "leqdate") == 0) 792 ml->type = MAGIC_TYPE_LEQDATE; 793 else if (strcmp(s, "leldate") == 0) 794 ml->type = MAGIC_TYPE_LELDATE; 795 else if (strcmp(s, "leqldate") == 0) 796 ml->type = MAGIC_TYPE_LEQLDATE; 797 else if (strcmp(s, "uledate") == 0) 798 ml->type = MAGIC_TYPE_ULEDATE; 799 else if (strcmp(s, "uleqdate") == 0) 800 ml->type = MAGIC_TYPE_ULEQDATE; 801 else if (strcmp(s, "uleldate") == 0) 802 ml->type = MAGIC_TYPE_ULELDATE; 803 else if (strcmp(s, "uleqldate") == 0) 804 ml->type = MAGIC_TYPE_ULEQLDATE; 805 else if (strcmp(s, "lestring16") == 0 || strcmp(s, "ulestring16") == 0) 806 ml->type = MAGIC_TYPE_LESTRING16; 807 else if (strcmp(s, "melong") == 0 || strcmp(s, "umelong") == 0) 808 ml->type = MAGIC_TYPE_MELONG; 809 else if (strcmp(s, "medate") == 0 || strcmp(s, "umedate") == 0) 810 ml->type = MAGIC_TYPE_MEDATE; 811 else if (strcmp(s, "meldate") == 0 || strcmp(s, "umeldate") == 0) 812 ml->type = MAGIC_TYPE_MELDATE; 813 else if (strcmp(s, "default") == 0 || strcmp(s, "udefault") == 0) 814 ml->type = MAGIC_TYPE_DEFAULT; 815 else if (strcmp(s, "clear") == 0 || strcmp(s, "uclear") == 0) 816 ml->type = MAGIC_TYPE_CLEAR; 817 else { 818 magic_warn(ml, "unknown type: %s", s); 819 goto fail; 820 } 821 magic_mark_text(ml, 0); 822 823 done: 824 free(copy); 825 return (0); 826 827 fail: 828 free(copy); 829 return (-1); 830 } 831 832 static int 833 magic_parse_value(struct magic_line *ml, char **line) 834 { 835 char *copy, *s, *cp, *endptr; 836 size_t slen; 837 uint64_t u; 838 839 while (isspace((u_char)**line)) 840 (*line)++; 841 842 ml->test_operator = '='; 843 ml->test_not = 0; 844 ml->test_string = NULL; 845 ml->test_string_size = 0; 846 ml->test_unsigned = 0; 847 ml->test_signed = 0; 848 849 if (**line == '\0') 850 return (0); 851 852 s = *line; 853 if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) { 854 (*line)++; 855 ml->test_operator = 'x'; 856 return (0); 857 } 858 859 if (ml->type == MAGIC_TYPE_DEFAULT || ml->type == MAGIC_TYPE_CLEAR) { 860 magic_warn(ml, "test specified for default or clear"); 861 ml->test_operator = 'x'; 862 return (0); 863 } 864 865 if (**line == '!') { 866 ml->test_not = 1; 867 (*line)++; 868 } 869 870 switch (ml->type) { 871 case MAGIC_TYPE_NAME: 872 case MAGIC_TYPE_USE: 873 copy = s = xmalloc(strlen(*line) + 1); 874 if (magic_get_string(line, s, &slen) != 0 || slen == 0) { 875 magic_warn(ml, "can't parse string"); 876 goto fail; 877 } 878 if (slen == 0 || *s == '\0' || strcmp(s, "^") == 0) { 879 magic_warn(ml, "invalid name"); 880 goto fail; 881 } 882 ml->name = s; 883 return (0); /* do not free */ 884 case MAGIC_TYPE_STRING: 885 case MAGIC_TYPE_PSTRING: 886 case MAGIC_TYPE_SEARCH: 887 if (**line == '>' || **line == '<' || **line == '=') { 888 ml->test_operator = **line; 889 (*line)++; 890 } 891 /* FALLTHROUGH */ 892 case MAGIC_TYPE_REGEX: 893 if (**line == '=') 894 (*line)++; 895 copy = s = xmalloc(strlen(*line) + 1); 896 if (magic_get_string(line, s, &slen) != 0) { 897 magic_warn(ml, "can't parse string"); 898 goto fail; 899 } 900 ml->test_string_size = slen; 901 ml->test_string = s; 902 return (0); /* do not free */ 903 default: 904 break; 905 } 906 907 while (isspace((u_char)**line)) 908 (*line)++; 909 if ((*line)[0] == '<' && (*line)[1] == '=') { 910 ml->test_operator = '['; 911 (*line) += 2; 912 } else if ((*line)[0] == '>' && (*line)[1] == '=') { 913 ml->test_operator = ']'; 914 (*line) += 2; 915 } else if (**line != '\0' && strchr("=<>&^", **line) != NULL) { 916 ml->test_operator = **line; 917 (*line)++; 918 } 919 920 while (isspace((u_char)**line)) 921 (*line)++; 922 copy = cp = xmalloc(strlen(*line) + 1); 923 while (**line != '\0' && !isspace((u_char)**line)) 924 *cp++ = *(*line)++; 925 *cp = '\0'; 926 927 switch (ml->type) { 928 case MAGIC_TYPE_FLOAT: 929 case MAGIC_TYPE_DOUBLE: 930 case MAGIC_TYPE_BEFLOAT: 931 case MAGIC_TYPE_BEDOUBLE: 932 case MAGIC_TYPE_LEFLOAT: 933 case MAGIC_TYPE_LEDOUBLE: 934 errno = 0; 935 ml->test_double = strtod(copy, &endptr); 936 if (errno == ERANGE) 937 endptr = NULL; 938 break; 939 default: 940 if (*ml->type_string == 'u') 941 endptr = magic_strtoull(copy, &ml->test_unsigned); 942 else { 943 endptr = magic_strtoll(copy, &ml->test_signed); 944 if (endptr == NULL || *endptr != '\0') { 945 /* 946 * If we can't parse this as a signed number, 947 * try as unsigned instead. 948 */ 949 endptr = magic_strtoull(copy, &u); 950 if (endptr != NULL && *endptr == '\0') 951 ml->test_signed = (int64_t)u; 952 } 953 } 954 break; 955 } 956 if (endptr == NULL || *endptr != '\0') { 957 magic_warn(ml, "can't parse number: %s", copy); 958 goto fail; 959 } 960 961 free(copy); 962 return (0); 963 964 fail: 965 free(copy); 966 return (-1); 967 } 968 969 int 970 magic_compare(struct magic_line *ml1, struct magic_line *ml2) 971 { 972 if (ml1->strength < ml2->strength) 973 return (1); 974 if (ml1->strength > ml2->strength) 975 return (-1); 976 977 /* 978 * The original file depends on the (undefined!) qsort(3) behaviour 979 * when the strength is equal. This is impossible to reproduce with an 980 * RB tree so just use the line number and hope for the best. 981 */ 982 if (ml1->line < ml2->line) 983 return (-1); 984 if (ml1->line > ml2->line) 985 return (1); 986 987 return (0); 988 } 989 RB_GENERATE(magic_tree, magic_line, node, magic_compare); 990 991 int 992 magic_named_compare(struct magic_line *ml1, struct magic_line *ml2) 993 { 994 return (strcmp(ml1->name, ml2->name)); 995 } 996 RB_GENERATE(magic_named_tree, magic_line, node, magic_named_compare); 997 998 static void 999 magic_adjust_strength(struct magic *m, u_int at, struct magic_line *ml, 1000 char *line) 1001 { 1002 char *cp, *s; 1003 int64_t value; 1004 1005 cp = line + (sizeof "!:strength") - 1; 1006 while (isspace((u_char)*cp)) 1007 cp++; 1008 s = cp; 1009 1010 cp = strchr(s, '#'); 1011 if (cp != NULL) 1012 *cp = '\0'; 1013 cp = s; 1014 1015 if (*s == '\0' || strchr("+-*/", *s) == NULL) { 1016 magic_warnm(m, at, "invalid strength operator: %s", s); 1017 return; 1018 } 1019 ml->strength_operator = *cp++; 1020 1021 while (isspace((u_char)*cp)) 1022 cp++; 1023 cp = magic_strtoll(cp, &value); 1024 while (cp != NULL && isspace((u_char)*cp)) 1025 cp++; 1026 if (cp == NULL || *cp != '\0' || value < 0 || value > 255) { 1027 magic_warnm(m, at, "invalid strength value: %s", s); 1028 return; 1029 } 1030 ml->strength_value = value; 1031 } 1032 1033 static void 1034 magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line) 1035 { 1036 char *mimetype, *cp; 1037 1038 mimetype = line + (sizeof "!:mime") - 1; 1039 while (isspace((u_char)*mimetype)) 1040 mimetype++; 1041 1042 cp = strchr(mimetype, '#'); 1043 if (cp != NULL) 1044 *cp = '\0'; 1045 1046 if (*mimetype != '\0') { 1047 cp = mimetype + strlen(mimetype) - 1; 1048 while (cp != mimetype && isspace((u_char)*cp)) 1049 *cp-- = '\0'; 1050 } 1051 1052 cp = mimetype; 1053 while (*cp != '\0') { 1054 if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL) 1055 break; 1056 cp++; 1057 } 1058 if (*mimetype == '\0' || *cp != '\0') { 1059 magic_warnm(m, at, "invalid MIME type: %s", mimetype); 1060 return; 1061 } 1062 if (ml == NULL) { 1063 magic_warnm(m, at, "stray MIME type: %s", mimetype); 1064 return; 1065 } 1066 ml->mimetype = xstrdup(mimetype); 1067 } 1068 1069 struct magic * 1070 magic_load(FILE *f, const char *path, int warnings) 1071 { 1072 struct magic *m; 1073 struct magic_line *ml = NULL, *parent, *parent0; 1074 char *line, *tmp; 1075 size_t size; 1076 u_int at, level, n, i; 1077 1078 m = xcalloc(1, sizeof *m); 1079 m->path = xstrdup(path); 1080 m->warnings = warnings; 1081 RB_INIT(&m->tree); 1082 1083 parent = NULL; 1084 parent0 = NULL; 1085 level = 0; 1086 1087 at = 0; 1088 tmp = NULL; 1089 while ((line = fgetln(f, &size))) { 1090 if (line[size - 1] == '\n') 1091 line[size - 1] = '\0'; 1092 else { 1093 tmp = xmalloc(size + 1); 1094 memcpy(tmp, line, size); 1095 tmp[size] = '\0'; 1096 line = tmp; 1097 } 1098 at++; 1099 1100 while (isspace((u_char)*line)) 1101 line++; 1102 if (*line == '\0' || *line == '#') 1103 continue; 1104 1105 if (strncmp (line, "!:mime", 6) == 0) { 1106 magic_set_mimetype(m, at, ml, line); 1107 continue; 1108 } 1109 if (strncmp (line, "!:strength", 10) == 0) { 1110 magic_adjust_strength(m, at, ml, line); 1111 continue; 1112 } 1113 if (strncmp (line, "!:", 2) == 0) { 1114 for (i = 0; i < 64 && line[i] != '\0'; i++) { 1115 if (isspace((u_char)line[i])) 1116 break; 1117 } 1118 magic_warnm(m, at, "%.*s not supported", i, line); 1119 continue; 1120 } 1121 1122 n = 0; 1123 for (; *line == '>'; line++) 1124 n++; 1125 1126 ml = xcalloc(1, sizeof *ml); 1127 ml->root = m; 1128 ml->line = at; 1129 ml->type = MAGIC_TYPE_NONE; 1130 TAILQ_INIT(&ml->children); 1131 ml->text = 1; 1132 1133 /* 1134 * At this point n is the level we want, level is the current 1135 * level. parent0 is the last line at the same level and parent 1136 * is the last line at the previous level. 1137 */ 1138 if (n == level + 1) { 1139 parent = parent0; 1140 } else if (n < level) { 1141 for (i = n; i < level && parent != NULL; i++) 1142 parent = parent->parent; 1143 } else if (n != level) { 1144 magic_warn(ml, "level skipped (%u->%u)", level, n); 1145 free(ml); 1146 continue; 1147 } 1148 ml->parent = parent; 1149 level = n; 1150 1151 if (magic_parse_offset(ml, &line) != 0 || 1152 magic_parse_type(ml, &line) != 0 || 1153 magic_parse_value(ml, &line) != 0 || 1154 magic_set_result(ml, line) != 0) { 1155 /* 1156 * An invalid line still needs to appear in the tree in 1157 * case it has any children. 1158 */ 1159 ml->type = MAGIC_TYPE_NONE; 1160 } 1161 1162 ml->strength = magic_get_strength(ml); 1163 if (ml->parent == NULL) { 1164 if (ml->name != NULL) 1165 RB_INSERT(magic_named_tree, &m->named, ml); 1166 else 1167 RB_INSERT(magic_tree, &m->tree, ml); 1168 } else 1169 TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry); 1170 parent0 = ml; 1171 } 1172 free(tmp); 1173 1174 fclose(f); 1175 return (m); 1176 } 1177