1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32 #include "file.h" 33 34 #ifndef lint 35 FILE_RCSID("@(#)$File: apprentice.c,v 1.202 2014/03/14 18:48:11 christos Exp $") 36 #endif /* lint */ 37 38 #include "magic.h" 39 #include <stdlib.h> 40 #ifdef HAVE_UNISTD_H 41 #include <unistd.h> 42 #endif 43 #ifdef HAVE_STDDEF_H 44 #include <stddef.h> 45 #endif 46 #include <string.h> 47 #include <assert.h> 48 #include <ctype.h> 49 #include <fcntl.h> 50 #ifdef QUICK 51 #include <sys/mman.h> 52 #endif 53 #include <dirent.h> 54 #if defined(HAVE_LIMITS_H) 55 #include <limits.h> 56 #endif 57 58 #ifndef SSIZE_MAX 59 #define MAXMAGIC_SIZE ((ssize_t)0x7fffffff) 60 #else 61 #define MAXMAGIC_SIZE SSIZE_MAX 62 #endif 63 64 #define EATAB {while (isascii((unsigned char) *l) && \ 65 isspace((unsigned char) *l)) ++l;} 66 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 67 tolower((unsigned char) (l)) : (l)) 68 /* 69 * Work around a bug in headers on Digital Unix. 70 * At least confirmed for: OSF1 V4.0 878 71 */ 72 #if defined(__osf__) && defined(__DECC) 73 #ifdef MAP_FAILED 74 #undef MAP_FAILED 75 #endif 76 #endif 77 78 #ifndef MAP_FAILED 79 #define MAP_FAILED (void *) -1 80 #endif 81 82 #ifndef MAP_FILE 83 #define MAP_FILE 0 84 #endif 85 86 #define ALLOC_CHUNK (size_t)10 87 #define ALLOC_INCR (size_t)200 88 89 struct magic_entry { 90 struct magic *mp; 91 uint32_t cont_count; 92 uint32_t max_count; 93 }; 94 95 struct magic_entry_set { 96 struct magic_entry *me; 97 uint32_t count; 98 uint32_t max; 99 }; 100 101 struct magic_map { 102 void *p; 103 size_t len; 104 struct magic *magic[MAGIC_SETS]; 105 uint32_t nmagic[MAGIC_SETS]; 106 }; 107 108 int file_formats[FILE_NAMES_SIZE]; 109 const size_t file_nformats = FILE_NAMES_SIZE; 110 const char *file_names[FILE_NAMES_SIZE]; 111 const size_t file_nnames = FILE_NAMES_SIZE; 112 113 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 114 private int hextoint(int); 115 private const char *getstr(struct magic_set *, struct magic *, const char *, 116 int); 117 private int parse(struct magic_set *, struct magic_entry *, const char *, 118 size_t, int); 119 private void eatsize(const char **); 120 private int apprentice_1(struct magic_set *, const char *, int); 121 private size_t apprentice_magic_strength(const struct magic *); 122 private int apprentice_sort(const void *, const void *); 123 private void apprentice_list(struct mlist *, int ); 124 private struct magic_map *apprentice_load(struct magic_set *, 125 const char *, int); 126 private struct mlist *mlist_alloc(void); 127 private void mlist_free(struct mlist *); 128 private void byteswap(struct magic *, uint32_t); 129 private void bs1(struct magic *); 130 private uint16_t swap2(uint16_t); 131 private uint32_t swap4(uint32_t); 132 private uint64_t swap8(uint64_t); 133 private char *mkdbname(struct magic_set *, const char *, int); 134 private struct magic_map *apprentice_map(struct magic_set *, const char *); 135 private void apprentice_unmap(struct magic_map *); 136 private int apprentice_compile(struct magic_set *, struct magic_map *, 137 const char *); 138 private int check_format_type(const char *, int); 139 private int check_format(struct magic_set *, struct magic *); 140 private int get_op(char); 141 private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 142 private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 143 private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 144 145 146 private size_t magicsize = sizeof(struct magic); 147 148 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 149 150 private struct { 151 const char *name; 152 size_t len; 153 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 154 } bang[] = { 155 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 156 DECLARE_FIELD(mime), 157 DECLARE_FIELD(apple), 158 DECLARE_FIELD(strength), 159 #undef DECLARE_FIELD 160 { NULL, 0, NULL } 161 }; 162 163 #ifdef COMPILE_ONLY 164 165 int main(int, char *[]); 166 167 int 168 main(int argc, char *argv[]) 169 { 170 int ret; 171 struct magic_set *ms; 172 char *progname; 173 174 if ((progname = strrchr(argv[0], '/')) != NULL) 175 progname++; 176 else 177 progname = argv[0]; 178 179 if (argc != 2) { 180 (void)fprintf(stderr, "Usage: %s file\n", progname); 181 return 1; 182 } 183 184 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 185 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 186 return 1; 187 } 188 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 189 if (ret == 1) 190 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 191 magic_close(ms); 192 return ret; 193 } 194 #endif /* COMPILE_ONLY */ 195 196 struct type_tbl_s { 197 const char name[16]; 198 const size_t len; 199 const int type; 200 const int format; 201 }; 202 203 /* 204 * XXX - the actual Single UNIX Specification says that "long" means "long", 205 * as in the C data type, but we treat it as meaning "4-byte integer". 206 * Given that the OS X version of file 5.04 did the same, I guess that passes 207 * the actual test; having "long" be dependent on how big a "long" is on 208 * the machine running "file" is silly. 209 */ 210 static const struct type_tbl_s type_tbl[] = { 211 # define XX(s) s, (sizeof(s) - 1) 212 # define XX_NULL "", 0 213 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 214 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 215 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 216 { XX("default"), FILE_DEFAULT, FILE_FMT_NONE }, 217 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 218 { XX("string"), FILE_STRING, FILE_FMT_STR }, 219 { XX("date"), FILE_DATE, FILE_FMT_STR }, 220 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 221 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 222 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 223 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 224 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 225 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 226 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 227 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 228 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 229 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 230 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 231 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 232 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 233 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 234 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 235 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 236 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 237 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 238 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 239 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 240 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 241 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 242 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 243 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 244 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 245 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 246 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 247 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 248 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 249 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 250 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 251 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 252 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 253 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 254 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 255 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 256 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 257 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 258 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 259 { XX("use"), FILE_USE, FILE_FMT_NONE }, 260 { XX("clear"), FILE_CLEAR, FILE_FMT_NONE }, 261 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 262 }; 263 264 /* 265 * These are not types, and cannot be preceded by "u" to make them 266 * unsigned. 267 */ 268 static const struct type_tbl_s special_tbl[] = { 269 { XX("name"), FILE_NAME, FILE_FMT_STR }, 270 { XX("use"), FILE_USE, FILE_FMT_STR }, 271 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 272 }; 273 # undef XX 274 # undef XX_NULL 275 276 private int 277 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 278 { 279 const struct type_tbl_s *p; 280 281 for (p = tbl; p->len; p++) { 282 if (strncmp(l, p->name, p->len) == 0) { 283 if (t) 284 *t = l + p->len; 285 break; 286 } 287 } 288 return p->type; 289 } 290 291 private int 292 get_standard_integer_type(const char *l, const char **t) 293 { 294 int type; 295 296 if (isalpha((unsigned char)l[1])) { 297 switch (l[1]) { 298 case 'C': 299 /* "dC" and "uC" */ 300 type = FILE_BYTE; 301 break; 302 case 'S': 303 /* "dS" and "uS" */ 304 type = FILE_SHORT; 305 break; 306 case 'I': 307 case 'L': 308 /* 309 * "dI", "dL", "uI", and "uL". 310 * 311 * XXX - the actual Single UNIX Specification says 312 * that "L" means "long", as in the C data type, 313 * but we treat it as meaning "4-byte integer". 314 * Given that the OS X version of file 5.04 did 315 * the same, I guess that passes the actual SUS 316 * validation suite; having "dL" be dependent on 317 * how big a "long" is on the machine running 318 * "file" is silly. 319 */ 320 type = FILE_LONG; 321 break; 322 case 'Q': 323 /* "dQ" and "uQ" */ 324 type = FILE_QUAD; 325 break; 326 default: 327 /* "d{anything else}", "u{anything else}" */ 328 return FILE_INVALID; 329 } 330 l += 2; 331 } else if (isdigit((unsigned char)l[1])) { 332 /* 333 * "d{num}" and "u{num}"; we only support {num} values 334 * of 1, 2, 4, and 8 - the Single UNIX Specification 335 * doesn't say anything about whether arbitrary 336 * values should be supported, but both the Solaris 10 337 * and OS X Mountain Lion versions of file passed the 338 * Single UNIX Specification validation suite, and 339 * neither of them support values bigger than 8 or 340 * non-power-of-2 values. 341 */ 342 if (isdigit((unsigned char)l[2])) { 343 /* Multi-digit, so > 9 */ 344 return FILE_INVALID; 345 } 346 switch (l[1]) { 347 case '1': 348 type = FILE_BYTE; 349 break; 350 case '2': 351 type = FILE_SHORT; 352 break; 353 case '4': 354 type = FILE_LONG; 355 break; 356 case '8': 357 type = FILE_QUAD; 358 break; 359 default: 360 /* XXX - what about 3, 5, 6, or 7? */ 361 return FILE_INVALID; 362 } 363 l += 2; 364 } else { 365 /* 366 * "d" or "u" by itself. 367 */ 368 type = FILE_LONG; 369 ++l; 370 } 371 if (t) 372 *t = l; 373 return type; 374 } 375 376 private void 377 init_file_tables(void) 378 { 379 static int done = 0; 380 const struct type_tbl_s *p; 381 382 if (done) 383 return; 384 done++; 385 386 for (p = type_tbl; p->len; p++) { 387 assert(p->type < FILE_NAMES_SIZE); 388 file_names[p->type] = p->name; 389 file_formats[p->type] = p->format; 390 } 391 assert(p - type_tbl == FILE_NAMES_SIZE); 392 } 393 394 private int 395 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 396 { 397 struct mlist *ml; 398 399 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 400 return -1; 401 402 ml->map = idx == 0 ? map : NULL; 403 ml->magic = map->magic[idx]; 404 ml->nmagic = map->nmagic[idx]; 405 406 mlp->prev->next = ml; 407 ml->prev = mlp->prev; 408 ml->next = mlp; 409 mlp->prev = ml; 410 return 0; 411 } 412 413 /* 414 * Handle one file or directory. 415 */ 416 private int 417 apprentice_1(struct magic_set *ms, const char *fn, int action) 418 { 419 struct mlist *ml; 420 struct magic_map *map; 421 size_t i; 422 423 if (magicsize != FILE_MAGICSIZE) { 424 file_error(ms, 0, "magic element size %lu != %lu", 425 (unsigned long)sizeof(*map->magic[0]), 426 (unsigned long)FILE_MAGICSIZE); 427 return -1; 428 } 429 430 if (action == FILE_COMPILE) { 431 map = apprentice_load(ms, fn, action); 432 if (map == NULL) 433 return -1; 434 return apprentice_compile(ms, map, fn); 435 } 436 437 #ifndef COMPILE_ONLY 438 map = apprentice_map(ms, fn); 439 if (map == NULL) { 440 if (ms->flags & MAGIC_CHECK) 441 file_magwarn(ms, "using regular magic file `%s'", fn); 442 map = apprentice_load(ms, fn, action); 443 if (map == NULL) 444 return -1; 445 } 446 447 for (i = 0; i < MAGIC_SETS; i++) { 448 if (add_mlist(ms->mlist[i], map, i) == -1) { 449 file_oomem(ms, sizeof(*ml)); 450 apprentice_unmap(map); 451 return -1; 452 } 453 } 454 455 if (action == FILE_LIST) { 456 for (i = 0; i < MAGIC_SETS; i++) { 457 printf("Set %zu:\nBinary patterns:\n", i); 458 apprentice_list(ms->mlist[i], BINTEST); 459 printf("Text patterns:\n"); 460 apprentice_list(ms->mlist[i], TEXTTEST); 461 } 462 } 463 464 return 0; 465 #endif /* COMPILE_ONLY */ 466 } 467 468 protected void 469 file_ms_free(struct magic_set *ms) 470 { 471 size_t i; 472 if (ms == NULL) 473 return; 474 for (i = 0; i < MAGIC_SETS; i++) 475 mlist_free(ms->mlist[i]); 476 free(ms->o.pbuf); 477 free(ms->o.buf); 478 free(ms->c.li); 479 free(ms); 480 } 481 482 protected struct magic_set * 483 file_ms_alloc(int flags) 484 { 485 struct magic_set *ms; 486 size_t i, len; 487 488 if ((ms = CAST(struct magic_set *, calloc((size_t)1, 489 sizeof(struct magic_set)))) == NULL) 490 return NULL; 491 492 if (magic_setflags(ms, flags) == -1) { 493 errno = EINVAL; 494 goto free; 495 } 496 497 ms->o.buf = ms->o.pbuf = NULL; 498 len = (ms->c.len = 10) * sizeof(*ms->c.li); 499 500 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 501 goto free; 502 503 ms->event_flags = 0; 504 ms->error = -1; 505 for (i = 0; i < MAGIC_SETS; i++) 506 ms->mlist[i] = NULL; 507 ms->file = "unknown"; 508 ms->line = 0; 509 return ms; 510 free: 511 free(ms); 512 return NULL; 513 } 514 515 private void 516 apprentice_unmap(struct magic_map *map) 517 { 518 if (map == NULL) 519 return; 520 if (map->p == NULL) 521 return; 522 #ifdef QUICK 523 if (map->len) 524 (void)munmap(map->p, map->len); 525 else 526 #endif 527 free(map->p); 528 free(map); 529 } 530 531 private struct mlist * 532 mlist_alloc(void) 533 { 534 struct mlist *mlist; 535 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 536 return NULL; 537 } 538 mlist->next = mlist->prev = mlist; 539 return mlist; 540 } 541 542 private void 543 mlist_free(struct mlist *mlist) 544 { 545 struct mlist *ml; 546 547 if (mlist == NULL) 548 return; 549 550 for (ml = mlist->next; ml != mlist;) { 551 struct mlist *next = ml->next; 552 if (ml->map) 553 apprentice_unmap(ml->map); 554 free(ml); 555 ml = next; 556 } 557 free(ml); 558 } 559 560 /* const char *fn: list of magic files and directories */ 561 protected int 562 file_apprentice(struct magic_set *ms, const char *fn, int action) 563 { 564 char *p, *mfn; 565 int file_err, errs = -1; 566 size_t i; 567 568 if (ms->mlist[0] != NULL) 569 file_reset(ms); 570 571 if ((fn = magic_getpath(fn, action)) == NULL) 572 return -1; 573 574 init_file_tables(); 575 576 if ((mfn = strdup(fn)) == NULL) { 577 file_oomem(ms, strlen(fn)); 578 return -1; 579 } 580 581 for (i = 0; i < MAGIC_SETS; i++) { 582 mlist_free(ms->mlist[i]); 583 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 584 file_oomem(ms, sizeof(*ms->mlist[i])); 585 if (i != 0) { 586 --i; 587 do 588 mlist_free(ms->mlist[i]); 589 while (i != 0); 590 } 591 free(mfn); 592 return -1; 593 } 594 } 595 fn = mfn; 596 597 while (fn) { 598 p = strchr(fn, PATHSEP); 599 if (p) 600 *p++ = '\0'; 601 if (*fn == '\0') 602 break; 603 file_err = apprentice_1(ms, fn, action); 604 errs = MAX(errs, file_err); 605 fn = p; 606 } 607 608 free(mfn); 609 610 if (errs == -1) { 611 for (i = 0; i < MAGIC_SETS; i++) { 612 mlist_free(ms->mlist[i]); 613 ms->mlist[i] = NULL; 614 } 615 file_error(ms, 0, "could not find any valid magic files!"); 616 return -1; 617 } 618 619 #if 0 620 /* 621 * Always leave the database loaded 622 */ 623 if (action == FILE_LOAD) 624 return 0; 625 626 for (i = 0; i < MAGIC_SETS; i++) { 627 mlist_free(ms->mlist[i]); 628 ms->mlist[i] = NULL; 629 } 630 #endif 631 632 switch (action) { 633 case FILE_LOAD: 634 case FILE_COMPILE: 635 case FILE_CHECK: 636 case FILE_LIST: 637 return 0; 638 default: 639 file_error(ms, 0, "Invalid action %d", action); 640 return -1; 641 } 642 } 643 644 /* 645 * Compute the real length of a magic expression, for the purposes 646 * of determining how "strong" a magic expression is (approximating 647 * how specific its matches are): 648 * - magic characters count 0 unless escaped. 649 * - [] expressions count 1 650 * - {} expressions count 0 651 * - regular characters or escaped magic characters count 1 652 * - 0 length expressions count as one 653 */ 654 private size_t 655 nonmagic(const char *str) 656 { 657 const char *p; 658 size_t rv = 0; 659 660 for (p = str; *p; p++) 661 switch (*p) { 662 case '\\': /* Escaped anything counts 1 */ 663 if (!*++p) 664 p--; 665 rv++; 666 continue; 667 case '?': /* Magic characters count 0 */ 668 case '*': 669 case '.': 670 case '+': 671 case '^': 672 case '$': 673 continue; 674 case '[': /* Bracketed expressions count 1 the ']' */ 675 while (*p && *p != ']') 676 p++; 677 p--; 678 continue; 679 case '{': /* Braced expressions count 0 */ 680 while (*p && *p != '}') 681 p++; 682 if (!*p) 683 p--; 684 continue; 685 default: /* Anything else counts 1 */ 686 rv++; 687 continue; 688 } 689 690 return rv == 0 ? 1 : rv; /* Return at least 1 */ 691 } 692 693 /* 694 * Get weight of this magic entry, for sorting purposes. 695 */ 696 private size_t 697 apprentice_magic_strength(const struct magic *m) 698 { 699 #define MULT 10 700 size_t v, val = 2 * MULT; /* baseline strength */ 701 702 switch (m->type) { 703 case FILE_DEFAULT: /* make sure this sorts last */ 704 if (m->factor_op != FILE_FACTOR_OP_NONE) 705 abort(); 706 return 0; 707 708 case FILE_BYTE: 709 val += 1 * MULT; 710 break; 711 712 case FILE_SHORT: 713 case FILE_LESHORT: 714 case FILE_BESHORT: 715 val += 2 * MULT; 716 break; 717 718 case FILE_LONG: 719 case FILE_LELONG: 720 case FILE_BELONG: 721 case FILE_MELONG: 722 val += 4 * MULT; 723 break; 724 725 case FILE_PSTRING: 726 case FILE_STRING: 727 val += m->vallen * MULT; 728 break; 729 730 case FILE_BESTRING16: 731 case FILE_LESTRING16: 732 val += m->vallen * MULT / 2; 733 break; 734 735 case FILE_SEARCH: 736 val += m->vallen * MAX(MULT / m->vallen, 1); 737 break; 738 739 case FILE_REGEX: 740 v = nonmagic(m->value.s); 741 val += v * MAX(MULT / v, 1); 742 break; 743 744 case FILE_DATE: 745 case FILE_LEDATE: 746 case FILE_BEDATE: 747 case FILE_MEDATE: 748 case FILE_LDATE: 749 case FILE_LELDATE: 750 case FILE_BELDATE: 751 case FILE_MELDATE: 752 case FILE_FLOAT: 753 case FILE_BEFLOAT: 754 case FILE_LEFLOAT: 755 val += 4 * MULT; 756 break; 757 758 case FILE_QUAD: 759 case FILE_BEQUAD: 760 case FILE_LEQUAD: 761 case FILE_QDATE: 762 case FILE_LEQDATE: 763 case FILE_BEQDATE: 764 case FILE_QLDATE: 765 case FILE_LEQLDATE: 766 case FILE_BEQLDATE: 767 case FILE_QWDATE: 768 case FILE_LEQWDATE: 769 case FILE_BEQWDATE: 770 case FILE_DOUBLE: 771 case FILE_BEDOUBLE: 772 case FILE_LEDOUBLE: 773 val += 8 * MULT; 774 break; 775 776 case FILE_INDIRECT: 777 case FILE_NAME: 778 case FILE_USE: 779 break; 780 781 default: 782 val = 0; 783 (void)fprintf(stderr, "Bad type %d\n", m->type); 784 abort(); 785 } 786 787 switch (m->reln) { 788 case 'x': /* matches anything penalize */ 789 case '!': /* matches almost anything penalize */ 790 val = 0; 791 break; 792 793 case '=': /* Exact match, prefer */ 794 val += MULT; 795 break; 796 797 case '>': 798 case '<': /* comparison match reduce strength */ 799 val -= 2 * MULT; 800 break; 801 802 case '^': 803 case '&': /* masking bits, we could count them too */ 804 val -= MULT; 805 break; 806 807 default: 808 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 809 abort(); 810 } 811 812 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 813 val = 1; 814 815 switch (m->factor_op) { 816 case FILE_FACTOR_OP_NONE: 817 break; 818 case FILE_FACTOR_OP_PLUS: 819 val += m->factor; 820 break; 821 case FILE_FACTOR_OP_MINUS: 822 val -= m->factor; 823 break; 824 case FILE_FACTOR_OP_TIMES: 825 val *= m->factor; 826 break; 827 case FILE_FACTOR_OP_DIV: 828 val /= m->factor; 829 break; 830 default: 831 abort(); 832 } 833 834 /* 835 * Magic entries with no description get a bonus because they depend 836 * on subsequent magic entries to print something. 837 */ 838 if (m->desc[0] == '\0') 839 val++; 840 return val; 841 } 842 843 /* 844 * Sort callback for sorting entries by "strength" (basically length) 845 */ 846 private int 847 apprentice_sort(const void *a, const void *b) 848 { 849 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 850 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 851 size_t sa = apprentice_magic_strength(ma->mp); 852 size_t sb = apprentice_magic_strength(mb->mp); 853 if (sa == sb) 854 return 0; 855 else if (sa > sb) 856 return -1; 857 else 858 return 1; 859 } 860 861 /* 862 * Shows sorted patterns list in the order which is used for the matching 863 */ 864 private void 865 apprentice_list(struct mlist *mlist, int mode) 866 { 867 uint32_t magindex = 0; 868 struct mlist *ml; 869 for (ml = mlist->next; ml != mlist; ml = ml->next) { 870 for (magindex = 0; magindex < ml->nmagic; magindex++) { 871 struct magic *m = &ml->magic[magindex]; 872 if ((m->flag & mode) != mode) { 873 /* Skip sub-tests */ 874 while (magindex + 1 < ml->nmagic && 875 ml->magic[magindex + 1].cont_level != 0) 876 ++magindex; 877 continue; /* Skip to next top-level test*/ 878 } 879 880 /* 881 * Try to iterate over the tree until we find item with 882 * description/mimetype. 883 */ 884 while (magindex + 1 < ml->nmagic && 885 ml->magic[magindex + 1].cont_level != 0 && 886 *ml->magic[magindex].desc == '\0' && 887 *ml->magic[magindex].mimetype == '\0') 888 magindex++; 889 890 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 891 apprentice_magic_strength(m), 892 ml->magic[magindex].desc, 893 ml->magic[magindex].mimetype); 894 } 895 } 896 } 897 898 private void 899 set_test_type(struct magic *mstart, struct magic *m) 900 { 901 switch (m->type) { 902 case FILE_BYTE: 903 case FILE_SHORT: 904 case FILE_LONG: 905 case FILE_DATE: 906 case FILE_BESHORT: 907 case FILE_BELONG: 908 case FILE_BEDATE: 909 case FILE_LESHORT: 910 case FILE_LELONG: 911 case FILE_LEDATE: 912 case FILE_LDATE: 913 case FILE_BELDATE: 914 case FILE_LELDATE: 915 case FILE_MEDATE: 916 case FILE_MELDATE: 917 case FILE_MELONG: 918 case FILE_QUAD: 919 case FILE_LEQUAD: 920 case FILE_BEQUAD: 921 case FILE_QDATE: 922 case FILE_LEQDATE: 923 case FILE_BEQDATE: 924 case FILE_QLDATE: 925 case FILE_LEQLDATE: 926 case FILE_BEQLDATE: 927 case FILE_QWDATE: 928 case FILE_LEQWDATE: 929 case FILE_BEQWDATE: 930 case FILE_FLOAT: 931 case FILE_BEFLOAT: 932 case FILE_LEFLOAT: 933 case FILE_DOUBLE: 934 case FILE_BEDOUBLE: 935 case FILE_LEDOUBLE: 936 mstart->flag |= BINTEST; 937 break; 938 case FILE_STRING: 939 case FILE_PSTRING: 940 case FILE_BESTRING16: 941 case FILE_LESTRING16: 942 /* Allow text overrides */ 943 if (mstart->str_flags & STRING_TEXTTEST) 944 mstart->flag |= TEXTTEST; 945 else 946 mstart->flag |= BINTEST; 947 break; 948 case FILE_REGEX: 949 case FILE_SEARCH: 950 #ifndef COMPILE_ONLY 951 if (mstart->str_flags & STRING_BINTEST) 952 mstart->flag |= BINTEST; 953 if (mstart->str_flags & STRING_TEXTTEST) 954 mstart->flag |= TEXTTEST; 955 956 if (mstart->flag & (TEXTTEST|BINTEST)) 957 break; 958 959 /* binary test if pattern is not text */ 960 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 961 NULL) <= 0) 962 mstart->flag |= BINTEST; 963 else 964 mstart->flag |= TEXTTEST; 965 #endif 966 break; 967 case FILE_DEFAULT: 968 /* can't deduce anything; we shouldn't see this at the 969 top level anyway */ 970 break; 971 case FILE_INVALID: 972 default: 973 /* invalid search type, but no need to complain here */ 974 break; 975 } 976 } 977 978 private int 979 addentry(struct magic_set *ms, struct magic_entry *me, 980 struct magic_entry_set *mset) 981 { 982 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 983 if (mset[i].count == mset[i].max) { 984 struct magic_entry *mp; 985 986 mset[i].max += ALLOC_INCR; 987 if ((mp = CAST(struct magic_entry *, 988 realloc(mset[i].me, sizeof(*mp) * mset[i].max))) == 989 NULL) { 990 file_oomem(ms, sizeof(*mp) * mset[i].max); 991 return -1; 992 } 993 (void)memset(&mp[mset[i].count], 0, sizeof(*mp) * 994 ALLOC_INCR); 995 mset[i].me = mp; 996 } 997 mset[i].me[mset[i].count++] = *me; 998 memset(me, 0, sizeof(*me)); 999 return 0; 1000 } 1001 1002 /* 1003 * Load and parse one file. 1004 */ 1005 private void 1006 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 1007 struct magic_entry_set *mset) 1008 { 1009 size_t lineno = 0, llen = 0; 1010 char *line = NULL; 1011 ssize_t len; 1012 struct magic_entry me; 1013 1014 FILE *f = fopen(ms->file = fn, "r"); 1015 if (f == NULL) { 1016 if (errno != ENOENT) 1017 file_error(ms, errno, "cannot read magic file `%s'", 1018 fn); 1019 (*errs)++; 1020 return; 1021 } 1022 1023 memset(&me, 0, sizeof(me)); 1024 /* read and parse this file */ 1025 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 1026 ms->line++) { 1027 if (len == 0) /* null line, garbage, etc */ 1028 continue; 1029 if (line[len - 1] == '\n') { 1030 lineno++; 1031 line[len - 1] = '\0'; /* delete newline */ 1032 } 1033 switch (line[0]) { 1034 case '\0': /* empty, do not parse */ 1035 case '#': /* comment, do not parse */ 1036 continue; 1037 case '!': 1038 if (line[1] == ':') { 1039 size_t i; 1040 1041 for (i = 0; bang[i].name != NULL; i++) { 1042 if ((size_t)(len - 2) > bang[i].len && 1043 memcmp(bang[i].name, line + 2, 1044 bang[i].len) == 0) 1045 break; 1046 } 1047 if (bang[i].name == NULL) { 1048 file_error(ms, 0, 1049 "Unknown !: entry `%s'", line); 1050 (*errs)++; 1051 continue; 1052 } 1053 if (me.mp == NULL) { 1054 file_error(ms, 0, 1055 "No current entry for :!%s type", 1056 bang[i].name); 1057 (*errs)++; 1058 continue; 1059 } 1060 if ((*bang[i].fun)(ms, &me, 1061 line + bang[i].len + 2) != 0) { 1062 (*errs)++; 1063 continue; 1064 } 1065 continue; 1066 } 1067 /*FALLTHROUGH*/ 1068 default: 1069 again: 1070 switch (parse(ms, &me, line, lineno, action)) { 1071 case 0: 1072 continue; 1073 case 1: 1074 (void)addentry(ms, &me, mset); 1075 goto again; 1076 default: 1077 (*errs)++; 1078 break; 1079 } 1080 } 1081 } 1082 if (me.mp) 1083 (void)addentry(ms, &me, mset); 1084 free(line); 1085 (void)fclose(f); 1086 } 1087 1088 /* 1089 * parse a file or directory of files 1090 * const char *fn: name of magic file or directory 1091 */ 1092 private int 1093 cmpstrp(const void *p1, const void *p2) 1094 { 1095 return strcmp(*(char *const *)p1, *(char *const *)p2); 1096 } 1097 1098 1099 private uint32_t 1100 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1101 uint32_t starttest) 1102 { 1103 static const char text[] = "text"; 1104 static const char binary[] = "binary"; 1105 static const size_t len = sizeof(text); 1106 1107 uint32_t i = starttest; 1108 1109 do { 1110 set_test_type(me[starttest].mp, me[i].mp); 1111 if ((ms->flags & MAGIC_DEBUG) == 0) 1112 continue; 1113 (void)fprintf(stderr, "%s%s%s: %s\n", 1114 me[i].mp->mimetype, 1115 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1116 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1117 me[i].mp->flag & BINTEST ? binary : text); 1118 if (me[i].mp->flag & BINTEST) { 1119 char *p = strstr(me[i].mp->desc, text); 1120 if (p && (p == me[i].mp->desc || 1121 isspace((unsigned char)p[-1])) && 1122 (p + len - me[i].mp->desc == MAXstring 1123 || (p[len] == '\0' || 1124 isspace((unsigned char)p[len])))) 1125 (void)fprintf(stderr, "*** Possible " 1126 "binary test for text type\n"); 1127 } 1128 } while (++i < nme && me[i].mp->cont_level != 0); 1129 return i; 1130 } 1131 1132 private void 1133 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1134 { 1135 uint32_t i; 1136 for (i = 0; i < nme; i++) { 1137 if (me[i].mp->cont_level == 0 && 1138 me[i].mp->type == FILE_DEFAULT) { 1139 while (++i < nme) 1140 if (me[i].mp->cont_level == 0) 1141 break; 1142 if (i != nme) { 1143 /* XXX - Ugh! */ 1144 ms->line = me[i].mp->lineno; 1145 file_magwarn(ms, 1146 "level 0 \"default\" did not sort last"); 1147 } 1148 return; 1149 } 1150 } 1151 } 1152 1153 private int 1154 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1155 struct magic **ma, uint32_t *nma) 1156 { 1157 uint32_t i, mentrycount = 0; 1158 size_t slen; 1159 1160 for (i = 0; i < nme; i++) 1161 mentrycount += me[i].cont_count; 1162 1163 slen = sizeof(**ma) * mentrycount; 1164 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1165 file_oomem(ms, slen); 1166 return -1; 1167 } 1168 1169 mentrycount = 0; 1170 for (i = 0; i < nme; i++) { 1171 (void)memcpy(*ma + mentrycount, me[i].mp, 1172 me[i].cont_count * sizeof(**ma)); 1173 mentrycount += me[i].cont_count; 1174 } 1175 *nma = mentrycount; 1176 return 0; 1177 } 1178 1179 private void 1180 magic_entry_free(struct magic_entry *me, uint32_t nme) 1181 { 1182 uint32_t i; 1183 if (me == NULL) 1184 return; 1185 for (i = 0; i < nme; i++) 1186 free(me[i].mp); 1187 free(me); 1188 } 1189 1190 private struct magic_map * 1191 apprentice_load(struct magic_set *ms, const char *fn, int action) 1192 { 1193 int errs = 0; 1194 uint32_t i, j; 1195 size_t files = 0, maxfiles = 0; 1196 char **filearr = NULL, *mfn; 1197 struct stat st; 1198 struct magic_map *map; 1199 struct magic_entry_set mset[MAGIC_SETS]; 1200 DIR *dir; 1201 struct dirent *d; 1202 1203 memset(mset, 0, sizeof(mset)); 1204 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1205 1206 1207 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) 1208 { 1209 file_oomem(ms, sizeof(*map)); 1210 return NULL; 1211 } 1212 1213 /* print silly verbose header for USG compat. */ 1214 if (action == FILE_CHECK) 1215 (void)fprintf(stderr, "%s\n", usg_hdr); 1216 1217 /* load directory or file */ 1218 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1219 dir = opendir(fn); 1220 if (!dir) { 1221 errs++; 1222 goto out; 1223 } 1224 while ((d = readdir(dir)) != NULL) { 1225 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1226 file_oomem(ms, 1227 strlen(fn) + strlen(d->d_name) + 2); 1228 errs++; 1229 closedir(dir); 1230 goto out; 1231 } 1232 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1233 free(mfn); 1234 continue; 1235 } 1236 if (files >= maxfiles) { 1237 size_t mlen; 1238 maxfiles = (maxfiles + 1) * 2; 1239 mlen = maxfiles * sizeof(*filearr); 1240 if ((filearr = CAST(char **, 1241 realloc(filearr, mlen))) == NULL) { 1242 file_oomem(ms, mlen); 1243 free(mfn); 1244 closedir(dir); 1245 errs++; 1246 goto out; 1247 } 1248 } 1249 filearr[files++] = mfn; 1250 } 1251 closedir(dir); 1252 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1253 for (i = 0; i < files; i++) { 1254 load_1(ms, action, filearr[i], &errs, mset); 1255 free(filearr[i]); 1256 } 1257 free(filearr); 1258 } else 1259 load_1(ms, action, fn, &errs, mset); 1260 if (errs) 1261 goto out; 1262 1263 for (j = 0; j < MAGIC_SETS; j++) { 1264 /* Set types of tests */ 1265 for (i = 0; i < mset[j].count; ) { 1266 if (mset[j].me[i].mp->cont_level != 0) { 1267 i++; 1268 continue; 1269 } 1270 i = set_text_binary(ms, mset[j].me, mset[j].count, i); 1271 } 1272 qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me), 1273 apprentice_sort); 1274 1275 /* 1276 * Make sure that any level 0 "default" line is last 1277 * (if one exists). 1278 */ 1279 set_last_default(ms, mset[j].me, mset[j].count); 1280 1281 /* coalesce per file arrays into a single one */ 1282 if (coalesce_entries(ms, mset[j].me, mset[j].count, 1283 &map->magic[j], &map->nmagic[j]) == -1) { 1284 errs++; 1285 goto out; 1286 } 1287 } 1288 1289 out: 1290 for (j = 0; j < MAGIC_SETS; j++) 1291 magic_entry_free(mset[j].me, mset[j].count); 1292 1293 if (errs) { 1294 for (j = 0; j < MAGIC_SETS; j++) { 1295 if (map->magic[j]) 1296 free(map->magic[j]); 1297 } 1298 free(map); 1299 return NULL; 1300 } 1301 return map; 1302 } 1303 1304 /* 1305 * extend the sign bit if the comparison is to be signed 1306 */ 1307 protected uint64_t 1308 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1309 { 1310 if (!(m->flag & UNSIGNED)) { 1311 switch(m->type) { 1312 /* 1313 * Do not remove the casts below. They are 1314 * vital. When later compared with the data, 1315 * the sign extension must have happened. 1316 */ 1317 case FILE_BYTE: 1318 v = (char) v; 1319 break; 1320 case FILE_SHORT: 1321 case FILE_BESHORT: 1322 case FILE_LESHORT: 1323 v = (short) v; 1324 break; 1325 case FILE_DATE: 1326 case FILE_BEDATE: 1327 case FILE_LEDATE: 1328 case FILE_MEDATE: 1329 case FILE_LDATE: 1330 case FILE_BELDATE: 1331 case FILE_LELDATE: 1332 case FILE_MELDATE: 1333 case FILE_LONG: 1334 case FILE_BELONG: 1335 case FILE_LELONG: 1336 case FILE_MELONG: 1337 case FILE_FLOAT: 1338 case FILE_BEFLOAT: 1339 case FILE_LEFLOAT: 1340 v = (int32_t) v; 1341 break; 1342 case FILE_QUAD: 1343 case FILE_BEQUAD: 1344 case FILE_LEQUAD: 1345 case FILE_QDATE: 1346 case FILE_QLDATE: 1347 case FILE_QWDATE: 1348 case FILE_BEQDATE: 1349 case FILE_BEQLDATE: 1350 case FILE_BEQWDATE: 1351 case FILE_LEQDATE: 1352 case FILE_LEQLDATE: 1353 case FILE_LEQWDATE: 1354 case FILE_DOUBLE: 1355 case FILE_BEDOUBLE: 1356 case FILE_LEDOUBLE: 1357 v = (int64_t) v; 1358 break; 1359 case FILE_STRING: 1360 case FILE_PSTRING: 1361 case FILE_BESTRING16: 1362 case FILE_LESTRING16: 1363 case FILE_REGEX: 1364 case FILE_SEARCH: 1365 case FILE_DEFAULT: 1366 case FILE_INDIRECT: 1367 case FILE_NAME: 1368 case FILE_USE: 1369 case FILE_CLEAR: 1370 break; 1371 default: 1372 if (ms->flags & MAGIC_CHECK) 1373 file_magwarn(ms, "cannot happen: m->type=%d\n", 1374 m->type); 1375 return ~0U; 1376 } 1377 } 1378 return v; 1379 } 1380 1381 private int 1382 string_modifier_check(struct magic_set *ms, struct magic *m) 1383 { 1384 if ((ms->flags & MAGIC_CHECK) == 0) 1385 return 0; 1386 1387 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 1388 file_magwarn(ms, 1389 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1390 return -1; 1391 } 1392 switch (m->type) { 1393 case FILE_BESTRING16: 1394 case FILE_LESTRING16: 1395 if (m->str_flags != 0) { 1396 file_magwarn(ms, 1397 "no modifiers allowed for 16-bit strings\n"); 1398 return -1; 1399 } 1400 break; 1401 case FILE_STRING: 1402 case FILE_PSTRING: 1403 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1404 file_magwarn(ms, 1405 "'/%c' only allowed on regex and search\n", 1406 CHAR_REGEX_OFFSET_START); 1407 return -1; 1408 } 1409 break; 1410 case FILE_SEARCH: 1411 if (m->str_range == 0) { 1412 file_magwarn(ms, 1413 "missing range; defaulting to %d\n", 1414 STRING_DEFAULT_RANGE); 1415 m->str_range = STRING_DEFAULT_RANGE; 1416 return -1; 1417 } 1418 break; 1419 case FILE_REGEX: 1420 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1421 file_magwarn(ms, "'/%c' not allowed on regex\n", 1422 CHAR_COMPACT_WHITESPACE); 1423 return -1; 1424 } 1425 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1426 file_magwarn(ms, "'/%c' not allowed on regex\n", 1427 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1428 return -1; 1429 } 1430 break; 1431 default: 1432 file_magwarn(ms, "coding error: m->type=%d\n", 1433 m->type); 1434 return -1; 1435 } 1436 return 0; 1437 } 1438 1439 private int 1440 get_op(char c) 1441 { 1442 switch (c) { 1443 case '&': 1444 return FILE_OPAND; 1445 case '|': 1446 return FILE_OPOR; 1447 case '^': 1448 return FILE_OPXOR; 1449 case '+': 1450 return FILE_OPADD; 1451 case '-': 1452 return FILE_OPMINUS; 1453 case '*': 1454 return FILE_OPMULTIPLY; 1455 case '/': 1456 return FILE_OPDIVIDE; 1457 case '%': 1458 return FILE_OPMODULO; 1459 default: 1460 return -1; 1461 } 1462 } 1463 1464 #ifdef ENABLE_CONDITIONALS 1465 private int 1466 get_cond(const char *l, const char **t) 1467 { 1468 static const struct cond_tbl_s { 1469 char name[8]; 1470 size_t len; 1471 int cond; 1472 } cond_tbl[] = { 1473 { "if", 2, COND_IF }, 1474 { "elif", 4, COND_ELIF }, 1475 { "else", 4, COND_ELSE }, 1476 { "", 0, COND_NONE }, 1477 }; 1478 const struct cond_tbl_s *p; 1479 1480 for (p = cond_tbl; p->len; p++) { 1481 if (strncmp(l, p->name, p->len) == 0 && 1482 isspace((unsigned char)l[p->len])) { 1483 if (t) 1484 *t = l + p->len; 1485 break; 1486 } 1487 } 1488 return p->cond; 1489 } 1490 1491 private int 1492 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1493 { 1494 int last_cond; 1495 last_cond = ms->c.li[cont_level].last_cond; 1496 1497 switch (cond) { 1498 case COND_IF: 1499 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1500 if (ms->flags & MAGIC_CHECK) 1501 file_magwarn(ms, "syntax error: `if'"); 1502 return -1; 1503 } 1504 last_cond = COND_IF; 1505 break; 1506 1507 case COND_ELIF: 1508 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1509 if (ms->flags & MAGIC_CHECK) 1510 file_magwarn(ms, "syntax error: `elif'"); 1511 return -1; 1512 } 1513 last_cond = COND_ELIF; 1514 break; 1515 1516 case COND_ELSE: 1517 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1518 if (ms->flags & MAGIC_CHECK) 1519 file_magwarn(ms, "syntax error: `else'"); 1520 return -1; 1521 } 1522 last_cond = COND_NONE; 1523 break; 1524 1525 case COND_NONE: 1526 last_cond = COND_NONE; 1527 break; 1528 } 1529 1530 ms->c.li[cont_level].last_cond = last_cond; 1531 return 0; 1532 } 1533 #endif /* ENABLE_CONDITIONALS */ 1534 1535 /* 1536 * parse one line from magic file, put into magic[index++] if valid 1537 */ 1538 private int 1539 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1540 size_t lineno, int action) 1541 { 1542 #ifdef ENABLE_CONDITIONALS 1543 static uint32_t last_cont_level = 0; 1544 #endif 1545 size_t i; 1546 struct magic *m; 1547 const char *l = line; 1548 char *t; 1549 int op; 1550 uint32_t cont_level; 1551 int32_t diff; 1552 1553 cont_level = 0; 1554 1555 /* 1556 * Parse the offset. 1557 */ 1558 while (*l == '>') { 1559 ++l; /* step over */ 1560 cont_level++; 1561 } 1562 #ifdef ENABLE_CONDITIONALS 1563 if (cont_level == 0 || cont_level > last_cont_level) 1564 if (file_check_mem(ms, cont_level) == -1) 1565 return -1; 1566 last_cont_level = cont_level; 1567 #endif 1568 if (cont_level != 0) { 1569 if (me->mp == NULL) { 1570 file_magerror(ms, "No current entry for continuation"); 1571 return -1; 1572 } 1573 if (me->cont_count == 0) { 1574 file_magerror(ms, "Continuations present with 0 count"); 1575 return -1; 1576 } 1577 m = &me->mp[me->cont_count - 1]; 1578 diff = (int32_t)cont_level - (int32_t)m->cont_level; 1579 if (diff > 1) 1580 file_magwarn(ms, "New continuation level %u is more " 1581 "than one larger than current level %u", cont_level, 1582 m->cont_level); 1583 if (me->cont_count == me->max_count) { 1584 struct magic *nm; 1585 size_t cnt = me->max_count + ALLOC_CHUNK; 1586 if ((nm = CAST(struct magic *, realloc(me->mp, 1587 sizeof(*nm) * cnt))) == NULL) { 1588 file_oomem(ms, sizeof(*nm) * cnt); 1589 return -1; 1590 } 1591 me->mp = m = nm; 1592 me->max_count = CAST(uint32_t, cnt); 1593 } 1594 m = &me->mp[me->cont_count++]; 1595 (void)memset(m, 0, sizeof(*m)); 1596 m->cont_level = cont_level; 1597 } else { 1598 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1599 if (me->mp != NULL) 1600 return 1; 1601 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1602 file_oomem(ms, len); 1603 return -1; 1604 } 1605 me->mp = m; 1606 me->max_count = ALLOC_CHUNK; 1607 (void)memset(m, 0, sizeof(*m)); 1608 m->factor_op = FILE_FACTOR_OP_NONE; 1609 m->cont_level = 0; 1610 me->cont_count = 1; 1611 } 1612 m->lineno = CAST(uint32_t, lineno); 1613 1614 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1615 ++l; /* step over */ 1616 m->flag |= OFFADD; 1617 } 1618 if (*l == '(') { 1619 ++l; /* step over */ 1620 m->flag |= INDIR; 1621 if (m->flag & OFFADD) 1622 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1623 1624 if (*l == '&') { /* m->cont_level == 0 checked below */ 1625 ++l; /* step over */ 1626 m->flag |= OFFADD; 1627 } 1628 } 1629 /* Indirect offsets are not valid at level 0. */ 1630 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1631 if (ms->flags & MAGIC_CHECK) 1632 file_magwarn(ms, "relative offset at level 0"); 1633 1634 /* get offset, then skip over it */ 1635 m->offset = (uint32_t)strtoul(l, &t, 0); 1636 if (l == t) 1637 if (ms->flags & MAGIC_CHECK) 1638 file_magwarn(ms, "offset `%s' invalid", l); 1639 l = t; 1640 1641 if (m->flag & INDIR) { 1642 m->in_type = FILE_LONG; 1643 m->in_offset = 0; 1644 /* 1645 * read [.lbs][+-]nnnnn) 1646 */ 1647 if (*l == '.') { 1648 l++; 1649 switch (*l) { 1650 case 'l': 1651 m->in_type = FILE_LELONG; 1652 break; 1653 case 'L': 1654 m->in_type = FILE_BELONG; 1655 break; 1656 case 'm': 1657 m->in_type = FILE_MELONG; 1658 break; 1659 case 'h': 1660 case 's': 1661 m->in_type = FILE_LESHORT; 1662 break; 1663 case 'H': 1664 case 'S': 1665 m->in_type = FILE_BESHORT; 1666 break; 1667 case 'c': 1668 case 'b': 1669 case 'C': 1670 case 'B': 1671 m->in_type = FILE_BYTE; 1672 break; 1673 case 'e': 1674 case 'f': 1675 case 'g': 1676 m->in_type = FILE_LEDOUBLE; 1677 break; 1678 case 'E': 1679 case 'F': 1680 case 'G': 1681 m->in_type = FILE_BEDOUBLE; 1682 break; 1683 case 'i': 1684 m->in_type = FILE_LEID3; 1685 break; 1686 case 'I': 1687 m->in_type = FILE_BEID3; 1688 break; 1689 default: 1690 if (ms->flags & MAGIC_CHECK) 1691 file_magwarn(ms, 1692 "indirect offset type `%c' invalid", 1693 *l); 1694 break; 1695 } 1696 l++; 1697 } 1698 1699 m->in_op = 0; 1700 if (*l == '~') { 1701 m->in_op |= FILE_OPINVERSE; 1702 l++; 1703 } 1704 if ((op = get_op(*l)) != -1) { 1705 m->in_op |= op; 1706 l++; 1707 } 1708 if (*l == '(') { 1709 m->in_op |= FILE_OPINDIRECT; 1710 l++; 1711 } 1712 if (isdigit((unsigned char)*l) || *l == '-') { 1713 m->in_offset = (int32_t)strtol(l, &t, 0); 1714 if (l == t) 1715 if (ms->flags & MAGIC_CHECK) 1716 file_magwarn(ms, 1717 "in_offset `%s' invalid", l); 1718 l = t; 1719 } 1720 if (*l++ != ')' || 1721 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1722 if (ms->flags & MAGIC_CHECK) 1723 file_magwarn(ms, 1724 "missing ')' in indirect offset"); 1725 } 1726 EATAB; 1727 1728 #ifdef ENABLE_CONDITIONALS 1729 m->cond = get_cond(l, &l); 1730 if (check_cond(ms, m->cond, cont_level) == -1) 1731 return -1; 1732 1733 EATAB; 1734 #endif 1735 1736 /* 1737 * Parse the type. 1738 */ 1739 if (*l == 'u') { 1740 /* 1741 * Try it as a keyword type prefixed by "u"; match what 1742 * follows the "u". If that fails, try it as an SUS 1743 * integer type. 1744 */ 1745 m->type = get_type(type_tbl, l + 1, &l); 1746 if (m->type == FILE_INVALID) { 1747 /* 1748 * Not a keyword type; parse it as an SUS type, 1749 * 'u' possibly followed by a number or C/S/L. 1750 */ 1751 m->type = get_standard_integer_type(l, &l); 1752 } 1753 // It's unsigned. 1754 if (m->type != FILE_INVALID) 1755 m->flag |= UNSIGNED; 1756 } else { 1757 /* 1758 * Try it as a keyword type. If that fails, try it as 1759 * an SUS integer type if it begins with "d" or as an 1760 * SUS string type if it begins with "s". In any case, 1761 * it's not unsigned. 1762 */ 1763 m->type = get_type(type_tbl, l, &l); 1764 if (m->type == FILE_INVALID) { 1765 /* 1766 * Not a keyword type; parse it as an SUS type, 1767 * either 'd' possibly followed by a number or 1768 * C/S/L, or just 's'. 1769 */ 1770 if (*l == 'd') 1771 m->type = get_standard_integer_type(l, &l); 1772 else if (*l == 's' && !isalpha((unsigned char)l[1])) { 1773 m->type = FILE_STRING; 1774 ++l; 1775 } 1776 } 1777 } 1778 1779 if (m->type == FILE_INVALID) { 1780 /* Not found - try it as a special keyword. */ 1781 m->type = get_type(special_tbl, l, &l); 1782 } 1783 1784 if (m->type == FILE_INVALID) { 1785 if (ms->flags & MAGIC_CHECK) 1786 file_magwarn(ms, "type `%s' invalid", l); 1787 return -1; 1788 } 1789 1790 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1791 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1792 1793 m->mask_op = 0; 1794 if (*l == '~') { 1795 if (!IS_STRING(m->type)) 1796 m->mask_op |= FILE_OPINVERSE; 1797 else if (ms->flags & MAGIC_CHECK) 1798 file_magwarn(ms, "'~' invalid for string types"); 1799 ++l; 1800 } 1801 m->str_range = 0; 1802 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1803 if ((op = get_op(*l)) != -1) { 1804 if (!IS_STRING(m->type)) { 1805 uint64_t val; 1806 ++l; 1807 m->mask_op |= op; 1808 val = (uint64_t)strtoull(l, &t, 0); 1809 l = t; 1810 m->num_mask = file_signextend(ms, m, val); 1811 eatsize(&l); 1812 } 1813 else if (op == FILE_OPDIVIDE) { 1814 int have_range = 0; 1815 while (!isspace((unsigned char)*++l)) { 1816 switch (*l) { 1817 case '0': case '1': case '2': 1818 case '3': case '4': case '5': 1819 case '6': case '7': case '8': 1820 case '9': 1821 if (have_range && 1822 (ms->flags & MAGIC_CHECK)) 1823 file_magwarn(ms, 1824 "multiple ranges"); 1825 have_range = 1; 1826 m->str_range = CAST(uint32_t, 1827 strtoul(l, &t, 0)); 1828 if (m->str_range == 0) 1829 file_magwarn(ms, 1830 "zero range"); 1831 l = t - 1; 1832 break; 1833 case CHAR_COMPACT_WHITESPACE: 1834 m->str_flags |= 1835 STRING_COMPACT_WHITESPACE; 1836 break; 1837 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1838 m->str_flags |= 1839 STRING_COMPACT_OPTIONAL_WHITESPACE; 1840 break; 1841 case CHAR_IGNORE_LOWERCASE: 1842 m->str_flags |= STRING_IGNORE_LOWERCASE; 1843 break; 1844 case CHAR_IGNORE_UPPERCASE: 1845 m->str_flags |= STRING_IGNORE_UPPERCASE; 1846 break; 1847 case CHAR_REGEX_OFFSET_START: 1848 m->str_flags |= REGEX_OFFSET_START; 1849 break; 1850 case CHAR_BINTEST: 1851 m->str_flags |= STRING_BINTEST; 1852 break; 1853 case CHAR_TEXTTEST: 1854 m->str_flags |= STRING_TEXTTEST; 1855 break; 1856 case CHAR_TRIM: 1857 m->str_flags |= STRING_TRIM; 1858 break; 1859 case CHAR_PSTRING_1_LE: 1860 if (m->type != FILE_PSTRING) 1861 goto bad; 1862 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1863 break; 1864 case CHAR_PSTRING_2_BE: 1865 if (m->type != FILE_PSTRING) 1866 goto bad; 1867 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1868 break; 1869 case CHAR_PSTRING_2_LE: 1870 if (m->type != FILE_PSTRING) 1871 goto bad; 1872 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1873 break; 1874 case CHAR_PSTRING_4_BE: 1875 if (m->type != FILE_PSTRING) 1876 goto bad; 1877 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1878 break; 1879 case CHAR_PSTRING_4_LE: 1880 if (m->type != FILE_PSTRING) 1881 goto bad; 1882 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1883 break; 1884 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1885 if (m->type != FILE_PSTRING) 1886 goto bad; 1887 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1888 break; 1889 default: 1890 bad: 1891 if (ms->flags & MAGIC_CHECK) 1892 file_magwarn(ms, 1893 "string extension `%c' " 1894 "invalid", *l); 1895 return -1; 1896 } 1897 /* allow multiple '/' for readability */ 1898 if (l[1] == '/' && 1899 !isspace((unsigned char)l[2])) 1900 l++; 1901 } 1902 if (string_modifier_check(ms, m) == -1) 1903 return -1; 1904 } 1905 else { 1906 if (ms->flags & MAGIC_CHECK) 1907 file_magwarn(ms, "invalid string op: %c", *t); 1908 return -1; 1909 } 1910 } 1911 /* 1912 * We used to set mask to all 1's here, instead let's just not do 1913 * anything if mask = 0 (unless you have a better idea) 1914 */ 1915 EATAB; 1916 1917 switch (*l) { 1918 case '>': 1919 case '<': 1920 m->reln = *l; 1921 ++l; 1922 if (*l == '=') { 1923 if (ms->flags & MAGIC_CHECK) { 1924 file_magwarn(ms, "%c= not supported", 1925 m->reln); 1926 return -1; 1927 } 1928 ++l; 1929 } 1930 break; 1931 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1932 case '&': 1933 case '^': 1934 case '=': 1935 m->reln = *l; 1936 ++l; 1937 if (*l == '=') { 1938 /* HP compat: ignore &= etc. */ 1939 ++l; 1940 } 1941 break; 1942 case '!': 1943 m->reln = *l; 1944 ++l; 1945 break; 1946 default: 1947 m->reln = '='; /* the default relation */ 1948 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1949 isspace((unsigned char)l[1])) || !l[1])) { 1950 m->reln = *l; 1951 ++l; 1952 } 1953 break; 1954 } 1955 /* 1956 * Grab the value part, except for an 'x' reln. 1957 */ 1958 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1959 return -1; 1960 1961 /* 1962 * TODO finish this macro and start using it! 1963 * #define offsetcheck {if (offset > HOWMANY-1) 1964 * magwarn("offset too big"); } 1965 */ 1966 1967 /* 1968 * Now get last part - the description 1969 */ 1970 EATAB; 1971 if (l[0] == '\b') { 1972 ++l; 1973 m->flag |= NOSPACE; 1974 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1975 ++l; 1976 ++l; 1977 m->flag |= NOSPACE; 1978 } 1979 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1980 continue; 1981 if (i == sizeof(m->desc)) { 1982 m->desc[sizeof(m->desc) - 1] = '\0'; 1983 if (ms->flags & MAGIC_CHECK) 1984 file_magwarn(ms, "description `%s' truncated", m->desc); 1985 } 1986 1987 /* 1988 * We only do this check while compiling, or if any of the magic 1989 * files were not compiled. 1990 */ 1991 if (ms->flags & MAGIC_CHECK) { 1992 if (check_format(ms, m) == -1) 1993 return -1; 1994 } 1995 #ifndef COMPILE_ONLY 1996 if (action == FILE_CHECK) { 1997 file_mdump(m); 1998 } 1999 #endif 2000 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 2001 return 0; 2002 } 2003 2004 /* 2005 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 2006 * if valid 2007 */ 2008 private int 2009 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 2010 { 2011 const char *l = line; 2012 char *el; 2013 unsigned long factor; 2014 struct magic *m = &me->mp[0]; 2015 2016 if (m->factor_op != FILE_FACTOR_OP_NONE) { 2017 file_magwarn(ms, 2018 "Current entry already has a strength type: %c %d", 2019 m->factor_op, m->factor); 2020 return -1; 2021 } 2022 if (m->type == FILE_NAME) { 2023 file_magwarn(ms, "%s: Strength setting is not supported in " 2024 "\"name\" magic entries", m->value.s); 2025 return -1; 2026 } 2027 EATAB; 2028 switch (*l) { 2029 case FILE_FACTOR_OP_NONE: 2030 case FILE_FACTOR_OP_PLUS: 2031 case FILE_FACTOR_OP_MINUS: 2032 case FILE_FACTOR_OP_TIMES: 2033 case FILE_FACTOR_OP_DIV: 2034 m->factor_op = *l++; 2035 break; 2036 default: 2037 file_magwarn(ms, "Unknown factor op `%c'", *l); 2038 return -1; 2039 } 2040 EATAB; 2041 factor = strtoul(l, &el, 0); 2042 if (factor > 255) { 2043 file_magwarn(ms, "Too large factor `%lu'", factor); 2044 goto out; 2045 } 2046 if (*el && !isspace((unsigned char)*el)) { 2047 file_magwarn(ms, "Bad factor `%s'", l); 2048 goto out; 2049 } 2050 m->factor = (uint8_t)factor; 2051 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 2052 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 2053 m->factor_op, m->factor); 2054 goto out; 2055 } 2056 return 0; 2057 out: 2058 m->factor_op = FILE_FACTOR_OP_NONE; 2059 m->factor = 0; 2060 return -1; 2061 } 2062 2063 private int 2064 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line, 2065 off_t off, size_t len, const char *name, int nt) 2066 { 2067 size_t i; 2068 const char *l = line; 2069 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2070 char *buf = (char *)m + off; 2071 2072 if (buf[0] != '\0') { 2073 len = nt ? strlen(buf) : len; 2074 file_magwarn(ms, "Current entry already has a %s type " 2075 "`%.*s', new type `%s'", name, (int)len, buf, l); 2076 return -1; 2077 } 2078 2079 if (*m->desc == '\0') { 2080 file_magwarn(ms, "Current entry does not yet have a " 2081 "description for adding a %s type", name); 2082 return -1; 2083 } 2084 2085 EATAB; 2086 for (i = 0; *l && ((isascii((unsigned char)*l) && 2087 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 2088 i < len; buf[i++] = *l++) 2089 continue; 2090 2091 if (i == len && *l) { 2092 if (nt) 2093 buf[len - 1] = '\0'; 2094 if (ms->flags & MAGIC_CHECK) 2095 file_magwarn(ms, "%s type `%s' truncated %" 2096 SIZE_T_FORMAT "u", name, line, i); 2097 } else { 2098 if (nt) 2099 buf[i] = '\0'; 2100 } 2101 2102 if (i > 0) 2103 return 0; 2104 else 2105 return -1; 2106 } 2107 2108 /* 2109 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 2110 * magic[index - 1] 2111 */ 2112 private int 2113 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 2114 { 2115 struct magic *m = &me->mp[0]; 2116 2117 return parse_extra(ms, me, line, offsetof(struct magic, apple), 2118 sizeof(m->apple), "APPLE", 0); 2119 } 2120 2121 /* 2122 * parse a MIME annotation line from magic file, put into magic[index - 1] 2123 * if valid 2124 */ 2125 private int 2126 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 2127 { 2128 struct magic *m = &me->mp[0]; 2129 2130 return parse_extra(ms, me, line, offsetof(struct magic, mimetype), 2131 sizeof(m->mimetype), "MIME", 1); 2132 } 2133 2134 private int 2135 check_format_type(const char *ptr, int type) 2136 { 2137 int quad = 0; 2138 if (*ptr == '\0') { 2139 /* Missing format string; bad */ 2140 return -1; 2141 } 2142 2143 switch (type) { 2144 case FILE_FMT_QUAD: 2145 quad = 1; 2146 /*FALLTHROUGH*/ 2147 case FILE_FMT_NUM: 2148 if (*ptr == '-') 2149 ptr++; 2150 if (*ptr == '.') 2151 ptr++; 2152 while (isdigit((unsigned char)*ptr)) ptr++; 2153 if (*ptr == '.') 2154 ptr++; 2155 while (isdigit((unsigned char)*ptr)) ptr++; 2156 if (quad) { 2157 if (*ptr++ != 'l') 2158 return -1; 2159 if (*ptr++ != 'l') 2160 return -1; 2161 } 2162 2163 switch (*ptr++) { 2164 case 'l': 2165 switch (*ptr++) { 2166 case 'i': 2167 case 'd': 2168 case 'u': 2169 case 'o': 2170 case 'x': 2171 case 'X': 2172 return 0; 2173 default: 2174 return -1; 2175 } 2176 2177 case 'h': 2178 switch (*ptr++) { 2179 case 'h': 2180 switch (*ptr++) { 2181 case 'i': 2182 case 'd': 2183 case 'u': 2184 case 'o': 2185 case 'x': 2186 case 'X': 2187 return 0; 2188 default: 2189 return -1; 2190 } 2191 case 'd': 2192 return 0; 2193 default: 2194 return -1; 2195 } 2196 2197 case 'i': 2198 case 'c': 2199 case 'd': 2200 case 'u': 2201 case 'o': 2202 case 'x': 2203 case 'X': 2204 return 0; 2205 2206 default: 2207 return -1; 2208 } 2209 2210 case FILE_FMT_FLOAT: 2211 case FILE_FMT_DOUBLE: 2212 if (*ptr == '-') 2213 ptr++; 2214 if (*ptr == '.') 2215 ptr++; 2216 while (isdigit((unsigned char)*ptr)) ptr++; 2217 if (*ptr == '.') 2218 ptr++; 2219 while (isdigit((unsigned char)*ptr)) ptr++; 2220 2221 switch (*ptr++) { 2222 case 'e': 2223 case 'E': 2224 case 'f': 2225 case 'F': 2226 case 'g': 2227 case 'G': 2228 return 0; 2229 2230 default: 2231 return -1; 2232 } 2233 2234 2235 case FILE_FMT_STR: 2236 if (*ptr == '-') 2237 ptr++; 2238 while (isdigit((unsigned char )*ptr)) 2239 ptr++; 2240 if (*ptr == '.') { 2241 ptr++; 2242 while (isdigit((unsigned char )*ptr)) 2243 ptr++; 2244 } 2245 2246 switch (*ptr++) { 2247 case 's': 2248 return 0; 2249 default: 2250 return -1; 2251 } 2252 2253 default: 2254 /* internal error */ 2255 abort(); 2256 } 2257 /*NOTREACHED*/ 2258 return -1; 2259 } 2260 2261 /* 2262 * Check that the optional printf format in description matches 2263 * the type of the magic. 2264 */ 2265 private int 2266 check_format(struct magic_set *ms, struct magic *m) 2267 { 2268 char *ptr; 2269 2270 for (ptr = m->desc; *ptr; ptr++) 2271 if (*ptr == '%') 2272 break; 2273 if (*ptr == '\0') { 2274 /* No format string; ok */ 2275 return 1; 2276 } 2277 2278 assert(file_nformats == file_nnames); 2279 2280 if (m->type >= file_nformats) { 2281 file_magwarn(ms, "Internal error inconsistency between " 2282 "m->type and format strings"); 2283 return -1; 2284 } 2285 if (file_formats[m->type] == FILE_FMT_NONE) { 2286 file_magwarn(ms, "No format string for `%s' with description " 2287 "`%s'", m->desc, file_names[m->type]); 2288 return -1; 2289 } 2290 2291 ptr++; 2292 if (check_format_type(ptr, file_formats[m->type]) == -1) { 2293 /* 2294 * TODO: this error message is unhelpful if the format 2295 * string is not one character long 2296 */ 2297 file_magwarn(ms, "Printf format `%c' is not valid for type " 2298 "`%s' in description `%s'", *ptr ? *ptr : '?', 2299 file_names[m->type], m->desc); 2300 return -1; 2301 } 2302 2303 for (; *ptr; ptr++) { 2304 if (*ptr == '%') { 2305 file_magwarn(ms, 2306 "Too many format strings (should have at most one) " 2307 "for `%s' with description `%s'", 2308 file_names[m->type], m->desc); 2309 return -1; 2310 } 2311 } 2312 return 0; 2313 } 2314 2315 /* 2316 * Read a numeric value from a pointer, into the value union of a magic 2317 * pointer, according to the magic type. Update the string pointer to point 2318 * just after the number read. Return 0 for success, non-zero for failure. 2319 */ 2320 private int 2321 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2322 { 2323 switch (m->type) { 2324 case FILE_BESTRING16: 2325 case FILE_LESTRING16: 2326 case FILE_STRING: 2327 case FILE_PSTRING: 2328 case FILE_REGEX: 2329 case FILE_SEARCH: 2330 case FILE_NAME: 2331 case FILE_USE: 2332 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2333 if (*p == NULL) { 2334 if (ms->flags & MAGIC_CHECK) 2335 file_magwarn(ms, "cannot get string from `%s'", 2336 m->value.s); 2337 return -1; 2338 } 2339 return 0; 2340 case FILE_FLOAT: 2341 case FILE_BEFLOAT: 2342 case FILE_LEFLOAT: 2343 if (m->reln != 'x') { 2344 char *ep; 2345 #if defined(HAVE_STRTOF) && !defined(COMPILE_ONLY) 2346 m->value.f = strtof(*p, &ep); 2347 #else 2348 m->value.f = (float)strtod(*p, &ep); 2349 #endif 2350 *p = ep; 2351 } 2352 return 0; 2353 case FILE_DOUBLE: 2354 case FILE_BEDOUBLE: 2355 case FILE_LEDOUBLE: 2356 if (m->reln != 'x') { 2357 char *ep; 2358 m->value.d = strtod(*p, &ep); 2359 *p = ep; 2360 } 2361 return 0; 2362 default: 2363 if (m->reln != 'x') { 2364 char *ep; 2365 m->value.q = file_signextend(ms, m, 2366 (uint64_t)strtoull(*p, &ep, 0)); 2367 *p = ep; 2368 eatsize(p); 2369 } 2370 return 0; 2371 } 2372 } 2373 2374 /* 2375 * Convert a string containing C character escapes. Stop at an unescaped 2376 * space or tab. 2377 * Copy the converted version to "m->value.s", and the length in m->vallen. 2378 * Return updated scan pointer as function result. Warn if set. 2379 */ 2380 private const char * 2381 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2382 { 2383 const char *origs = s; 2384 char *p = m->value.s; 2385 size_t plen = sizeof(m->value.s); 2386 char *origp = p; 2387 char *pmax = p + plen - 1; 2388 int c; 2389 int val; 2390 2391 while ((c = *s++) != '\0') { 2392 if (isspace((unsigned char) c)) 2393 break; 2394 if (p >= pmax) { 2395 file_error(ms, 0, "string too long: `%s'", origs); 2396 return NULL; 2397 } 2398 if (c == '\\') { 2399 switch(c = *s++) { 2400 2401 case '\0': 2402 if (warn) 2403 file_magwarn(ms, "incomplete escape"); 2404 goto out; 2405 2406 case '\t': 2407 if (warn) { 2408 file_magwarn(ms, 2409 "escaped tab found, use \\t instead"); 2410 warn = 0; /* already did */ 2411 } 2412 /*FALLTHROUGH*/ 2413 default: 2414 if (warn) { 2415 if (isprint((unsigned char)c)) { 2416 /* Allow escaping of 2417 * ``relations'' */ 2418 if (strchr("<>&^=!", c) == NULL 2419 && (m->type != FILE_REGEX || 2420 strchr("[]().*?^$|{}", c) 2421 == NULL)) { 2422 file_magwarn(ms, "no " 2423 "need to escape " 2424 "`%c'", c); 2425 } 2426 } else { 2427 file_magwarn(ms, 2428 "unknown escape sequence: " 2429 "\\%03o", c); 2430 } 2431 } 2432 /*FALLTHROUGH*/ 2433 /* space, perhaps force people to use \040? */ 2434 case ' ': 2435 #if 0 2436 /* 2437 * Other things people escape, but shouldn't need to, 2438 * so we disallow them 2439 */ 2440 case '\'': 2441 case '"': 2442 case '?': 2443 #endif 2444 /* Relations */ 2445 case '>': 2446 case '<': 2447 case '&': 2448 case '^': 2449 case '=': 2450 case '!': 2451 /* and baskslash itself */ 2452 case '\\': 2453 *p++ = (char) c; 2454 break; 2455 2456 case 'a': 2457 *p++ = '\a'; 2458 break; 2459 2460 case 'b': 2461 *p++ = '\b'; 2462 break; 2463 2464 case 'f': 2465 *p++ = '\f'; 2466 break; 2467 2468 case 'n': 2469 *p++ = '\n'; 2470 break; 2471 2472 case 'r': 2473 *p++ = '\r'; 2474 break; 2475 2476 case 't': 2477 *p++ = '\t'; 2478 break; 2479 2480 case 'v': 2481 *p++ = '\v'; 2482 break; 2483 2484 /* \ and up to 3 octal digits */ 2485 case '0': 2486 case '1': 2487 case '2': 2488 case '3': 2489 case '4': 2490 case '5': 2491 case '6': 2492 case '7': 2493 val = c - '0'; 2494 c = *s++; /* try for 2 */ 2495 if (c >= '0' && c <= '7') { 2496 val = (val << 3) | (c - '0'); 2497 c = *s++; /* try for 3 */ 2498 if (c >= '0' && c <= '7') 2499 val = (val << 3) | (c-'0'); 2500 else 2501 --s; 2502 } 2503 else 2504 --s; 2505 *p++ = (char)val; 2506 break; 2507 2508 /* \x and up to 2 hex digits */ 2509 case 'x': 2510 val = 'x'; /* Default if no digits */ 2511 c = hextoint(*s++); /* Get next char */ 2512 if (c >= 0) { 2513 val = c; 2514 c = hextoint(*s++); 2515 if (c >= 0) 2516 val = (val << 4) + c; 2517 else 2518 --s; 2519 } else 2520 --s; 2521 *p++ = (char)val; 2522 break; 2523 } 2524 } else 2525 *p++ = (char)c; 2526 } 2527 out: 2528 *p = '\0'; 2529 m->vallen = CAST(unsigned char, (p - origp)); 2530 if (m->type == FILE_PSTRING) 2531 m->vallen += (unsigned char)file_pstring_length_size(m); 2532 return s; 2533 } 2534 2535 2536 /* Single hex char to int; -1 if not a hex char. */ 2537 private int 2538 hextoint(int c) 2539 { 2540 if (!isascii((unsigned char) c)) 2541 return -1; 2542 if (isdigit((unsigned char) c)) 2543 return c - '0'; 2544 if ((c >= 'a') && (c <= 'f')) 2545 return c + 10 - 'a'; 2546 if (( c>= 'A') && (c <= 'F')) 2547 return c + 10 - 'A'; 2548 return -1; 2549 } 2550 2551 2552 /* 2553 * Print a string containing C character escapes. 2554 */ 2555 protected void 2556 file_showstr(FILE *fp, const char *s, size_t len) 2557 { 2558 char c; 2559 2560 for (;;) { 2561 if (len == ~0U) { 2562 c = *s++; 2563 if (c == '\0') 2564 break; 2565 } 2566 else { 2567 if (len-- == 0) 2568 break; 2569 c = *s++; 2570 } 2571 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2572 (void) fputc(c, fp); 2573 else { 2574 (void) fputc('\\', fp); 2575 switch (c) { 2576 case '\a': 2577 (void) fputc('a', fp); 2578 break; 2579 2580 case '\b': 2581 (void) fputc('b', fp); 2582 break; 2583 2584 case '\f': 2585 (void) fputc('f', fp); 2586 break; 2587 2588 case '\n': 2589 (void) fputc('n', fp); 2590 break; 2591 2592 case '\r': 2593 (void) fputc('r', fp); 2594 break; 2595 2596 case '\t': 2597 (void) fputc('t', fp); 2598 break; 2599 2600 case '\v': 2601 (void) fputc('v', fp); 2602 break; 2603 2604 default: 2605 (void) fprintf(fp, "%.3o", c & 0377); 2606 break; 2607 } 2608 } 2609 } 2610 } 2611 2612 /* 2613 * eatsize(): Eat the size spec from a number [eg. 10UL] 2614 */ 2615 private void 2616 eatsize(const char **p) 2617 { 2618 const char *l = *p; 2619 2620 if (LOWCASE(*l) == 'u') 2621 l++; 2622 2623 switch (LOWCASE(*l)) { 2624 case 'l': /* long */ 2625 case 's': /* short */ 2626 case 'h': /* short */ 2627 case 'b': /* char/byte */ 2628 case 'c': /* char/byte */ 2629 l++; 2630 /*FALLTHROUGH*/ 2631 default: 2632 break; 2633 } 2634 2635 *p = l; 2636 } 2637 2638 /* 2639 * handle a compiled file. 2640 */ 2641 2642 private struct magic_map * 2643 apprentice_map(struct magic_set *ms, const char *fn) 2644 { 2645 int fd; 2646 struct stat st; 2647 uint32_t *ptr; 2648 uint32_t version, entries, nentries; 2649 int needsbyteswap; 2650 char *dbname = NULL; 2651 struct magic_map *map; 2652 size_t i; 2653 2654 fd = -1; 2655 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 2656 file_oomem(ms, sizeof(*map)); 2657 goto error; 2658 } 2659 2660 dbname = mkdbname(ms, fn, 0); 2661 if (dbname == NULL) 2662 goto error; 2663 2664 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2665 goto error; 2666 2667 if (fstat(fd, &st) == -1) { 2668 file_error(ms, errno, "cannot stat `%s'", dbname); 2669 goto error; 2670 } 2671 if (st.st_size < 8 || st.st_size > MAXMAGIC_SIZE) { 2672 file_error(ms, 0, "file `%s' is too %s", dbname, 2673 st.st_size < 8 ? "small" : "large"); 2674 goto error; 2675 } 2676 2677 map->len = (size_t)st.st_size; 2678 #ifdef QUICK 2679 if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2680 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2681 file_error(ms, errno, "cannot map `%s'", dbname); 2682 goto error; 2683 } 2684 #else 2685 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 2686 file_oomem(ms, map->len); 2687 goto error; 2688 } 2689 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 2690 file_badread(ms); 2691 goto error; 2692 } 2693 map->len = 0; 2694 #define RET 1 2695 #endif 2696 (void)close(fd); 2697 fd = -1; 2698 ptr = CAST(uint32_t *, map->p); 2699 if (*ptr != MAGICNO) { 2700 if (swap4(*ptr) != MAGICNO) { 2701 file_error(ms, 0, "bad magic in `%s'", dbname); 2702 goto error; 2703 } 2704 needsbyteswap = 1; 2705 } else 2706 needsbyteswap = 0; 2707 if (needsbyteswap) 2708 version = swap4(ptr[1]); 2709 else 2710 version = ptr[1]; 2711 if (version != VERSIONNO) { 2712 file_error(ms, 0, "File %s supports only version %d magic " 2713 "files. `%s' is version %d", VERSION, 2714 VERSIONNO, dbname, version); 2715 goto error; 2716 } 2717 entries = (uint32_t)(st.st_size / sizeof(struct magic)); 2718 if ((off_t)(entries * sizeof(struct magic)) != st.st_size) { 2719 file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu", 2720 dbname, (unsigned long long)st.st_size, 2721 sizeof(struct magic)); 2722 goto error; 2723 } 2724 map->magic[0] = CAST(struct magic *, map->p) + 1; 2725 nentries = 0; 2726 for (i = 0; i < MAGIC_SETS; i++) { 2727 if (needsbyteswap) 2728 map->nmagic[i] = swap4(ptr[i + 2]); 2729 else 2730 map->nmagic[i] = ptr[i + 2]; 2731 if (i != MAGIC_SETS - 1) 2732 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 2733 nentries += map->nmagic[i]; 2734 } 2735 if (entries != nentries + 1) { 2736 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 2737 dbname, entries, nentries + 1); 2738 goto error; 2739 } 2740 if (needsbyteswap) 2741 for (i = 0; i < MAGIC_SETS; i++) 2742 byteswap(map->magic[i], map->nmagic[i]); 2743 free(dbname); 2744 return map; 2745 2746 error: 2747 if (fd != -1) 2748 (void)close(fd); 2749 apprentice_unmap(map); 2750 free(dbname); 2751 return NULL; 2752 } 2753 2754 private const uint32_t ar[] = { 2755 MAGICNO, VERSIONNO 2756 }; 2757 2758 /* 2759 * handle an mmaped file. 2760 */ 2761 private int 2762 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 2763 { 2764 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 2765 static const size_t m = sizeof(**map->magic); 2766 int fd = -1; 2767 size_t len; 2768 char *dbname; 2769 int rv = -1; 2770 uint32_t i; 2771 2772 dbname = mkdbname(ms, fn, 1); 2773 2774 if (dbname == NULL) 2775 goto out; 2776 2777 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 2778 { 2779 file_error(ms, errno, "cannot open `%s'", dbname); 2780 goto out; 2781 } 2782 2783 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2784 file_error(ms, errno, "error writing `%s'", dbname); 2785 goto out; 2786 } 2787 2788 if (write(fd, map->nmagic, nm) != (ssize_t)nm) { 2789 file_error(ms, errno, "error writing `%s'", dbname); 2790 goto out; 2791 } 2792 2793 assert(nm + sizeof(ar) < m); 2794 2795 if (lseek(fd, (off_t)m, SEEK_SET) != (off_t)m) { 2796 file_error(ms, errno, "error seeking `%s'", dbname); 2797 goto out; 2798 } 2799 2800 for (i = 0; i < MAGIC_SETS; i++) { 2801 len = m * map->nmagic[i]; 2802 if (write(fd, map->magic[i], len) != (ssize_t)len) { 2803 file_error(ms, errno, "error writing `%s'", dbname); 2804 goto out; 2805 } 2806 } 2807 2808 if (fd != -1) 2809 (void)close(fd); 2810 rv = 0; 2811 out: 2812 free(dbname); 2813 return rv; 2814 } 2815 2816 private const char ext[] = ".mgc"; 2817 /* 2818 * make a dbname 2819 */ 2820 private char * 2821 mkdbname(struct magic_set *ms, const char *fn, int strip) 2822 { 2823 const char *p, *q; 2824 char *buf; 2825 2826 if (strip) { 2827 if ((p = strrchr(fn, '/')) != NULL) 2828 fn = ++p; 2829 } 2830 2831 for (q = fn; *q; q++) 2832 continue; 2833 /* Look for .mgc */ 2834 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2835 if (*p != *q) 2836 break; 2837 2838 /* Did not find .mgc, restore q */ 2839 if (p >= ext) 2840 while (*q) 2841 q++; 2842 2843 q++; 2844 /* Compatibility with old code that looked in .mime */ 2845 if (ms->flags & MAGIC_MIME) { 2846 if (asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext) < 0) 2847 return NULL; 2848 if (access(buf, R_OK) != -1) { 2849 ms->flags &= MAGIC_MIME_TYPE; 2850 return buf; 2851 } 2852 free(buf); 2853 } 2854 if (asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext) < 0) 2855 return NULL; 2856 2857 /* Compatibility with old code that looked in .mime */ 2858 if (strstr(p, ".mime") != NULL) 2859 ms->flags &= MAGIC_MIME_TYPE; 2860 return buf; 2861 } 2862 2863 /* 2864 * Byteswap an mmap'ed file if needed 2865 */ 2866 private void 2867 byteswap(struct magic *magic, uint32_t nmagic) 2868 { 2869 uint32_t i; 2870 for (i = 0; i < nmagic; i++) 2871 bs1(&magic[i]); 2872 } 2873 2874 /* 2875 * swap a short 2876 */ 2877 private uint16_t 2878 swap2(uint16_t sv) 2879 { 2880 uint16_t rv; 2881 uint8_t *s = (uint8_t *)(void *)&sv; 2882 uint8_t *d = (uint8_t *)(void *)&rv; 2883 d[0] = s[1]; 2884 d[1] = s[0]; 2885 return rv; 2886 } 2887 2888 /* 2889 * swap an int 2890 */ 2891 private uint32_t 2892 swap4(uint32_t sv) 2893 { 2894 uint32_t rv; 2895 uint8_t *s = (uint8_t *)(void *)&sv; 2896 uint8_t *d = (uint8_t *)(void *)&rv; 2897 d[0] = s[3]; 2898 d[1] = s[2]; 2899 d[2] = s[1]; 2900 d[3] = s[0]; 2901 return rv; 2902 } 2903 2904 /* 2905 * swap a quad 2906 */ 2907 private uint64_t 2908 swap8(uint64_t sv) 2909 { 2910 uint64_t rv; 2911 uint8_t *s = (uint8_t *)(void *)&sv; 2912 uint8_t *d = (uint8_t *)(void *)&rv; 2913 #if 0 2914 d[0] = s[3]; 2915 d[1] = s[2]; 2916 d[2] = s[1]; 2917 d[3] = s[0]; 2918 d[4] = s[7]; 2919 d[5] = s[6]; 2920 d[6] = s[5]; 2921 d[7] = s[4]; 2922 #else 2923 d[0] = s[7]; 2924 d[1] = s[6]; 2925 d[2] = s[5]; 2926 d[3] = s[4]; 2927 d[4] = s[3]; 2928 d[5] = s[2]; 2929 d[6] = s[1]; 2930 d[7] = s[0]; 2931 #endif 2932 return rv; 2933 } 2934 2935 /* 2936 * byteswap a single magic entry 2937 */ 2938 private void 2939 bs1(struct magic *m) 2940 { 2941 m->cont_level = swap2(m->cont_level); 2942 m->offset = swap4((uint32_t)m->offset); 2943 m->in_offset = swap4((uint32_t)m->in_offset); 2944 m->lineno = swap4((uint32_t)m->lineno); 2945 if (IS_STRING(m->type)) { 2946 m->str_range = swap4(m->str_range); 2947 m->str_flags = swap4(m->str_flags); 2948 } 2949 else { 2950 m->value.q = swap8(m->value.q); 2951 m->num_mask = swap8(m->num_mask); 2952 } 2953 } 2954 2955 protected size_t 2956 file_pstring_length_size(const struct magic *m) 2957 { 2958 switch (m->str_flags & PSTRING_LEN) { 2959 case PSTRING_1_LE: 2960 return 1; 2961 case PSTRING_2_LE: 2962 case PSTRING_2_BE: 2963 return 2; 2964 case PSTRING_4_LE: 2965 case PSTRING_4_BE: 2966 return 4; 2967 default: 2968 abort(); /* Impossible */ 2969 return 1; 2970 } 2971 } 2972 protected size_t 2973 file_pstring_get_length(const struct magic *m, const char *s) 2974 { 2975 size_t len = 0; 2976 2977 switch (m->str_flags & PSTRING_LEN) { 2978 case PSTRING_1_LE: 2979 len = *s; 2980 break; 2981 case PSTRING_2_LE: 2982 len = (s[1] << 8) | s[0]; 2983 break; 2984 case PSTRING_2_BE: 2985 len = (s[0] << 8) | s[1]; 2986 break; 2987 case PSTRING_4_LE: 2988 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2989 break; 2990 case PSTRING_4_BE: 2991 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2992 break; 2993 default: 2994 abort(); /* Impossible */ 2995 } 2996 2997 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2998 len -= file_pstring_length_size(m); 2999 3000 return len; 3001 } 3002 3003 protected int 3004 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 3005 { 3006 uint32_t i, j; 3007 struct mlist *mlist, *ml; 3008 3009 mlist = ms->mlist[1]; 3010 3011 for (ml = mlist->next; ml != mlist; ml = ml->next) { 3012 struct magic *ma = ml->magic; 3013 uint32_t nma = ml->nmagic; 3014 for (i = 0; i < nma; i++) { 3015 if (ma[i].type != FILE_NAME) 3016 continue; 3017 if (strcmp(ma[i].value.s, name) == 0) { 3018 v->magic = &ma[i]; 3019 for (j = i + 1; j < nma; j++) 3020 if (ma[j].cont_level == 0) 3021 break; 3022 v->nmagic = j - i; 3023 return 0; 3024 } 3025 } 3026 } 3027 return -1; 3028 } 3029