1 /* $NetBSD: apprentice.c,v 1.9 2013/03/23 16:15:58 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * apprentice - make one pass through /etc/magic, learning its secrets. 32 */ 33 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: apprentice.c,v 1.191 2013/02/26 21:02:48 christos Exp $") 39 #else 40 __RCSID("$NetBSD: apprentice.c,v 1.9 2013/03/23 16:15:58 christos Exp $"); 41 #endif 42 #endif /* lint */ 43 44 #include "magic.h" 45 #include <stdlib.h> 46 #ifdef HAVE_UNISTD_H 47 #include <unistd.h> 48 #endif 49 #include <string.h> 50 #include <assert.h> 51 #include <ctype.h> 52 #include <fcntl.h> 53 #ifdef QUICK 54 #include <sys/mman.h> 55 #endif 56 #include <dirent.h> 57 58 #define EATAB {while (isascii((unsigned char) *l) && \ 59 isspace((unsigned char) *l)) ++l;} 60 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 61 tolower((unsigned char) (l)) : (l)) 62 /* 63 * Work around a bug in headers on Digital Unix. 64 * At least confirmed for: OSF1 V4.0 878 65 */ 66 #if defined(__osf__) && defined(__DECC) 67 #ifdef MAP_FAILED 68 #undef MAP_FAILED 69 #endif 70 #endif 71 72 #ifndef MAP_FAILED 73 #define MAP_FAILED (void *) -1 74 #endif 75 76 #ifndef MAP_FILE 77 #define MAP_FILE 0 78 #endif 79 80 #define ALLOC_CHUNK (size_t)10 81 #define ALLOC_INCR (size_t)200 82 83 struct magic_entry { 84 struct magic *mp; 85 uint32_t cont_count; 86 uint32_t max_count; 87 }; 88 89 struct magic_map { 90 void *p; 91 size_t len; 92 struct magic *magic[MAGIC_SETS]; 93 uint32_t nmagic[MAGIC_SETS]; 94 }; 95 96 int file_formats[FILE_NAMES_SIZE]; 97 const size_t file_nformats = FILE_NAMES_SIZE; 98 const char *file_names[FILE_NAMES_SIZE]; 99 const size_t file_nnames = FILE_NAMES_SIZE; 100 101 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 102 private int hextoint(int); 103 private const char *getstr(struct magic_set *, struct magic *, const char *, 104 int); 105 private int parse(struct magic_set *, struct magic_entry *, const char *, 106 size_t, int); 107 private void eatsize(const char **); 108 private int apprentice_1(struct magic_set *, const char *, int); 109 private size_t apprentice_magic_strength(const struct magic *); 110 private int apprentice_sort(const void *, const void *); 111 private void apprentice_list(struct mlist *, int ); 112 private struct magic_map *apprentice_load(struct magic_set *, 113 const char *, int); 114 private struct mlist *mlist_alloc(void); 115 private void mlist_free(struct mlist *); 116 private void byteswap(struct magic *, uint32_t); 117 private void bs1(struct magic *); 118 private uint16_t swap2(uint16_t); 119 private uint32_t swap4(uint32_t); 120 private uint64_t swap8(uint64_t); 121 private char *mkdbname(struct magic_set *, const char *, int); 122 private struct magic_map *apprentice_map(struct magic_set *, const char *); 123 private void apprentice_unmap(struct magic_map *); 124 private int apprentice_compile(struct magic_set *, struct magic_map *, 125 const char *); 126 private int check_format_type(const char *, int); 127 private int check_format(struct magic_set *, struct magic *); 128 private int get_op(char); 129 private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 130 private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 131 private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 132 133 134 private size_t maxmagic[MAGIC_SETS] = { 0 }; 135 private size_t magicsize = sizeof(struct magic); 136 137 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 138 139 private struct { 140 const char *name; 141 size_t len; 142 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 143 } bang[] = { 144 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 145 DECLARE_FIELD(mime), 146 DECLARE_FIELD(apple), 147 DECLARE_FIELD(strength), 148 #undef DECLARE_FIELD 149 { NULL, 0, NULL } 150 }; 151 152 #ifdef COMPILE_ONLY 153 154 int main(int, char *[]); 155 156 int 157 main(int argc, char *argv[]) 158 { 159 int ret; 160 struct magic_set *ms; 161 char *progname; 162 163 if ((progname = strrchr(argv[0], '/')) != NULL) 164 progname++; 165 else 166 progname = argv[0]; 167 168 if (argc != 2) { 169 (void)fprintf(stderr, "Usage: %s file\n", progname); 170 return 1; 171 } 172 173 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 174 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 175 return 1; 176 } 177 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 178 if (ret == 1) 179 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 180 magic_close(ms); 181 return ret; 182 } 183 #endif /* COMPILE_ONLY */ 184 185 struct type_tbl_s { 186 const char name[16]; 187 const size_t len; 188 const int type; 189 const int format; 190 }; 191 192 /* 193 * XXX - the actual Single UNIX Specification says that "long" means "long", 194 * as in the C data type, but we treat it as meaning "4-byte integer". 195 * Given that the OS X version of file 5.04 did the same, I guess that passes 196 * the actual test; having "long" be dependent on how big a "long" is on 197 * the machine running "file" is silly. 198 */ 199 static const struct type_tbl_s type_tbl[] = { 200 # define XX(s) s, (sizeof(s) - 1) 201 # define XX_NULL "", 0 202 { XX("invalid"), FILE_INVALID, FILE_FMT_NONE }, 203 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 204 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 205 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 206 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 207 { XX("string"), FILE_STRING, FILE_FMT_STR }, 208 { XX("date"), FILE_DATE, FILE_FMT_STR }, 209 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 210 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 211 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 212 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 213 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 214 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 215 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 216 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 217 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 218 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 219 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 220 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 221 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 222 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 223 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 224 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 225 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 226 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 227 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 228 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 229 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 230 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 231 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 232 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 233 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 234 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 235 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 236 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 237 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 238 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 239 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 240 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 241 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 242 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 243 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 244 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 245 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 246 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 247 { XX("name"), FILE_NAME, FILE_FMT_NONE }, 248 { XX("use"), FILE_USE, FILE_FMT_NONE }, 249 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 250 }; 251 252 /* 253 * These are not types, and cannot be preceded by "u" to make them 254 * unsigned. 255 */ 256 static const struct type_tbl_s special_tbl[] = { 257 { XX("name"), FILE_NAME, FILE_FMT_STR }, 258 { XX("use"), FILE_USE, FILE_FMT_STR }, 259 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 260 }; 261 # undef XX 262 # undef XX_NULL 263 264 private int 265 get_type(const struct type_tbl_s *tbl, const char *l, const char **t) 266 { 267 const struct type_tbl_s *p; 268 269 for (p = tbl; p->len; p++) { 270 if (strncmp(l, p->name, p->len) == 0) { 271 if (t) 272 *t = l + p->len; 273 break; 274 } 275 } 276 return p->type; 277 } 278 279 private int 280 get_standard_integer_type(const char *l, const char **t) 281 { 282 int type; 283 284 if (isalpha((unsigned char)l[1])) { 285 switch (l[1]) { 286 case 'C': 287 /* "dC" and "uC" */ 288 type = FILE_BYTE; 289 break; 290 case 'S': 291 /* "dS" and "uS" */ 292 type = FILE_SHORT; 293 break; 294 case 'I': 295 case 'L': 296 /* 297 * "dI", "dL", "uI", and "uL". 298 * 299 * XXX - the actual Single UNIX Specification says 300 * that "L" means "long", as in the C data type, 301 * but we treat it as meaning "4-byte integer". 302 * Given that the OS X version of file 5.04 did 303 * the same, I guess that passes the actual SUS 304 * validation suite; having "dL" be dependent on 305 * how big a "long" is on the machine running 306 * "file" is silly. 307 */ 308 type = FILE_LONG; 309 break; 310 case 'Q': 311 /* "dQ" and "uQ" */ 312 type = FILE_QUAD; 313 break; 314 default: 315 /* "d{anything else}", "u{anything else}" */ 316 return FILE_INVALID; 317 } 318 l += 2; 319 } else if (isdigit((unsigned char)l[1])) { 320 /* 321 * "d{num}" and "u{num}"; we only support {num} values 322 * of 1, 2, 4, and 8 - the Single UNIX Specification 323 * doesn't say anything about whether arbitrary 324 * values should be supported, but both the Solaris 10 325 * and OS X Mountain Lion versions of file passed the 326 * Single UNIX Specification validation suite, and 327 * neither of them support values bigger than 8 or 328 * non-power-of-2 values. 329 */ 330 if (isdigit((unsigned char)l[2])) { 331 /* Multi-digit, so > 9 */ 332 return FILE_INVALID; 333 } 334 switch (l[1]) { 335 case '1': 336 type = FILE_BYTE; 337 break; 338 case '2': 339 type = FILE_SHORT; 340 break; 341 case '4': 342 type = FILE_LONG; 343 break; 344 case '8': 345 type = FILE_QUAD; 346 break; 347 default: 348 /* XXX - what about 3, 5, 6, or 7? */ 349 return FILE_INVALID; 350 } 351 l += 2; 352 } else { 353 /* 354 * "d" or "u" by itself. 355 */ 356 type = FILE_LONG; 357 ++l; 358 } 359 if (t) 360 *t = l; 361 return type; 362 } 363 364 private void 365 init_file_tables(void) 366 { 367 static int done = 0; 368 const struct type_tbl_s *p; 369 370 if (done) 371 return; 372 done++; 373 374 for (p = type_tbl; p->len; p++) { 375 assert(p->type < FILE_NAMES_SIZE); 376 file_names[p->type] = p->name; 377 file_formats[p->type] = p->format; 378 } 379 assert(p - type_tbl == FILE_NAMES_SIZE); 380 } 381 382 private int 383 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) 384 { 385 struct mlist *ml; 386 387 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 388 return -1; 389 390 ml->map = idx == 0 ? map : NULL; 391 ml->magic = map->magic[idx]; 392 ml->nmagic = map->nmagic[idx]; 393 394 mlp->prev->next = ml; 395 ml->prev = mlp->prev; 396 ml->next = mlp; 397 mlp->prev = ml; 398 return 0; 399 } 400 401 /* 402 * Handle one file or directory. 403 */ 404 private int 405 apprentice_1(struct magic_set *ms, const char *fn, int action) 406 { 407 struct mlist *ml; 408 struct magic_map *map; 409 size_t i; 410 411 if (magicsize != FILE_MAGICSIZE) { 412 file_error(ms, 0, "magic element size %lu != %lu", 413 (unsigned long)sizeof(*map->magic[0]), 414 (unsigned long)FILE_MAGICSIZE); 415 return -1; 416 } 417 418 if (action == FILE_COMPILE) { 419 map = apprentice_load(ms, fn, action); 420 if (map == NULL) 421 return -1; 422 return apprentice_compile(ms, map, fn); 423 } 424 425 #ifndef COMPILE_ONLY 426 map = apprentice_map(ms, fn); 427 if (map == NULL) { 428 if (ms->flags & MAGIC_CHECK) 429 file_magwarn(ms, "using regular magic file `%s'", fn); 430 map = apprentice_load(ms, fn, action); 431 if (map == NULL) 432 return -1; 433 } 434 435 for (i = 0; i < MAGIC_SETS; i++) { 436 if (add_mlist(ms->mlist[i], map, i) == -1) { 437 file_oomem(ms, sizeof(*ml)); 438 apprentice_unmap(map); 439 return -1; 440 } 441 } 442 443 if (action == FILE_LIST) { 444 for (i = 0; i < MAGIC_SETS; i++) { 445 printf("Set %zu:\nBinary patterns:\n", i); 446 apprentice_list(ms->mlist[i], BINTEST); 447 printf("Text patterns:\n"); 448 apprentice_list(ms->mlist[i], TEXTTEST); 449 } 450 } 451 452 return 0; 453 #endif /* COMPILE_ONLY */ 454 } 455 456 protected void 457 file_ms_free(struct magic_set *ms) 458 { 459 size_t i; 460 if (ms == NULL) 461 return; 462 for (i = 0; i < MAGIC_SETS; i++) 463 mlist_free(ms->mlist[i]); 464 free(ms->o.pbuf); 465 free(ms->o.buf); 466 free(ms->c.li); 467 free(ms); 468 } 469 470 protected struct magic_set * 471 file_ms_alloc(int flags) 472 { 473 struct magic_set *ms; 474 size_t i, len; 475 476 if ((ms = CAST(struct magic_set *, calloc((size_t)1, 477 sizeof(struct magic_set)))) == NULL) 478 return NULL; 479 480 if (magic_setflags(ms, flags) == -1) { 481 errno = EINVAL; 482 goto free; 483 } 484 485 ms->o.buf = ms->o.pbuf = NULL; 486 len = (ms->c.len = 10) * sizeof(*ms->c.li); 487 488 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 489 goto free; 490 491 ms->event_flags = 0; 492 ms->error = -1; 493 for (i = 0; i < MAGIC_SETS; i++) 494 ms->mlist[i] = NULL; 495 ms->file = "unknown"; 496 ms->line = 0; 497 return ms; 498 free: 499 free(ms); 500 return NULL; 501 } 502 503 private void 504 apprentice_unmap(struct magic_map *map) 505 { 506 if (map == NULL) 507 return; 508 if (map->p == NULL) 509 return; 510 #ifdef QUICK 511 if (map->len) 512 (void)munmap(map->p, map->len); 513 else 514 #endif 515 free(map->p); 516 free(map); 517 } 518 519 private struct mlist * 520 mlist_alloc(void) 521 { 522 struct mlist *mlist; 523 if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) { 524 return NULL; 525 } 526 mlist->next = mlist->prev = mlist; 527 return mlist; 528 } 529 530 private void 531 mlist_free(struct mlist *mlist) 532 { 533 struct mlist *ml; 534 535 if (mlist == NULL) 536 return; 537 538 for (ml = mlist->next; ml != mlist;) { 539 struct mlist *next = ml->next; 540 if (ml->map) 541 apprentice_unmap(ml->map); 542 free(ml); 543 ml = next; 544 } 545 free(ml); 546 } 547 548 /* const char *fn: list of magic files and directories */ 549 protected int 550 file_apprentice(struct magic_set *ms, const char *fn, int action) 551 { 552 char *p, *mfn; 553 int file_err, errs = -1; 554 size_t i; 555 556 if ((fn = magic_getpath(fn, action)) == NULL) 557 return -1; 558 559 init_file_tables(); 560 561 if ((mfn = strdup(fn)) == NULL) { 562 file_oomem(ms, strlen(fn)); 563 return -1; 564 } 565 566 for (i = 0; i < MAGIC_SETS; i++) { 567 mlist_free(ms->mlist[i]); 568 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 569 file_oomem(ms, sizeof(*ms->mlist[i])); 570 if (i != 0) { 571 --i; 572 do 573 mlist_free(ms->mlist[i]); 574 while (i != 0); 575 } 576 free(mfn); 577 return -1; 578 } 579 } 580 fn = mfn; 581 582 while (fn) { 583 p = strchr(fn, PATHSEP); 584 if (p) 585 *p++ = '\0'; 586 if (*fn == '\0') 587 break; 588 file_err = apprentice_1(ms, fn, action); 589 errs = MAX(errs, file_err); 590 fn = p; 591 } 592 593 free(mfn); 594 595 if (errs == -1) { 596 for (i = 0; i < MAGIC_SETS; i++) { 597 mlist_free(ms->mlist[i]); 598 ms->mlist[i] = NULL; 599 } 600 file_error(ms, 0, "could not find any valid magic files!"); 601 return -1; 602 } 603 604 if (action == FILE_LOAD) 605 return 0; 606 607 for (i = 0; i < MAGIC_SETS; i++) { 608 mlist_free(ms->mlist[i]); 609 ms->mlist[i] = NULL; 610 } 611 612 switch (action) { 613 case FILE_COMPILE: 614 case FILE_CHECK: 615 case FILE_LIST: 616 return 0; 617 default: 618 file_error(ms, 0, "Invalid action %d", action); 619 return -1; 620 } 621 } 622 623 /* 624 * Get weight of this magic entry, for sorting purposes. 625 */ 626 private size_t 627 apprentice_magic_strength(const struct magic *m) 628 { 629 #define MULT 10 630 size_t val = 2 * MULT; /* baseline strength */ 631 632 switch (m->type) { 633 case FILE_DEFAULT: /* make sure this sorts last */ 634 if (m->factor_op != FILE_FACTOR_OP_NONE) 635 abort(); 636 return 0; 637 638 case FILE_BYTE: 639 val += 1 * MULT; 640 break; 641 642 case FILE_SHORT: 643 case FILE_LESHORT: 644 case FILE_BESHORT: 645 val += 2 * MULT; 646 break; 647 648 case FILE_LONG: 649 case FILE_LELONG: 650 case FILE_BELONG: 651 case FILE_MELONG: 652 val += 4 * MULT; 653 break; 654 655 case FILE_PSTRING: 656 case FILE_STRING: 657 val += m->vallen * MULT; 658 break; 659 660 case FILE_BESTRING16: 661 case FILE_LESTRING16: 662 val += m->vallen * MULT / 2; 663 break; 664 665 case FILE_SEARCH: 666 case FILE_REGEX: 667 val += m->vallen * MAX(MULT / m->vallen, 1); 668 break; 669 670 case FILE_DATE: 671 case FILE_LEDATE: 672 case FILE_BEDATE: 673 case FILE_MEDATE: 674 case FILE_LDATE: 675 case FILE_LELDATE: 676 case FILE_BELDATE: 677 case FILE_MELDATE: 678 case FILE_FLOAT: 679 case FILE_BEFLOAT: 680 case FILE_LEFLOAT: 681 val += 4 * MULT; 682 break; 683 684 case FILE_QUAD: 685 case FILE_BEQUAD: 686 case FILE_LEQUAD: 687 case FILE_QDATE: 688 case FILE_LEQDATE: 689 case FILE_BEQDATE: 690 case FILE_QLDATE: 691 case FILE_LEQLDATE: 692 case FILE_BEQLDATE: 693 case FILE_QWDATE: 694 case FILE_LEQWDATE: 695 case FILE_BEQWDATE: 696 case FILE_DOUBLE: 697 case FILE_BEDOUBLE: 698 case FILE_LEDOUBLE: 699 val += 8 * MULT; 700 break; 701 702 case FILE_INDIRECT: 703 case FILE_NAME: 704 case FILE_USE: 705 break; 706 707 default: 708 val = 0; 709 (void)fprintf(stderr, "Bad type %d\n", m->type); 710 abort(); 711 } 712 713 switch (m->reln) { 714 case 'x': /* matches anything penalize */ 715 case '!': /* matches almost anything penalize */ 716 val = 0; 717 break; 718 719 case '=': /* Exact match, prefer */ 720 val += MULT; 721 break; 722 723 case '>': 724 case '<': /* comparison match reduce strength */ 725 val -= 2 * MULT; 726 break; 727 728 case '^': 729 case '&': /* masking bits, we could count them too */ 730 val -= MULT; 731 break; 732 733 default: 734 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 735 abort(); 736 } 737 738 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 739 val = 1; 740 741 switch (m->factor_op) { 742 case FILE_FACTOR_OP_NONE: 743 break; 744 case FILE_FACTOR_OP_PLUS: 745 val += m->factor; 746 break; 747 case FILE_FACTOR_OP_MINUS: 748 val -= m->factor; 749 break; 750 case FILE_FACTOR_OP_TIMES: 751 val *= m->factor; 752 break; 753 case FILE_FACTOR_OP_DIV: 754 val /= m->factor; 755 break; 756 default: 757 abort(); 758 } 759 760 /* 761 * Magic entries with no description get a bonus because they depend 762 * on subsequent magic entries to print something. 763 */ 764 if (m->desc[0] == '\0') 765 val++; 766 return val; 767 } 768 769 /* 770 * Sort callback for sorting entries by "strength" (basically length) 771 */ 772 private int 773 apprentice_sort(const void *a, const void *b) 774 { 775 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 776 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 777 size_t sa = apprentice_magic_strength(ma->mp); 778 size_t sb = apprentice_magic_strength(mb->mp); 779 if (sa == sb) 780 return 0; 781 else if (sa > sb) 782 return -1; 783 else 784 return 1; 785 } 786 787 /* 788 * Shows sorted patterns list in the order which is used for the matching 789 */ 790 private void 791 apprentice_list(struct mlist *mlist, int mode) 792 { 793 uint32_t magindex = 0; 794 struct mlist *ml; 795 for (ml = mlist->next; ml != mlist; ml = ml->next) { 796 for (magindex = 0; magindex < ml->nmagic; magindex++) { 797 struct magic *m = &ml->magic[magindex]; 798 if ((m->flag & mode) != mode) { 799 /* Skip sub-tests */ 800 while (magindex + 1 < ml->nmagic && 801 ml->magic[magindex + 1].cont_level != 0) 802 ++magindex; 803 continue; /* Skip to next top-level test*/ 804 } 805 806 /* 807 * Try to iterate over the tree until we find item with 808 * description/mimetype. 809 */ 810 while (magindex + 1 < ml->nmagic && 811 ml->magic[magindex + 1].cont_level != 0 && 812 *ml->magic[magindex].desc == '\0' && 813 *ml->magic[magindex].mimetype == '\0') 814 magindex++; 815 816 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 817 apprentice_magic_strength(m), 818 ml->magic[magindex].desc, 819 ml->magic[magindex].mimetype); 820 } 821 } 822 } 823 824 private void 825 set_test_type(struct magic *mstart, struct magic *m) 826 { 827 switch (m->type) { 828 case FILE_BYTE: 829 case FILE_SHORT: 830 case FILE_LONG: 831 case FILE_DATE: 832 case FILE_BESHORT: 833 case FILE_BELONG: 834 case FILE_BEDATE: 835 case FILE_LESHORT: 836 case FILE_LELONG: 837 case FILE_LEDATE: 838 case FILE_LDATE: 839 case FILE_BELDATE: 840 case FILE_LELDATE: 841 case FILE_MEDATE: 842 case FILE_MELDATE: 843 case FILE_MELONG: 844 case FILE_QUAD: 845 case FILE_LEQUAD: 846 case FILE_BEQUAD: 847 case FILE_QDATE: 848 case FILE_LEQDATE: 849 case FILE_BEQDATE: 850 case FILE_QLDATE: 851 case FILE_LEQLDATE: 852 case FILE_BEQLDATE: 853 case FILE_QWDATE: 854 case FILE_LEQWDATE: 855 case FILE_BEQWDATE: 856 case FILE_FLOAT: 857 case FILE_BEFLOAT: 858 case FILE_LEFLOAT: 859 case FILE_DOUBLE: 860 case FILE_BEDOUBLE: 861 case FILE_LEDOUBLE: 862 mstart->flag |= BINTEST; 863 break; 864 case FILE_STRING: 865 case FILE_PSTRING: 866 case FILE_BESTRING16: 867 case FILE_LESTRING16: 868 /* Allow text overrides */ 869 if (mstart->str_flags & STRING_TEXTTEST) 870 mstart->flag |= TEXTTEST; 871 else 872 mstart->flag |= BINTEST; 873 break; 874 case FILE_REGEX: 875 case FILE_SEARCH: 876 /* Check for override */ 877 if (mstart->str_flags & STRING_BINTEST) 878 mstart->flag |= BINTEST; 879 if (mstart->str_flags & STRING_TEXTTEST) 880 mstart->flag |= TEXTTEST; 881 882 if (mstart->flag & (TEXTTEST|BINTEST)) 883 break; 884 885 /* binary test if pattern is not text */ 886 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 887 NULL) <= 0) 888 mstart->flag |= BINTEST; 889 else 890 mstart->flag |= TEXTTEST; 891 break; 892 case FILE_DEFAULT: 893 /* can't deduce anything; we shouldn't see this at the 894 top level anyway */ 895 break; 896 case FILE_INVALID: 897 default: 898 /* invalid search type, but no need to complain here */ 899 break; 900 } 901 } 902 903 private int 904 addentry(struct magic_set *ms, struct magic_entry *me, 905 struct magic_entry **mentry, uint32_t *mentrycount) 906 { 907 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 908 if (mentrycount[i] == maxmagic[i]) { 909 struct magic_entry *mp; 910 911 maxmagic[i] += ALLOC_INCR; 912 if ((mp = CAST(struct magic_entry *, 913 realloc(mentry[i], sizeof(*mp) * maxmagic[i]))) == 914 NULL) { 915 file_oomem(ms, sizeof(*mp) * maxmagic[i]); 916 return -1; 917 } 918 (void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) * 919 ALLOC_INCR); 920 mentry[i] = mp; 921 } 922 mentry[i][mentrycount[i]++] = *me; 923 memset(me, 0, sizeof(*me)); 924 return 0; 925 } 926 927 /* 928 * Load and parse one file. 929 */ 930 private void 931 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 932 struct magic_entry **mentry, uint32_t *mentrycount) 933 { 934 size_t lineno = 0, llen = 0; 935 char *line = NULL; 936 ssize_t len; 937 struct magic_entry me; 938 939 FILE *f = fopen(ms->file = fn, "r"); 940 if (f == NULL) { 941 if (errno != ENOENT) 942 file_error(ms, errno, "cannot read magic file `%s'", 943 fn); 944 (*errs)++; 945 return; 946 } 947 948 memset(&me, 0, sizeof(me)); 949 /* read and parse this file */ 950 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 951 ms->line++) { 952 if (len == 0) /* null line, garbage, etc */ 953 continue; 954 if (line[len - 1] == '\n') { 955 lineno++; 956 line[len - 1] = '\0'; /* delete newline */ 957 } 958 switch (line[0]) { 959 case '\0': /* empty, do not parse */ 960 case '#': /* comment, do not parse */ 961 continue; 962 case '!': 963 if (line[1] == ':') { 964 size_t i; 965 966 for (i = 0; bang[i].name != NULL; i++) { 967 if ((size_t)(len - 2) > bang[i].len && 968 memcmp(bang[i].name, line + 2, 969 bang[i].len) == 0) 970 break; 971 } 972 if (bang[i].name == NULL) { 973 file_error(ms, 0, 974 "Unknown !: entry `%s'", line); 975 (*errs)++; 976 continue; 977 } 978 if (me.mp == NULL) { 979 file_error(ms, 0, 980 "No current entry for :!%s type", 981 bang[i].name); 982 (*errs)++; 983 continue; 984 } 985 if ((*bang[i].fun)(ms, &me, 986 line + bang[i].len + 2) != 0) { 987 (*errs)++; 988 continue; 989 } 990 continue; 991 } 992 /*FALLTHROUGH*/ 993 default: 994 again: 995 switch (parse(ms, &me, line, lineno, action)) { 996 case 0: 997 continue; 998 case 1: 999 (void)addentry(ms, &me, mentry, mentrycount); 1000 goto again; 1001 default: 1002 (*errs)++; 1003 break; 1004 } 1005 } 1006 } 1007 if (me.mp) 1008 (void)addentry(ms, &me, mentry, mentrycount); 1009 free(line); 1010 (void)fclose(f); 1011 } 1012 1013 /* 1014 * parse a file or directory of files 1015 * const char *fn: name of magic file or directory 1016 */ 1017 private int 1018 cmpstrp(const void *p1, const void *p2) 1019 { 1020 return strcmp(*(char *const *)p1, *(char *const *)p2); 1021 } 1022 1023 1024 private uint32_t 1025 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1026 uint32_t starttest) 1027 { 1028 static const char text[] = "text"; 1029 static const char binary[] = "binary"; 1030 static const size_t len = sizeof(text); 1031 1032 uint32_t i = starttest; 1033 1034 do { 1035 set_test_type(me[starttest].mp, me[i].mp); 1036 if ((ms->flags & MAGIC_DEBUG) == 0) 1037 continue; 1038 (void)fprintf(stderr, "%s%s%s: %s\n", 1039 me[i].mp->mimetype, 1040 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 1041 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 1042 me[i].mp->flag & BINTEST ? binary : text); 1043 if (me[i].mp->flag & BINTEST) { 1044 char *p = strstr(me[i].mp->desc, text); 1045 if (p && (p == me[i].mp->desc || 1046 isspace((unsigned char)p[-1])) && 1047 (p + len - me[i].mp->desc == MAXstring 1048 || (p[len] == '\0' || 1049 isspace((unsigned char)p[len])))) 1050 (void)fprintf(stderr, "*** Possible " 1051 "binary test for text type\n"); 1052 } 1053 } while (++i < nme && me[i].mp->cont_level != 0); 1054 return i; 1055 } 1056 1057 private void 1058 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 1059 { 1060 uint32_t i; 1061 for (i = 0; i < nme; i++) { 1062 if (me[i].mp->cont_level == 0 && 1063 me[i].mp->type == FILE_DEFAULT) { 1064 while (++i < nme) 1065 if (me[i].mp->cont_level == 0) 1066 break; 1067 if (i != nme) { 1068 /* XXX - Ugh! */ 1069 ms->line = me[i].mp->lineno; 1070 file_magwarn(ms, 1071 "level 0 \"default\" did not sort last"); 1072 } 1073 return; 1074 } 1075 } 1076 } 1077 1078 private int 1079 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 1080 struct magic **ma, uint32_t *nma) 1081 { 1082 uint32_t i, mentrycount = 0; 1083 size_t slen; 1084 1085 for (i = 0; i < nme; i++) 1086 mentrycount += me[i].cont_count; 1087 1088 slen = sizeof(**ma) * mentrycount; 1089 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 1090 file_oomem(ms, slen); 1091 return -1; 1092 } 1093 1094 mentrycount = 0; 1095 for (i = 0; i < nme; i++) { 1096 (void)memcpy(*ma + mentrycount, me[i].mp, 1097 me[i].cont_count * sizeof(**ma)); 1098 mentrycount += me[i].cont_count; 1099 } 1100 *nma = mentrycount; 1101 return 0; 1102 } 1103 1104 private void 1105 magic_entry_free(struct magic_entry *me, uint32_t nme) 1106 { 1107 uint32_t i; 1108 if (me == NULL) 1109 return; 1110 for (i = 0; i < nme; i++) 1111 free(me[i].mp); 1112 free(me); 1113 } 1114 1115 private struct magic_map * 1116 apprentice_load(struct magic_set *ms, const char *fn, int action) 1117 { 1118 int errs = 0; 1119 struct magic_entry *mentry[MAGIC_SETS] = { NULL }; 1120 uint32_t mentrycount[MAGIC_SETS] = { 0 }; 1121 uint32_t i, j; 1122 size_t files = 0, maxfiles = 0; 1123 char **filearr = NULL, *mfn; 1124 struct stat st; 1125 struct magic_map *map; 1126 DIR *dir; 1127 struct dirent *d; 1128 1129 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1130 1131 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 1132 file_oomem(ms, sizeof(*map)); 1133 return NULL; 1134 } 1135 1136 /* print silly verbose header for USG compat. */ 1137 if (action == FILE_CHECK) 1138 (void)fprintf(stderr, "%s\n", usg_hdr); 1139 1140 /* load directory or file */ 1141 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1142 dir = opendir(fn); 1143 if (!dir) { 1144 errs++; 1145 goto out; 1146 } 1147 while ((d = readdir(dir)) != NULL) { 1148 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1149 file_oomem(ms, 1150 strlen(fn) + strlen(d->d_name) + 2); 1151 errs++; 1152 closedir(dir); 1153 goto out; 1154 } 1155 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1156 free(mfn); 1157 continue; 1158 } 1159 if (files >= maxfiles) { 1160 size_t mlen; 1161 maxfiles = (maxfiles + 1) * 2; 1162 mlen = maxfiles * sizeof(*filearr); 1163 if ((filearr = CAST(char **, 1164 realloc(filearr, mlen))) == NULL) { 1165 file_oomem(ms, mlen); 1166 free(mfn); 1167 closedir(dir); 1168 errs++; 1169 goto out; 1170 } 1171 } 1172 filearr[files++] = mfn; 1173 } 1174 closedir(dir); 1175 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1176 for (i = 0; i < files; i++) { 1177 load_1(ms, action, filearr[i], &errs, mentry, 1178 mentrycount); 1179 free(filearr[i]); 1180 } 1181 free(filearr); 1182 } else 1183 load_1(ms, action, fn, &errs, mentry, mentrycount); 1184 if (errs) 1185 goto out; 1186 1187 for (j = 0; j < MAGIC_SETS; j++) { 1188 /* Set types of tests */ 1189 for (i = 0; i < mentrycount[j]; ) { 1190 if (mentry[j][i].mp->cont_level != 0) { 1191 i++; 1192 continue; 1193 } 1194 i = set_text_binary(ms, mentry[j], mentrycount[j], i); 1195 } 1196 qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]), 1197 apprentice_sort); 1198 1199 /* 1200 * Make sure that any level 0 "default" line is last 1201 * (if one exists). 1202 */ 1203 set_last_default(ms, mentry[j], mentrycount[j]); 1204 1205 /* coalesce per file arrays into a single one */ 1206 if (coalesce_entries(ms, mentry[j], mentrycount[j], 1207 &map->magic[j], &map->nmagic[j]) == -1) { 1208 errs++; 1209 goto out; 1210 } 1211 } 1212 1213 out: 1214 for (j = 0; j < MAGIC_SETS; j++) 1215 magic_entry_free(mentry[j], mentrycount[j]); 1216 1217 if (errs) { 1218 for (j = 0; j < MAGIC_SETS; j++) { 1219 if (map->magic[j]) 1220 free(map->magic[j]); 1221 } 1222 free(map); 1223 return NULL; 1224 } 1225 return map; 1226 } 1227 1228 /* 1229 * extend the sign bit if the comparison is to be signed 1230 */ 1231 protected uint64_t 1232 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1233 { 1234 if (!(m->flag & UNSIGNED)) { 1235 switch(m->type) { 1236 /* 1237 * Do not remove the casts below. They are 1238 * vital. When later compared with the data, 1239 * the sign extension must have happened. 1240 */ 1241 case FILE_BYTE: 1242 v = (char) v; 1243 break; 1244 case FILE_SHORT: 1245 case FILE_BESHORT: 1246 case FILE_LESHORT: 1247 v = (short) v; 1248 break; 1249 case FILE_DATE: 1250 case FILE_BEDATE: 1251 case FILE_LEDATE: 1252 case FILE_MEDATE: 1253 case FILE_LDATE: 1254 case FILE_BELDATE: 1255 case FILE_LELDATE: 1256 case FILE_MELDATE: 1257 case FILE_LONG: 1258 case FILE_BELONG: 1259 case FILE_LELONG: 1260 case FILE_MELONG: 1261 case FILE_FLOAT: 1262 case FILE_BEFLOAT: 1263 case FILE_LEFLOAT: 1264 v = (int32_t) v; 1265 break; 1266 case FILE_QUAD: 1267 case FILE_BEQUAD: 1268 case FILE_LEQUAD: 1269 case FILE_QDATE: 1270 case FILE_QLDATE: 1271 case FILE_QWDATE: 1272 case FILE_BEQDATE: 1273 case FILE_BEQLDATE: 1274 case FILE_BEQWDATE: 1275 case FILE_LEQDATE: 1276 case FILE_LEQLDATE: 1277 case FILE_LEQWDATE: 1278 case FILE_DOUBLE: 1279 case FILE_BEDOUBLE: 1280 case FILE_LEDOUBLE: 1281 v = (int64_t) v; 1282 break; 1283 case FILE_STRING: 1284 case FILE_PSTRING: 1285 case FILE_BESTRING16: 1286 case FILE_LESTRING16: 1287 case FILE_REGEX: 1288 case FILE_SEARCH: 1289 case FILE_DEFAULT: 1290 case FILE_INDIRECT: 1291 case FILE_NAME: 1292 case FILE_USE: 1293 break; 1294 default: 1295 if (ms->flags & MAGIC_CHECK) 1296 file_magwarn(ms, "cannot happen: m->type=%d\n", 1297 m->type); 1298 return ~0U; 1299 } 1300 } 1301 return v; 1302 } 1303 1304 private int 1305 string_modifier_check(struct magic_set *ms, struct magic *m) 1306 { 1307 if ((ms->flags & MAGIC_CHECK) == 0) 1308 return 0; 1309 1310 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 1311 file_magwarn(ms, 1312 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1313 return -1; 1314 } 1315 switch (m->type) { 1316 case FILE_BESTRING16: 1317 case FILE_LESTRING16: 1318 if (m->str_flags != 0) { 1319 file_magwarn(ms, 1320 "no modifiers allowed for 16-bit strings\n"); 1321 return -1; 1322 } 1323 break; 1324 case FILE_STRING: 1325 case FILE_PSTRING: 1326 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1327 file_magwarn(ms, 1328 "'/%c' only allowed on regex and search\n", 1329 CHAR_REGEX_OFFSET_START); 1330 return -1; 1331 } 1332 break; 1333 case FILE_SEARCH: 1334 if (m->str_range == 0) { 1335 file_magwarn(ms, 1336 "missing range; defaulting to %d\n", 1337 STRING_DEFAULT_RANGE); 1338 m->str_range = STRING_DEFAULT_RANGE; 1339 return -1; 1340 } 1341 break; 1342 case FILE_REGEX: 1343 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1344 file_magwarn(ms, "'/%c' not allowed on regex\n", 1345 CHAR_COMPACT_WHITESPACE); 1346 return -1; 1347 } 1348 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1349 file_magwarn(ms, "'/%c' not allowed on regex\n", 1350 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1351 return -1; 1352 } 1353 break; 1354 default: 1355 file_magwarn(ms, "coding error: m->type=%d\n", 1356 m->type); 1357 return -1; 1358 } 1359 return 0; 1360 } 1361 1362 private int 1363 get_op(char c) 1364 { 1365 switch (c) { 1366 case '&': 1367 return FILE_OPAND; 1368 case '|': 1369 return FILE_OPOR; 1370 case '^': 1371 return FILE_OPXOR; 1372 case '+': 1373 return FILE_OPADD; 1374 case '-': 1375 return FILE_OPMINUS; 1376 case '*': 1377 return FILE_OPMULTIPLY; 1378 case '/': 1379 return FILE_OPDIVIDE; 1380 case '%': 1381 return FILE_OPMODULO; 1382 default: 1383 return -1; 1384 } 1385 } 1386 1387 #ifdef ENABLE_CONDITIONALS 1388 private int 1389 get_cond(const char *l, const char **t) 1390 { 1391 static const struct cond_tbl_s { 1392 char name[8]; 1393 size_t len; 1394 int cond; 1395 } cond_tbl[] = { 1396 { "if", 2, COND_IF }, 1397 { "elif", 4, COND_ELIF }, 1398 { "else", 4, COND_ELSE }, 1399 { "", 0, COND_NONE }, 1400 }; 1401 const struct cond_tbl_s *p; 1402 1403 for (p = cond_tbl; p->len; p++) { 1404 if (strncmp(l, p->name, p->len) == 0 && 1405 isspace((unsigned char)l[p->len])) { 1406 if (t) 1407 *t = l + p->len; 1408 break; 1409 } 1410 } 1411 return p->cond; 1412 } 1413 1414 private int 1415 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1416 { 1417 int last_cond; 1418 last_cond = ms->c.li[cont_level].last_cond; 1419 1420 switch (cond) { 1421 case COND_IF: 1422 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1423 if (ms->flags & MAGIC_CHECK) 1424 file_magwarn(ms, "syntax error: `if'"); 1425 return -1; 1426 } 1427 last_cond = COND_IF; 1428 break; 1429 1430 case COND_ELIF: 1431 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1432 if (ms->flags & MAGIC_CHECK) 1433 file_magwarn(ms, "syntax error: `elif'"); 1434 return -1; 1435 } 1436 last_cond = COND_ELIF; 1437 break; 1438 1439 case COND_ELSE: 1440 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1441 if (ms->flags & MAGIC_CHECK) 1442 file_magwarn(ms, "syntax error: `else'"); 1443 return -1; 1444 } 1445 last_cond = COND_NONE; 1446 break; 1447 1448 case COND_NONE: 1449 last_cond = COND_NONE; 1450 break; 1451 } 1452 1453 ms->c.li[cont_level].last_cond = last_cond; 1454 return 0; 1455 } 1456 #endif /* ENABLE_CONDITIONALS */ 1457 1458 /* 1459 * parse one line from magic file, put into magic[index++] if valid 1460 */ 1461 private int 1462 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1463 size_t lineno, int action) 1464 { 1465 #ifdef ENABLE_CONDITIONALS 1466 static uint32_t last_cont_level = 0; 1467 #endif 1468 size_t i; 1469 struct magic *m; 1470 const char *l = line; 1471 char *t; 1472 int op; 1473 uint32_t cont_level; 1474 int32_t diff; 1475 1476 cont_level = 0; 1477 1478 /* 1479 * Parse the offset. 1480 */ 1481 while (*l == '>') { 1482 ++l; /* step over */ 1483 cont_level++; 1484 } 1485 #ifdef ENABLE_CONDITIONALS 1486 if (cont_level == 0 || cont_level > last_cont_level) 1487 if (file_check_mem(ms, cont_level) == -1) 1488 return -1; 1489 last_cont_level = cont_level; 1490 #endif 1491 if (cont_level != 0) { 1492 if (me->mp == NULL) { 1493 file_magerror(ms, "No current entry for continuation"); 1494 return -1; 1495 } 1496 if (me->cont_count == 0) { 1497 file_magerror(ms, "Continuations present with 0 count"); 1498 return -1; 1499 } 1500 m = &me->mp[me->cont_count - 1]; 1501 diff = (int32_t)cont_level - (int32_t)m->cont_level; 1502 if (diff > 1) 1503 file_magwarn(ms, "New continuation level %u is more " 1504 "than one larger than current level %u", cont_level, 1505 m->cont_level); 1506 if (me->cont_count == me->max_count) { 1507 struct magic *nm; 1508 size_t cnt = me->max_count + ALLOC_CHUNK; 1509 if ((nm = CAST(struct magic *, realloc(me->mp, 1510 sizeof(*nm) * cnt))) == NULL) { 1511 file_oomem(ms, sizeof(*nm) * cnt); 1512 return -1; 1513 } 1514 me->mp = m = nm; 1515 me->max_count = CAST(uint32_t, cnt); 1516 } 1517 m = &me->mp[me->cont_count++]; 1518 (void)memset(m, 0, sizeof(*m)); 1519 m->cont_level = cont_level; 1520 } else { 1521 static const size_t len = sizeof(*m) * ALLOC_CHUNK; 1522 if (me->mp != NULL) 1523 return 1; 1524 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1525 file_oomem(ms, len); 1526 return -1; 1527 } 1528 me->mp = m; 1529 me->max_count = ALLOC_CHUNK; 1530 (void)memset(m, 0, sizeof(*m)); 1531 m->factor_op = FILE_FACTOR_OP_NONE; 1532 m->cont_level = 0; 1533 me->cont_count = 1; 1534 } 1535 m->lineno = CAST(uint32_t, lineno); 1536 1537 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1538 ++l; /* step over */ 1539 m->flag |= OFFADD; 1540 } 1541 if (*l == '(') { 1542 ++l; /* step over */ 1543 m->flag |= INDIR; 1544 if (m->flag & OFFADD) 1545 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1546 1547 if (*l == '&') { /* m->cont_level == 0 checked below */ 1548 ++l; /* step over */ 1549 m->flag |= OFFADD; 1550 } 1551 } 1552 /* Indirect offsets are not valid at level 0. */ 1553 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1554 if (ms->flags & MAGIC_CHECK) 1555 file_magwarn(ms, "relative offset at level 0"); 1556 1557 /* get offset, then skip over it */ 1558 m->offset = (uint32_t)strtoul(l, &t, 0); 1559 if (l == t) 1560 if (ms->flags & MAGIC_CHECK) 1561 file_magwarn(ms, "offset `%s' invalid", l); 1562 l = t; 1563 1564 if (m->flag & INDIR) { 1565 m->in_type = FILE_LONG; 1566 m->in_offset = 0; 1567 /* 1568 * read [.lbs][+-]nnnnn) 1569 */ 1570 if (*l == '.') { 1571 l++; 1572 switch (*l) { 1573 case 'l': 1574 m->in_type = FILE_LELONG; 1575 break; 1576 case 'L': 1577 m->in_type = FILE_BELONG; 1578 break; 1579 case 'm': 1580 m->in_type = FILE_MELONG; 1581 break; 1582 case 'h': 1583 case 's': 1584 m->in_type = FILE_LESHORT; 1585 break; 1586 case 'H': 1587 case 'S': 1588 m->in_type = FILE_BESHORT; 1589 break; 1590 case 'c': 1591 case 'b': 1592 case 'C': 1593 case 'B': 1594 m->in_type = FILE_BYTE; 1595 break; 1596 case 'e': 1597 case 'f': 1598 case 'g': 1599 m->in_type = FILE_LEDOUBLE; 1600 break; 1601 case 'E': 1602 case 'F': 1603 case 'G': 1604 m->in_type = FILE_BEDOUBLE; 1605 break; 1606 case 'i': 1607 m->in_type = FILE_LEID3; 1608 break; 1609 case 'I': 1610 m->in_type = FILE_BEID3; 1611 break; 1612 default: 1613 if (ms->flags & MAGIC_CHECK) 1614 file_magwarn(ms, 1615 "indirect offset type `%c' invalid", 1616 *l); 1617 break; 1618 } 1619 l++; 1620 } 1621 1622 m->in_op = 0; 1623 if (*l == '~') { 1624 m->in_op |= FILE_OPINVERSE; 1625 l++; 1626 } 1627 if ((op = get_op(*l)) != -1) { 1628 m->in_op |= op; 1629 l++; 1630 } 1631 if (*l == '(') { 1632 m->in_op |= FILE_OPINDIRECT; 1633 l++; 1634 } 1635 if (isdigit((unsigned char)*l) || *l == '-') { 1636 m->in_offset = (int32_t)strtol(l, &t, 0); 1637 if (l == t) 1638 if (ms->flags & MAGIC_CHECK) 1639 file_magwarn(ms, 1640 "in_offset `%s' invalid", l); 1641 l = t; 1642 } 1643 if (*l++ != ')' || 1644 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1645 if (ms->flags & MAGIC_CHECK) 1646 file_magwarn(ms, 1647 "missing ')' in indirect offset"); 1648 } 1649 EATAB; 1650 1651 #ifdef ENABLE_CONDITIONALS 1652 m->cond = get_cond(l, &l); 1653 if (check_cond(ms, m->cond, cont_level) == -1) 1654 return -1; 1655 1656 EATAB; 1657 #endif 1658 1659 /* 1660 * Parse the type. 1661 */ 1662 if (*l == 'u') { 1663 /* 1664 * Try it as a keyword type prefixed by "u"; match what 1665 * follows the "u". If that fails, try it as an SUS 1666 * integer type. 1667 */ 1668 m->type = get_type(type_tbl, l + 1, &l); 1669 if (m->type == FILE_INVALID) { 1670 /* 1671 * Not a keyword type; parse it as an SUS type, 1672 * 'u' possibly followed by a number or C/S/L. 1673 */ 1674 m->type = get_standard_integer_type(l, &l); 1675 } 1676 // It's unsigned. 1677 if (m->type != FILE_INVALID) 1678 m->flag |= UNSIGNED; 1679 } else { 1680 /* 1681 * Try it as a keyword type. If that fails, try it as 1682 * an SUS integer type if it begins with "d" or as an 1683 * SUS string type if it begins with "s". In any case, 1684 * it's not unsigned. 1685 */ 1686 m->type = get_type(type_tbl, l, &l); 1687 if (m->type == FILE_INVALID) { 1688 /* 1689 * Not a keyword type; parse it as an SUS type, 1690 * either 'd' possibly followed by a number or 1691 * C/S/L, or just 's'. 1692 */ 1693 if (*l == 'd') 1694 m->type = get_standard_integer_type(l, &l); 1695 else if (*l == 's' && !isalpha((unsigned char)l[1])) { 1696 m->type = FILE_STRING; 1697 ++l; 1698 } 1699 } 1700 } 1701 1702 if (m->type == FILE_INVALID) { 1703 /* Not found - try it as a special keyword. */ 1704 m->type = get_type(special_tbl, l, &l); 1705 } 1706 1707 if (m->type == FILE_INVALID) { 1708 if (ms->flags & MAGIC_CHECK) 1709 file_magwarn(ms, "type `%s' invalid", l); 1710 return -1; 1711 } 1712 1713 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1714 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1715 1716 m->mask_op = 0; 1717 if (*l == '~') { 1718 if (!IS_STRING(m->type)) 1719 m->mask_op |= FILE_OPINVERSE; 1720 else if (ms->flags & MAGIC_CHECK) 1721 file_magwarn(ms, "'~' invalid for string types"); 1722 ++l; 1723 } 1724 m->str_range = 0; 1725 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1726 if ((op = get_op(*l)) != -1) { 1727 if (!IS_STRING(m->type)) { 1728 uint64_t val; 1729 ++l; 1730 m->mask_op |= op; 1731 val = (uint64_t)strtoull(l, &t, 0); 1732 l = t; 1733 m->num_mask = file_signextend(ms, m, val); 1734 eatsize(&l); 1735 } 1736 else if (op == FILE_OPDIVIDE) { 1737 int have_range = 0; 1738 while (!isspace((unsigned char)*++l)) { 1739 switch (*l) { 1740 case '0': case '1': case '2': 1741 case '3': case '4': case '5': 1742 case '6': case '7': case '8': 1743 case '9': 1744 if (have_range && 1745 (ms->flags & MAGIC_CHECK)) 1746 file_magwarn(ms, 1747 "multiple ranges"); 1748 have_range = 1; 1749 m->str_range = CAST(uint32_t, 1750 strtoul(l, &t, 0)); 1751 if (m->str_range == 0) 1752 file_magwarn(ms, 1753 "zero range"); 1754 l = t - 1; 1755 break; 1756 case CHAR_COMPACT_WHITESPACE: 1757 m->str_flags |= 1758 STRING_COMPACT_WHITESPACE; 1759 break; 1760 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1761 m->str_flags |= 1762 STRING_COMPACT_OPTIONAL_WHITESPACE; 1763 break; 1764 case CHAR_IGNORE_LOWERCASE: 1765 m->str_flags |= STRING_IGNORE_LOWERCASE; 1766 break; 1767 case CHAR_IGNORE_UPPERCASE: 1768 m->str_flags |= STRING_IGNORE_UPPERCASE; 1769 break; 1770 case CHAR_REGEX_OFFSET_START: 1771 m->str_flags |= REGEX_OFFSET_START; 1772 break; 1773 case CHAR_BINTEST: 1774 m->str_flags |= STRING_BINTEST; 1775 break; 1776 case CHAR_TEXTTEST: 1777 m->str_flags |= STRING_TEXTTEST; 1778 break; 1779 case CHAR_TRIM: 1780 m->str_flags |= STRING_TRIM; 1781 break; 1782 case CHAR_PSTRING_1_LE: 1783 if (m->type != FILE_PSTRING) 1784 goto bad; 1785 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1786 break; 1787 case CHAR_PSTRING_2_BE: 1788 if (m->type != FILE_PSTRING) 1789 goto bad; 1790 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1791 break; 1792 case CHAR_PSTRING_2_LE: 1793 if (m->type != FILE_PSTRING) 1794 goto bad; 1795 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1796 break; 1797 case CHAR_PSTRING_4_BE: 1798 if (m->type != FILE_PSTRING) 1799 goto bad; 1800 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1801 break; 1802 case CHAR_PSTRING_4_LE: 1803 if (m->type != FILE_PSTRING) 1804 goto bad; 1805 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1806 break; 1807 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1808 if (m->type != FILE_PSTRING) 1809 goto bad; 1810 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1811 break; 1812 default: 1813 bad: 1814 if (ms->flags & MAGIC_CHECK) 1815 file_magwarn(ms, 1816 "string extension `%c' " 1817 "invalid", *l); 1818 return -1; 1819 } 1820 /* allow multiple '/' for readability */ 1821 if (l[1] == '/' && 1822 !isspace((unsigned char)l[2])) 1823 l++; 1824 } 1825 if (string_modifier_check(ms, m) == -1) 1826 return -1; 1827 } 1828 else { 1829 if (ms->flags & MAGIC_CHECK) 1830 file_magwarn(ms, "invalid string op: %c", *t); 1831 return -1; 1832 } 1833 } 1834 /* 1835 * We used to set mask to all 1's here, instead let's just not do 1836 * anything if mask = 0 (unless you have a better idea) 1837 */ 1838 EATAB; 1839 1840 switch (*l) { 1841 case '>': 1842 case '<': 1843 m->reln = *l; 1844 ++l; 1845 if (*l == '=') { 1846 if (ms->flags & MAGIC_CHECK) { 1847 file_magwarn(ms, "%c= not supported", 1848 m->reln); 1849 return -1; 1850 } 1851 ++l; 1852 } 1853 break; 1854 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1855 case '&': 1856 case '^': 1857 case '=': 1858 m->reln = *l; 1859 ++l; 1860 if (*l == '=') { 1861 /* HP compat: ignore &= etc. */ 1862 ++l; 1863 } 1864 break; 1865 case '!': 1866 m->reln = *l; 1867 ++l; 1868 break; 1869 default: 1870 m->reln = '='; /* the default relation */ 1871 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1872 isspace((unsigned char)l[1])) || !l[1])) { 1873 m->reln = *l; 1874 ++l; 1875 } 1876 break; 1877 } 1878 /* 1879 * Grab the value part, except for an 'x' reln. 1880 */ 1881 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1882 return -1; 1883 1884 /* 1885 * TODO finish this macro and start using it! 1886 * #define offsetcheck {if (offset > HOWMANY-1) 1887 * magwarn("offset too big"); } 1888 */ 1889 1890 /* 1891 * Now get last part - the description 1892 */ 1893 EATAB; 1894 if (l[0] == '\b') { 1895 ++l; 1896 m->flag |= NOSPACE; 1897 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1898 ++l; 1899 ++l; 1900 m->flag |= NOSPACE; 1901 } 1902 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1903 continue; 1904 if (i == sizeof(m->desc)) { 1905 m->desc[sizeof(m->desc) - 1] = '\0'; 1906 if (ms->flags & MAGIC_CHECK) 1907 file_magwarn(ms, "description `%s' truncated", m->desc); 1908 } 1909 1910 /* 1911 * We only do this check while compiling, or if any of the magic 1912 * files were not compiled. 1913 */ 1914 if (ms->flags & MAGIC_CHECK) { 1915 if (check_format(ms, m) == -1) 1916 return -1; 1917 } 1918 #ifndef COMPILE_ONLY 1919 if (action == FILE_CHECK) { 1920 file_mdump(m); 1921 } 1922 #endif 1923 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1924 return 0; 1925 } 1926 1927 /* 1928 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1929 * if valid 1930 */ 1931 private int 1932 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1933 { 1934 const char *l = line; 1935 char *el; 1936 unsigned long factor; 1937 struct magic *m = &me->mp[0]; 1938 1939 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1940 file_magwarn(ms, 1941 "Current entry already has a strength type: %c %d", 1942 m->factor_op, m->factor); 1943 return -1; 1944 } 1945 EATAB; 1946 switch (*l) { 1947 case FILE_FACTOR_OP_NONE: 1948 case FILE_FACTOR_OP_PLUS: 1949 case FILE_FACTOR_OP_MINUS: 1950 case FILE_FACTOR_OP_TIMES: 1951 case FILE_FACTOR_OP_DIV: 1952 m->factor_op = *l++; 1953 break; 1954 default: 1955 file_magwarn(ms, "Unknown factor op `%c'", *l); 1956 return -1; 1957 } 1958 EATAB; 1959 factor = strtoul(l, &el, 0); 1960 if (factor > 255) { 1961 file_magwarn(ms, "Too large factor `%lu'", factor); 1962 goto out; 1963 } 1964 if (*el && !isspace((unsigned char)*el)) { 1965 file_magwarn(ms, "Bad factor `%s'", l); 1966 goto out; 1967 } 1968 m->factor = (uint8_t)factor; 1969 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1970 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1971 m->factor_op, m->factor); 1972 goto out; 1973 } 1974 return 0; 1975 out: 1976 m->factor_op = FILE_FACTOR_OP_NONE; 1977 m->factor = 0; 1978 return -1; 1979 } 1980 1981 /* 1982 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 1983 * magic[index - 1] 1984 */ 1985 private int 1986 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 1987 { 1988 size_t i; 1989 const char *l = line; 1990 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1991 1992 if (m->apple[0] != '\0') { 1993 file_magwarn(ms, "Current entry already has a APPLE type " 1994 "`%.8s', new type `%s'", m->mimetype, l); 1995 return -1; 1996 } 1997 1998 EATAB; 1999 for (i = 0; *l && ((isascii((unsigned char)*l) && 2000 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 2001 i < sizeof(m->apple); m->apple[i++] = *l++) 2002 continue; 2003 if (i == sizeof(m->apple) && *l) { 2004 /* We don't need to NUL terminate here, printing handles it */ 2005 if (ms->flags & MAGIC_CHECK) 2006 file_magwarn(ms, "APPLE type `%s' truncated %" 2007 SIZE_T_FORMAT "u", line, i); 2008 } 2009 2010 if (i > 0) 2011 return 0; 2012 else 2013 return -1; 2014 } 2015 2016 /* 2017 * parse a MIME annotation line from magic file, put into magic[index - 1] 2018 * if valid 2019 */ 2020 private int 2021 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 2022 { 2023 size_t i; 2024 const char *l = line; 2025 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 2026 2027 if (m->mimetype[0] != '\0') { 2028 file_magwarn(ms, "Current entry already has a MIME type `%s'," 2029 " new type `%s'", m->mimetype, l); 2030 return -1; 2031 } 2032 2033 EATAB; 2034 for (i = 0; *l && ((isascii((unsigned char)*l) && 2035 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 2036 i < sizeof(m->mimetype); m->mimetype[i++] = *l++) 2037 continue; 2038 if (i == sizeof(m->mimetype)) { 2039 m->mimetype[sizeof(m->mimetype) - 1] = '\0'; 2040 if (ms->flags & MAGIC_CHECK) 2041 file_magwarn(ms, "MIME type `%s' truncated %" 2042 SIZE_T_FORMAT "u", m->mimetype, i); 2043 } else 2044 m->mimetype[i] = '\0'; 2045 2046 if (i > 0) 2047 return 0; 2048 else 2049 return -1; 2050 } 2051 2052 private int 2053 check_format_type(const char *ptr, int type) 2054 { 2055 int quad = 0; 2056 if (*ptr == '\0') { 2057 /* Missing format string; bad */ 2058 return -1; 2059 } 2060 2061 switch (type) { 2062 case FILE_FMT_QUAD: 2063 quad = 1; 2064 /*FALLTHROUGH*/ 2065 case FILE_FMT_NUM: 2066 if (*ptr == '-') 2067 ptr++; 2068 if (*ptr == '.') 2069 ptr++; 2070 while (isdigit((unsigned char)*ptr)) ptr++; 2071 if (*ptr == '.') 2072 ptr++; 2073 while (isdigit((unsigned char)*ptr)) ptr++; 2074 if (quad) { 2075 if (*ptr++ != 'l') 2076 return -1; 2077 if (*ptr++ != 'l') 2078 return -1; 2079 } 2080 2081 switch (*ptr++) { 2082 case 'l': 2083 switch (*ptr++) { 2084 case 'i': 2085 case 'd': 2086 case 'u': 2087 case 'o': 2088 case 'x': 2089 case 'X': 2090 return 0; 2091 default: 2092 return -1; 2093 } 2094 2095 case 'h': 2096 switch (*ptr++) { 2097 case 'h': 2098 switch (*ptr++) { 2099 case 'i': 2100 case 'd': 2101 case 'u': 2102 case 'o': 2103 case 'x': 2104 case 'X': 2105 return 0; 2106 default: 2107 return -1; 2108 } 2109 case 'd': 2110 return 0; 2111 default: 2112 return -1; 2113 } 2114 2115 case 'i': 2116 case 'c': 2117 case 'd': 2118 case 'u': 2119 case 'o': 2120 case 'x': 2121 case 'X': 2122 return 0; 2123 2124 default: 2125 return -1; 2126 } 2127 2128 case FILE_FMT_FLOAT: 2129 case FILE_FMT_DOUBLE: 2130 if (*ptr == '-') 2131 ptr++; 2132 if (*ptr == '.') 2133 ptr++; 2134 while (isdigit((unsigned char)*ptr)) ptr++; 2135 if (*ptr == '.') 2136 ptr++; 2137 while (isdigit((unsigned char)*ptr)) ptr++; 2138 2139 switch (*ptr++) { 2140 case 'e': 2141 case 'E': 2142 case 'f': 2143 case 'F': 2144 case 'g': 2145 case 'G': 2146 return 0; 2147 2148 default: 2149 return -1; 2150 } 2151 2152 2153 case FILE_FMT_STR: 2154 if (*ptr == '-') 2155 ptr++; 2156 while (isdigit((unsigned char )*ptr)) 2157 ptr++; 2158 if (*ptr == '.') { 2159 ptr++; 2160 while (isdigit((unsigned char )*ptr)) 2161 ptr++; 2162 } 2163 2164 switch (*ptr++) { 2165 case 's': 2166 return 0; 2167 default: 2168 return -1; 2169 } 2170 2171 default: 2172 /* internal error */ 2173 abort(); 2174 } 2175 /*NOTREACHED*/ 2176 return -1; 2177 } 2178 2179 /* 2180 * Check that the optional printf format in description matches 2181 * the type of the magic. 2182 */ 2183 private int 2184 check_format(struct magic_set *ms, struct magic *m) 2185 { 2186 char *ptr; 2187 2188 for (ptr = m->desc; *ptr; ptr++) 2189 if (*ptr == '%') 2190 break; 2191 if (*ptr == '\0') { 2192 /* No format string; ok */ 2193 return 1; 2194 } 2195 2196 assert(file_nformats == file_nnames); 2197 2198 if (m->type >= file_nformats) { 2199 file_magwarn(ms, "Internal error inconsistency between " 2200 "m->type and format strings"); 2201 return -1; 2202 } 2203 if (file_formats[m->type] == FILE_FMT_NONE) { 2204 file_magwarn(ms, "No format string for `%s' with description " 2205 "`%s'", m->desc, file_names[m->type]); 2206 return -1; 2207 } 2208 2209 ptr++; 2210 if (check_format_type(ptr, file_formats[m->type]) == -1) { 2211 /* 2212 * TODO: this error message is unhelpful if the format 2213 * string is not one character long 2214 */ 2215 file_magwarn(ms, "Printf format `%c' is not valid for type " 2216 "`%s' in description `%s'", *ptr ? *ptr : '?', 2217 file_names[m->type], m->desc); 2218 return -1; 2219 } 2220 2221 for (; *ptr; ptr++) { 2222 if (*ptr == '%') { 2223 file_magwarn(ms, 2224 "Too many format strings (should have at most one) " 2225 "for `%s' with description `%s'", 2226 file_names[m->type], m->desc); 2227 return -1; 2228 } 2229 } 2230 return 0; 2231 } 2232 2233 /* 2234 * Read a numeric value from a pointer, into the value union of a magic 2235 * pointer, according to the magic type. Update the string pointer to point 2236 * just after the number read. Return 0 for success, non-zero for failure. 2237 */ 2238 private int 2239 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2240 { 2241 switch (m->type) { 2242 case FILE_BESTRING16: 2243 case FILE_LESTRING16: 2244 case FILE_STRING: 2245 case FILE_PSTRING: 2246 case FILE_REGEX: 2247 case FILE_SEARCH: 2248 case FILE_NAME: 2249 case FILE_USE: 2250 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2251 if (*p == NULL) { 2252 if (ms->flags & MAGIC_CHECK) 2253 file_magwarn(ms, "cannot get string from `%s'", 2254 m->value.s); 2255 return -1; 2256 } 2257 return 0; 2258 case FILE_FLOAT: 2259 case FILE_BEFLOAT: 2260 case FILE_LEFLOAT: 2261 if (m->reln != 'x') { 2262 char *ep; 2263 #ifdef HAVE_STRTOF 2264 m->value.f = strtof(*p, &ep); 2265 #else 2266 m->value.f = (float)strtod(*p, &ep); 2267 #endif 2268 *p = ep; 2269 } 2270 return 0; 2271 case FILE_DOUBLE: 2272 case FILE_BEDOUBLE: 2273 case FILE_LEDOUBLE: 2274 if (m->reln != 'x') { 2275 char *ep; 2276 m->value.d = strtod(*p, &ep); 2277 *p = ep; 2278 } 2279 return 0; 2280 default: 2281 if (m->reln != 'x') { 2282 char *ep; 2283 m->value.q = file_signextend(ms, m, 2284 (uint64_t)strtoull(*p, &ep, 0)); 2285 *p = ep; 2286 eatsize(p); 2287 } 2288 return 0; 2289 } 2290 } 2291 2292 /* 2293 * Convert a string containing C character escapes. Stop at an unescaped 2294 * space or tab. 2295 * Copy the converted version to "m->value.s", and the length in m->vallen. 2296 * Return updated scan pointer as function result. Warn if set. 2297 */ 2298 private const char * 2299 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2300 { 2301 const char *origs = s; 2302 char *p = m->value.s; 2303 size_t plen = sizeof(m->value.s); 2304 char *origp = p; 2305 char *pmax = p + plen - 1; 2306 int c; 2307 int val; 2308 2309 while ((c = *s++) != '\0') { 2310 if (isspace((unsigned char) c)) 2311 break; 2312 if (p >= pmax) { 2313 file_error(ms, 0, "string too long: `%s'", origs); 2314 return NULL; 2315 } 2316 if (c == '\\') { 2317 switch(c = *s++) { 2318 2319 case '\0': 2320 if (warn) 2321 file_magwarn(ms, "incomplete escape"); 2322 goto out; 2323 2324 case '\t': 2325 if (warn) { 2326 file_magwarn(ms, 2327 "escaped tab found, use \\t instead"); 2328 warn = 0; /* already did */ 2329 } 2330 /*FALLTHROUGH*/ 2331 default: 2332 if (warn) { 2333 if (isprint((unsigned char)c)) { 2334 /* Allow escaping of 2335 * ``relations'' */ 2336 if (strchr("<>&^=!", c) == NULL 2337 && (m->type != FILE_REGEX || 2338 strchr("[]().*?^$|{}", c) 2339 == NULL)) { 2340 file_magwarn(ms, "no " 2341 "need to escape " 2342 "`%c'", c); 2343 } 2344 } else { 2345 file_magwarn(ms, 2346 "unknown escape sequence: " 2347 "\\%03o", c); 2348 } 2349 } 2350 /*FALLTHROUGH*/ 2351 /* space, perhaps force people to use \040? */ 2352 case ' ': 2353 #if 0 2354 /* 2355 * Other things people escape, but shouldn't need to, 2356 * so we disallow them 2357 */ 2358 case '\'': 2359 case '"': 2360 case '?': 2361 #endif 2362 /* Relations */ 2363 case '>': 2364 case '<': 2365 case '&': 2366 case '^': 2367 case '=': 2368 case '!': 2369 /* and baskslash itself */ 2370 case '\\': 2371 *p++ = (char) c; 2372 break; 2373 2374 case 'a': 2375 *p++ = '\a'; 2376 break; 2377 2378 case 'b': 2379 *p++ = '\b'; 2380 break; 2381 2382 case 'f': 2383 *p++ = '\f'; 2384 break; 2385 2386 case 'n': 2387 *p++ = '\n'; 2388 break; 2389 2390 case 'r': 2391 *p++ = '\r'; 2392 break; 2393 2394 case 't': 2395 *p++ = '\t'; 2396 break; 2397 2398 case 'v': 2399 *p++ = '\v'; 2400 break; 2401 2402 /* \ and up to 3 octal digits */ 2403 case '0': 2404 case '1': 2405 case '2': 2406 case '3': 2407 case '4': 2408 case '5': 2409 case '6': 2410 case '7': 2411 val = c - '0'; 2412 c = *s++; /* try for 2 */ 2413 if (c >= '0' && c <= '7') { 2414 val = (val << 3) | (c - '0'); 2415 c = *s++; /* try for 3 */ 2416 if (c >= '0' && c <= '7') 2417 val = (val << 3) | (c-'0'); 2418 else 2419 --s; 2420 } 2421 else 2422 --s; 2423 *p++ = (char)val; 2424 break; 2425 2426 /* \x and up to 2 hex digits */ 2427 case 'x': 2428 val = 'x'; /* Default if no digits */ 2429 c = hextoint(*s++); /* Get next char */ 2430 if (c >= 0) { 2431 val = c; 2432 c = hextoint(*s++); 2433 if (c >= 0) 2434 val = (val << 4) + c; 2435 else 2436 --s; 2437 } else 2438 --s; 2439 *p++ = (char)val; 2440 break; 2441 } 2442 } else 2443 *p++ = (char)c; 2444 } 2445 out: 2446 *p = '\0'; 2447 m->vallen = CAST(unsigned char, (p - origp)); 2448 if (m->type == FILE_PSTRING) 2449 m->vallen += (unsigned char)file_pstring_length_size(m); 2450 return s; 2451 } 2452 2453 2454 /* Single hex char to int; -1 if not a hex char. */ 2455 private int 2456 hextoint(int c) 2457 { 2458 if (!isascii((unsigned char) c)) 2459 return -1; 2460 if (isdigit((unsigned char) c)) 2461 return c - '0'; 2462 if ((c >= 'a') && (c <= 'f')) 2463 return c + 10 - 'a'; 2464 if (( c>= 'A') && (c <= 'F')) 2465 return c + 10 - 'A'; 2466 return -1; 2467 } 2468 2469 2470 /* 2471 * Print a string containing C character escapes. 2472 */ 2473 protected void 2474 file_showstr(FILE *fp, const char *s, size_t len) 2475 { 2476 char c; 2477 2478 for (;;) { 2479 if (len == ~0U) { 2480 c = *s++; 2481 if (c == '\0') 2482 break; 2483 } 2484 else { 2485 if (len-- == 0) 2486 break; 2487 c = *s++; 2488 } 2489 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2490 (void) fputc(c, fp); 2491 else { 2492 (void) fputc('\\', fp); 2493 switch (c) { 2494 case '\a': 2495 (void) fputc('a', fp); 2496 break; 2497 2498 case '\b': 2499 (void) fputc('b', fp); 2500 break; 2501 2502 case '\f': 2503 (void) fputc('f', fp); 2504 break; 2505 2506 case '\n': 2507 (void) fputc('n', fp); 2508 break; 2509 2510 case '\r': 2511 (void) fputc('r', fp); 2512 break; 2513 2514 case '\t': 2515 (void) fputc('t', fp); 2516 break; 2517 2518 case '\v': 2519 (void) fputc('v', fp); 2520 break; 2521 2522 default: 2523 (void) fprintf(fp, "%.3o", c & 0377); 2524 break; 2525 } 2526 } 2527 } 2528 } 2529 2530 /* 2531 * eatsize(): Eat the size spec from a number [eg. 10UL] 2532 */ 2533 private void 2534 eatsize(const char **p) 2535 { 2536 const char *l = *p; 2537 2538 if (LOWCASE(*l) == 'u') 2539 l++; 2540 2541 switch (LOWCASE(*l)) { 2542 case 'l': /* long */ 2543 case 's': /* short */ 2544 case 'h': /* short */ 2545 case 'b': /* char/byte */ 2546 case 'c': /* char/byte */ 2547 l++; 2548 /*FALLTHROUGH*/ 2549 default: 2550 break; 2551 } 2552 2553 *p = l; 2554 } 2555 2556 /* 2557 * handle a compiled file. 2558 */ 2559 2560 private struct magic_map * 2561 apprentice_map(struct magic_set *ms, const char *fn) 2562 { 2563 int fd; 2564 struct stat st; 2565 uint32_t *ptr; 2566 uint32_t version, entries, nentries; 2567 int needsbyteswap; 2568 char *dbname = NULL; 2569 struct magic_map *map; 2570 size_t i; 2571 2572 fd = -1; 2573 if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) { 2574 file_oomem(ms, sizeof(*map)); 2575 goto error; 2576 } 2577 2578 dbname = mkdbname(ms, fn, 0); 2579 if (dbname == NULL) 2580 goto error; 2581 2582 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2583 goto error; 2584 2585 if (fstat(fd, &st) == -1) { 2586 file_error(ms, errno, "cannot stat `%s'", dbname); 2587 goto error; 2588 } 2589 if (st.st_size < 8) { 2590 file_error(ms, 0, "file `%s' is too small", dbname); 2591 goto error; 2592 } 2593 2594 map->len = (size_t)st.st_size; 2595 #ifdef QUICK 2596 if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2597 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2598 file_error(ms, errno, "cannot map `%s'", dbname); 2599 goto error; 2600 } 2601 #else 2602 if ((map->p = CAST(void *, malloc(map->len))) == NULL) { 2603 file_oomem(ms, map->len); 2604 goto error; 2605 } 2606 if (read(fd, map->p, map->len) != (ssize_t)map->len) { 2607 file_badread(ms); 2608 goto error; 2609 } 2610 map->len = 0; 2611 #define RET 1 2612 #endif 2613 (void)close(fd); 2614 fd = -1; 2615 ptr = CAST(uint32_t *, map->p); 2616 if (*ptr != MAGICNO) { 2617 if (swap4(*ptr) != MAGICNO) { 2618 file_error(ms, 0, "bad magic in `%s'", dbname); 2619 goto error; 2620 } 2621 needsbyteswap = 1; 2622 } else 2623 needsbyteswap = 0; 2624 if (needsbyteswap) 2625 version = swap4(ptr[1]); 2626 else 2627 version = ptr[1]; 2628 if (version != VERSIONNO) { 2629 file_error(ms, 0, "File %s supports only version %d magic " 2630 "files. `%s' is version %d", VERSION, 2631 VERSIONNO, dbname, version); 2632 goto error; 2633 } 2634 entries = (uint32_t)(st.st_size / sizeof(struct magic)); 2635 if ((off_t)(entries * sizeof(struct magic)) != st.st_size) { 2636 file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu", 2637 dbname, (unsigned long long)st.st_size, 2638 sizeof(struct magic)); 2639 goto error; 2640 } 2641 map->magic[0] = CAST(struct magic *, map->p) + 1; 2642 nentries = 0; 2643 for (i = 0; i < MAGIC_SETS; i++) { 2644 if (needsbyteswap) 2645 map->nmagic[i] = swap4(ptr[i + 2]); 2646 else 2647 map->nmagic[i] = ptr[i + 2]; 2648 if (i != MAGIC_SETS - 1) 2649 map->magic[i + 1] = map->magic[i] + map->nmagic[i]; 2650 nentries += map->nmagic[i]; 2651 } 2652 if (entries != nentries + 1) { 2653 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 2654 dbname, entries, nentries + 1); 2655 goto error; 2656 } 2657 if (needsbyteswap) 2658 for (i = 0; i < MAGIC_SETS; i++) 2659 byteswap(map->magic[i], map->nmagic[i]); 2660 free(dbname); 2661 return map; 2662 2663 error: 2664 if (fd != -1) 2665 (void)close(fd); 2666 apprentice_unmap(map); 2667 free(dbname); 2668 return NULL; 2669 } 2670 2671 private const uint32_t ar[] = { 2672 MAGICNO, VERSIONNO 2673 }; 2674 2675 /* 2676 * handle an mmaped file. 2677 */ 2678 private int 2679 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn) 2680 { 2681 static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS; 2682 static const size_t m = sizeof(**map->magic); 2683 int fd = -1; 2684 size_t len; 2685 char *dbname; 2686 int rv = -1; 2687 uint32_t i; 2688 2689 dbname = mkdbname(ms, fn, 1); 2690 2691 if (dbname == NULL) 2692 goto out; 2693 2694 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 2695 { 2696 file_error(ms, errno, "cannot open `%s'", dbname); 2697 goto out; 2698 } 2699 2700 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2701 file_error(ms, errno, "error writing `%s'", dbname); 2702 goto out; 2703 } 2704 2705 if (write(fd, map->nmagic, nm) != (ssize_t)nm) { 2706 file_error(ms, errno, "error writing `%s'", dbname); 2707 goto out; 2708 } 2709 2710 assert(nm + sizeof(ar) < m); 2711 2712 if (lseek(fd, (off_t)m, SEEK_SET) != (off_t)m) { 2713 file_error(ms, errno, "error seeking `%s'", dbname); 2714 goto out; 2715 } 2716 2717 for (i = 0; i < MAGIC_SETS; i++) { 2718 len = m * map->nmagic[i]; 2719 if (write(fd, map->magic[i], len) != (ssize_t)len) { 2720 file_error(ms, errno, "error writing `%s'", dbname); 2721 goto out; 2722 } 2723 } 2724 2725 if (fd != -1) 2726 (void)close(fd); 2727 rv = 0; 2728 out: 2729 free(dbname); 2730 return rv; 2731 } 2732 2733 private const char ext[] = ".mgc"; 2734 /* 2735 * make a dbname 2736 */ 2737 private char * 2738 mkdbname(struct magic_set *ms, const char *fn, int strip) 2739 { 2740 const char *p, *q; 2741 char *buf; 2742 2743 if (strip) { 2744 if ((p = strrchr(fn, '/')) != NULL) 2745 fn = ++p; 2746 } 2747 2748 for (q = fn; *q; q++) 2749 continue; 2750 /* Look for .mgc */ 2751 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2752 if (*p != *q) 2753 break; 2754 2755 /* Did not find .mgc, restore q */ 2756 if (p >= ext) 2757 while (*q) 2758 q++; 2759 2760 q++; 2761 /* Compatibility with old code that looked in .mime */ 2762 if (ms->flags & MAGIC_MIME) { 2763 if (asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext) < 0) 2764 return NULL; 2765 if (access(buf, R_OK) != -1) { 2766 ms->flags &= MAGIC_MIME_TYPE; 2767 return buf; 2768 } 2769 free(buf); 2770 } 2771 if (asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext) < 0) 2772 return NULL; 2773 2774 /* Compatibility with old code that looked in .mime */ 2775 if (strstr(p, ".mime") != NULL) 2776 ms->flags &= MAGIC_MIME_TYPE; 2777 return buf; 2778 } 2779 2780 /* 2781 * Byteswap an mmap'ed file if needed 2782 */ 2783 private void 2784 byteswap(struct magic *magic, uint32_t nmagic) 2785 { 2786 uint32_t i; 2787 for (i = 0; i < nmagic; i++) 2788 bs1(&magic[i]); 2789 } 2790 2791 /* 2792 * swap a short 2793 */ 2794 private uint16_t 2795 swap2(uint16_t sv) 2796 { 2797 uint16_t rv; 2798 uint8_t *s = (uint8_t *)(void *)&sv; 2799 uint8_t *d = (uint8_t *)(void *)&rv; 2800 d[0] = s[1]; 2801 d[1] = s[0]; 2802 return rv; 2803 } 2804 2805 /* 2806 * swap an int 2807 */ 2808 private uint32_t 2809 swap4(uint32_t sv) 2810 { 2811 uint32_t rv; 2812 uint8_t *s = (uint8_t *)(void *)&sv; 2813 uint8_t *d = (uint8_t *)(void *)&rv; 2814 d[0] = s[3]; 2815 d[1] = s[2]; 2816 d[2] = s[1]; 2817 d[3] = s[0]; 2818 return rv; 2819 } 2820 2821 /* 2822 * swap a quad 2823 */ 2824 private uint64_t 2825 swap8(uint64_t sv) 2826 { 2827 uint64_t rv; 2828 uint8_t *s = (uint8_t *)(void *)&sv; 2829 uint8_t *d = (uint8_t *)(void *)&rv; 2830 #if 0 2831 d[0] = s[3]; 2832 d[1] = s[2]; 2833 d[2] = s[1]; 2834 d[3] = s[0]; 2835 d[4] = s[7]; 2836 d[5] = s[6]; 2837 d[6] = s[5]; 2838 d[7] = s[4]; 2839 #else 2840 d[0] = s[7]; 2841 d[1] = s[6]; 2842 d[2] = s[5]; 2843 d[3] = s[4]; 2844 d[4] = s[3]; 2845 d[5] = s[2]; 2846 d[6] = s[1]; 2847 d[7] = s[0]; 2848 #endif 2849 return rv; 2850 } 2851 2852 /* 2853 * byteswap a single magic entry 2854 */ 2855 private void 2856 bs1(struct magic *m) 2857 { 2858 m->cont_level = swap2(m->cont_level); 2859 m->offset = swap4((uint32_t)m->offset); 2860 m->in_offset = swap4((uint32_t)m->in_offset); 2861 m->lineno = swap4((uint32_t)m->lineno); 2862 if (IS_STRING(m->type)) { 2863 m->str_range = swap4(m->str_range); 2864 m->str_flags = swap4(m->str_flags); 2865 } 2866 else { 2867 m->value.q = swap8(m->value.q); 2868 m->num_mask = swap8(m->num_mask); 2869 } 2870 } 2871 2872 protected size_t 2873 file_pstring_length_size(const struct magic *m) 2874 { 2875 switch (m->str_flags & PSTRING_LEN) { 2876 case PSTRING_1_LE: 2877 return 1; 2878 case PSTRING_2_LE: 2879 case PSTRING_2_BE: 2880 return 2; 2881 case PSTRING_4_LE: 2882 case PSTRING_4_BE: 2883 return 4; 2884 default: 2885 abort(); /* Impossible */ 2886 return 1; 2887 } 2888 } 2889 protected size_t 2890 file_pstring_get_length(const struct magic *m, const char *s) 2891 { 2892 size_t len = 0; 2893 2894 switch (m->str_flags & PSTRING_LEN) { 2895 case PSTRING_1_LE: 2896 len = *s; 2897 break; 2898 case PSTRING_2_LE: 2899 len = (s[1] << 8) | s[0]; 2900 break; 2901 case PSTRING_2_BE: 2902 len = (s[0] << 8) | s[1]; 2903 break; 2904 case PSTRING_4_LE: 2905 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2906 break; 2907 case PSTRING_4_BE: 2908 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2909 break; 2910 default: 2911 abort(); /* Impossible */ 2912 } 2913 2914 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2915 len -= file_pstring_length_size(m); 2916 2917 return len; 2918 } 2919 2920 protected int 2921 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 2922 { 2923 uint32_t i, j; 2924 struct mlist *mlist, *ml; 2925 2926 mlist = ms->mlist[1]; 2927 2928 for (ml = mlist->next; ml != mlist; ml = ml->next) { 2929 struct magic *ma = ml->magic; 2930 uint32_t nma = ml->nmagic; 2931 for (i = 0; i < nma; i++) { 2932 if (ma[i].type != FILE_NAME) 2933 continue; 2934 if (strcmp(ma[i].value.s, name) == 0) { 2935 v->magic = &ma[i]; 2936 for (j = i + 1; j < nma; j++) 2937 if (ma[j].cont_level == 0) 2938 break; 2939 v->nmagic = j - i; 2940 return 0; 2941 } 2942 } 2943 } 2944 return -1; 2945 } 2946