1 /* $NetBSD: apprentice.c,v 1.8 2013/01/03 23:05:38 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * apprentice - make one pass through /etc/magic, learning its secrets. 32 */ 33 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: apprentice.c,v 1.180 2012/11/21 16:27:07 christos Exp $") 39 #else 40 __RCSID("$NetBSD: apprentice.c,v 1.8 2013/01/03 23:05:38 christos Exp $"); 41 #endif 42 #endif /* lint */ 43 44 #include "magic.h" 45 #include <stdlib.h> 46 #ifdef HAVE_UNISTD_H 47 #include <unistd.h> 48 #endif 49 #include <string.h> 50 #include <assert.h> 51 #include <ctype.h> 52 #include <fcntl.h> 53 #ifdef QUICK 54 #include <sys/mman.h> 55 #endif 56 #include <dirent.h> 57 58 #define EATAB {while (isascii((unsigned char) *l) && \ 59 isspace((unsigned char) *l)) ++l;} 60 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 61 tolower((unsigned char) (l)) : (l)) 62 /* 63 * Work around a bug in headers on Digital Unix. 64 * At least confirmed for: OSF1 V4.0 878 65 */ 66 #if defined(__osf__) && defined(__DECC) 67 #ifdef MAP_FAILED 68 #undef MAP_FAILED 69 #endif 70 #endif 71 72 #ifndef MAP_FAILED 73 #define MAP_FAILED (void *) -1 74 #endif 75 76 #ifndef MAP_FILE 77 #define MAP_FILE 0 78 #endif 79 80 #define ALLOC_CHUNK (size_t)10 81 #define ALLOC_INCR (size_t)200 82 83 struct magic_entry { 84 struct magic *mp; 85 uint32_t cont_count; 86 uint32_t max_count; 87 }; 88 89 int file_formats[FILE_NAMES_SIZE]; 90 const size_t file_nformats = FILE_NAMES_SIZE; 91 const char *file_names[FILE_NAMES_SIZE]; 92 const size_t file_nnames = FILE_NAMES_SIZE; 93 94 private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 95 private int hextoint(int); 96 private const char *getstr(struct magic_set *, struct magic *, const char *, 97 int); 98 private int parse(struct magic_set *, struct magic_entry *, const char *, 99 size_t, int); 100 private void eatsize(const char **); 101 private int apprentice_1(struct magic_set *, const char *, int); 102 private size_t apprentice_magic_strength(const struct magic *); 103 private int apprentice_sort(const void *, const void *); 104 private void apprentice_list(struct mlist *, int ); 105 private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, 106 const char *, int); 107 private struct mlist *mlist_alloc(void); 108 private void mlist_free(struct mlist *); 109 private void byteswap(struct magic *, uint32_t); 110 private void bs1(struct magic *); 111 private uint16_t swap2(uint16_t); 112 private uint32_t swap4(uint32_t); 113 private uint64_t swap8(uint64_t); 114 private char *mkdbname(struct magic_set *, const char *, int); 115 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 116 const char *); 117 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 118 const char *); 119 private int check_format_type(const char *, int); 120 private int check_format(struct magic_set *, struct magic *); 121 private int get_op(char); 122 private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 123 private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 124 private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 125 126 127 private size_t maxmagic[MAGIC_SETS] = { 0 }; 128 private size_t magicsize = sizeof(struct magic); 129 130 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 131 132 private struct { 133 const char *name; 134 size_t len; 135 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 136 } bang[] = { 137 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 138 DECLARE_FIELD(mime), 139 DECLARE_FIELD(apple), 140 DECLARE_FIELD(strength), 141 #undef DECLARE_FIELD 142 { NULL, 0, NULL } 143 }; 144 145 #ifdef COMPILE_ONLY 146 147 int main(int, char *[]); 148 149 int 150 main(int argc, char *argv[]) 151 { 152 int ret; 153 struct magic_set *ms; 154 char *progname; 155 156 if ((progname = strrchr(argv[0], '/')) != NULL) 157 progname++; 158 else 159 progname = argv[0]; 160 161 if (argc != 2) { 162 (void)fprintf(stderr, "Usage: %s file\n", progname); 163 return 1; 164 } 165 166 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 167 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 168 return 1; 169 } 170 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 171 if (ret == 1) 172 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 173 magic_close(ms); 174 return ret; 175 } 176 #endif /* COMPILE_ONLY */ 177 178 static const struct type_tbl_s { 179 const char name[16]; 180 const size_t len; 181 const int type; 182 const int format; 183 } type_tbl[] = { 184 # define XX(s) s, (sizeof(s) - 1) 185 # define XX_NULL "", 0 186 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 187 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 188 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 189 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 190 { XX("string"), FILE_STRING, FILE_FMT_STR }, 191 { XX("date"), FILE_DATE, FILE_FMT_STR }, 192 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 193 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 194 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 195 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 196 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 197 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 198 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 199 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 200 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 201 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 202 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 203 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 204 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 205 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 206 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 207 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 208 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 209 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 210 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 211 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 212 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 213 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 214 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 215 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 216 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 217 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 218 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 219 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 220 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 221 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 222 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 223 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 224 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 225 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 226 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NUM }, 227 { XX("qwdate"), FILE_QWDATE, FILE_FMT_STR }, 228 { XX("leqwdate"), FILE_LEQWDATE, FILE_FMT_STR }, 229 { XX("beqwdate"), FILE_BEQWDATE, FILE_FMT_STR }, 230 { XX("name"), FILE_NAME, FILE_FMT_STR }, 231 { XX("use"), FILE_USE, FILE_FMT_STR }, 232 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 233 # undef XX 234 # undef XX_NULL 235 }; 236 237 private int 238 get_type(const char *l, const char **t) 239 { 240 const struct type_tbl_s *p; 241 242 for (p = type_tbl; p->len; p++) { 243 if (strncmp(l, p->name, p->len) == 0) { 244 if (t) 245 *t = l + p->len; 246 break; 247 } 248 } 249 return p->type; 250 } 251 252 private void 253 init_file_tables(void) 254 { 255 static int done = 0; 256 const struct type_tbl_s *p; 257 258 if (done) 259 return; 260 done++; 261 262 for (p = type_tbl; p->len; p++) { 263 assert(p->type < FILE_NAMES_SIZE); 264 file_names[p->type] = p->name; 265 file_formats[p->type] = p->format; 266 } 267 } 268 269 private int 270 add_mlist(struct mlist *mlp, struct magic *magic, uint32_t nmagic, int mapped) 271 { 272 struct mlist *ml; 273 274 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) 275 return -1; 276 277 ml->magic = magic; 278 ml->nmagic = nmagic; 279 ml->mapped = mapped; 280 281 mlp->prev->next = ml; 282 ml->prev = mlp->prev; 283 ml->next = mlp; 284 mlp->prev = ml; 285 return 0; 286 } 287 288 /* 289 * Handle one file or directory. 290 */ 291 private int 292 apprentice_1(struct magic_set *ms, const char *fn, int action) 293 { 294 struct magic *magic[MAGIC_SETS] = { NULL }; 295 uint32_t nmagic[MAGIC_SETS] = { 0 }; 296 struct mlist *ml; 297 int rv = -1; 298 int mapped; 299 size_t i; 300 301 if (magicsize != FILE_MAGICSIZE) { 302 file_error(ms, 0, "magic element size %lu != %lu", 303 (unsigned long)sizeof(*magic[0]), 304 (unsigned long)FILE_MAGICSIZE); 305 return -1; 306 } 307 308 if (action == FILE_COMPILE) { 309 rv = apprentice_load(ms, magic, nmagic, fn, action); 310 if (rv != 0) 311 return -1; 312 rv = apprentice_compile(ms, magic, nmagic, fn); 313 return rv; 314 } 315 316 #ifndef COMPILE_ONLY 317 if ((rv = apprentice_map(ms, magic, nmagic, fn)) < 0) { 318 if (rv == -2) 319 return -1; 320 if (ms->flags & MAGIC_CHECK) 321 file_magwarn(ms, "using regular magic file `%s'", fn); 322 rv = apprentice_load(ms, magic, nmagic, fn, action); 323 if (rv != 0) 324 return -1; 325 } 326 327 mapped = rv; 328 329 for (i = 0; i < MAGIC_SETS; i++) { 330 if (magic[i] == NULL) 331 continue; 332 if (add_mlist(ms->mlist[i], magic[i], nmagic[i], mapped) == -1) 333 { 334 i = i == 1 ? 0 : 1; 335 file_delmagic(magic[i], mapped, nmagic[i]); 336 file_oomem(ms, sizeof(*ml)); 337 return -1; 338 } 339 } 340 341 if (action == FILE_LIST) { 342 for (i = 0; i < MAGIC_SETS; i++) { 343 printf("Set %zu:\nBinary patterns:\n", i); 344 apprentice_list(ms->mlist[i], BINTEST); 345 printf("Text patterns:\n"); 346 apprentice_list(ms->mlist[i], TEXTTEST); 347 } 348 } 349 350 return 0; 351 #endif /* COMPILE_ONLY */ 352 } 353 354 protected void 355 file_ms_free(struct magic_set *ms) 356 { 357 size_t i; 358 if (ms == NULL) 359 return; 360 for (i = 0; i < MAGIC_SETS; i++) 361 mlist_free(ms->mlist[i]); 362 free(ms->o.pbuf); 363 free(ms->o.buf); 364 free(ms->c.li); 365 free(ms); 366 } 367 368 protected struct magic_set * 369 file_ms_alloc(int flags) 370 { 371 struct magic_set *ms; 372 size_t i, len; 373 374 if ((ms = CAST(struct magic_set *, calloc((size_t)1, 375 sizeof(struct magic_set)))) == NULL) 376 return NULL; 377 378 if (magic_setflags(ms, flags) == -1) { 379 errno = EINVAL; 380 goto free; 381 } 382 383 ms->o.buf = ms->o.pbuf = NULL; 384 len = (ms->c.len = 10) * sizeof(*ms->c.li); 385 386 if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL) 387 goto free; 388 389 ms->event_flags = 0; 390 ms->error = -1; 391 for (i = 0; i < MAGIC_SETS; i++) 392 ms->mlist[i] = NULL; 393 ms->file = "unknown"; 394 ms->line = 0; 395 return ms; 396 free: 397 free(ms); 398 return NULL; 399 } 400 401 protected void 402 file_delmagic(struct magic *p, int type, size_t entries) 403 { 404 if (p == NULL) 405 return; 406 switch (type) { 407 case 2: 408 #ifdef QUICK 409 p--; 410 (void)munmap((void *)p, sizeof(*p) * (entries + 1)); 411 break; 412 #else 413 (void)&entries; 414 abort(); 415 /*NOTREACHED*/ 416 #endif 417 case 1: 418 p--; 419 /*FALLTHROUGH*/ 420 case 0: 421 free(p); 422 break; 423 default: 424 abort(); 425 } 426 } 427 428 private struct mlist * 429 mlist_alloc(void) 430 { 431 struct mlist *mlist; 432 if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) { 433 return NULL; 434 } 435 mlist->next = mlist->prev = mlist; 436 return mlist; 437 } 438 439 private void 440 mlist_free(struct mlist *mlist) 441 { 442 struct mlist *ml; 443 444 if (mlist == NULL) 445 return; 446 447 for (ml = mlist->next; ml != mlist;) { 448 struct mlist *next = ml->next; 449 struct magic *mg = ml->magic; 450 file_delmagic(mg, ml->mapped, ml->nmagic); 451 free(ml); 452 ml = next; 453 } 454 free(ml); 455 } 456 457 /* const char *fn: list of magic files and directories */ 458 protected int 459 file_apprentice(struct magic_set *ms, const char *fn, int action) 460 { 461 char *p, *mfn; 462 int file_err, errs = -1; 463 size_t i; 464 465 if ((fn = magic_getpath(fn, action)) == NULL) 466 return -1; 467 468 init_file_tables(); 469 470 if ((mfn = strdup(fn)) == NULL) { 471 file_oomem(ms, strlen(fn)); 472 return -1; 473 } 474 475 for (i = 0; i < MAGIC_SETS; i++) { 476 mlist_free(ms->mlist[i]); 477 if ((ms->mlist[i] = mlist_alloc()) == NULL) { 478 file_oomem(ms, sizeof(*ms->mlist[i])); 479 if (i != 0) { 480 --i; 481 do 482 mlist_free(ms->mlist[i]); 483 while (i != 0); 484 } 485 free(mfn); 486 return -1; 487 } 488 } 489 fn = mfn; 490 491 while (fn) { 492 p = strchr(fn, PATHSEP); 493 if (p) 494 *p++ = '\0'; 495 if (*fn == '\0') 496 break; 497 file_err = apprentice_1(ms, fn, action); 498 errs = MAX(errs, file_err); 499 fn = p; 500 } 501 502 free(mfn); 503 504 if (errs == -1) { 505 for (i = 0; i < MAGIC_SETS; i++) { 506 mlist_free(ms->mlist[i]); 507 ms->mlist[i] = NULL; 508 } 509 file_error(ms, 0, "could not find any magic files!"); 510 return -1; 511 } 512 513 if (action == FILE_LOAD) 514 return 0; 515 516 for (i = 0; i < MAGIC_SETS; i++) { 517 mlist_free(ms->mlist[i]); 518 ms->mlist[i] = NULL; 519 } 520 521 switch (action) { 522 case FILE_COMPILE: 523 case FILE_CHECK: 524 case FILE_LIST: 525 return 0; 526 default: 527 file_error(ms, 0, "Invalid action %d", action); 528 return -1; 529 } 530 } 531 532 /* 533 * Get weight of this magic entry, for sorting purposes. 534 */ 535 private size_t 536 apprentice_magic_strength(const struct magic *m) 537 { 538 #define MULT 10 539 size_t val = 2 * MULT; /* baseline strength */ 540 541 switch (m->type) { 542 case FILE_DEFAULT: /* make sure this sorts last */ 543 if (m->factor_op != FILE_FACTOR_OP_NONE) 544 abort(); 545 return 0; 546 547 case FILE_BYTE: 548 val += 1 * MULT; 549 break; 550 551 case FILE_SHORT: 552 case FILE_LESHORT: 553 case FILE_BESHORT: 554 val += 2 * MULT; 555 break; 556 557 case FILE_LONG: 558 case FILE_LELONG: 559 case FILE_BELONG: 560 case FILE_MELONG: 561 val += 4 * MULT; 562 break; 563 564 case FILE_PSTRING: 565 case FILE_STRING: 566 val += m->vallen * MULT; 567 break; 568 569 case FILE_BESTRING16: 570 case FILE_LESTRING16: 571 val += m->vallen * MULT / 2; 572 break; 573 574 case FILE_SEARCH: 575 case FILE_REGEX: 576 val += m->vallen * MAX(MULT / m->vallen, 1); 577 break; 578 579 case FILE_DATE: 580 case FILE_LEDATE: 581 case FILE_BEDATE: 582 case FILE_MEDATE: 583 case FILE_LDATE: 584 case FILE_LELDATE: 585 case FILE_BELDATE: 586 case FILE_MELDATE: 587 case FILE_FLOAT: 588 case FILE_BEFLOAT: 589 case FILE_LEFLOAT: 590 val += 4 * MULT; 591 break; 592 593 case FILE_QUAD: 594 case FILE_BEQUAD: 595 case FILE_LEQUAD: 596 case FILE_QDATE: 597 case FILE_LEQDATE: 598 case FILE_BEQDATE: 599 case FILE_QLDATE: 600 case FILE_LEQLDATE: 601 case FILE_BEQLDATE: 602 case FILE_QWDATE: 603 case FILE_LEQWDATE: 604 case FILE_BEQWDATE: 605 case FILE_DOUBLE: 606 case FILE_BEDOUBLE: 607 case FILE_LEDOUBLE: 608 val += 8 * MULT; 609 break; 610 611 case FILE_INDIRECT: 612 case FILE_NAME: 613 case FILE_USE: 614 break; 615 616 default: 617 val = 0; 618 (void)fprintf(stderr, "Bad type %d\n", m->type); 619 abort(); 620 } 621 622 switch (m->reln) { 623 case 'x': /* matches anything penalize */ 624 case '!': /* matches almost anything penalize */ 625 val = 0; 626 break; 627 628 case '=': /* Exact match, prefer */ 629 val += MULT; 630 break; 631 632 case '>': 633 case '<': /* comparison match reduce strength */ 634 val -= 2 * MULT; 635 break; 636 637 case '^': 638 case '&': /* masking bits, we could count them too */ 639 val -= MULT; 640 break; 641 642 default: 643 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 644 abort(); 645 } 646 647 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 648 val = 1; 649 650 switch (m->factor_op) { 651 case FILE_FACTOR_OP_NONE: 652 break; 653 case FILE_FACTOR_OP_PLUS: 654 val += m->factor; 655 break; 656 case FILE_FACTOR_OP_MINUS: 657 val -= m->factor; 658 break; 659 case FILE_FACTOR_OP_TIMES: 660 val *= m->factor; 661 break; 662 case FILE_FACTOR_OP_DIV: 663 val /= m->factor; 664 break; 665 default: 666 abort(); 667 } 668 669 /* 670 * Magic entries with no description get a bonus because they depend 671 * on subsequent magic entries to print something. 672 */ 673 if (m->desc[0] == '\0') 674 val++; 675 return val; 676 } 677 678 /* 679 * Sort callback for sorting entries by "strength" (basically length) 680 */ 681 private int 682 apprentice_sort(const void *a, const void *b) 683 { 684 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 685 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 686 size_t sa = apprentice_magic_strength(ma->mp); 687 size_t sb = apprentice_magic_strength(mb->mp); 688 if (sa == sb) 689 return 0; 690 else if (sa > sb) 691 return -1; 692 else 693 return 1; 694 } 695 696 /* 697 * Shows sorted patterns list in the order which is used for the matching 698 */ 699 private void 700 apprentice_list(struct mlist *mlist, int mode) 701 { 702 uint32_t magindex = 0; 703 struct mlist *ml; 704 for (ml = mlist->next; ml != mlist; ml = ml->next) { 705 for (magindex = 0; magindex < ml->nmagic; magindex++) { 706 struct magic *m = &ml->magic[magindex]; 707 if ((m->flag & mode) != mode) { 708 /* Skip sub-tests */ 709 while (magindex + 1 < ml->nmagic && 710 ml->magic[magindex + 1].cont_level != 0) 711 ++magindex; 712 continue; /* Skip to next top-level test*/ 713 } 714 715 /* 716 * Try to iterate over the tree until we find item with 717 * description/mimetype. 718 */ 719 while (magindex + 1 < ml->nmagic && 720 ml->magic[magindex + 1].cont_level != 0 && 721 *ml->magic[magindex].desc == '\0' && 722 *ml->magic[magindex].mimetype == '\0') 723 magindex++; 724 725 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 726 apprentice_magic_strength(m), 727 ml->magic[magindex].desc, 728 ml->magic[magindex].mimetype); 729 } 730 } 731 } 732 733 private void 734 set_test_type(struct magic *mstart, struct magic *m) 735 { 736 switch (m->type) { 737 case FILE_BYTE: 738 case FILE_SHORT: 739 case FILE_LONG: 740 case FILE_DATE: 741 case FILE_BESHORT: 742 case FILE_BELONG: 743 case FILE_BEDATE: 744 case FILE_LESHORT: 745 case FILE_LELONG: 746 case FILE_LEDATE: 747 case FILE_LDATE: 748 case FILE_BELDATE: 749 case FILE_LELDATE: 750 case FILE_MEDATE: 751 case FILE_MELDATE: 752 case FILE_MELONG: 753 case FILE_QUAD: 754 case FILE_LEQUAD: 755 case FILE_BEQUAD: 756 case FILE_QDATE: 757 case FILE_LEQDATE: 758 case FILE_BEQDATE: 759 case FILE_QLDATE: 760 case FILE_LEQLDATE: 761 case FILE_BEQLDATE: 762 case FILE_QWDATE: 763 case FILE_LEQWDATE: 764 case FILE_BEQWDATE: 765 case FILE_FLOAT: 766 case FILE_BEFLOAT: 767 case FILE_LEFLOAT: 768 case FILE_DOUBLE: 769 case FILE_BEDOUBLE: 770 case FILE_LEDOUBLE: 771 mstart->flag |= BINTEST; 772 break; 773 case FILE_STRING: 774 case FILE_PSTRING: 775 case FILE_BESTRING16: 776 case FILE_LESTRING16: 777 /* Allow text overrides */ 778 if (mstart->str_flags & STRING_TEXTTEST) 779 mstart->flag |= TEXTTEST; 780 else 781 mstart->flag |= BINTEST; 782 break; 783 case FILE_REGEX: 784 case FILE_SEARCH: 785 /* Check for override */ 786 if (mstart->str_flags & STRING_BINTEST) 787 mstart->flag |= BINTEST; 788 if (mstart->str_flags & STRING_TEXTTEST) 789 mstart->flag |= TEXTTEST; 790 791 if (mstart->flag & (TEXTTEST|BINTEST)) 792 break; 793 794 /* binary test if pattern is not text */ 795 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 796 NULL) <= 0) 797 mstart->flag |= BINTEST; 798 else 799 mstart->flag |= TEXTTEST; 800 break; 801 case FILE_DEFAULT: 802 /* can't deduce anything; we shouldn't see this at the 803 top level anyway */ 804 break; 805 case FILE_INVALID: 806 default: 807 /* invalid search type, but no need to complain here */ 808 break; 809 } 810 } 811 812 private int 813 addentry(struct magic_set *ms, struct magic_entry *me, 814 struct magic_entry **mentry, uint32_t *mentrycount) 815 { 816 size_t i = me->mp->type == FILE_NAME ? 1 : 0; 817 if (mentrycount[i] == maxmagic[i]) { 818 struct magic_entry *mp; 819 820 maxmagic[i] += ALLOC_INCR; 821 if ((mp = CAST(struct magic_entry *, 822 realloc(mentry[i], sizeof(*mp) * maxmagic[i]))) == 823 NULL) { 824 file_oomem(ms, sizeof(*mp) * maxmagic[i]); 825 return -1; 826 } 827 (void)memset(&mp[mentrycount[i]], 0, sizeof(*mp) * 828 ALLOC_INCR); 829 mentry[i] = mp; 830 } 831 mentry[i][mentrycount[i]++] = *me; 832 memset(me, 0, sizeof(*me)); 833 return 0; 834 } 835 836 /* 837 * Load and parse one file. 838 */ 839 private void 840 load_1(struct magic_set *ms, int action, const char *fn, int *errs, 841 struct magic_entry **mentry, uint32_t *mentrycount) 842 { 843 size_t lineno = 0, llen = 0; 844 char *line = NULL; 845 ssize_t len; 846 struct magic_entry me; 847 848 FILE *f = fopen(ms->file = fn, "r"); 849 if (f == NULL) { 850 if (errno != ENOENT) 851 file_error(ms, errno, "cannot read magic file `%s'", 852 fn); 853 (*errs)++; 854 return; 855 } 856 857 memset(&me, 0, sizeof(me)); 858 /* read and parse this file */ 859 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 860 ms->line++) { 861 if (len == 0) /* null line, garbage, etc */ 862 continue; 863 if (line[len - 1] == '\n') { 864 lineno++; 865 line[len - 1] = '\0'; /* delete newline */ 866 } 867 switch (line[0]) { 868 case '\0': /* empty, do not parse */ 869 case '#': /* comment, do not parse */ 870 continue; 871 case '!': 872 if (line[1] == ':') { 873 size_t i; 874 875 for (i = 0; bang[i].name != NULL; i++) { 876 if ((size_t)(len - 2) > bang[i].len && 877 memcmp(bang[i].name, line + 2, 878 bang[i].len) == 0) 879 break; 880 } 881 if (bang[i].name == NULL) { 882 file_error(ms, 0, 883 "Unknown !: entry `%s'", line); 884 (*errs)++; 885 continue; 886 } 887 if (me.mp == NULL) { 888 file_error(ms, 0, 889 "No current entry for :!%s type", 890 bang[i].name); 891 (*errs)++; 892 continue; 893 } 894 if ((*bang[i].fun)(ms, &me, 895 line + bang[i].len + 2) != 0) { 896 (*errs)++; 897 continue; 898 } 899 continue; 900 } 901 /*FALLTHROUGH*/ 902 default: 903 again: 904 switch (parse(ms, &me, line, lineno, action)) { 905 case 0: 906 continue; 907 case 1: 908 (void)addentry(ms, &me, mentry, mentrycount); 909 goto again; 910 default: 911 (*errs)++; 912 break; 913 } 914 } 915 } 916 if (me.mp) 917 (void)addentry(ms, &me, mentry, mentrycount); 918 free(line); 919 (void)fclose(f); 920 } 921 922 /* 923 * parse a file or directory of files 924 * const char *fn: name of magic file or directory 925 */ 926 private int 927 cmpstrp(const void *p1, const void *p2) 928 { 929 return strcmp(*(char *const *)p1, *(char *const *)p2); 930 } 931 932 933 private uint32_t 934 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 935 uint32_t starttest) 936 { 937 static const char text[] = "text"; 938 static const char binary[] = "binary"; 939 static const size_t len = sizeof(text); 940 941 uint32_t i = starttest; 942 943 do { 944 set_test_type(me[starttest].mp, me[i].mp); 945 if ((ms->flags & MAGIC_DEBUG) == 0) 946 continue; 947 (void)fprintf(stderr, "%s%s%s: %s\n", 948 me[i].mp->mimetype, 949 me[i].mp->mimetype[0] == '\0' ? "" : "; ", 950 me[i].mp->desc[0] ? me[i].mp->desc : "(no description)", 951 me[i].mp->flag & BINTEST ? binary : text); 952 if (me[i].mp->flag & BINTEST) { 953 char *p = strstr(me[i].mp->desc, text); 954 if (p && (p == me[i].mp->desc || 955 isspace((unsigned char)p[-1])) && 956 (p + len - me[i].mp->desc == MAXstring 957 || (p[len] == '\0' || 958 isspace((unsigned char)p[len])))) 959 (void)fprintf(stderr, "*** Possible " 960 "binary test for text type\n"); 961 } 962 } while (++i < nme && me[i].mp->cont_level != 0); 963 return i; 964 } 965 966 private void 967 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme) 968 { 969 uint32_t i; 970 for (i = 0; i < nme; i++) { 971 if (me[i].mp->cont_level == 0 && 972 me[i].mp->type == FILE_DEFAULT) { 973 while (++i < nme) 974 if (me[i].mp->cont_level == 0) 975 break; 976 if (i != nme) { 977 /* XXX - Ugh! */ 978 ms->line = me[i].mp->lineno; 979 file_magwarn(ms, 980 "level 0 \"default\" did not sort last"); 981 } 982 return; 983 } 984 } 985 } 986 987 private int 988 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme, 989 struct magic **ma, uint32_t *nma) 990 { 991 uint32_t i, mentrycount = 0; 992 size_t slen; 993 994 for (i = 0; i < nme; i++) 995 mentrycount += me[i].cont_count; 996 997 slen = sizeof(**ma) * mentrycount; 998 if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) { 999 file_oomem(ms, slen); 1000 return -1; 1001 } 1002 1003 mentrycount = 0; 1004 for (i = 0; i < nme; i++) { 1005 (void)memcpy(*ma + mentrycount, me[i].mp, 1006 me[i].cont_count * sizeof(**ma)); 1007 mentrycount += me[i].cont_count; 1008 } 1009 *nma = mentrycount; 1010 return 0; 1011 } 1012 1013 private void 1014 magic_entry_free(struct magic_entry *me, uint32_t nme) 1015 { 1016 uint32_t i; 1017 if (me == NULL) 1018 return; 1019 for (i = 0; i < nme; i++) 1020 free(me[i].mp); 1021 free(me); 1022 } 1023 1024 private int 1025 apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 1026 const char *fn, int action) 1027 { 1028 int errs = 0; 1029 struct magic_entry *mentry[MAGIC_SETS] = { NULL }; 1030 uint32_t mentrycount[MAGIC_SETS] = { 0 }; 1031 uint32_t i, j; 1032 size_t files = 0, maxfiles = 0; 1033 char **filearr = NULL, *mfn; 1034 struct stat st; 1035 DIR *dir; 1036 struct dirent *d; 1037 1038 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 1039 1040 /* print silly verbose header for USG compat. */ 1041 if (action == FILE_CHECK) 1042 (void)fprintf(stderr, "%s\n", usg_hdr); 1043 1044 /* load directory or file */ 1045 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 1046 dir = opendir(fn); 1047 if (!dir) { 1048 errs++; 1049 goto out; 1050 } 1051 while ((d = readdir(dir)) != NULL) { 1052 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 1053 file_oomem(ms, 1054 strlen(fn) + strlen(d->d_name) + 2); 1055 errs++; 1056 closedir(dir); 1057 goto out; 1058 } 1059 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 1060 free(mfn); 1061 continue; 1062 } 1063 if (files >= maxfiles) { 1064 size_t mlen; 1065 maxfiles = (maxfiles + 1) * 2; 1066 mlen = maxfiles * sizeof(*filearr); 1067 if ((filearr = CAST(char **, 1068 realloc(filearr, mlen))) == NULL) { 1069 file_oomem(ms, mlen); 1070 free(mfn); 1071 closedir(dir); 1072 errs++; 1073 goto out; 1074 } 1075 } 1076 filearr[files++] = mfn; 1077 } 1078 closedir(dir); 1079 qsort(filearr, files, sizeof(*filearr), cmpstrp); 1080 for (i = 0; i < files; i++) { 1081 load_1(ms, action, filearr[i], &errs, mentry, 1082 mentrycount); 1083 free(filearr[i]); 1084 } 1085 free(filearr); 1086 } else 1087 load_1(ms, action, fn, &errs, mentry, mentrycount); 1088 if (errs) 1089 goto out; 1090 1091 for (j = 0; j < MAGIC_SETS; j++) { 1092 /* Set types of tests */ 1093 for (i = 0; i < mentrycount[j]; ) { 1094 if (mentry[j][i].mp->cont_level != 0) { 1095 i++; 1096 continue; 1097 } 1098 i = set_text_binary(ms, mentry[j], mentrycount[j], i); 1099 } 1100 qsort(mentry[j], mentrycount[j], sizeof(*mentry[j]), 1101 apprentice_sort); 1102 1103 /* 1104 * Make sure that any level 0 "default" line is last 1105 * (if one exists). 1106 */ 1107 set_last_default(ms, mentry[j], mentrycount[j]); 1108 1109 /* coalesce per file arrays into a single one */ 1110 if (coalesce_entries(ms, mentry[j], mentrycount[j], 1111 &magicp[j], &nmagicp[j]) == -1) { 1112 errs++; 1113 goto out; 1114 } 1115 } 1116 1117 out: 1118 for (j = 0; j < MAGIC_SETS; j++) 1119 magic_entry_free(mentry[j], mentrycount[j]); 1120 1121 if (errs) { 1122 for (j = 0; j < MAGIC_SETS; j++) { 1123 if (magicp[j]) { 1124 free(magicp[j]); 1125 magicp[j] = NULL; 1126 } 1127 nmagicp[j] = 0; 1128 } 1129 return errs; 1130 } 1131 return 0; 1132 } 1133 1134 /* 1135 * extend the sign bit if the comparison is to be signed 1136 */ 1137 protected uint64_t 1138 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 1139 { 1140 if (!(m->flag & UNSIGNED)) { 1141 switch(m->type) { 1142 /* 1143 * Do not remove the casts below. They are 1144 * vital. When later compared with the data, 1145 * the sign extension must have happened. 1146 */ 1147 case FILE_BYTE: 1148 v = (char) v; 1149 break; 1150 case FILE_SHORT: 1151 case FILE_BESHORT: 1152 case FILE_LESHORT: 1153 v = (short) v; 1154 break; 1155 case FILE_DATE: 1156 case FILE_BEDATE: 1157 case FILE_LEDATE: 1158 case FILE_MEDATE: 1159 case FILE_LDATE: 1160 case FILE_BELDATE: 1161 case FILE_LELDATE: 1162 case FILE_MELDATE: 1163 case FILE_LONG: 1164 case FILE_BELONG: 1165 case FILE_LELONG: 1166 case FILE_MELONG: 1167 case FILE_FLOAT: 1168 case FILE_BEFLOAT: 1169 case FILE_LEFLOAT: 1170 v = (int32_t) v; 1171 break; 1172 case FILE_QUAD: 1173 case FILE_BEQUAD: 1174 case FILE_LEQUAD: 1175 case FILE_QDATE: 1176 case FILE_QLDATE: 1177 case FILE_QWDATE: 1178 case FILE_BEQDATE: 1179 case FILE_BEQLDATE: 1180 case FILE_BEQWDATE: 1181 case FILE_LEQDATE: 1182 case FILE_LEQLDATE: 1183 case FILE_LEQWDATE: 1184 case FILE_DOUBLE: 1185 case FILE_BEDOUBLE: 1186 case FILE_LEDOUBLE: 1187 v = (int64_t) v; 1188 break; 1189 case FILE_STRING: 1190 case FILE_PSTRING: 1191 case FILE_BESTRING16: 1192 case FILE_LESTRING16: 1193 case FILE_REGEX: 1194 case FILE_SEARCH: 1195 case FILE_DEFAULT: 1196 case FILE_INDIRECT: 1197 case FILE_NAME: 1198 case FILE_USE: 1199 break; 1200 default: 1201 if (ms->flags & MAGIC_CHECK) 1202 file_magwarn(ms, "cannot happen: m->type=%d\n", 1203 m->type); 1204 return ~0U; 1205 } 1206 } 1207 return v; 1208 } 1209 1210 private int 1211 string_modifier_check(struct magic_set *ms, struct magic *m) 1212 { 1213 if ((ms->flags & MAGIC_CHECK) == 0) 1214 return 0; 1215 1216 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 1217 file_magwarn(ms, 1218 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1219 return -1; 1220 } 1221 switch (m->type) { 1222 case FILE_BESTRING16: 1223 case FILE_LESTRING16: 1224 if (m->str_flags != 0) { 1225 file_magwarn(ms, 1226 "no modifiers allowed for 16-bit strings\n"); 1227 return -1; 1228 } 1229 break; 1230 case FILE_STRING: 1231 case FILE_PSTRING: 1232 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1233 file_magwarn(ms, 1234 "'/%c' only allowed on regex and search\n", 1235 CHAR_REGEX_OFFSET_START); 1236 return -1; 1237 } 1238 break; 1239 case FILE_SEARCH: 1240 if (m->str_range == 0) { 1241 file_magwarn(ms, 1242 "missing range; defaulting to %d\n", 1243 STRING_DEFAULT_RANGE); 1244 m->str_range = STRING_DEFAULT_RANGE; 1245 return -1; 1246 } 1247 break; 1248 case FILE_REGEX: 1249 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1250 file_magwarn(ms, "'/%c' not allowed on regex\n", 1251 CHAR_COMPACT_WHITESPACE); 1252 return -1; 1253 } 1254 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1255 file_magwarn(ms, "'/%c' not allowed on regex\n", 1256 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1257 return -1; 1258 } 1259 break; 1260 default: 1261 file_magwarn(ms, "coding error: m->type=%d\n", 1262 m->type); 1263 return -1; 1264 } 1265 return 0; 1266 } 1267 1268 private int 1269 get_op(char c) 1270 { 1271 switch (c) { 1272 case '&': 1273 return FILE_OPAND; 1274 case '|': 1275 return FILE_OPOR; 1276 case '^': 1277 return FILE_OPXOR; 1278 case '+': 1279 return FILE_OPADD; 1280 case '-': 1281 return FILE_OPMINUS; 1282 case '*': 1283 return FILE_OPMULTIPLY; 1284 case '/': 1285 return FILE_OPDIVIDE; 1286 case '%': 1287 return FILE_OPMODULO; 1288 default: 1289 return -1; 1290 } 1291 } 1292 1293 #ifdef ENABLE_CONDITIONALS 1294 private int 1295 get_cond(const char *l, const char **t) 1296 { 1297 static const struct cond_tbl_s { 1298 char name[8]; 1299 size_t len; 1300 int cond; 1301 } cond_tbl[] = { 1302 { "if", 2, COND_IF }, 1303 { "elif", 4, COND_ELIF }, 1304 { "else", 4, COND_ELSE }, 1305 { "", 0, COND_NONE }, 1306 }; 1307 const struct cond_tbl_s *p; 1308 1309 for (p = cond_tbl; p->len; p++) { 1310 if (strncmp(l, p->name, p->len) == 0 && 1311 isspace((unsigned char)l[p->len])) { 1312 if (t) 1313 *t = l + p->len; 1314 break; 1315 } 1316 } 1317 return p->cond; 1318 } 1319 1320 private int 1321 check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1322 { 1323 int last_cond; 1324 last_cond = ms->c.li[cont_level].last_cond; 1325 1326 switch (cond) { 1327 case COND_IF: 1328 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1329 if (ms->flags & MAGIC_CHECK) 1330 file_magwarn(ms, "syntax error: `if'"); 1331 return -1; 1332 } 1333 last_cond = COND_IF; 1334 break; 1335 1336 case COND_ELIF: 1337 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1338 if (ms->flags & MAGIC_CHECK) 1339 file_magwarn(ms, "syntax error: `elif'"); 1340 return -1; 1341 } 1342 last_cond = COND_ELIF; 1343 break; 1344 1345 case COND_ELSE: 1346 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1347 if (ms->flags & MAGIC_CHECK) 1348 file_magwarn(ms, "syntax error: `else'"); 1349 return -1; 1350 } 1351 last_cond = COND_NONE; 1352 break; 1353 1354 case COND_NONE: 1355 last_cond = COND_NONE; 1356 break; 1357 } 1358 1359 ms->c.li[cont_level].last_cond = last_cond; 1360 return 0; 1361 } 1362 #endif /* ENABLE_CONDITIONALS */ 1363 1364 /* 1365 * parse one line from magic file, put into magic[index++] if valid 1366 */ 1367 private int 1368 parse(struct magic_set *ms, struct magic_entry *me, const char *line, 1369 size_t lineno, int action) 1370 { 1371 #ifdef ENABLE_CONDITIONALS 1372 static uint32_t last_cont_level = 0; 1373 #endif 1374 size_t i; 1375 struct magic *m; 1376 const char *l = line; 1377 char *t; 1378 int op; 1379 uint32_t cont_level; 1380 1381 cont_level = 0; 1382 1383 while (*l == '>') { 1384 ++l; /* step over */ 1385 cont_level++; 1386 } 1387 #ifdef ENABLE_CONDITIONALS 1388 if (cont_level == 0 || cont_level > last_cont_level) 1389 if (file_check_mem(ms, cont_level) == -1) 1390 return -1; 1391 last_cont_level = cont_level; 1392 #endif 1393 if (cont_level != 0) { 1394 if (me->mp == NULL) { 1395 file_error(ms, 0, "No current entry for continuation"); 1396 return -1; 1397 } 1398 if (me->cont_count == me->max_count) { 1399 struct magic *nm; 1400 size_t cnt = me->max_count + ALLOC_CHUNK; 1401 if ((nm = CAST(struct magic *, realloc(me->mp, 1402 sizeof(*nm) * cnt))) == NULL) { 1403 file_oomem(ms, sizeof(*nm) * cnt); 1404 return -1; 1405 } 1406 me->mp = m = nm; 1407 me->max_count = CAST(uint32_t, cnt); 1408 } 1409 m = &me->mp[me->cont_count++]; 1410 (void)memset(m, 0, sizeof(*m)); 1411 m->cont_level = cont_level; 1412 } else { 1413 size_t len = sizeof(*m) * ALLOC_CHUNK; 1414 if (me->mp != NULL) 1415 return 1; 1416 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1417 file_oomem(ms, len); 1418 return -1; 1419 } 1420 me->mp = m; 1421 me->max_count = ALLOC_CHUNK; 1422 (void)memset(m, 0, sizeof(*m)); 1423 m->factor_op = FILE_FACTOR_OP_NONE; 1424 m->cont_level = 0; 1425 me->cont_count = 1; 1426 } 1427 m->lineno = CAST(uint32_t, lineno); 1428 1429 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1430 ++l; /* step over */ 1431 m->flag |= OFFADD; 1432 } 1433 if (*l == '(') { 1434 ++l; /* step over */ 1435 m->flag |= INDIR; 1436 if (m->flag & OFFADD) 1437 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1438 1439 if (*l == '&') { /* m->cont_level == 0 checked below */ 1440 ++l; /* step over */ 1441 m->flag |= OFFADD; 1442 } 1443 } 1444 /* Indirect offsets are not valid at level 0. */ 1445 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1446 if (ms->flags & MAGIC_CHECK) 1447 file_magwarn(ms, "relative offset at level 0"); 1448 1449 /* get offset, then skip over it */ 1450 m->offset = (uint32_t)strtoul(l, &t, 0); 1451 if (l == t) 1452 if (ms->flags & MAGIC_CHECK) 1453 file_magwarn(ms, "offset `%s' invalid", l); 1454 l = t; 1455 1456 if (m->flag & INDIR) { 1457 m->in_type = FILE_LONG; 1458 m->in_offset = 0; 1459 /* 1460 * read [.lbs][+-]nnnnn) 1461 */ 1462 if (*l == '.') { 1463 l++; 1464 switch (*l) { 1465 case 'l': 1466 m->in_type = FILE_LELONG; 1467 break; 1468 case 'L': 1469 m->in_type = FILE_BELONG; 1470 break; 1471 case 'm': 1472 m->in_type = FILE_MELONG; 1473 break; 1474 case 'h': 1475 case 's': 1476 m->in_type = FILE_LESHORT; 1477 break; 1478 case 'H': 1479 case 'S': 1480 m->in_type = FILE_BESHORT; 1481 break; 1482 case 'c': 1483 case 'b': 1484 case 'C': 1485 case 'B': 1486 m->in_type = FILE_BYTE; 1487 break; 1488 case 'e': 1489 case 'f': 1490 case 'g': 1491 m->in_type = FILE_LEDOUBLE; 1492 break; 1493 case 'E': 1494 case 'F': 1495 case 'G': 1496 m->in_type = FILE_BEDOUBLE; 1497 break; 1498 case 'i': 1499 m->in_type = FILE_LEID3; 1500 break; 1501 case 'I': 1502 m->in_type = FILE_BEID3; 1503 break; 1504 default: 1505 if (ms->flags & MAGIC_CHECK) 1506 file_magwarn(ms, 1507 "indirect offset type `%c' invalid", 1508 *l); 1509 break; 1510 } 1511 l++; 1512 } 1513 1514 m->in_op = 0; 1515 if (*l == '~') { 1516 m->in_op |= FILE_OPINVERSE; 1517 l++; 1518 } 1519 if ((op = get_op(*l)) != -1) { 1520 m->in_op |= op; 1521 l++; 1522 } 1523 if (*l == '(') { 1524 m->in_op |= FILE_OPINDIRECT; 1525 l++; 1526 } 1527 if (isdigit((unsigned char)*l) || *l == '-') { 1528 m->in_offset = (int32_t)strtol(l, &t, 0); 1529 if (l == t) 1530 if (ms->flags & MAGIC_CHECK) 1531 file_magwarn(ms, 1532 "in_offset `%s' invalid", l); 1533 l = t; 1534 } 1535 if (*l++ != ')' || 1536 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1537 if (ms->flags & MAGIC_CHECK) 1538 file_magwarn(ms, 1539 "missing ')' in indirect offset"); 1540 } 1541 EATAB; 1542 1543 #ifdef ENABLE_CONDITIONALS 1544 m->cond = get_cond(l, &l); 1545 if (check_cond(ms, m->cond, cont_level) == -1) 1546 return -1; 1547 1548 EATAB; 1549 #endif 1550 1551 if (*l == 'u' && (l[1] != 's' || l[2] != 'e')) { 1552 ++l; 1553 m->flag |= UNSIGNED; 1554 } 1555 1556 m->type = get_type(l, &l); 1557 if (m->type == FILE_INVALID) { 1558 if (ms->flags & MAGIC_CHECK) 1559 file_magwarn(ms, "type `%s' invalid", l); 1560 return -1; 1561 } 1562 1563 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1564 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1565 1566 m->mask_op = 0; 1567 if (*l == '~') { 1568 if (!IS_STRING(m->type)) 1569 m->mask_op |= FILE_OPINVERSE; 1570 else if (ms->flags & MAGIC_CHECK) 1571 file_magwarn(ms, "'~' invalid for string types"); 1572 ++l; 1573 } 1574 m->str_range = 0; 1575 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1576 if ((op = get_op(*l)) != -1) { 1577 if (!IS_STRING(m->type)) { 1578 uint64_t val; 1579 ++l; 1580 m->mask_op |= op; 1581 val = (uint64_t)strtoull(l, &t, 0); 1582 l = t; 1583 m->num_mask = file_signextend(ms, m, val); 1584 eatsize(&l); 1585 } 1586 else if (op == FILE_OPDIVIDE) { 1587 int have_range = 0; 1588 while (!isspace((unsigned char)*++l)) { 1589 switch (*l) { 1590 case '0': case '1': case '2': 1591 case '3': case '4': case '5': 1592 case '6': case '7': case '8': 1593 case '9': 1594 if (have_range && 1595 (ms->flags & MAGIC_CHECK)) 1596 file_magwarn(ms, 1597 "multiple ranges"); 1598 have_range = 1; 1599 m->str_range = CAST(uint32_t, 1600 strtoul(l, &t, 0)); 1601 if (m->str_range == 0) 1602 file_magwarn(ms, 1603 "zero range"); 1604 l = t - 1; 1605 break; 1606 case CHAR_COMPACT_WHITESPACE: 1607 m->str_flags |= 1608 STRING_COMPACT_WHITESPACE; 1609 break; 1610 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1611 m->str_flags |= 1612 STRING_COMPACT_OPTIONAL_WHITESPACE; 1613 break; 1614 case CHAR_IGNORE_LOWERCASE: 1615 m->str_flags |= STRING_IGNORE_LOWERCASE; 1616 break; 1617 case CHAR_IGNORE_UPPERCASE: 1618 m->str_flags |= STRING_IGNORE_UPPERCASE; 1619 break; 1620 case CHAR_REGEX_OFFSET_START: 1621 m->str_flags |= REGEX_OFFSET_START; 1622 break; 1623 case CHAR_BINTEST: 1624 m->str_flags |= STRING_BINTEST; 1625 break; 1626 case CHAR_TEXTTEST: 1627 m->str_flags |= STRING_TEXTTEST; 1628 break; 1629 case CHAR_TRIM: 1630 m->str_flags |= STRING_TRIM; 1631 break; 1632 case CHAR_PSTRING_1_LE: 1633 if (m->type != FILE_PSTRING) 1634 goto bad; 1635 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1636 break; 1637 case CHAR_PSTRING_2_BE: 1638 if (m->type != FILE_PSTRING) 1639 goto bad; 1640 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1641 break; 1642 case CHAR_PSTRING_2_LE: 1643 if (m->type != FILE_PSTRING) 1644 goto bad; 1645 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1646 break; 1647 case CHAR_PSTRING_4_BE: 1648 if (m->type != FILE_PSTRING) 1649 goto bad; 1650 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1651 break; 1652 case CHAR_PSTRING_4_LE: 1653 if (m->type != FILE_PSTRING) 1654 goto bad; 1655 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1656 break; 1657 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1658 if (m->type != FILE_PSTRING) 1659 goto bad; 1660 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1661 break; 1662 default: 1663 bad: 1664 if (ms->flags & MAGIC_CHECK) 1665 file_magwarn(ms, 1666 "string extension `%c' " 1667 "invalid", *l); 1668 return -1; 1669 } 1670 /* allow multiple '/' for readability */ 1671 if (l[1] == '/' && 1672 !isspace((unsigned char)l[2])) 1673 l++; 1674 } 1675 if (string_modifier_check(ms, m) == -1) 1676 return -1; 1677 } 1678 else { 1679 if (ms->flags & MAGIC_CHECK) 1680 file_magwarn(ms, "invalid string op: %c", *t); 1681 return -1; 1682 } 1683 } 1684 /* 1685 * We used to set mask to all 1's here, instead let's just not do 1686 * anything if mask = 0 (unless you have a better idea) 1687 */ 1688 EATAB; 1689 1690 switch (*l) { 1691 case '>': 1692 case '<': 1693 m->reln = *l; 1694 ++l; 1695 if (*l == '=') { 1696 if (ms->flags & MAGIC_CHECK) { 1697 file_magwarn(ms, "%c= not supported", 1698 m->reln); 1699 return -1; 1700 } 1701 ++l; 1702 } 1703 break; 1704 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1705 case '&': 1706 case '^': 1707 case '=': 1708 m->reln = *l; 1709 ++l; 1710 if (*l == '=') { 1711 /* HP compat: ignore &= etc. */ 1712 ++l; 1713 } 1714 break; 1715 case '!': 1716 m->reln = *l; 1717 ++l; 1718 break; 1719 default: 1720 m->reln = '='; /* the default relation */ 1721 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1722 isspace((unsigned char)l[1])) || !l[1])) { 1723 m->reln = *l; 1724 ++l; 1725 } 1726 break; 1727 } 1728 /* 1729 * Grab the value part, except for an 'x' reln. 1730 */ 1731 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1732 return -1; 1733 1734 /* 1735 * TODO finish this macro and start using it! 1736 * #define offsetcheck {if (offset > HOWMANY-1) 1737 * magwarn("offset too big"); } 1738 */ 1739 1740 /* 1741 * Now get last part - the description 1742 */ 1743 EATAB; 1744 if (l[0] == '\b') { 1745 ++l; 1746 m->flag |= NOSPACE; 1747 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1748 ++l; 1749 ++l; 1750 m->flag |= NOSPACE; 1751 } 1752 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1753 continue; 1754 if (i == sizeof(m->desc)) { 1755 m->desc[sizeof(m->desc) - 1] = '\0'; 1756 if (ms->flags & MAGIC_CHECK) 1757 file_magwarn(ms, "description `%s' truncated", m->desc); 1758 } 1759 1760 /* 1761 * We only do this check while compiling, or if any of the magic 1762 * files were not compiled. 1763 */ 1764 if (ms->flags & MAGIC_CHECK) { 1765 if (check_format(ms, m) == -1) 1766 return -1; 1767 } 1768 #ifndef COMPILE_ONLY 1769 if (action == FILE_CHECK) { 1770 file_mdump(m); 1771 } 1772 #endif 1773 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1774 return 0; 1775 } 1776 1777 /* 1778 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1779 * if valid 1780 */ 1781 private int 1782 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1783 { 1784 const char *l = line; 1785 char *el; 1786 unsigned long factor; 1787 struct magic *m = &me->mp[0]; 1788 1789 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1790 file_magwarn(ms, 1791 "Current entry already has a strength type: %c %d", 1792 m->factor_op, m->factor); 1793 return -1; 1794 } 1795 EATAB; 1796 switch (*l) { 1797 case FILE_FACTOR_OP_NONE: 1798 case FILE_FACTOR_OP_PLUS: 1799 case FILE_FACTOR_OP_MINUS: 1800 case FILE_FACTOR_OP_TIMES: 1801 case FILE_FACTOR_OP_DIV: 1802 m->factor_op = *l++; 1803 break; 1804 default: 1805 file_magwarn(ms, "Unknown factor op `%c'", *l); 1806 return -1; 1807 } 1808 EATAB; 1809 factor = strtoul(l, &el, 0); 1810 if (factor > 255) { 1811 file_magwarn(ms, "Too large factor `%lu'", factor); 1812 goto out; 1813 } 1814 if (*el && !isspace((unsigned char)*el)) { 1815 file_magwarn(ms, "Bad factor `%s'", l); 1816 goto out; 1817 } 1818 m->factor = (uint8_t)factor; 1819 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1820 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1821 m->factor_op, m->factor); 1822 goto out; 1823 } 1824 return 0; 1825 out: 1826 m->factor_op = FILE_FACTOR_OP_NONE; 1827 m->factor = 0; 1828 return -1; 1829 } 1830 1831 /* 1832 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 1833 * magic[index - 1] 1834 */ 1835 private int 1836 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 1837 { 1838 size_t i; 1839 const char *l = line; 1840 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1841 1842 if (m->apple[0] != '\0') { 1843 file_magwarn(ms, "Current entry already has a APPLE type " 1844 "`%.8s', new type `%s'", m->mimetype, l); 1845 return -1; 1846 } 1847 1848 EATAB; 1849 for (i = 0; *l && ((isascii((unsigned char)*l) && 1850 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1851 i < sizeof(m->apple); m->apple[i++] = *l++) 1852 continue; 1853 if (i == sizeof(m->apple) && *l) { 1854 /* We don't need to NUL terminate here, printing handles it */ 1855 if (ms->flags & MAGIC_CHECK) 1856 file_magwarn(ms, "APPLE type `%s' truncated %" 1857 SIZE_T_FORMAT "u", line, i); 1858 } 1859 1860 if (i > 0) 1861 return 0; 1862 else 1863 return -1; 1864 } 1865 1866 /* 1867 * parse a MIME annotation line from magic file, put into magic[index - 1] 1868 * if valid 1869 */ 1870 private int 1871 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 1872 { 1873 size_t i; 1874 const char *l = line; 1875 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1876 1877 if (m->mimetype[0] != '\0') { 1878 file_magwarn(ms, "Current entry already has a MIME type `%s'," 1879 " new type `%s'", m->mimetype, l); 1880 return -1; 1881 } 1882 1883 EATAB; 1884 for (i = 0; *l && ((isascii((unsigned char)*l) && 1885 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1886 i < sizeof(m->mimetype); m->mimetype[i++] = *l++) 1887 continue; 1888 if (i == sizeof(m->mimetype)) { 1889 m->mimetype[sizeof(m->mimetype) - 1] = '\0'; 1890 if (ms->flags & MAGIC_CHECK) 1891 file_magwarn(ms, "MIME type `%s' truncated %" 1892 SIZE_T_FORMAT "u", m->mimetype, i); 1893 } else 1894 m->mimetype[i] = '\0'; 1895 1896 if (i > 0) 1897 return 0; 1898 else 1899 return -1; 1900 } 1901 1902 private int 1903 check_format_type(const char *ptr, int type) 1904 { 1905 int quad = 0; 1906 if (*ptr == '\0') { 1907 /* Missing format string; bad */ 1908 return -1; 1909 } 1910 1911 switch (type) { 1912 case FILE_FMT_QUAD: 1913 quad = 1; 1914 /*FALLTHROUGH*/ 1915 case FILE_FMT_NUM: 1916 if (*ptr == '-') 1917 ptr++; 1918 if (*ptr == '.') 1919 ptr++; 1920 while (isdigit((unsigned char)*ptr)) ptr++; 1921 if (*ptr == '.') 1922 ptr++; 1923 while (isdigit((unsigned char)*ptr)) ptr++; 1924 if (quad) { 1925 if (*ptr++ != 'l') 1926 return -1; 1927 if (*ptr++ != 'l') 1928 return -1; 1929 } 1930 1931 switch (*ptr++) { 1932 case 'l': 1933 switch (*ptr++) { 1934 case 'i': 1935 case 'd': 1936 case 'u': 1937 case 'x': 1938 case 'X': 1939 return 0; 1940 default: 1941 return -1; 1942 } 1943 1944 case 'h': 1945 switch (*ptr++) { 1946 case 'h': 1947 switch (*ptr++) { 1948 case 'i': 1949 case 'd': 1950 case 'u': 1951 case 'x': 1952 case 'X': 1953 return 0; 1954 default: 1955 return -1; 1956 } 1957 case 'd': 1958 return 0; 1959 default: 1960 return -1; 1961 } 1962 1963 case 'i': 1964 case 'c': 1965 case 'd': 1966 case 'u': 1967 case 'x': 1968 case 'X': 1969 return 0; 1970 1971 default: 1972 return -1; 1973 } 1974 1975 case FILE_FMT_FLOAT: 1976 case FILE_FMT_DOUBLE: 1977 if (*ptr == '-') 1978 ptr++; 1979 if (*ptr == '.') 1980 ptr++; 1981 while (isdigit((unsigned char)*ptr)) ptr++; 1982 if (*ptr == '.') 1983 ptr++; 1984 while (isdigit((unsigned char)*ptr)) ptr++; 1985 1986 switch (*ptr++) { 1987 case 'e': 1988 case 'E': 1989 case 'f': 1990 case 'F': 1991 case 'g': 1992 case 'G': 1993 return 0; 1994 1995 default: 1996 return -1; 1997 } 1998 1999 2000 case FILE_FMT_STR: 2001 if (*ptr == '-') 2002 ptr++; 2003 while (isdigit((unsigned char )*ptr)) 2004 ptr++; 2005 if (*ptr == '.') { 2006 ptr++; 2007 while (isdigit((unsigned char )*ptr)) 2008 ptr++; 2009 } 2010 2011 switch (*ptr++) { 2012 case 's': 2013 return 0; 2014 default: 2015 return -1; 2016 } 2017 2018 default: 2019 /* internal error */ 2020 abort(); 2021 } 2022 /*NOTREACHED*/ 2023 return -1; 2024 } 2025 2026 /* 2027 * Check that the optional printf format in description matches 2028 * the type of the magic. 2029 */ 2030 private int 2031 check_format(struct magic_set *ms, struct magic *m) 2032 { 2033 char *ptr; 2034 2035 for (ptr = m->desc; *ptr; ptr++) 2036 if (*ptr == '%') 2037 break; 2038 if (*ptr == '\0') { 2039 /* No format string; ok */ 2040 return 1; 2041 } 2042 2043 assert(file_nformats == file_nnames); 2044 2045 if (m->type >= file_nformats) { 2046 file_magwarn(ms, "Internal error inconsistency between " 2047 "m->type and format strings"); 2048 return -1; 2049 } 2050 if (file_formats[m->type] == FILE_FMT_NONE) { 2051 file_magwarn(ms, "No format string for `%s' with description " 2052 "`%s'", m->desc, file_names[m->type]); 2053 return -1; 2054 } 2055 2056 ptr++; 2057 if (check_format_type(ptr, file_formats[m->type]) == -1) { 2058 /* 2059 * TODO: this error message is unhelpful if the format 2060 * string is not one character long 2061 */ 2062 file_magwarn(ms, "Printf format `%c' is not valid for type " 2063 "`%s' in description `%s'", *ptr ? *ptr : '?', 2064 file_names[m->type], m->desc); 2065 return -1; 2066 } 2067 2068 for (; *ptr; ptr++) { 2069 if (*ptr == '%') { 2070 file_magwarn(ms, 2071 "Too many format strings (should have at most one) " 2072 "for `%s' with description `%s'", 2073 file_names[m->type], m->desc); 2074 return -1; 2075 } 2076 } 2077 return 0; 2078 } 2079 2080 /* 2081 * Read a numeric value from a pointer, into the value union of a magic 2082 * pointer, according to the magic type. Update the string pointer to point 2083 * just after the number read. Return 0 for success, non-zero for failure. 2084 */ 2085 private int 2086 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 2087 { 2088 switch (m->type) { 2089 case FILE_BESTRING16: 2090 case FILE_LESTRING16: 2091 case FILE_STRING: 2092 case FILE_PSTRING: 2093 case FILE_REGEX: 2094 case FILE_SEARCH: 2095 case FILE_NAME: 2096 case FILE_USE: 2097 *p = getstr(ms, m, *p, action == FILE_COMPILE); 2098 if (*p == NULL) { 2099 if (ms->flags & MAGIC_CHECK) 2100 file_magwarn(ms, "cannot get string from `%s'", 2101 m->value.s); 2102 return -1; 2103 } 2104 return 0; 2105 case FILE_FLOAT: 2106 case FILE_BEFLOAT: 2107 case FILE_LEFLOAT: 2108 if (m->reln != 'x') { 2109 char *ep; 2110 #ifdef HAVE_STRTOF 2111 m->value.f = strtof(*p, &ep); 2112 #else 2113 m->value.f = (float)strtod(*p, &ep); 2114 #endif 2115 *p = ep; 2116 } 2117 return 0; 2118 case FILE_DOUBLE: 2119 case FILE_BEDOUBLE: 2120 case FILE_LEDOUBLE: 2121 if (m->reln != 'x') { 2122 char *ep; 2123 m->value.d = strtod(*p, &ep); 2124 *p = ep; 2125 } 2126 return 0; 2127 default: 2128 if (m->reln != 'x') { 2129 char *ep; 2130 m->value.q = file_signextend(ms, m, 2131 (uint64_t)strtoull(*p, &ep, 0)); 2132 *p = ep; 2133 eatsize(p); 2134 } 2135 return 0; 2136 } 2137 } 2138 2139 /* 2140 * Convert a string containing C character escapes. Stop at an unescaped 2141 * space or tab. 2142 * Copy the converted version to "m->value.s", and the length in m->vallen. 2143 * Return updated scan pointer as function result. Warn if set. 2144 */ 2145 private const char * 2146 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 2147 { 2148 const char *origs = s; 2149 char *p = m->value.s; 2150 size_t plen = sizeof(m->value.s); 2151 char *origp = p; 2152 char *pmax = p + plen - 1; 2153 int c; 2154 int val; 2155 2156 while ((c = *s++) != '\0') { 2157 if (isspace((unsigned char) c)) 2158 break; 2159 if (p >= pmax) { 2160 file_error(ms, 0, "string too long: `%s'", origs); 2161 return NULL; 2162 } 2163 if (c == '\\') { 2164 switch(c = *s++) { 2165 2166 case '\0': 2167 if (warn) 2168 file_magwarn(ms, "incomplete escape"); 2169 goto out; 2170 2171 case '\t': 2172 if (warn) { 2173 file_magwarn(ms, 2174 "escaped tab found, use \\t instead"); 2175 warn = 0; /* already did */ 2176 } 2177 /*FALLTHROUGH*/ 2178 default: 2179 if (warn) { 2180 if (isprint((unsigned char)c)) { 2181 /* Allow escaping of 2182 * ``relations'' */ 2183 if (strchr("<>&^=!", c) == NULL 2184 && (m->type != FILE_REGEX || 2185 strchr("[]().*?^$|{}", c) 2186 == NULL)) { 2187 file_magwarn(ms, "no " 2188 "need to escape " 2189 "`%c'", c); 2190 } 2191 } else { 2192 file_magwarn(ms, 2193 "unknown escape sequence: " 2194 "\\%03o", c); 2195 } 2196 } 2197 /*FALLTHROUGH*/ 2198 /* space, perhaps force people to use \040? */ 2199 case ' ': 2200 #if 0 2201 /* 2202 * Other things people escape, but shouldn't need to, 2203 * so we disallow them 2204 */ 2205 case '\'': 2206 case '"': 2207 case '?': 2208 #endif 2209 /* Relations */ 2210 case '>': 2211 case '<': 2212 case '&': 2213 case '^': 2214 case '=': 2215 case '!': 2216 /* and baskslash itself */ 2217 case '\\': 2218 *p++ = (char) c; 2219 break; 2220 2221 case 'a': 2222 *p++ = '\a'; 2223 break; 2224 2225 case 'b': 2226 *p++ = '\b'; 2227 break; 2228 2229 case 'f': 2230 *p++ = '\f'; 2231 break; 2232 2233 case 'n': 2234 *p++ = '\n'; 2235 break; 2236 2237 case 'r': 2238 *p++ = '\r'; 2239 break; 2240 2241 case 't': 2242 *p++ = '\t'; 2243 break; 2244 2245 case 'v': 2246 *p++ = '\v'; 2247 break; 2248 2249 /* \ and up to 3 octal digits */ 2250 case '0': 2251 case '1': 2252 case '2': 2253 case '3': 2254 case '4': 2255 case '5': 2256 case '6': 2257 case '7': 2258 val = c - '0'; 2259 c = *s++; /* try for 2 */ 2260 if (c >= '0' && c <= '7') { 2261 val = (val << 3) | (c - '0'); 2262 c = *s++; /* try for 3 */ 2263 if (c >= '0' && c <= '7') 2264 val = (val << 3) | (c-'0'); 2265 else 2266 --s; 2267 } 2268 else 2269 --s; 2270 *p++ = (char)val; 2271 break; 2272 2273 /* \x and up to 2 hex digits */ 2274 case 'x': 2275 val = 'x'; /* Default if no digits */ 2276 c = hextoint(*s++); /* Get next char */ 2277 if (c >= 0) { 2278 val = c; 2279 c = hextoint(*s++); 2280 if (c >= 0) 2281 val = (val << 4) + c; 2282 else 2283 --s; 2284 } else 2285 --s; 2286 *p++ = (char)val; 2287 break; 2288 } 2289 } else 2290 *p++ = (char)c; 2291 } 2292 out: 2293 *p = '\0'; 2294 m->vallen = CAST(unsigned char, (p - origp)); 2295 if (m->type == FILE_PSTRING) 2296 m->vallen += (unsigned char)file_pstring_length_size(m); 2297 return s; 2298 } 2299 2300 2301 /* Single hex char to int; -1 if not a hex char. */ 2302 private int 2303 hextoint(int c) 2304 { 2305 if (!isascii((unsigned char) c)) 2306 return -1; 2307 if (isdigit((unsigned char) c)) 2308 return c - '0'; 2309 if ((c >= 'a') && (c <= 'f')) 2310 return c + 10 - 'a'; 2311 if (( c>= 'A') && (c <= 'F')) 2312 return c + 10 - 'A'; 2313 return -1; 2314 } 2315 2316 2317 /* 2318 * Print a string containing C character escapes. 2319 */ 2320 protected void 2321 file_showstr(FILE *fp, const char *s, size_t len) 2322 { 2323 char c; 2324 2325 for (;;) { 2326 if (len == ~0U) { 2327 c = *s++; 2328 if (c == '\0') 2329 break; 2330 } 2331 else { 2332 if (len-- == 0) 2333 break; 2334 c = *s++; 2335 } 2336 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2337 (void) fputc(c, fp); 2338 else { 2339 (void) fputc('\\', fp); 2340 switch (c) { 2341 case '\a': 2342 (void) fputc('a', fp); 2343 break; 2344 2345 case '\b': 2346 (void) fputc('b', fp); 2347 break; 2348 2349 case '\f': 2350 (void) fputc('f', fp); 2351 break; 2352 2353 case '\n': 2354 (void) fputc('n', fp); 2355 break; 2356 2357 case '\r': 2358 (void) fputc('r', fp); 2359 break; 2360 2361 case '\t': 2362 (void) fputc('t', fp); 2363 break; 2364 2365 case '\v': 2366 (void) fputc('v', fp); 2367 break; 2368 2369 default: 2370 (void) fprintf(fp, "%.3o", c & 0377); 2371 break; 2372 } 2373 } 2374 } 2375 } 2376 2377 /* 2378 * eatsize(): Eat the size spec from a number [eg. 10UL] 2379 */ 2380 private void 2381 eatsize(const char **p) 2382 { 2383 const char *l = *p; 2384 2385 if (LOWCASE(*l) == 'u') 2386 l++; 2387 2388 switch (LOWCASE(*l)) { 2389 case 'l': /* long */ 2390 case 's': /* short */ 2391 case 'h': /* short */ 2392 case 'b': /* char/byte */ 2393 case 'c': /* char/byte */ 2394 l++; 2395 /*FALLTHROUGH*/ 2396 default: 2397 break; 2398 } 2399 2400 *p = l; 2401 } 2402 2403 /* 2404 * handle a compiled file. 2405 */ 2406 private int 2407 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 2408 const char *fn) 2409 { 2410 int fd, ret; 2411 struct stat st; 2412 uint32_t *ptr; 2413 uint32_t version, entries, nentries; 2414 int needsbyteswap; 2415 char *dbname = NULL; 2416 void *mm = NULL; 2417 size_t i; 2418 2419 ret = -1; 2420 dbname = mkdbname(ms, fn, 0); 2421 if (dbname == NULL) 2422 goto error2; 2423 2424 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2425 goto error2; 2426 2427 if (fstat(fd, &st) == -1) { 2428 file_error(ms, errno, "cannot stat `%s'", dbname); 2429 goto error1; 2430 } 2431 if (st.st_size < 8) { 2432 file_error(ms, 0, "file `%s' is too small", dbname); 2433 goto error1; 2434 } 2435 2436 #ifdef QUICK 2437 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2438 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2439 file_error(ms, errno, "cannot map `%s'", dbname); 2440 goto error1; 2441 } 2442 #define RET 2 2443 #else 2444 if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) { 2445 file_oomem(ms, (size_t)st.st_size); 2446 goto error1; 2447 } 2448 if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) { 2449 file_badread(ms); 2450 goto error1; 2451 } 2452 #define RET 1 2453 #endif 2454 (void)close(fd); 2455 fd = -1; 2456 ptr = CAST(uint32_t *, mm); 2457 if (*ptr != MAGICNO) { 2458 if (swap4(*ptr) != MAGICNO) { 2459 file_error(ms, 0, "bad magic in `%s'", dbname); 2460 goto error1; 2461 } 2462 needsbyteswap = 1; 2463 } else 2464 needsbyteswap = 0; 2465 if (needsbyteswap) 2466 version = swap4(ptr[1]); 2467 else 2468 version = ptr[1]; 2469 ret = -2; 2470 if (version != VERSIONNO) { 2471 file_error(ms, 0, "File %s supports only version %d magic " 2472 "files. `%s' is version %d", VERSION, 2473 VERSIONNO, dbname, version); 2474 goto error1; 2475 } 2476 entries = (uint32_t)(st.st_size / sizeof(struct magic)); 2477 if ((off_t)(entries * sizeof(struct magic)) != st.st_size) { 2478 file_error(ms, 0, "Size of `%s' %llu is not a multiple of %zu", 2479 dbname, (unsigned long long)st.st_size, 2480 sizeof(struct magic)); 2481 goto error1; 2482 } 2483 magicp[0] = CAST(struct magic *, mm) + 1; 2484 nentries = 0; 2485 for (i = 0; i < MAGIC_SETS; i++) { 2486 if (needsbyteswap) 2487 nmagicp[i] = swap4(ptr[i + 2]); 2488 else 2489 nmagicp[i] = ptr[i + 2]; 2490 if (i != MAGIC_SETS - 1) 2491 magicp[i + 1] = magicp[i] + nmagicp[i]; 2492 nentries += nmagicp[i]; 2493 } 2494 if (entries != nentries + 1) { 2495 file_error(ms, 0, "Inconsistent entries in `%s' %u != %u", 2496 dbname, entries, nentries + 1); 2497 goto error1; 2498 } 2499 if (needsbyteswap) 2500 for (i = 0; i < MAGIC_SETS; i++) 2501 byteswap(magicp[i], nmagicp[i]); 2502 free(dbname); 2503 return RET; 2504 2505 error1: 2506 if (fd != -1) 2507 (void)close(fd); 2508 if (mm) { 2509 #ifdef QUICK 2510 (void)munmap((void *)mm, (size_t)st.st_size); 2511 #else 2512 free(mm); 2513 #endif 2514 } else { 2515 for (i = 0; i < MAGIC_SETS; i++) { 2516 magicp[i] = NULL; 2517 nmagicp[i] = 0; 2518 } 2519 } 2520 error2: 2521 free(dbname); 2522 return ret; 2523 } 2524 2525 private const uint32_t ar[] = { 2526 MAGICNO, VERSIONNO 2527 }; 2528 /* 2529 * handle an mmaped file. 2530 */ 2531 private int 2532 apprentice_compile(struct magic_set *ms, struct magic **magicp, 2533 uint32_t *nmagicp, const char *fn) 2534 { 2535 static size_t nm = sizeof(*nmagicp) * MAGIC_SETS; 2536 int fd = -1; 2537 char *dbname; 2538 int rv = -1; 2539 uint32_t i; 2540 2541 dbname = mkdbname(ms, fn, 1); 2542 2543 if (dbname == NULL) 2544 goto out; 2545 2546 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) 2547 { 2548 file_error(ms, errno, "cannot open `%s'", dbname); 2549 goto out; 2550 } 2551 2552 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2553 file_error(ms, errno, "error writing `%s'", dbname); 2554 goto out; 2555 } 2556 2557 if (write(fd, nmagicp, nm) != (ssize_t)nm) { 2558 file_error(ms, errno, "error writing `%s'", dbname); 2559 goto out; 2560 } 2561 2562 assert(nm + sizeof(ar) < sizeof(struct magic)); 2563 2564 if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) 2565 != sizeof(struct magic)) { 2566 file_error(ms, errno, "error seeking `%s'", dbname); 2567 goto out; 2568 } 2569 2570 for (i = 0; i < MAGIC_SETS; i++) { 2571 if (write(fd, magicp[i], (sizeof(struct magic) * nmagicp[i])) 2572 != (ssize_t)(sizeof(struct magic) * nmagicp[i])) { 2573 file_error(ms, errno, "error writing `%s'", dbname); 2574 goto out; 2575 } 2576 } 2577 2578 if (fd != -1) 2579 (void)close(fd); 2580 rv = 0; 2581 out: 2582 free(dbname); 2583 return rv; 2584 } 2585 2586 private const char ext[] = ".mgc"; 2587 /* 2588 * make a dbname 2589 */ 2590 private char * 2591 mkdbname(struct magic_set *ms, const char *fn, int strip) 2592 { 2593 const char *p, *q; 2594 char *buf; 2595 2596 if (strip) { 2597 if ((p = strrchr(fn, '/')) != NULL) 2598 fn = ++p; 2599 } 2600 2601 for (q = fn; *q; q++) 2602 continue; 2603 /* Look for .mgc */ 2604 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2605 if (*p != *q) 2606 break; 2607 2608 /* Did not find .mgc, restore q */ 2609 if (p >= ext) 2610 while (*q) 2611 q++; 2612 2613 q++; 2614 /* Compatibility with old code that looked in .mime */ 2615 if (ms->flags & MAGIC_MIME) { 2616 asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext); 2617 if (access(buf, R_OK) != -1) { 2618 ms->flags &= MAGIC_MIME_TYPE; 2619 return buf; 2620 } 2621 free(buf); 2622 } 2623 asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext); 2624 2625 /* Compatibility with old code that looked in .mime */ 2626 if (strstr(p, ".mime") != NULL) 2627 ms->flags &= MAGIC_MIME_TYPE; 2628 return buf; 2629 } 2630 2631 /* 2632 * Byteswap an mmap'ed file if needed 2633 */ 2634 private void 2635 byteswap(struct magic *magic, uint32_t nmagic) 2636 { 2637 uint32_t i; 2638 for (i = 0; i < nmagic; i++) 2639 bs1(&magic[i]); 2640 } 2641 2642 /* 2643 * swap a short 2644 */ 2645 private uint16_t 2646 swap2(uint16_t sv) 2647 { 2648 uint16_t rv; 2649 uint8_t *s = (uint8_t *)(void *)&sv; 2650 uint8_t *d = (uint8_t *)(void *)&rv; 2651 d[0] = s[1]; 2652 d[1] = s[0]; 2653 return rv; 2654 } 2655 2656 /* 2657 * swap an int 2658 */ 2659 private uint32_t 2660 swap4(uint32_t sv) 2661 { 2662 uint32_t rv; 2663 uint8_t *s = (uint8_t *)(void *)&sv; 2664 uint8_t *d = (uint8_t *)(void *)&rv; 2665 d[0] = s[3]; 2666 d[1] = s[2]; 2667 d[2] = s[1]; 2668 d[3] = s[0]; 2669 return rv; 2670 } 2671 2672 /* 2673 * swap a quad 2674 */ 2675 private uint64_t 2676 swap8(uint64_t sv) 2677 { 2678 uint64_t rv; 2679 uint8_t *s = (uint8_t *)(void *)&sv; 2680 uint8_t *d = (uint8_t *)(void *)&rv; 2681 #if 0 2682 d[0] = s[3]; 2683 d[1] = s[2]; 2684 d[2] = s[1]; 2685 d[3] = s[0]; 2686 d[4] = s[7]; 2687 d[5] = s[6]; 2688 d[6] = s[5]; 2689 d[7] = s[4]; 2690 #else 2691 d[0] = s[7]; 2692 d[1] = s[6]; 2693 d[2] = s[5]; 2694 d[3] = s[4]; 2695 d[4] = s[3]; 2696 d[5] = s[2]; 2697 d[6] = s[1]; 2698 d[7] = s[0]; 2699 #endif 2700 return rv; 2701 } 2702 2703 /* 2704 * byteswap a single magic entry 2705 */ 2706 private void 2707 bs1(struct magic *m) 2708 { 2709 m->cont_level = swap2(m->cont_level); 2710 m->offset = swap4((uint32_t)m->offset); 2711 m->in_offset = swap4((uint32_t)m->in_offset); 2712 m->lineno = swap4((uint32_t)m->lineno); 2713 if (IS_STRING(m->type)) { 2714 m->str_range = swap4(m->str_range); 2715 m->str_flags = swap4(m->str_flags); 2716 } 2717 else { 2718 m->value.q = swap8(m->value.q); 2719 m->num_mask = swap8(m->num_mask); 2720 } 2721 } 2722 2723 protected size_t 2724 file_pstring_length_size(const struct magic *m) 2725 { 2726 switch (m->str_flags & PSTRING_LEN) { 2727 case PSTRING_1_LE: 2728 return 1; 2729 case PSTRING_2_LE: 2730 case PSTRING_2_BE: 2731 return 2; 2732 case PSTRING_4_LE: 2733 case PSTRING_4_BE: 2734 return 4; 2735 default: 2736 abort(); /* Impossible */ 2737 return 1; 2738 } 2739 } 2740 protected size_t 2741 file_pstring_get_length(const struct magic *m, const char *s) 2742 { 2743 size_t len = 0; 2744 2745 switch (m->str_flags & PSTRING_LEN) { 2746 case PSTRING_1_LE: 2747 len = *s; 2748 break; 2749 case PSTRING_2_LE: 2750 len = (s[1] << 8) | s[0]; 2751 break; 2752 case PSTRING_2_BE: 2753 len = (s[0] << 8) | s[1]; 2754 break; 2755 case PSTRING_4_LE: 2756 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2757 break; 2758 case PSTRING_4_BE: 2759 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2760 break; 2761 default: 2762 abort(); /* Impossible */ 2763 } 2764 2765 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2766 len -= file_pstring_length_size(m); 2767 2768 return len; 2769 } 2770 2771 protected int 2772 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) 2773 { 2774 uint32_t i, j; 2775 struct mlist *mlist, *ml; 2776 2777 mlist = ms->mlist[1]; 2778 2779 for (ml = mlist->next; ml != mlist; ml = ml->next) { 2780 struct magic *ma = ml->magic; 2781 uint32_t nma = ml->nmagic; 2782 for (i = 0; i < nma; i++) { 2783 if (ma[i].type != FILE_NAME) 2784 continue; 2785 if (strcmp(ma[i].value.s, name) == 0) { 2786 v->magic = &ma[i]; 2787 for (j = i + 1; j < nma; j++) 2788 if (ma[j].cont_level == 0) 2789 break; 2790 v->nmagic = j - i; 2791 return 0; 2792 } 2793 } 2794 } 2795 return -1; 2796 } 2797